summaryrefslogtreecommitdiff
path: root/iconv/gconv_trans.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-06-16 00:39:38 +0000
committerUlrich Drepper <drepper@redhat.com>2000-06-16 00:39:38 +0000
commitf1d5c60ddef851078544e6b8456b18534b9a2a95 (patch)
treee75b7ab28ae1b6b8276d663556ddda7eb640ce5d /iconv/gconv_trans.c
parentf5361098c526e43f0437a491ed48122794b35451 (diff)
Update.
2000-06-15 Ulrich Drepper <drepper@redhat.com> * iconv/gconv.h (__gconv_fct): Change type of fifth parameter to unsigned char **. (__gconv_init_fct): Remove two parameters. * iconv/gconv_int.h (__gconv_transliterate): Renamed from gconv_transliterate. Remove two parameters. Change prototypes of builtin functions according to __gconv_fct change. * iconv/skeleton.c: Change type of fifth parameter. make sure it is != NULL only during error handling. Stop in this case after the conversion. * iconv/gconv_trans.c: Replace with real implementation for __gconv_transliterate. * iconv/gconv_open.c: Adjust for renaming of __gconv_transliterate. * iconv/gconv.c: Change calls to downstream functions once again. Use NULL for the fifth parameter instead of pointer to output buffer. * libio/iofwide.c: Likewise. * wcsmbs/btowc.c: Likewise. * wcsmbs/mbrtowc.c: Likewise. * wcsmbs/mbsnrtowcs.c: Likewise. * wcsmbs/mbsrtowcs.c: Likewise. * wcsmbs/wcrtomb.c: Likewise. * wcsmbs/wcsnrtombs.c: Likewise. * wcsmbs/wcsrtombs.c: Likewise. * wcsmbs/wctob.c: Likewise. * iconv/gconv_simple.c: Remove two parameters from error handling function call. * iconvdata/8bit-gap.c: Likewise. * iconvdata/8bit-generic.c: Likewise. * iconvdata/ansi_x3.110.c: Likewise. * iconvdata/big5.c: Likewise. * iconvdata/big5hkscs.c: Likewise. * iconvdata/euc-cn.c: Likewise. * iconvdata/euc-jp.c: Likewise. * iconvdata/euc-kr.c: Likewise. * iconvdata/euc-tw.c: Likewise. * iconvdata/gbgbk.c: Likewise. * iconvdata/gbk.c: Likewise. * iconvdata/iso-2022-cn.c: Likewise. * iconvdata/iso-2022-jp.c: Likewise. * iconvdata/iso-2022-kr.c: Likewise. * iconvdata/iso646.c: Likewise. * iconvdata/iso8859-1.c: Likewise. * iconvdata/iso_6937-2.c: Likewise. * iconvdata/iso_6937.c: Likewise. * iconvdata/johab.c: Likewise. * iconvdata/sjis.c: Likewise. * iconvdata/t.61.c: Likewise. * iconvdata/uhc.c: Likewise. * iconvdata/unicode.c: Likewise. * iconvdata/utf-16.c: Likewise.
Diffstat (limited to 'iconv/gconv_trans.c')
-rw-r--r--iconv/gconv_trans.c122
1 files changed, 109 insertions, 13 deletions
diff --git a/iconv/gconv_trans.c b/iconv/gconv_trans.c
index 56c3ff6be6..829ff5f981 100644
--- a/iconv/gconv_trans.c
+++ b/iconv/gconv_trans.c
@@ -18,6 +18,7 @@
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+#include <dlfcn.h>
#include <stdint.h>
#include "gconv_int.h"
@@ -25,26 +26,121 @@
int
-gconv_transliterate (struct __gconv_step *step,
- struct __gconv_step_data *step_data,
- const unsigned char *inbufstart,
- const unsigned char **inbufp,
- const unsigned char *inbufend,
- unsigned char *outbufstart,
- unsigned char **outbufp, unsigned char *outbufend,
- size_t *irreversible)
+__gconv_transliterate (struct __gconv_step *step,
+ struct __gconv_step_data *step_data,
+ const unsigned char *inbufstart,
+ const unsigned char **inbufp,
+ const unsigned char *inbufend,
+ unsigned char **outbufstart, size_t *irreversible)
{
/* Find out about the locale's transliteration. */
- uint_fast32_t size = _NL_CURRENT_WORD (LC_CTYPE,
- _NL_CTYPE_TRANSLIT_HASH_SIZE);
- uint_fast32_t layers = _NL_CURRENT_WORD (LC_CTYPE,
- _NL_CTYPE_TRANSLIT_HASH_LAYERS);
+ uint_fast32_t size;
+ uint_fast32_t layers;
+ uint32_t *from_idx;
+ uint32_t *from_tbl;
+ uint32_t *to_idx;
+ uint32_t *to_tbl;
+ uint32_t *winbuf;
+ uint32_t *winbufend;
+ uint_fast32_t low;
+ uint_fast32_t high;
/* If there is no transliteration information in the locale don't do
anything and return the error. */
+ size = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_SIZE);
if (size == 0)
return __GCONV_ILLEGAL_INPUT;
- /* XXX For now we don't do anything. */
+ /* Get the rest of the values. */
+ layers = _NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_TRANSLIT_HASH_LAYERS);
+ from_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_IDX);
+ from_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_FROM_TBL);
+ to_idx = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_IDX);
+ to_tbl = (uint32_t *) _NL_CURRENT (LC_CTYPE, _NL_CTYPE_TRANSLIT_TO_TBL);
+
+ /* The input buffer. There are actually 4-byte values. */
+ winbuf = (uint32_t *) *inbufp;
+ winbufend = (uint32_t *) inbufend;
+
+ /* Test whether there is enough input. */
+ if (winbuf + 1 > winbufend)
+ return (winbuf == winbufend
+ ? __GCONV_EMPTY_INPUT : __GCONV_INCOMPLETE_INPUT);
+
+ /* The array starting at FROM_IDX contains indeces to the string table
+ in FROM_TBL. The indeces are sorted wrt to the strings. I.e., we
+ are doing binary search. */
+ low = 0;
+ high = size;
+ while (low < high)
+ {
+ uint_fast32_t med = (low + high) / 2;
+ uint32_t idx;
+ int cnt;
+
+ /* Compare the string at this index with the string at the current
+ position in the input buffer. */
+ idx = from_idx[med];
+ cnt = 0;
+ do
+ {
+ if (from_tbl[idx + cnt] != winbuf[cnt])
+ /* Does not match. */
+ break;
+ ++cnt;
+ }
+ while (from_tbl[idx + cnt] != L'\0' && winbuf + cnt < winbufend);
+
+ if (cnt > 0 && from_tbl[idx + cnt] == L'\0')
+ {
+ /* Found a matching input sequence. Now try to convert the
+ possible replacements. */
+ uint32_t idx2 = to_idx[med];
+
+ do
+ {
+ /* Determine length of replacement. */
+ uint_fast32_t len = 0;
+ int res;
+ const unsigned char *toinptr;
+
+ while (to_tbl[idx2 + len] != L'\0')
+ ++len;
+
+ /* Try this input text. */
+ toinptr = (const unsigned char *) &to_tbl[idx2];
+ res = DL_CALL_FCT (step->__fct,
+ (step, step_data, &toinptr,
+ (const unsigned char *) &to_tbl[idx2 + len],
+ (unsigned char **) outbufstart,
+ irreversible, 0, 0));
+ if (res != __GCONV_ILLEGAL_INPUT)
+ {
+ /* If the conversion succeeds we have to increment the
+ input buffer. */
+ if (res == __GCONV_EMPTY_INPUT)
+ {
+ *inbufp += cnt * sizeof (uint32_t);
+ ++*irreversible;
+ }
+
+ return res;
+ }
+
+ /* Next replacement. */
+ idx2 += len + 1;
+ }
+ while (to_tbl[idx2] != L'\0');
+
+ /* Nothing found, continue searching. */
+ }
+
+ if (winbuf + cnt >= winbufend || from_tbl[idx + cnt] < winbuf[cnt])
+ low = idx;
+ else
+ high = idx;
+ }
+
+ /* Haven't found a match. */
return __GCONV_ILLEGAL_INPUT;
}