summaryrefslogtreecommitdiff
path: root/iconv
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1998-04-05 11:23:12 +0000
committerUlrich Drepper <drepper@redhat.com>1998-04-05 11:23:12 +0000
commita904b5d93a2b54c611beb9c50ed274c5d77ec7b4 (patch)
treec2e6f86a2f3ffe62e3317467b91a716fc178fd6f /iconv
parent923609d1497f3116d57b297e3e84fc07b2b15b20 (diff)
Update.
1998-04-05 Ulrich Drepper <drepper@cygnus.com> * iconv/gconv_simple.c: Rewrite to not make use of the mb*towc* and wc*tomb* functions. 1998-04-04 15:16 Philip Blundell <Philip.Blundell@pobox.com> * sysdeps/unix/start.c: Fix typo. 1998-04-04 Ulrich Drepper <drepper@cygnus.com> * iconv/gconv_db.c (__gconv_find_transform): Fix typo.
Diffstat (limited to 'iconv')
-rw-r--r--iconv/gconv_db.c6
-rw-r--r--iconv/gconv_simple.c183
2 files changed, 161 insertions, 28 deletions
diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c
index e0a94e41c3..b3bdd7abc0 100644
--- a/iconv/gconv_db.c
+++ b/iconv/gconv_db.c
@@ -534,9 +534,9 @@ __gconv_find_transform (const char *toset, const char *fromset,
do
if (steps[--cnt].counter++ == 0)
{
- steps[--cnt].shlib_handle =
- __gconv_find_shlib (steps[--cnt].modname);
- if (steps[--cnt].shlib_handle == NULL)
+ steps[cnt].shlib_handle =
+ __gconv_find_shlib (steps[cnt].modname);
+ if (steps[cnt].shlib_handle == NULL)
{
/* Oops, this is the second time we use this module (after
unloading) and this time loading failed!? */
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 7fbdfbacf3..197a3d1e9c 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -26,6 +26,24 @@
#include <wchar.h>
#include <sys/param.h>
+#ifndef EILSEQ
+# define EILSEQ EINVAL
+#endif
+
+
+/* These are definitions used by some of the functions for handling
+ UTF-8 encoding below. */
+static const wchar_t encoding_mask[] =
+{
+ ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
+};
+
+static const unsigned char encoding_byte[] =
+{
+ 0xc0, 0xe0, 0xf0, 0xf8, 0xfc
+};
+
+
int
__gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data,
@@ -97,30 +115,70 @@ __gconv_transform_ucs4_utf8 (struct gconv_step *step,
int save_errno = errno;
do_write = 0;
+ result = GCONV_OK;
do
{
- const char *newinbuf = inbuf;
- size_t actually;
+ const wchar_t *newinbuf = (const wchar_t *) inbuf;
+ size_t actually = 0;
+ size_t cnt = 0;
- errno = 0;
- actually = __wmemrtombs (&data->outbuf[data->outbufavail],
- (const wchar_t **) &newinbuf,
- *inlen / sizeof (wchar_t),
- data->outbufsize - data->outbufavail,
- data->statep);
+ while (data->outbufavail < data->outbufsize
+ && cnt * sizeof (wchar_t) <= *inlen)
+ {
+ wchar_t wc = newinbuf[cnt];
+
+ if (wc < 0 && wc > 0x7fffffff)
+ {
+ /* This is no correct ISO 10646 character. */
+ result = GCONV_ILLEGAL_INPUT;
+ break;
+ }
+
+ if (wc < 0x80)
+ {
+ /* It's an one byte sequence. */
+ data->outbuf[data->outbufavail++] = (char) wc;
+ ++actually;
+ }
+ else
+ {
+ size_t step;
+ size_t start;
+
+ for (step = 2; step < 6; ++step)
+ if ((wc & encoding_mask[step - 2]) == 0)
+ break;
+
+ if (data->outbufavail + step >= data->outbufsize)
+ /* Too long. */
+ break;
+
+ start = data->outbufavail;
+ data->outbufavail += step;
+ actually += step;
+ data->outbuf[start] = encoding_byte[step - 2];
+ --step;
+ do
+ {
+ data->outbuf[start + step] = 0x80 | (wc & 0x3f);
+ wc >>= 6;
+ }
+ while (--step > 0);
+ data->outbuf[start] |= wc;
+ }
+
+ ++cnt;
+ }
/* Remember how much we converted. */
- do_write += newinbuf - inbuf;
- *inlen -= newinbuf - inbuf;
+ do_write += cnt * sizeof (wchar_t);
+ *inlen -= cnt * sizeof (wchar_t);
data->outbufavail += actually;
/* Check whether an illegal character appeared. */
- if (errno != 0)
- {
- result = GCONV_ILLEGAL_INPUT;
- break;
- }
+ if (result != GCONV_OK)
+ break;
if (data->is_last)
{
@@ -199,26 +257,101 @@ __gconv_transform_utf8_ucs4 (struct gconv_step *step,
int save_errno = errno;
do_write = 0;
+ result = GCONV_OK;
do
{
- const char *newinbuf = inbuf;
- size_t actually;
+ wchar_t *outbuf = (wchar_t *) &data->outbuf[data->outbufavail];
+ size_t cnt = 0;
+ size_t actually = 0;
- errno = 0;
- actually = __wmemrtowcs ((wchar_t *) &data->outbuf[data->outbufavail],
- &newinbuf, *inlen,
- ((data->outbufsize
- - data->outbufavail) / sizeof (wchar_t)),
- data->statep);
+ while (data->outbufavail + sizeof (wchar_t) <= data->outbufsize
+ && cnt < *inlen)
+ {
+ size_t start = cnt;
+ wchar_t value;
+ unsigned char byte;
+ int count;
+
+ /* Next input byte. */
+ byte = inbuf[cnt++];
+
+ if (byte < 0x80)
+ {
+ /* One byte sequence. */
+ count = 0;
+ value = byte;
+ }
+ else if ((byte & 0xe0) == 0xc0)
+ {
+ count = 1;
+ value = byte & 0x1f;
+ }
+ else if ((byte & 0xf0) == 0xe0)
+ {
+ /* We expect three bytes. */
+ count = 2;
+ value = byte & 0x0f;
+ }
+ else if ((byte & 0xf8) == 0xf0)
+ {
+ /* We expect four bytes. */
+ count = 3;
+ value = byte & 0x07;
+ }
+ else if ((byte & 0xfc) == 0xf8)
+ {
+ /* We expect five bytes. */
+ count = 4;
+ value = byte & 0x03;
+ }
+ else if ((byte & 0xfe) == 0xfc)
+ {
+ /* We expect six bytes. */
+ count = 5;
+ value = byte & 0x01;
+ }
+ else
+ {
+ /* This is an illegal encoding. */
+ result = GCONV_ILLEGAL_INPUT;
+ break;
+ }
+
+ /* Read the possible remaining bytes. */
+ while (cnt < *inbuf && count > 0)
+ {
+ byte = inbuf[cnt++];
+ --count;
+
+ if ((byte & 0xc0) != 0x80)
+ {
+ /* This is an illegal encoding. */
+ result = GCONV_ILLEGAL_INPUT;
+ break;
+ }
+
+ value <<= 6;
+ value |= byte & 0x3f;
+ }
+
+ if (result != GCONV_OK)
+ {
+ cnt = start;
+ break;
+ }
+
+ *outbuf++ = value;
+ ++actually;
+ }
/* Remember how much we converted. */
do_write += actually;
- *inlen -= newinbuf - inbuf;
+ *inlen -= cnt;
data->outbufavail += actually * sizeof (wchar_t);
/* Check whether an illegal character appeared. */
- if (errno != 0)
+ if (result != GCONV_OK)
{
result = GCONV_ILLEGAL_INPUT;
break;