summaryrefslogtreecommitdiff
path: root/wcsmbs/mbsrtowcs.c
diff options
context:
space:
mode:
Diffstat (limited to 'wcsmbs/mbsrtowcs.c')
-rw-r--r--wcsmbs/mbsrtowcs.c114
1 files changed, 94 insertions, 20 deletions
diff --git a/wcsmbs/mbsrtowcs.c b/wcsmbs/mbsrtowcs.c
index dc026b7252..712b199271 100644
--- a/wcsmbs/mbsrtowcs.c
+++ b/wcsmbs/mbsrtowcs.c
@@ -1,6 +1,6 @@
/* Copyright (C) 1996 Free Software Foundation, Inc.
This file is part of the GNU C Library.
-Contributed by Ulrich Drepper, <drepper@gnu.ai.mit.edu>
+Contributed by Ulrich Drepper <drepper@gnu.ai.mit.edu>, 1996.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Library General Public License as
@@ -17,9 +17,16 @@ License along with the GNU C Library; see the file COPYING.LIB. If
not, write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+#include <errno.h>
#include <wchar.h>
+#ifndef EILSEQ
+#define EILSEQ EINVAL
+#endif
+
+/* We don't need the state really because we don't have shift states
+ to maintain between calls to this function. */
static mbstate_t internal;
size_t
@@ -29,35 +36,102 @@ mbsrtowcs (dst, src, len, ps)
size_t len;
mbstate_t *ps;
{
- size_t result = 0;
+ size_t written = 0;
+ const char *run = *src;
if (ps == NULL)
ps = &internal;
- /*************************************************************\
- |* This is no complete implementation. While the multi-byte *|
- |* character handling is not finished this will do. *|
- \*************************************************************/
+ if (dst == NULL)
+ /* The LEN parameter has to be ignored if we don't actually write
+ anything. */
+ len = ~0;
- while (len > 0 && **src != '\0')
+ /* Copy all words. */
+ while (written < len)
{
- /* For now there is no possibly illegal MB char sequence. */
- if (dst != NULL)
- dst[result] = (wchar_t) **src;
- ++result;
- ++(*src);
- --len;
- }
+ wchar_t value;
+ size_t count;
+ unsigned char byte = *run++;
- if (len > 0)
- {
+ /* We expect a start of a new multibyte character. */
+ if (byte < 0x80)
+ {
+ /* One byte sequence. */
+ count = 0;
+ value = byte;
+ }
+ else if ((byte & 0xe0) == 0xc0)
+ {
+ count = 1;
+ value = byte & 0x1f;
+ }
+ else if ((byte & 0xf0) == 0xe0)
+ {
+ /* We expect three bytes. */
+ count = 2;
+ value = byte & 0x0f;
+ }
+ else if ((byte & 0xf8) == 0xf0)
+ {
+ /* We expect four bytes. */
+ count = 3;
+ value = byte & 0x07;
+ }
+ else if ((byte & 0xfc) == 0xf8)
+ {
+ /* We expect five bytes. */
+ count = 4;
+ value = byte & 0x03;
+ }
+ else if ((byte & 0xfe) == 0xfc)
+ {
+ /* We expect six bytes. */
+ count = 5;
+ value = byte & 0x01;
+ }
+ else
+ {
+ /* This is an illegal encoding. */
+ errno = EILSEQ;
+ return (size_t) -1;
+ }
+
+ /* Read the possible remaining bytes. */
+ while (count-- > 0)
+ {
+ byte = *run++;
+
+ if ((byte & 0xc0) != 0x80)
+ {
+ /* This is an illegal encoding. */
+ errno = EILSEQ;
+ return (size_t) -1;
+ }
+
+ value <<= 6;
+ value |= byte & 0x3f;
+ }
+
+ /* Store value is required. */
if (dst != NULL)
+ *dst++ = value;
+
+ /* The whole sequence is read. Check whether end of string is
+ reached. */
+ if (value == L'\0')
{
- dst[result] = L'\0';
- *ps = 0;
+ /* Found the end of the string. */
+ *src = NULL;
+ return written;
}
- *src = NULL;
+
+ /* Increment counter of produced words. */
+ ++written;
}
- return result;
+ /* Store address of next byte to process. */
+ *src = run;
+
+ return written;
}