Simplify char16_t implementation

author: Ulrich Drepper <drepper@gmail.com> 2012-01-08 07:19:21 -0500
committer: Ulrich Drepper <drepper@gmail.com> 2012-01-08 07:19:21 -0500
commit: d3ed722566f42d3f614b1221a8e4f19092976531 (patch)
tree: 4a63e059ef599167cf407311188551fe72221d8d
parent: a0da5fe1e49b819b4d90b77915e21cddd397d064 (diff)
10 files changed, 59 insertions, 642 deletions
diff --git a/ChangeLog b/ChangeLog
index d33e6554ed..55d8069dc0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,17 @@
 2012-01-08  Ulrich Drepper  <drepper@gmail.com>
 
+	* wcsmbs/mbrtoc16.c: Implement using towc function.
+	* wcsmbs/wcsmbsload.h: No need for toc16 and fromc16 functions.
+	* wcsmbs/wcsmbsload.c: Likewise.
+	* iconv/gconv_simple.c: Likewise.
+	* iconv/gconv_int.h: Likewise.
+	* iconv/gconv_builtin.h: Likewise.
+	* iconv/iconv_prog.c: Remove CHAR16 handling.
+
+	* wcsmbs/c16rtomb.c: Remove #if 0'ed code.
+
+	* wcsmbs/mbrtowc.c: Better check for invalid inputs.
+
 	* configure.in: Remove --with-elf and --enable-bounded options.
 	Dont set base_machine for ia64.  More non-ELF conditions removed.
 	Remove testing and setting of leading underscore information.
diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h
index 6820f828ec..271a4be87c 100644
--- a/iconv/gconv_builtin.h
+++ b/iconv/gconv_builtin.h
@@ -122,17 +122,3 @@ BUILTIN_TRANSFORMATION ("INTERNAL", "UNICODEBIG//", 1,
 			__gconv_transform_internal_ucs2reverse, NULL,
 			4, 4, 2, 2)
 #endif
-
-
-BUILTIN_TRANSFORMATION ("ANSI_X3.4-1968//", "CHAR16", 1, "=ascii->CHAR16",
-			__gconv_transform_ascii_char16, NULL, 1, 1, 2, 4)
-
-BUILTIN_TRANSFORMATION ("CHAR16", "ANSI_X3.4-1968//", 1, "=CHAR16->ascii",
-			__gconv_transform_char16_ascii, NULL, 2, 4, 1, 1)
-
-
-BUILTIN_TRANSFORMATION ("ISO-10646/UTF8/", "CHAR16", 1, "=utf8->CHAR16",
-			__gconv_transform_utf8_char16, NULL, 1, 6, 2, 4)
-
-BUILTIN_TRANSFORMATION ("CHAR16", "ISO-10646/UTF8/", 1, "=CHAR16->utf8",
-			__gconv_transform_char16_utf8, NULL, 2, 4, 1, 6)
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index 7508372707..a2fcb93740 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -303,10 +303,6 @@ __BUILTIN_TRANSFORM (__gconv_transform_internal_ucs4le);
 __BUILTIN_TRANSFORM (__gconv_transform_ucs4le_internal);
 __BUILTIN_TRANSFORM (__gconv_transform_internal_utf16);
 __BUILTIN_TRANSFORM (__gconv_transform_utf16_internal);
-__BUILTIN_TRANSFORM (__gconv_transform_ascii_char16);
-__BUILTIN_TRANSFORM (__gconv_transform_char16_ascii);
-__BUILTIN_TRANSFORM (__gconv_transform_utf8_char16);
-__BUILTIN_TRANSFORM (__gconv_transform_char16_utf8);
 # undef __BUITLIN_TRANSFORM
 
 /* Specialized conversion function for a single byte to INTERNAL, recognizing
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 028822c918..67761603f9 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -1321,391 +1321,3 @@ ucs4le_internal_loop_single (struct __gconv_step *step,
 #define LOOP_NEED_FLAGS
 #include <iconv/loop.c>
 #include <iconv/skeleton.c>
-
-
-/* Convert from ISO 646-IRV to the char16_t format.  */
-#define DEFINE_INIT		0
-#define DEFINE_FINI		0
-#define MIN_NEEDED_FROM		1
-#define MIN_NEEDED_TO		2
-#define FROM_DIRECTION		1
-#define FROM_LOOP		ascii_char16_loop
-#define TO_LOOP			ascii_char16_loop /* This is not used.  */
-#define FUNCTION_NAME		__gconv_transform_ascii_char16
-#define ONE_DIRECTION		1
-
-#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
-#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
-#define LOOPFCT			FROM_LOOP
-#define BODY \
-  {									      \
-    if (__builtin_expect (*inptr > '\x7f', 0))				      \
-      {									      \
-	/* The value is too large.  We don't try transliteration here since   \
-	   this is not an error because of the lack of possibilities to	      \
-	   represent the result.  This is a genuine bug in the input since    \
-	   ASCII does not allow such values.  */			      \
-	STANDARD_FROM_LOOP_ERR_HANDLER (1);				      \
-      }									      \
-    else								      \
-      {									      \
-	/* It's an one byte sequence.  */				      \
-	*((uint16_t *) outptr) = *inptr++;				      \
-	outptr += sizeof (uint16_t);					      \
-      }									      \
-  }
-#define LOOP_NEED_FLAGS
-#include <iconv/loop.c>
-#include <iconv/skeleton.c>
-
-
-/* Convert from the char16_t format to ISO 646-IRV.  */
-#define DEFINE_INIT		0
-#define DEFINE_FINI		0
-#define MIN_NEEDED_FROM		2
-#define MIN_NEEDED_TO		1
-#define FROM_DIRECTION		1
-#define FROM_LOOP		char16_ascii_loop
-#define TO_LOOP			char16_ascii_loop /* This is not used.  */
-#define FUNCTION_NAME		__gconv_transform_char16_ascii
-#define ONE_DIRECTION		1
-
-#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
-#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
-#define LOOPFCT			FROM_LOOP
-#define BODY \
-  {									      \
-    if (__builtin_expect (*((const uint16_t *) inptr) > 0x7f, 0))	      \
-      {									      \
-	UNICODE_TAG_HANDLER (*((const uint16_t *) inptr), 2);		      \
-	STANDARD_TO_LOOP_ERR_HANDLER (2);				      \
-      }									      \
-    else								      \
-      {									      \
-	/* It's an one byte sequence.  */				      \
-	*outptr++ = *((const uint16_t *) inptr);			      \
-	inptr += 2;							      \
-      }									      \
-  }
-#define LOOP_NEED_FLAGS
-#include <iconv/loop.c>
-#include <iconv/skeleton.c>
-
-
-/* Convert from the char16_t format to UTF-8.  */
-#define DEFINE_INIT		0
-#define DEFINE_FINI		0
-#define MIN_NEEDED_FROM		2
-#define MAX_NEEDED_FROM		4
-#define MIN_NEEDED_TO		1
-#define MAX_NEEDED_TO		6
-#define FROM_DIRECTION		1
-#define FROM_LOOP		char16_utf8_loop
-#define TO_LOOP			char16_utf8_loop /* This is not used.  */
-#define FUNCTION_NAME		__gconv_transform_char16_utf8
-#define ONE_DIRECTION		1
-
-#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
-#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
-#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
-#define MAX_NEEDED_OUTPUT	MAX_NEEDED_TO
-#define LOOPFCT			FROM_LOOP
-#define BODY \
-  {									      \
-    /* Yes, reading a 16-bit number and storing it as 32-bit is correct.  */  \
-    uint32_t wc = *((const uint16_t *) inptr);				      \
-    inptr += 2;								      \
-									      \
-    if (__builtin_expect (wc < 0x80, 1))				      \
-      /* It's an one byte sequence.  */					      \
-      *outptr++ = (unsigned char) wc;					      \
-    else								      \
-      {									      \
-	size_t step;							      \
-									      \
-	if (__builtin_expect (wc < 0xd800 || wc > 0xdfff, 1))		      \
-	  step = wc < 0x800 ? 2 : 3;					      \
-	else								      \
-	  {								      \
-	    if (__builtin_expect (inptr + 2 > inend, 0))		      \
-	      {                                                               \
-		/* We don't have enough input for another complete input      \
-		   character.  */                                             \
-		inptr -= 2;						      \
-		result = __GCONV_INCOMPLETE_INPUT;                            \
-		break;                                                        \
-	      }								      \
-									      \
-	    uint32_t sec = *((const uint16_t *) inptr);			      \
-	    if (__builtin_expect (sec < 0xdc00, 0)			      \
-		|| __builtin_expect (sec > 0xdfff, 0))			      \
-	      {								      \
-		/* This is no valid second word for a surrogate.  */	      \
-		STANDARD_FROM_LOOP_ERR_HANDLER (2);			      \
-	      }								      \
-	    inptr += 2;							      \
-	    wc = ((wc - 0xd7c0) << 10) + (sec - 0xdc00);		      \
-									      \
-	    step = wc < 0x200000 ? 4 : 5;				      \
-	  }								      \
-									      \
-	if (__builtin_expect (outptr + step > outend, 0))		      \
-	  {								      \
-	    /* Too long.  */						      \
-	    result = __GCONV_FULL_OUTPUT;				      \
-	    inptr -= step >= 4 ? 4 : 2;					      \
-	    break;							      \
-	  }								      \
-									      \
-	unsigned char *start = outptr;					      \
-	*outptr = (unsigned char) (~0xff >> step);			      \
-	outptr += step;							      \
-	do								      \
-	  {								      \
-	    start[--step] = 0x80 | (wc & 0x3f);				      \
-	    wc >>= 6;							      \
-	  }								      \
-	while (step > 1);						      \
-	start[0] |= wc;							      \
-      }									      \
-  }
-#define LOOP_NEED_FLAGS
-#include <iconv/loop.c>
-#include <iconv/skeleton.c>
-
-
-/* Convert from UTF-8 to the char16_t format.  */
-#define DEFINE_INIT		0
-#define DEFINE_FINI		0
-#define MIN_NEEDED_FROM		1
-#define MAX_NEEDED_FROM		6
-#define MIN_NEEDED_TO		2
-#define MAX_NEEDED_TO		4
-#define FROM_DIRECTION		1
-#define FROM_LOOP		utf8_char16_loop
-#define TO_LOOP			utf8_char16_loop /* This is not used.  */
-#define FUNCTION_NAME		__gconv_transform_utf8_char16
-#define ONE_DIRECTION		1
-
-#define MIN_NEEDED_INPUT	MIN_NEEDED_FROM
-#define MAX_NEEDED_INPUT	MAX_NEEDED_FROM
-#define MIN_NEEDED_OUTPUT	MIN_NEEDED_TO
-#define LOOPFCT			FROM_LOOP
-#define BODY \
-  {									      \
-    /* Next input byte.  */						      \
-    uint32_t ch = *inptr;						      \
-									      \
-    if (__builtin_expect (ch < 0x80, 1))				      \
-      {									      \
-	/* One byte sequence.  */					      \
-	*((uint16_t *) outptr) = ch;					      \
-	outptr += 2;							      \
-	++inptr;							      \
-      }									      \
-    else								      \
-      {									      \
-	uint_fast32_t cnt;						      \
-	uint_fast32_t i;						      \
-									      \
-	if (ch >= 0xc2 && ch < 0xe0)					      \
-	  {								      \
-	    /* We expect two bytes.  The first byte cannot be 0xc0 or 0xc1,   \
-	       otherwise the wide character could have been represented	      \
-	       using a single byte.  */					      \
-	    cnt = 2;							      \
-	    ch &= 0x1f;							      \
-	  }								      \
-	else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1))		      \
-	  {								      \
-	    /* We expect three bytes.  */				      \
-	    cnt = 3;							      \
-	    ch &= 0x0f;							      \
-	  }								      \
-	else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1))		      \
-	  {								      \
-	    /* We expect four bytes.  */				      \
-	    cnt = 4;							      \
-	    ch &= 0x07;							      \
-	  }								      \
-	else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1))		      \
-	  {								      \
-	    /* We expect five bytes.  */				      \
-	    cnt = 5;							      \
-	    ch &= 0x03;							      \
-	  }								      \
-	else if (__builtin_expect ((ch & 0xfe) == 0xfc, 1))		      \
-	  {								      \
-	    /* We expect six bytes.  */					      \
-	    cnt = 6;							      \
-	    ch &= 0x01;							      \
-	  }								      \
-	else								      \
-	  {								      \
-	    /* Search the end of this ill-formed UTF-8 character.  This	      \
-	       is the next byte with (x & 0xc0) != 0x80.  */		      \
-	    i = 0;							      \
-	    do								      \
-	      ++i;							      \
-	    while (inptr + i < inend					      \
-		   && (*(inptr + i) & 0xc0) == 0x80			      \
-		   && i < 5);						      \
-									      \
-	  errout:							      \
-	    STANDARD_FROM_LOOP_ERR_HANDLER (i);				      \
-	  }								      \
-									      \
-	if (__builtin_expect (inptr + cnt > inend, 0))			      \
-	  {								      \
-	    /* We don't have enough input.  But before we report that check   \
-	       that all the bytes are correct.  */			      \
-	    for (i = 1; inptr + i < inend; ++i)				      \
-	      if ((inptr[i] & 0xc0) != 0x80)				      \
-		break;							      \
-									      \
-	    if (__builtin_expect (inptr + i == inend, 1))		      \
-	      {								      \
-		result = __GCONV_INCOMPLETE_INPUT;			      \
-		break;							      \
-	      }								      \
-									      \
-	    goto errout;						      \
-	  }								      \
-									      \
-	/* Read the possible remaining bytes.  */			      \
-	for (i = 1; i < cnt; ++i)					      \
-	  {								      \
-	    uint32_t byte = inptr[i];					      \
-									      \
-	    if ((byte & 0xc0) != 0x80)					      \
-	      /* This is an illegal encoding.  */			      \
-	      break;							      \
-									      \
-	    ch <<= 6;							      \
-	    ch |= byte & 0x3f;						      \
-	  }								      \
-									      \
-	/* If i < cnt, some trail byte was not >= 0x80, < 0xc0.		      \
-	   If cnt > 2 and ch < 2^(5*cnt-4), the wide character ch could	      \
-	   have been represented with fewer than cnt bytes.  */		      \
-	if (i < cnt || (cnt > 2 && (ch >> (5 * cnt - 4)) == 0)		      \
-	    /* Do not accept UTF-16 surrogates.  */			      \
-	    || (ch >= 0xd800 && ch <= 0xdfff))				      \
-	  {								      \
-	    /* This is an illegal encoding.  */				      \
-	    goto errout;						      \
-	  }								      \
-									      \
-	/* Now adjust the pointers and store the result.  */		      \
-	if (ch < 0x10000)						      \
-	  *((uint16_t *) outptr) = ch;					      \
-	else								      \
-	  {								      \
-	    if (__builtin_expect (outptr + 4 > outend, 0))		      \
-	      {								      \
-		result = __GCONV_FULL_OUTPUT;				      \
-		break;							      \
-	      }								      \
-									      \
-	    *((uint16_t *) outptr) = 0xd7c0 + (ch >> 10);		      \
-	    outptr += 2;						      \
-	    *((uint16_t *) outptr) = 0xdc00 + (ch & 0x3ff);		      \
-	  }								      \
-									      \
-	outptr += 2;							      \
-	inptr += cnt;							      \
-      }									      \
-  }
-#define LOOP_NEED_FLAGS
-
-#define STORE_REST \
-  {									      \
-    /* We store the remaining bytes while converting them into the UCS4	      \
-       format.  We can assume that the first byte in the buffer is	      \
-       correct and that it requires a larger number of bytes than there	      \
-       are in the input buffer.  */					      \
-    wint_t ch = **inptrp;						      \
-    size_t cnt, r;							      \
-									      \
-    state->__count = inend - *inptrp;					      \
-									      \
-    assert (ch != 0xc0 && ch != 0xc1);					      \
-    if (ch >= 0xc2 && ch < 0xe0)					      \
-      {									      \
-	/* We expect two bytes.  The first byte cannot be 0xc0 or	      \
-	   0xc1, otherwise the wide character could have been		      \
-	   represented using a single byte.  */				      \
-	cnt = 2;							      \
-	ch &= 0x1f;							      \
-      }									      \
-    else if (__builtin_expect ((ch & 0xf0) == 0xe0, 1))			      \
-      {									      \
-	/* We expect three bytes.  */					      \
-	cnt = 3;							      \
-	ch &= 0x0f;							      \
-      }									      \
-    else if (__builtin_expect ((ch & 0xf8) == 0xf0, 1))			      \
-      {									      \
-	/* We expect four bytes.  */					      \
-	cnt = 4;							      \
-	ch &= 0x07;							      \
-      }									      \
-    else if (__builtin_expect ((ch & 0xfc) == 0xf8, 1))			      \
-      {									      \
-	/* We expect five bytes.  */					      \
-	cnt = 5;							      \
-	ch &= 0x03;							      \
-      }									      \
-    else								      \
-      {									      \
-	/* We expect six bytes.  */					      \
-	cnt = 6;							      \
-	ch &= 0x01;							      \
-      }									      \
-									      \
-    /* The first byte is already consumed.  */				      \
-    r = cnt - 1;							      \
-    while (++(*inptrp) < inend)						      \
-      {									      \
-	ch <<= 6;							      \
-	ch |= **inptrp & 0x3f;						      \
-	--r;								      \
-      }									      \
-									      \
-    /* Shift for the so far missing bytes.  */				      \
-    ch <<= r * 6;							      \
-									      \
-    /* Store the number of bytes expected for the entire sequence.  */	      \
-    state->__count |= cnt << 8;						      \
-									      \
-    /* Store the value.  */						      \
-    state->__value.__wch = ch;						      \
-  }
-
-#define UNPACK_BYTES \
-  {									      \
-    static const unsigned char inmask[5] = { 0xc0, 0xe0, 0xf0, 0xf8, 0xfc };  \
-    wint_t wch = state->__value.__wch;					      \
-    size_t ntotal = state->__count >> 8;				      \
-									      \
-    inlen = state->__count & 255;					      \
-									      \
-    bytebuf[0] = inmask[ntotal - 2];					      \
-									      \
-    do									      \
-      {									      \
-	if (--ntotal < inlen)						      \
-	  bytebuf[ntotal] = 0x80 | (wch & 0x3f);			      \
-	wch >>= 6;							      \
-      }									      \
-    while (ntotal > 1);							      \
-									      \
-    bytebuf[0] |= wch;							      \
-  }
-
-#define CLEAR_STATE \
-  state->__count = 0
-
-
-#include <iconv/loop.c>
-#include <iconv/skeleton.c>
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index 13facc8235..0d62a07147 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -719,12 +719,9 @@ add_known_names (struct gconv_module *node)
     add_known_names (node->right);
   do
     {
-      if (strcmp (node->from_string, "INTERNAL") != 0
-	  && strcmp (node->from_string, "CHAR16") != 0)
-	tsearch (node->from_string, &printlist,
-		 (__compar_fn_t) strverscmp);
-      if (strcmp (node->to_string, "INTERNAL") != 0
-	  && strcmp (node->to_string, "CHAR16") != 0)
+      if (strcmp (node->from_string, "INTERNAL") != 0)
+	tsearch (node->from_string, &printlist, (__compar_fn_t) strverscmp);
+      if (strcmp (node->to_string, "INTERNAL") != 0)
 	tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp);
 
       node = node->same;
@@ -750,8 +747,7 @@ insert_cache (void)
       {
 	const char *str = strtab + hashtab[cnt].string_offset;
 
-	if (strcmp (str, "INTERNAL") != 0
-	    && strcmp (str, "CHAR16") != 0)
+	if (strcmp (str, "INTERNAL") != 0)
 	  tsearch (str, &printlist, (__compar_fn_t) strverscmp);
       }
 }
diff --git a/wcsmbs/c16rtomb.c b/wcsmbs/c16rtomb.c
index 3fed0b5d63..5374c755cc 100644
--- a/wcsmbs/c16rtomb.c
+++ b/wcsmbs/c16rtomb.c
@@ -17,25 +17,8 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-#include <assert.h>
-#include <dlfcn.h>
-#include <errno.h>
-#include <gconv.h>
-#include <stdlib.h>
 #include <uchar.h>
-#include <wcsmbsload.h>
-
-#include <sysdep.h>
-
-#ifndef EILSEQ
-# define EILSEQ EINVAL
-#endif
-
-#if __STDC__ >= 201000L
-# define u(c) U##c
-#else
-# define u(c) L##c
-#endif
+#include <wchar.h>
 
 
 /* This is the private state used if PS is NULL.  */
@@ -44,85 +27,7 @@ static mbstate_t state;
 size_t
 c16rtomb (char *s, char16_t c16, mbstate_t *ps)
 {
-#if 1
   // XXX The ISO C 11 spec I have does not say anything about handling
   // XXX surrogates in this interface.
   return wcrtomb (s, c16, ps ?: &state);
-#else
-  char buf[MB_LEN_MAX];
-  struct __gconv_step_data data;
-  int status;
-  size_t result;
-  size_t dummy;
-  const struct gconv_fcts *fcts;
-
-  /* Set information for this step.  */
-  data.__invocation_counter = 0;
-  data.__internal_use = 1;
-  data.__flags = __GCONV_IS_LAST;
-  data.__statep = ps ?: &state;
-  data.__trans = NULL;
-
-  /* A first special case is if S is NULL.  This means put PS in the
-     initial state.  */
-  if (s == NULL)
-    {
-      s = buf;
-      c16 = u('\0');
-    }
-
-  /* Tell where we want to have the result.  */
-  data.__outbuf = (unsigned char *) s;
-  data.__outbufend = (unsigned char *) s + MB_CUR_MAX;
-
-  /* Get the conversion functions.  */
-  fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE));
-  __gconv_fct fct = fcts->fromc16->__fct;
-#ifdef PTR_DEMANGLE
-  if (fcts->tomb->__shlib_handle != NULL)
-    PTR_DEMANGLE (fct);
-#endif
-
-  /* If C16 is the NUL character we write into the output buffer
-     the byte sequence necessary for PS to get into the initial
-     state, followed by a NUL byte.  */
-  if (c16 == L'\0')
-    {
-      status = DL_CALL_FCT (fct, (fcts->fromc16, &data, NULL, NULL,
-				  NULL, &dummy, 1, 1));
-
-      if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT)
-	*data.__outbuf++ = '\0';
-    }
-  else
-    {
-      /* Do a normal conversion.  */
-      const unsigned char *inbuf = (const unsigned char *) &c16;
-
-      status = DL_CALL_FCT (fct,
-			    (fcts->fromc16, &data, &inbuf,
-			     inbuf + sizeof (char16_t), NULL, &dummy,
-			     0, 1));
-    }
-
-  /* There must not be any problems with the conversion but illegal input
-     characters.  The output buffer must be large enough, otherwise the
-     definition of MB_CUR_MAX is not correct.  All the other possible
-     errors also must not happen.  */
-  assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
-	  || status == __GCONV_ILLEGAL_INPUT
-	  || status == __GCONV_INCOMPLETE_INPUT
-	  || status == __GCONV_FULL_OUTPUT);
-
-  if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
-      || status == __GCONV_FULL_OUTPUT)
-    result = data.__outbuf - (unsigned char *) s;
-  else
-    {
-      result = (size_t) -1;
-      __set_errno (EILSEQ);
-    }
-
-  return result;
-#endif
 }
diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c
index df970fba4f..f5ed2b4ac9 100644
--- a/wcsmbs/mbrtoc16.c
+++ b/wcsmbs/mbrtoc16.c
@@ -30,12 +30,6 @@
 # define EILSEQ EINVAL
 #endif
 
-#if __STDC__ >= 201000L
-# define U(c) U##c
-#else
-# define U(c) L##c
-#endif
-
 
 /* This is the private state used if PS is NULL.  */
 static mbstate_t state;
@@ -46,6 +40,11 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
   if (ps == NULL)
     ps = &state;
 
+  /* The standard text does not say that S being NULL means the state
+     is reset even if the second half of a surrogate still have to be
+     returned.  In fact, the error code description indicates
+     otherwise.  Therefore always first try to return a second
+     half.  */
   if (ps->__count & 0x80000000)
     {
       /* We have to return the second word for a surrogate.  */
@@ -55,13 +54,13 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
       return (size_t) -3;
     }
 
-  char16_t buf[2];
+  wchar_t wc;
   struct __gconv_step_data data;
   int status;
   size_t result;
   size_t dummy;
   const unsigned char *inbuf, *endbuf;
-  unsigned char *outbuf = (unsigned char *) buf;
+  unsigned char *outbuf = (unsigned char *) &wc;
   const struct gconv_fcts *fcts;
 
   /* Set information for this step.  */
@@ -75,14 +74,14 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
      initial state.  */
   if (s == NULL)
     {
-      outbuf = (unsigned char *) buf;
+      pc16 = NULL;
       s = "";
       n = 1;
     }
 
   /* Tell where we want the result.  */
   data.__outbuf = outbuf;
-  data.__outbufend = outbuf + sizeof (char16_t);
+  data.__outbufend = outbuf + sizeof (wchar_t);
 
   /* Get the conversion functions.  */
   fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE));
@@ -91,28 +90,20 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
   inbuf = (const unsigned char *) s;
   endbuf = inbuf + n;
   if (__builtin_expect (endbuf < inbuf, 0))
-    endbuf = (const unsigned char *) ~(uintptr_t) 0;
-  __gconv_fct fct = fcts->toc16->__fct;
+    {
+      endbuf = (const unsigned char *) ~(uintptr_t) 0;
+      if (endbuf == inbuf)
+	goto ilseq;
+    }
+  __gconv_fct fct = fcts->towc->__fct;
 #ifdef PTR_DEMANGLE
-  if (fcts->toc16->__shlib_handle != NULL)
+  if (fcts->towc->__shlib_handle != NULL)
     PTR_DEMANGLE (fct);
 #endif
 
-  /* We first have to check whether the character can be represented
-     without a surrogate.  If we immediately pass in a buffer large
-     enough to hold two char16_t values and the first character does
-     not require a surrogate the routine will try to convert more
-     input if N is larger then needed for the first character.  */
-  status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
+  status = DL_CALL_FCT (fct, (fcts->towc, &data, &inbuf, endbuf,
 			      NULL, &dummy, 0, 1));
 
-  if (status == __GCONV_FULL_OUTPUT && data.__outbuf == outbuf)
-    {
-      data.__outbufend = outbuf + 2 * sizeof (char16_t);
-      status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
-				  NULL, &dummy, 0, 1));
-    }
-
   /* There must not be any problems with the conversion but illegal input
      characters.  The output buffer must be large enough, otherwise the
      definition of MB_CUR_MAX is not correct.  All the other possible
@@ -125,33 +116,35 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
   if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
       || status == __GCONV_FULL_OUTPUT)
     {
-      if (pc16 != NULL)
-	*pc16 = buf[0];
+      result = inbuf - (const unsigned char *) s;
 
-      if (data.__outbuf != outbuf && *(char16_t *) outbuf == U('\0'))
+      if (wc < 0x10000)
 	{
-	  /* The converted character is the NUL character.  */
-	  assert (__mbsinit (data.__statep));
-	  result = 0;
+	  if (pc16 != NULL)
+	    *pc16 = wc;
+
+	  if (data.__outbuf != outbuf && wc == L'\0')
+	    {
+	      /* The converted character is the NUL character.  */
+	      assert (__mbsinit (data.__statep));
+	      result = 0;
+	    }
 	}
       else
 	{
-	  result = inbuf - (const unsigned char *) s;
+	  /* This is a surrogate.  */
+	  if (pc16 != NULL)
+	    *pc16 = 0xd7c0 + (wc >> 10);
 
-	  if (data.__outbuf != outbuf + 2)
-	    {
-	      /* This is a surrogate.  */
-	      assert (buf[0] >= 0xd800 && buf[0] <= 0xdfff);
-	      assert (buf[1] >= 0xdc00 && buf[1] <= 0xdfff);
-	      ps->__count |= 0x80000000;
-	      ps->__value.__wch = buf[1];
-	    }
+	  ps->__count |= 0x80000000;
+	  ps->__value.__wch = 0xdc00 + (wc & 0x3ff);
 	}
     }
   else if (status == __GCONV_INCOMPLETE_INPUT)
     result = (size_t) -2;
   else
     {
+    ilseq:
       result = (size_t) -1;
       __set_errno (EILSEQ);
     }
diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c
index 03b8348d30..0c99b7401d 100644
--- a/wcsmbs/mbrtowc.c
+++ b/wcsmbs/mbrtowc.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005, 2011
+/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005, 2011, 2012
    Free Software Foundation, Inc.
    This file is part of the GNU C Library.
    Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
@@ -73,7 +73,11 @@ __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
   inbuf = (const unsigned char *) s;
   endbuf = inbuf + n;
   if (__builtin_expect (endbuf < inbuf, 0))
-    endbuf = (const unsigned char *) ~(uintptr_t) 0;
+    {
+      endbuf = (const unsigned char *) ~(uintptr_t) 0;
+      if (endbuf == inbuf)
+	goto ilseq;
+    }
   __gconv_fct fct = fcts->towc->__fct;
 #ifdef PTR_DEMANGLE
   if (fcts->towc->__shlib_handle != NULL)
@@ -108,6 +112,7 @@ __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
     result = (size_t) -2;
   else
     {
+    ilseq:
       result = (size_t) -1;
       __set_errno (EILSEQ);
     }
diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c
index 9ce26f1dc0..27ea442d40 100644
--- a/wcsmbs/wcsmbsload.c
+++ b/wcsmbs/wcsmbsload.c
@@ -68,44 +68,6 @@ static const struct __gconv_step to_mb =
   .__data = NULL
 };
 
-static const struct __gconv_step to_c16 =
-{
-  .__shlib_handle = NULL,
-  .__modname = NULL,
-  .__counter = INT_MAX,
-  .__from_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
-  .__to_name = (char *) "UTF-16//",
-  .__fct = __gconv_transform_ascii_char16,
-  .__btowc_fct = NULL,
-  .__init_fct = NULL,
-  .__end_fct = NULL,
-  .__min_needed_from = 1,
-  .__max_needed_from = 1,
-  .__min_needed_to = 4,
-  .__max_needed_to = 4,
-  .__stateful = 0,
-  .__data = NULL
-};
-
-static const struct __gconv_step from_c16 =
-{
-  .__shlib_handle = NULL,
-  .__modname = NULL,
-  .__counter = INT_MAX,
-  .__from_name = (char *) "UTF-16//",
-  .__to_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
-  .__fct = __gconv_transform_char16_ascii,
-  .__btowc_fct = NULL,
-  .__init_fct = NULL,
-  .__end_fct = NULL,
-  .__min_needed_from = 4,
-  .__max_needed_from = 4,
-  .__min_needed_to = 1,
-  .__max_needed_to = 1,
-  .__stateful = 0,
-  .__data = NULL
-};
-
 
 /* For the default locale we only have to handle ANSI_X3.4-1968.  */
 const struct gconv_fcts __wcsmbs_gconv_fcts_c =
@@ -114,11 +76,6 @@ const struct gconv_fcts __wcsmbs_gconv_fcts_c =
   .towc_nsteps = 1,
   .tomb = (struct __gconv_step *) &to_mb,
   .tomb_nsteps = 1,
-
-  .toc16 = (struct __gconv_step *) &to_c16,
-  .toc16_nsteps = 1,
-  .fromc16 = (struct __gconv_step *) &from_c16,
-  .fromc16_nsteps = 1,
 };
 
 
@@ -234,24 +191,9 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
 	new_fcts->tomb = __wcsmbs_getfct (complete_name, "INTERNAL",
 					  &new_fcts->tomb_nsteps);
 
-      if (new_fcts->tomb != NULL)
-	{
-	  new_fcts->toc16 = __wcsmbs_getfct ("CHAR16", complete_name,
-					     &new_fcts->toc16_nsteps);
-
-	  if (new_fcts->toc16 != NULL)
-	    new_fcts->fromc16 = __wcsmbs_getfct (complete_name, "CHAR16",
-						 &new_fcts->fromc16_nsteps);
-	  else
-	    {
-	      __gconv_close_transform (new_fcts->toc16, new_fcts->toc16_nsteps);
-	      new_fcts->toc16 = NULL;
-	    }
-	}
-
       /* If any of the conversion functions is not available we don't
 	 use any since this would mean we cannot convert back and
-	 forth.*/
+	 forth.  NB: NEW_FCTS was allocated with calloc.  */
       if (new_fcts->tomb == NULL)
 	{
 	  if (new_fcts->towc != NULL)
@@ -264,12 +206,6 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
 	}
       else
 	{
-	  // XXX At least for now we live with the CHAR16 not being available.
-	  if (new_fcts->toc16 == NULL)
-	    new_fcts->toc16 = __wcsmbs_gconv_fcts_c.toc16;
-	  if (new_fcts->fromc16 == NULL)
-	    new_fcts->fromc16 = __wcsmbs_gconv_fcts_c.fromc16;
-
 	  new_category->private.ctype = new_fcts;
 	  new_category->private.cleanup = &_nl_cleanup_ctype;
 	}
@@ -297,10 +233,6 @@ __wcsmbs_clone_conv (struct gconv_fcts *copy)
     ++copy->towc->__counter;
   if (copy->tomb->__shlib_handle != NULL)
     ++copy->tomb->__counter;
-  if (copy->toc16->__shlib_handle != NULL)
-    ++copy->toc16->__counter;
-  if (copy->fromc16->__shlib_handle != NULL)
-    ++copy->fromc16->__counter;
 }
 
 
@@ -320,19 +252,6 @@ __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name)
       return 1;
     }
 
-  copy->fromc16 = __wcsmbs_getfct (name, "CHAR16", &copy->fromc16_nsteps);
-  if (copy->fromc16 == NULL)
-    copy->toc16 = NULL;
-  else
-    {
-      copy->toc16 = __wcsmbs_getfct ("CHAR16", name, &copy->toc16_nsteps);
-      if (copy->toc16 == NULL)
-	{
-	  __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps);
-	  copy->fromc16 = NULL;
-	}
-    }
-
   return 0;
 }
 
@@ -348,8 +267,6 @@ _nl_cleanup_ctype (struct __locale_data *locale)
       /* Free the old conversions.  */
       __gconv_close_transform (data->tomb, data->tomb_nsteps);
       __gconv_close_transform (data->towc, data->towc_nsteps);
-      __gconv_close_transform (data->fromc16, data->fromc16_nsteps);
-      __gconv_close_transform (data->toc16, data->toc16_nsteps);
       free ((char *) data);
     }
 }
diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h
index 064c41c82f..98f53bcc4c 100644
--- a/wcsmbs/wcsmbsload.h
+++ b/wcsmbs/wcsmbsload.h
@@ -32,11 +32,6 @@ struct gconv_fcts
     size_t towc_nsteps;
     struct __gconv_step *tomb;
     size_t tomb_nsteps;
-
-    struct __gconv_step *toc16;
-    size_t toc16_nsteps;
-    struct __gconv_step *fromc16;
-    size_t fromc16_nsteps;
   };
 
 /* Set of currently active conversion functions.  */
author	Ulrich Drepper <drepper@gmail.com>	2012-01-08 07:19:21 -0500
committer	Ulrich Drepper <drepper@gmail.com>	2012-01-08 07:19:21 -0500
commit	d3ed722566f42d3f614b1221a8e4f19092976531 (patch)
tree	4a63e059ef599167cf407311188551fe72221d8d
parent	a0da5fe1e49b819b4d90b77915e21cddd397d064 (diff)