summaryrefslogtreecommitdiff
path: root/wcsmbs
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2012-01-07 10:52:53 -0500
committerUlrich Drepper <drepper@gmail.com>2012-01-07 10:52:53 -0500
commit9954432e309c8fddaec2fe53e601702a5c981624 (patch)
tree3eb7513694e25391b3393afbb847dbd85ebf097a /wcsmbs
parentc3a87236702cb73be1dada3438bbd3c3934e83f8 (diff)
More char16_t and char32_t support
It works now for UTF-8 locales
Diffstat (limited to 'wcsmbs')
-rw-r--r--wcsmbs/Makefile3
-rw-r--r--wcsmbs/c16rtomb.c19
-rw-r--r--wcsmbs/mbrtoc16.c52
-rw-r--r--wcsmbs/tst-c16c32-1.c131
-rw-r--r--wcsmbs/wcrtomb.c5
-rw-r--r--wcsmbs/wcsmbsload.c84
6 files changed, 241 insertions, 53 deletions
diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile
index 8c446e1fd3..010e0c8d03 100644
--- a/wcsmbs/Makefile
+++ b/wcsmbs/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1995-2000,2002,2003,2004,2005,2006,2007,2011
+# Copyright (C) 1995-2000,2002,2003,2004,2005,2006,2007,2011,2012
# Free Software Foundation, Inc.
# This file is part of the GNU C Library.
@@ -46,6 +46,7 @@ routines := wcscat wcschr wcscmp wcscpy wcscspn wcsdup wcslen wcsncat \
strop-tests := wcscmp wmemcmp wcslen wcschr wcsrchr wcscpy
tests := tst-wcstof wcsmbs-tst1 tst-wcsnlen tst-btowc tst-mbrtowc \
tst-wcrtomb tst-wcpncpy tst-mbsrtowcs tst-wchar-h tst-mbrtowc2 \
+ tst-c16c32-1 \
wcsatcliff $(addprefix test-,$(strop-tests))
include ../Rules
diff --git a/wcsmbs/c16rtomb.c b/wcsmbs/c16rtomb.c
index c75ca3bf21..3fed0b5d63 100644
--- a/wcsmbs/c16rtomb.c
+++ b/wcsmbs/c16rtomb.c
@@ -1,6 +1,6 @@
/* Copyright (C) 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@cygnus.com>, 2011.
+ Contributed by Ulrich Drepper <drepper@gmail.com>, 2011.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -44,7 +44,12 @@ static mbstate_t state;
size_t
c16rtomb (char *s, char16_t c16, mbstate_t *ps)
{
- char buf[MB_CUR_MAX];
+#if 1
+ // XXX The ISO C 11 spec I have does not say anything about handling
+ // XXX surrogates in this interface.
+ return wcrtomb (s, c16, ps ?: &state);
+#else
+ char buf[MB_LEN_MAX];
struct __gconv_step_data data;
int status;
size_t result;
@@ -78,9 +83,9 @@ c16rtomb (char *s, char16_t c16, mbstate_t *ps)
PTR_DEMANGLE (fct);
#endif
- /* If C16 is the NUL character we write into the output buffer the byte
- sequence necessary for PS to get into the initial state, followed
- by a NUL byte. */
+ /* If C16 is the NUL character we write into the output buffer
+ the byte sequence necessary for PS to get into the initial
+ state, followed by a NUL byte. */
if (c16 == L'\0')
{
status = DL_CALL_FCT (fct, (fcts->fromc16, &data, NULL, NULL,
@@ -96,7 +101,8 @@ c16rtomb (char *s, char16_t c16, mbstate_t *ps)
status = DL_CALL_FCT (fct,
(fcts->fromc16, &data, &inbuf,
- inbuf + sizeof (char16_t), NULL, &dummy, 0, 1));
+ inbuf + sizeof (char16_t), NULL, &dummy,
+ 0, 1));
}
/* There must not be any problems with the conversion but illegal input
@@ -118,4 +124,5 @@ c16rtomb (char *s, char16_t c16, mbstate_t *ps)
}
return result;
+#endif
}
diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c
index 7b5822d690..df970fba4f 100644
--- a/wcsmbs/mbrtoc16.c
+++ b/wcsmbs/mbrtoc16.c
@@ -1,6 +1,6 @@
/* Copyright (C) 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU C Library.
- Contributed by Ulrich Drepper <drepper@gnu.org>, 2011.
+ Contributed by Ulrich Drepper <drepper@gmail.com>, 2011.
The GNU C Library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -43,20 +43,32 @@ static mbstate_t state;
size_t
mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
{
- char16_t buf[1];
+ if (ps == NULL)
+ ps = &state;
+
+ if (ps->__count & 0x80000000)
+ {
+ /* We have to return the second word for a surrogate. */
+ ps->__count &= 0x7fffffff;
+ *pc16 = ps->__value.__wch;
+ ps->__value.__wch = L'\0';
+ return (size_t) -3;
+ }
+
+ char16_t buf[2];
struct __gconv_step_data data;
int status;
size_t result;
size_t dummy;
const unsigned char *inbuf, *endbuf;
- unsigned char *outbuf = (unsigned char *) (pc16 ?: buf);
+ unsigned char *outbuf = (unsigned char *) buf;
const struct gconv_fcts *fcts;
/* Set information for this step. */
data.__invocation_counter = 0;
data.__internal_use = 1;
data.__flags = __GCONV_IS_LAST;
- data.__statep = ps ?: &state;
+ data.__statep = ps;
data.__trans = NULL;
/* A first special case is if S is NULL. This means put PS in the
@@ -85,9 +97,22 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
if (fcts->toc16->__shlib_handle != NULL)
PTR_DEMANGLE (fct);
#endif
+
+ /* We first have to check whether the character can be represented
+ without a surrogate. If we immediately pass in a buffer large
+ enough to hold two char16_t values and the first character does
+ not require a surrogate the routine will try to convert more
+ input if N is larger then needed for the first character. */
status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
NULL, &dummy, 0, 1));
+ if (status == __GCONV_FULL_OUTPUT && data.__outbuf == outbuf)
+ {
+ data.__outbufend = outbuf + 2 * sizeof (char16_t);
+ status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
+ NULL, &dummy, 0, 1));
+ }
+
/* There must not be any problems with the conversion but illegal input
characters. The output buffer must be large enough, otherwise the
definition of MB_CUR_MAX is not correct. All the other possible
@@ -100,15 +125,28 @@ mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
|| status == __GCONV_FULL_OUTPUT)
{
- if (data.__outbuf != (unsigned char *) outbuf
- && *(char16_t *) outbuf == U('\0'))
+ if (pc16 != NULL)
+ *pc16 = buf[0];
+
+ if (data.__outbuf != outbuf && *(char16_t *) outbuf == U('\0'))
{
/* The converted character is the NUL character. */
assert (__mbsinit (data.__statep));
result = 0;
}
else
- result = inbuf - (const unsigned char *) s;
+ {
+ result = inbuf - (const unsigned char *) s;
+
+ if (data.__outbuf != outbuf + 2)
+ {
+ /* This is a surrogate. */
+ assert (buf[0] >= 0xd800 && buf[0] <= 0xdfff);
+ assert (buf[1] >= 0xdc00 && buf[1] <= 0xdfff);
+ ps->__count |= 0x80000000;
+ ps->__value.__wch = buf[1];
+ }
+ }
}
else if (status == __GCONV_INCOMPLETE_INPUT)
result = (size_t) -2;
diff --git a/wcsmbs/tst-c16c32-1.c b/wcsmbs/tst-c16c32-1.c
new file mode 100644
index 0000000000..f4534c5d93
--- /dev/null
+++ b/wcsmbs/tst-c16c32-1.c
@@ -0,0 +1,131 @@
+#include <inttypes.h>
+#include <locale.h>
+#include <stdio.h>
+#include <uchar.h>
+
+
+static int
+do_test (void)
+{
+ if (setlocale (LC_ALL, "de_DE.UTF-8") == NULL)
+ {
+ puts ("cannot set locale");
+ return 1;
+ }
+
+ int result = 0;
+
+ char32_t c32 = 48;
+ do
+ {
+ if (c32 >= 0xd800 && c32 <= 0xe000)
+ continue;
+
+ char buf[20];
+ size_t n1 = c32rtomb (buf, c32, NULL);
+ if (n1 <= 0)
+ {
+ printf ("c32rtomb for U'\\x%" PRIx32 "' failed\n", (uint32_t) c32);
+ result = 1;
+ continue;
+ }
+
+ char32_t c32out;
+ size_t n2 = mbrtoc32 (&c32out, buf, n1, NULL);
+ if ((ssize_t) n2 < 0)
+ {
+ printf ("mbrtoc32 for U'\\x%" PRIx32 "' failed\n", (uint32_t) c32);
+ result = 1;
+ continue;
+ }
+ if (n2 != n1)
+ {
+ printf ("mbrtoc32 for U'\\x%" PRIx32 "' consumed %zu bytes, not %zu\n",
+ (uint32_t) c32, n2, n1);
+ result = 1;
+ }
+ else if (c32out != c32)
+ {
+ printf ("mbrtoc32 for U'\\x%" PRIx32 "' produced U'\\x%" PRIx32 "\n",
+ (uint32_t) c32, (uint32_t) c32out);
+ result = 1;
+ }
+
+ char16_t c16;
+ size_t n3 = mbrtoc16 (&c16, buf, n1, NULL);
+ if (n3 != n1)
+ {
+ printf ("mbrtoc16 for U'\\x%" PRIx32 "' did not consume all bytes\n",
+ (uint32_t) c32);
+ result = 1;
+ continue;
+ }
+ if (c32 < 0x10000)
+ {
+ if (c16 != c32)
+ {
+ printf ("mbrtoc16 for U'\\x%" PRIx32 "' produce u'\\x%" PRIx16 "'\n",
+ (uint32_t) c32, (uint16_t) c16);
+ result = 1;
+ continue;
+ }
+ }
+ else
+ {
+ buf[0] = '1';
+ char16_t c16_2;
+ size_t n4 = mbrtoc16 (&c16_2, buf, 1, NULL);
+ if (n4 != (size_t) -3)
+ {
+ printf ("second mbrtoc16 for U'\\x%" PRIx32 "' did not return -3\n",
+ (uint32_t) c32);
+ result = 1;
+ continue;
+ }
+
+ if (c32 != (((uint32_t) (c16 - 0xd7c0)) << 10) + (c16_2 - 0xdc00))
+ {
+ printf ("mbrtoc16 for U'\\x%" PRIx32 "' returns U'\\x%" PRIx32 "\n",
+ (uint32_t) c32,
+ (((uint32_t) (c16 - 0xd7c0)) << 10) + (c16_2 - 0xdc00));
+ result = 1;
+ continue;
+ }
+ }
+
+ buf[0] = '\0';
+ char16_t c16_nul;
+ n3 = mbrtoc16 (&c16_nul, buf, n1, NULL);
+ if (n3 != 0)
+ {
+ printf ("mbrtoc16 for '\\0' returns %zd\n", n3);
+ result = 1;
+ continue;
+ }
+
+ if (c32 < 0x10000)
+ {
+ size_t n5 = c16rtomb (buf, c16, NULL);
+ if ((ssize_t) n5 < 0)
+ {
+ printf ("c16rtomb for U'\\x%" PRIx32 "' failed with %zd\n",
+ (uint32_t) c32, n5);
+ result = 1;
+ continue;
+ }
+ if (n5 != n1)
+ {
+ printf ("c16rtomb for U'\\x%" PRIx32 "' produced %zu bytes instead of %zu bytes\n",
+ (uint32_t) c32, n5, n1);
+ result = 1;
+ continue;
+ }
+ }
+ }
+ while ((c32 += 0x1111) <= U'\x12000');
+
+ return result;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/wcsmbs/wcrtomb.c b/wcsmbs/wcrtomb.c
index 547b05aa9c..946fdaf47f 100644
--- a/wcsmbs/wcrtomb.c
+++ b/wcsmbs/wcrtomb.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 1996-1998,2000,2002,2005,2011 Free Software Foundation, Inc.
+/* Copyright (C) 1996-1998,2000,2002,2005,2011,2012
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -38,7 +39,7 @@ static mbstate_t state;
size_t
__wcrtomb (char *s, wchar_t wc, mbstate_t *ps)
{
- char buf[MB_CUR_MAX];
+ char buf[MB_LEN_MAX];
struct __gconv_step_data data;
int status;
size_t result;
diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c
index 212a6c8135..9ce26f1dc0 100644
--- a/wcsmbs/wcsmbsload.c
+++ b/wcsmbs/wcsmbsload.c
@@ -1,4 +1,5 @@
-/* Copyright (C) 1998-2002,2004,2005,2008,2010,2011 Free Software Foundation, Inc.
+/* Copyright (C) 1998-2002,2004,2005,2008,2010,2011,2012
+ Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
@@ -74,7 +75,7 @@ static const struct __gconv_step to_c16 =
.__counter = INT_MAX,
.__from_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
.__to_name = (char *) "UTF-16//",
- .__fct = __gconv_transform_ascii_utf16,
+ .__fct = __gconv_transform_ascii_char16,
.__btowc_fct = NULL,
.__init_fct = NULL,
.__end_fct = NULL,
@@ -93,7 +94,7 @@ static const struct __gconv_step from_c16 =
.__counter = INT_MAX,
.__from_name = (char *) "UTF-16//",
.__to_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
- .__fct = __gconv_transform_utf16_ascii,
+ .__fct = __gconv_transform_char16_ascii,
.__btowc_fct = NULL,
.__init_fct = NULL,
.__end_fct = NULL,
@@ -209,7 +210,7 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
int use_translit;
/* Allocate the gconv_fcts structure. */
- new_fcts = malloc (sizeof *new_fcts);
+ new_fcts = calloc (1, sizeof *new_fcts);
if (new_fcts == NULL)
goto failed;
@@ -229,16 +230,24 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
represent all others. */
new_fcts->towc = __wcsmbs_getfct ("INTERNAL", complete_name,
&new_fcts->towc_nsteps);
- new_fcts->tomb = (new_fcts->towc != NULL
- ? __wcsmbs_getfct (complete_name, "INTERNAL",
- &new_fcts->tomb_nsteps)
- : NULL);
+ if (new_fcts->towc != NULL)
+ new_fcts->tomb = __wcsmbs_getfct (complete_name, "INTERNAL",
+ &new_fcts->tomb_nsteps);
- // XXX
- new_fcts->toc16 = (struct __gconv_step *) &to_c16;
- new_fcts->toc16_nsteps = 1;
- new_fcts->fromc16 = (struct __gconv_step *) &from_c16;
- new_fcts->fromc16_nsteps = 1;
+ if (new_fcts->tomb != NULL)
+ {
+ new_fcts->toc16 = __wcsmbs_getfct ("CHAR16", complete_name,
+ &new_fcts->toc16_nsteps);
+
+ if (new_fcts->toc16 != NULL)
+ new_fcts->fromc16 = __wcsmbs_getfct (complete_name, "CHAR16",
+ &new_fcts->fromc16_nsteps);
+ else
+ {
+ __gconv_close_transform (new_fcts->toc16, new_fcts->toc16_nsteps);
+ new_fcts->toc16 = NULL;
+ }
+ }
/* If any of the conversion functions is not available we don't
use any since this would mean we cannot convert back and
@@ -255,6 +264,12 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
}
else
{
+ // XXX At least for now we live with the CHAR16 not being available.
+ if (new_fcts->toc16 == NULL)
+ new_fcts->toc16 = __wcsmbs_gconv_fcts_c.toc16;
+ if (new_fcts->fromc16 == NULL)
+ new_fcts->fromc16 = __wcsmbs_gconv_fcts_c.fromc16;
+
new_category->private.ctype = new_fcts;
new_category->private.cleanup = &_nl_cleanup_ctype;
}
@@ -277,11 +292,15 @@ __wcsmbs_clone_conv (struct gconv_fcts *copy)
*copy = *orig;
/* Now increment the usage counters.
- Note: This assumes copy->towc_nsteps == 1 and copy->tomb_nsteps == 1. */
+ Note: This assumes copy->*_nsteps == 1. */
if (copy->towc->__shlib_handle != NULL)
++copy->towc->__counter;
if (copy->tomb->__shlib_handle != NULL)
++copy->tomb->__counter;
+ if (copy->toc16->__shlib_handle != NULL)
+ ++copy->toc16->__counter;
+ if (copy->fromc16->__shlib_handle != NULL)
+ ++copy->fromc16->__counter;
}
@@ -296,30 +315,24 @@ __wcsmbs_named_conv (struct gconv_fcts *copy, const char *name)
copy->tomb = __wcsmbs_getfct (name, "INTERNAL", &copy->tomb_nsteps);
if (copy->tomb == NULL)
- goto out_mb;
-
-#if 0
- copy->fromc16 = __wcsmbs_getfct (name, "UTF-16//", &copy->fromc16_nsteps);
- if (copy->fromc16 == NULL)
- goto out_fromc16;
-
- copy->toc16 = __wcsmbs_getfct ("UTF-16//", name, &copy->toc16_nsteps);
- if (copy->toc16 == NULL)
-#else
- if (0)
-#endif
{
-#if 0
- __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps);
- out_fromc16:
- __gconv_close_transform (copy->tomb, copy->tomb_nsteps);
-#endif
- out_mb:
__gconv_close_transform (copy->towc, copy->towc_nsteps);
- out_wc:
return 1;
}
+ copy->fromc16 = __wcsmbs_getfct (name, "CHAR16", &copy->fromc16_nsteps);
+ if (copy->fromc16 == NULL)
+ copy->toc16 = NULL;
+ else
+ {
+ copy->toc16 = __wcsmbs_getfct ("CHAR16", name, &copy->toc16_nsteps);
+ if (copy->toc16 == NULL)
+ {
+ __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps);
+ copy->fromc16 = NULL;
+ }
+ }
+
return 0;
}
@@ -335,11 +348,8 @@ _nl_cleanup_ctype (struct __locale_data *locale)
/* Free the old conversions. */
__gconv_close_transform (data->tomb, data->tomb_nsteps);
__gconv_close_transform (data->towc, data->towc_nsteps);
-#if 0
- // XXX
__gconv_close_transform (data->fromc16, data->fromc16_nsteps);
- __gconv_close_transform (data->toc16, data->toc16c_nsteps);
-#endif
+ __gconv_close_transform (data->toc16, data->toc16_nsteps);
free ((char *) data);
}
}