summaryrefslogtreecommitdiff
path: root/wcsmbs
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@gmail.com>2011-12-28 06:19:42 -0500
committerUlrich Drepper <drepper@gmail.com>2012-01-01 07:17:22 -0500
commitdb6af3ebf46a83b885455dc03a3c2c1c2c2dedec (patch)
tree942a59c7de0033cf9ab3231523130c323fa4b80c /wcsmbs
parent8ea79a616e43093f403927e425c197afe39196b7 (diff)
Add uchar.h support, part 1
c16 support for locales other than the C locale is still missing.
Diffstat (limited to 'wcsmbs')
-rw-r--r--wcsmbs/Makefile3
-rw-r--r--wcsmbs/Versions3
-rw-r--r--wcsmbs/c16rtomb.c121
-rw-r--r--wcsmbs/mbrtoc16.c122
-rw-r--r--wcsmbs/mbrtowc.c7
-rw-r--r--wcsmbs/uchar.h8
-rw-r--r--wcsmbs/wchar.h8
-rw-r--r--wcsmbs/wcrtomb.c7
-rw-r--r--wcsmbs/wcsmbsload.c90
-rw-r--r--wcsmbs/wcsmbsload.h7
10 files changed, 363 insertions, 13 deletions
diff --git a/wcsmbs/Makefile b/wcsmbs/Makefile
index 0bb1740838..8c446e1fd3 100644
--- a/wcsmbs/Makefile
+++ b/wcsmbs/Makefile
@@ -40,7 +40,8 @@ routines := wcscat wcschr wcscmp wcscpy wcscspn wcsdup wcslen wcsncat \
wcscasecmp wcsncase wcscasecmp_l wcsncase_l \
wcsmbsload mbsrtowcs_l \
isoc99_wscanf isoc99_vwscanf isoc99_fwscanf isoc99_vfwscanf \
- isoc99_swscanf isoc99_vswscanf
+ isoc99_swscanf isoc99_vswscanf \
+ mbrtoc16 c16rtomb
strop-tests := wcscmp wmemcmp wcslen wcschr wcsrchr wcscpy
tests := tst-wcstof wcsmbs-tst1 tst-wcsnlen tst-btowc tst-mbrtowc \
diff --git a/wcsmbs/Versions b/wcsmbs/Versions
index b6dfa85a40..10bccc9539 100644
--- a/wcsmbs/Versions
+++ b/wcsmbs/Versions
@@ -28,4 +28,7 @@ libc {
__isoc99_wscanf; __isoc99_vwscanf; __isoc99_fwscanf; __isoc99_vfwscanf;
__isoc99_swscanf; __isoc99_vswscanf;
}
+ GLIBC_2.16 {
+ mbrtoc16; c16rtomb; mbrtoc32; c32rtomb;
+ }
}
diff --git a/wcsmbs/c16rtomb.c b/wcsmbs/c16rtomb.c
new file mode 100644
index 0000000000..33e6b92d02
--- /dev/null
+++ b/wcsmbs/c16rtomb.c
@@ -0,0 +1,121 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@cygnus.com>, 2011.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <assert.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <gconv.h>
+#include <stdlib.h>
+#include <uchar.h>
+#include <wcsmbsload.h>
+
+#include <sysdep.h>
+
+#ifndef EILSEQ
+# define EILSEQ EINVAL
+#endif
+
+#if __STDC__ >= 20100L
+# define u(c) U##c
+#else
+# define u(c) L##c
+#endif
+
+
+/* This is the private state used if PS is NULL. */
+static mbstate_t state;
+
+size_t
+c16rtomb (char *s, char16_t c16, mbstate_t *ps)
+{
+ char buf[MB_CUR_MAX];
+ struct __gconv_step_data data;
+ int status;
+ size_t result;
+ size_t dummy;
+ const struct gconv_fcts *fcts;
+
+ /* Set information for this step. */
+ data.__invocation_counter = 0;
+ data.__internal_use = 1;
+ data.__flags = __GCONV_IS_LAST;
+ data.__statep = ps ?: &state;
+ data.__trans = NULL;
+
+ /* A first special case is if S is NULL. This means put PS in the
+ initial state. */
+ if (s == NULL)
+ {
+ s = buf;
+ c16 = u('\0');
+ }
+
+ /* Tell where we want to have the result. */
+ data.__outbuf = (unsigned char *) s;
+ data.__outbufend = (unsigned char *) s + MB_CUR_MAX;
+
+ /* Get the conversion functions. */
+ fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE));
+ __gconv_fct fct = fcts->fromc16->__fct;
+#ifdef PTR_DEMANGLE
+ if (fcts->tomb->__shlib_handle != NULL)
+ PTR_DEMANGLE (fct);
+#endif
+
+ /* If C16 is the NUL character we write into the output buffer the byte
+ sequence necessary for PS to get into the initial state, followed
+ by a NUL byte. */
+ if (c16 == L'\0')
+ {
+ status = DL_CALL_FCT (fct, (fcts->fromc16, &data, NULL, NULL,
+ NULL, &dummy, 1, 1));
+
+ if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT)
+ *data.__outbuf++ = '\0';
+ }
+ else
+ {
+ /* Do a normal conversion. */
+ const unsigned char *inbuf = (const unsigned char *) &c16;
+
+ status = DL_CALL_FCT (fct,
+ (fcts->fromc16, &data, &inbuf,
+ inbuf + sizeof (char16_t), NULL, &dummy, 0, 1));
+ }
+
+ /* There must not be any problems with the conversion but illegal input
+ characters. The output buffer must be large enough, otherwise the
+ definition of MB_CUR_MAX is not correct. All the other possible
+ errors also must not happen. */
+ assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
+ || status == __GCONV_ILLEGAL_INPUT
+ || status == __GCONV_INCOMPLETE_INPUT
+ || status == __GCONV_FULL_OUTPUT);
+
+ if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
+ || status == __GCONV_FULL_OUTPUT)
+ result = data.__outbuf - (unsigned char *) s;
+ else
+ {
+ result = (size_t) -1;
+ __set_errno (EILSEQ);
+ }
+
+ return result;
+}
diff --git a/wcsmbs/mbrtoc16.c b/wcsmbs/mbrtoc16.c
new file mode 100644
index 0000000000..3a3a45ce1a
--- /dev/null
+++ b/wcsmbs/mbrtoc16.c
@@ -0,0 +1,122 @@
+/* Copyright (C) 2011 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@gnu.org>, 2011.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <assert.h>
+#include <dlfcn.h>
+#include <errno.h>
+#include <gconv.h>
+#include <uchar.h>
+#include <wcsmbsload.h>
+
+#include <sysdep.h>
+
+#ifndef EILSEQ
+# define EILSEQ EINVAL
+#endif
+
+#if __STDC__ >= 20100L
+# define U(c) U##c
+#else
+# define U(c) L##c
+#endif
+
+
+/* This is the private state used if PS is NULL. */
+static mbstate_t state;
+
+size_t
+mbrtoc16 (char16_t *pc16, const char *s, size_t n, mbstate_t *ps)
+{
+ char16_t buf[1];
+ struct __gconv_step_data data;
+ int status;
+ size_t result;
+ size_t dummy;
+ const unsigned char *inbuf, *endbuf;
+ unsigned char *outbuf = (unsigned char *) (pc16 ?: buf);
+ const struct gconv_fcts *fcts;
+
+ /* Set information for this step. */
+ data.__invocation_counter = 0;
+ data.__internal_use = 1;
+ data.__flags = __GCONV_IS_LAST;
+ data.__statep = ps ?: &state;
+ data.__trans = NULL;
+
+ /* A first special case is if S is NULL. This means put PS in the
+ initial state. */
+ if (s == NULL)
+ {
+ outbuf = (unsigned char *) buf;
+ s = "";
+ n = 1;
+ }
+
+ /* Tell where we want the result. */
+ data.__outbuf = outbuf;
+ data.__outbufend = outbuf + sizeof (char16_t);
+
+ /* Get the conversion functions. */
+ fcts = get_gconv_fcts (_NL_CURRENT_DATA (LC_CTYPE));
+
+ /* Do a normal conversion. */
+ inbuf = (const unsigned char *) s;
+ endbuf = inbuf + n;
+ if (__builtin_expect (endbuf < inbuf, 0))
+ endbuf = (const unsigned char *) ~(uintptr_t) 0;
+ __gconv_fct fct = fcts->toc16->__fct;
+#ifdef PTR_DEMANGLE
+ if (fcts->toc16->__shlib_handle != NULL)
+ PTR_DEMANGLE (fct);
+#endif
+ status = DL_CALL_FCT (fct, (fcts->toc16, &data, &inbuf, endbuf,
+ NULL, &dummy, 0, 1));
+
+ /* There must not be any problems with the conversion but illegal input
+ characters. The output buffer must be large enough, otherwise the
+ definition of MB_CUR_MAX is not correct. All the other possible
+ errors also must not happen. */
+ assert (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
+ || status == __GCONV_ILLEGAL_INPUT
+ || status == __GCONV_INCOMPLETE_INPUT
+ || status == __GCONV_FULL_OUTPUT);
+
+ if (status == __GCONV_OK || status == __GCONV_EMPTY_INPUT
+ || status == __GCONV_FULL_OUTPUT)
+ {
+ if (data.__outbuf != (unsigned char *) outbuf
+ && *(char16_t *) outbuf == U('\0'))
+ {
+ /* The converted character is the NUL character. */
+ assert (__mbsinit (data.__statep));
+ result = 0;
+ }
+ else
+ result = inbuf - (const unsigned char *) s;
+ }
+ else if (status == __GCONV_INCOMPLETE_INPUT)
+ result = (size_t) -2;
+ else
+ {
+ result = (size_t) -1;
+ __set_errno (EILSEQ);
+ }
+
+ return result;
+}
diff --git a/wcsmbs/mbrtowc.c b/wcsmbs/mbrtowc.c
index b534571736..03b8348d30 100644
--- a/wcsmbs/mbrtowc.c
+++ b/wcsmbs/mbrtowc.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005
+/* Copyright (C) 1996, 1997, 1998, 1999, 2000, 2002, 2004, 2005, 2011
Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.org>, 1996.
@@ -117,3 +117,8 @@ __mbrtowc (wchar_t *pwc, const char *s, size_t n, mbstate_t *ps)
libc_hidden_def (__mbrtowc)
weak_alias (__mbrtowc, mbrtowc)
libc_hidden_weak (mbrtowc)
+
+/* There should be no difference between the UTF-32 handling required
+ by mbrtoc32 and the wchar_t handling which has long since been
+ implemented in mbrtowc. */
+weak_alias (__mbrtowc, mbrtoc32)
diff --git a/wcsmbs/uchar.h b/wcsmbs/uchar.h
index 44637c3396..bb5f3ba35c 100644
--- a/wcsmbs/uchar.h
+++ b/wcsmbs/uchar.h
@@ -31,6 +31,14 @@
#define __need_mbstate_t
#include <wchar.h>
+#ifndef __mbstate_t_defined
+__BEGIN_NAMESPACE_C99
+/* Public type. */
+typedef __mbstate_t mbstate_t;
+__END_NAMESPACE_C99
+# define __mbstate_t_defined 1
+#endif
+
#ifdef __GNUC__
/* Define the 16-bit and 32-bit character types. Use the information
diff --git a/wcsmbs/wchar.h b/wcsmbs/wchar.h
index 2b35f51ad6..ccaaed8f49 100644
--- a/wcsmbs/wchar.h
+++ b/wcsmbs/wchar.h
@@ -77,8 +77,8 @@ __END_NAMESPACE_STD
# endif
#endif
-#if (defined _WCHAR_H || defined __need_mbstate_t) && !defined __mbstate_t_defined
-# define __mbstate_t_defined 1
+#if (defined _WCHAR_H || defined __need_mbstate_t) && !defined ____mbstate_t_defined
+# define ____mbstate_t_defined 1
/* Conversion state information. */
typedef struct
{
@@ -101,10 +101,14 @@ typedef struct
defined. */
#ifdef _WCHAR_H
+# ifndef __mbstate_t_defined
__BEGIN_NAMESPACE_C99
/* Public type. */
typedef __mbstate_t mbstate_t;
__END_NAMESPACE_C99
+# define __mbstate_t_defined 1
+# endif
+
#ifdef __USE_GNU
__USING_NAMESPACE_C99(mbstate_t)
#endif
diff --git a/wcsmbs/wcrtomb.c b/wcsmbs/wcrtomb.c
index aa51b6891b..547b05aa9c 100644
--- a/wcsmbs/wcrtomb.c
+++ b/wcsmbs/wcrtomb.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1996,1997,1998,2000,2002,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1996-1998,2000,2002,2005,2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1996.
@@ -115,3 +115,8 @@ __wcrtomb (char *s, wchar_t wc, mbstate_t *ps)
}
weak_alias (__wcrtomb, wcrtomb)
libc_hidden_weak (wcrtomb)
+
+/* There should be no difference between the UTF-32 handling required
+ by c32rtomb and the wchar_t handling which has long since been
+ implemented in wcrtomb. */
+weak_alias (__wcrtomb, c32rtomb)
diff --git a/wcsmbs/wcsmbsload.c b/wcsmbs/wcsmbsload.c
index 328f16497c..212a6c8135 100644
--- a/wcsmbs/wcsmbsload.c
+++ b/wcsmbs/wcsmbsload.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1998-2002,2004,2005,2008,2010 Free Software Foundation, Inc.
+/* Copyright (C) 1998-2002,2004,2005,2008,2010,2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
@@ -67,6 +67,44 @@ static const struct __gconv_step to_mb =
.__data = NULL
};
+static const struct __gconv_step to_c16 =
+{
+ .__shlib_handle = NULL,
+ .__modname = NULL,
+ .__counter = INT_MAX,
+ .__from_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
+ .__to_name = (char *) "UTF-16//",
+ .__fct = __gconv_transform_ascii_utf16,
+ .__btowc_fct = NULL,
+ .__init_fct = NULL,
+ .__end_fct = NULL,
+ .__min_needed_from = 1,
+ .__max_needed_from = 1,
+ .__min_needed_to = 4,
+ .__max_needed_to = 4,
+ .__stateful = 0,
+ .__data = NULL
+};
+
+static const struct __gconv_step from_c16 =
+{
+ .__shlib_handle = NULL,
+ .__modname = NULL,
+ .__counter = INT_MAX,
+ .__from_name = (char *) "UTF-16//",
+ .__to_name = (char *) "ANSI_X3.4-1968//TRANSLIT",
+ .__fct = __gconv_transform_utf16_ascii,
+ .__btowc_fct = NULL,
+ .__init_fct = NULL,
+ .__end_fct = NULL,
+ .__min_needed_from = 4,
+ .__max_needed_from = 4,
+ .__min_needed_to = 1,
+ .__max_needed_to = 1,
+ .__stateful = 0,
+ .__data = NULL
+};
+
/* For the default locale we only have to handle ANSI_X3.4-1968. */
const struct gconv_fcts __wcsmbs_gconv_fcts_c =
@@ -74,7 +112,12 @@ const struct gconv_fcts __wcsmbs_gconv_fcts_c =
.towc = (struct __gconv_step *) &to_wc,
.towc_nsteps = 1,
.tomb = (struct __gconv_step *) &to_mb,
- .tomb_nsteps = 1
+ .tomb_nsteps = 1,
+
+ .toc16 = (struct __gconv_step *) &to_c16,
+ .toc16_nsteps = 1,
+ .fromc16 = (struct __gconv_step *) &from_c16,
+ .fromc16_nsteps = 1,
};
@@ -191,6 +234,12 @@ __wcsmbs_load_conv (struct __locale_data *new_category)
&new_fcts->tomb_nsteps)
: NULL);
+ // XXX
+ new_fcts->toc16 = (struct __gconv_step *) &to_c16;
+ new_fcts->toc16_nsteps = 1;
+ new_fcts->fromc16 = (struct __gconv_step *) &from_c16;
+ new_fcts->fromc16_nsteps = 1;
+
/* If any of the conversion functions is not available we don't
use any since this would mean we cannot convert back and
forth.*/
@@ -242,14 +291,36 @@ internal_function
__wcsmbs_named_conv (struct gconv_fcts *copy, const char *name)
{
copy->towc = __wcsmbs_getfct ("INTERNAL", name, &copy->towc_nsteps);
- if (copy->towc != NULL)
+ if (copy->towc == NULL)
+ return 1;
+
+ copy->tomb = __wcsmbs_getfct (name, "INTERNAL", &copy->tomb_nsteps);
+ if (copy->tomb == NULL)
+ goto out_mb;
+
+#if 0
+ copy->fromc16 = __wcsmbs_getfct (name, "UTF-16//", &copy->fromc16_nsteps);
+ if (copy->fromc16 == NULL)
+ goto out_fromc16;
+
+ copy->toc16 = __wcsmbs_getfct ("UTF-16//", name, &copy->toc16_nsteps);
+ if (copy->toc16 == NULL)
+#else
+ if (0)
+#endif
{
- copy->tomb = __wcsmbs_getfct (name, "INTERNAL", &copy->tomb_nsteps);
- if (copy->tomb == NULL)
- __gconv_close_transform (copy->towc, copy->towc_nsteps);
+#if 0
+ __gconv_close_transform (copy->fromc16, copy->fromc16_nsteps);
+ out_fromc16:
+ __gconv_close_transform (copy->tomb, copy->tomb_nsteps);
+#endif
+ out_mb:
+ __gconv_close_transform (copy->towc, copy->towc_nsteps);
+ out_wc:
+ return 1;
}
- return copy->towc == NULL || copy->tomb == NULL ? 1 : 0;
+ return 0;
}
void internal_function
@@ -264,6 +335,11 @@ _nl_cleanup_ctype (struct __locale_data *locale)
/* Free the old conversions. */
__gconv_close_transform (data->tomb, data->tomb_nsteps);
__gconv_close_transform (data->towc, data->towc_nsteps);
+#if 0
+ // XXX
+ __gconv_close_transform (data->fromc16, data->fromc16_nsteps);
+ __gconv_close_transform (data->toc16, data->toc16c_nsteps);
+#endif
free ((char *) data);
}
}
diff --git a/wcsmbs/wcsmbsload.h b/wcsmbs/wcsmbsload.h
index e2b1bfa9c8..064c41c82f 100644
--- a/wcsmbs/wcsmbsload.h
+++ b/wcsmbs/wcsmbsload.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1998-2002, 2010 Free Software Foundation, Inc.
+/* Copyright (C) 1998-2002, 2010, 2011 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1998.
@@ -32,6 +32,11 @@ struct gconv_fcts
size_t towc_nsteps;
struct __gconv_step *tomb;
size_t tomb_nsteps;
+
+ struct __gconv_step *toc16;
+ size_t toc16_nsteps;
+ struct __gconv_step *fromc16;
+ size_t fromc16_nsteps;
};
/* Set of currently active conversion functions. */