summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ChangeLog8
-rw-r--r--iconv/gconv_builtin.h14
-rw-r--r--iconv/gconv_int.h2
-rw-r--r--iconv/gconv_simple.c172
4 files changed, 193 insertions, 3 deletions
diff --git a/ChangeLog b/ChangeLog
index 498b0a3784..e4027a7209 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,9 @@
+1999-04-24 Ulrich Drepper <drepper@cygnus.com>
+
+ * iconv/gconv_builtin.h: Add definitions for UTF16 builtins.
+ * iconv/gconv_int.h: Declare UTF16 functions.
+ * iconv/gconv_simple.c: Add UTF16 conversion functions.
+
1999-04-20 Andreas Jaeger <aj@arthur.rhein-neckar.de>
* posix/wordexp.c (parse_param): Fix type of offset to allow it to
@@ -37,7 +43,7 @@
1999-04-17 Thorsten Kukuk <kukuk@suse.de>
- * timezone/zic.c (dolink): Append complete path ot only filename
+ * timezone/zic.c (dolink): Append complete path not only filename
to ../ list.
1999-04-17 Andreas Jaeger <aj@arthur.rhein-neckar.de>
diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h
index e12f1e46ee..0c31890a37 100644
--- a/iconv/gconv_builtin.h
+++ b/iconv/gconv_builtin.h
@@ -1,5 +1,5 @@
/* Builtin transformations.
- Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@@ -79,3 +79,15 @@ BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "UNICODELITTLE//",
1, "=INTERNAL->ucs2little",
__gconv_transform_internal_ucs2little, NULL, NULL,
4, 4, 2, 2)
+
+BUILTIN_ALIAS ("UTF-16//", "UTF16//")
+
+BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "UTF16//",
+ 1, "=INTERNAL->utf16",
+ __gconv_transform_internal_utf16, NULL, NULL,
+ 4, 4, 2, 4)
+
+BUILTIN_TRANSFORMATION (NULL, "UTF16//", 7, "INTERNAL",
+ 1, "=utf16->INTERNAL",
+ __gconv_transform_utf16_internal, NULL, NULL,
+ 2, 4, 4, 4)
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index d4ffa551d6..9b00e6522c 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -168,6 +168,8 @@ __BUILTIN_TRANS (__gconv_transform_internal_ucs2);
__BUILTIN_TRANS (__gconv_transform_ucs2little_internal);
__BUILTIN_TRANS (__gconv_transform_internal_ucs2little);
__BUILTIN_TRANS (__gconv_transform_internal_ucs4);
+__BUILTIN_TRANS (__gconv_transform_internal_utf16);
+__BUILTIN_TRANS (__gconv_transform_utf16_internal);
# undef __BUITLIN_TRANS
#endif
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index 4084d04b44..74dbfc0356 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -1,5 +1,5 @@
/* Simple transformations functions.
- Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@@ -451,3 +451,173 @@ internal_ucs4_loop (const unsigned char **inptrp, const unsigned char *inend,
#endif
#include <iconv/loop.c>
#include <iconv/skeleton.c>
+
+
+/* Convert from the internal (UCS4-like) format to UTF-16. */
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 4
+#define MIN_NEEDED_TO 2
+#define MAX_NEEDED_TO 4
+#define FROM_DIRECTION 1
+#define FROM_LOOP internal_utf16_loop
+#define TO_LOOP internal_utf16_loop /* This is not used. */
+#define FUNCTION_NAME __gconv_transform_internal_utf16
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define MAX_NEEDED_OUTPUT MAX_NEEDED_TO
+#define LOOPFCT FROM_LOOP
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define BODY \
+ { \
+ if (*((uint32_t *) inptr) >= 0x10000) \
+ { \
+ if (*((uint32_t *) inptr) >= 0x110000) \
+ { \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ \
+ /* Generate a surrogate character. */ \
+ if (NEED_LENGTH_TEST && outptr + 4 > outend) \
+ { \
+ /* Overflow in the output buffer. */ \
+ result = GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ \
+ *((uint16_t *) outptr)++ = bswap_16 (0xd7c0 \
+ + (*((uint32_t *) inptr) >> 10));\
+ *((uint16_t *) outptr)++ = bswap_16 (0xdc00 \
+ + (*((uint32_t *) inptr) \
+ & 0x3ff)); \
+ } \
+ else \
+ /* Please note that we use the `uint32_t' from-pointer as an `uint16_t' \
+ pointer which works since we are on a little endian machine. */ \
+ *((uint16_t *) outptr)++ = bswap_16 (*((uint16_t *) inptr)); \
+ inptr += 4; \
+ }
+#else
+# define BODY \
+ { \
+ if (*((uint32_t *) inptr) >= 0x10000) \
+ { \
+ if (*((uint32_t *) inptr) >= 0x110000) \
+ { \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ \
+ /* Generate a surrogate character. */ \
+ if (NEED_LENGTH_TEST && outptr + 4 > outend) \
+ { \
+ /* Overflow in the output buffer. */ \
+ result = GCONV_FULL_OUTPUT; \
+ break; \
+ } \
+ \
+ *((uint16_t *) outptr)++ = 0xd7c0 + (*((uint32_t *) inptr) >> 10); \
+ *((uint16_t *) outptr)++ = 0xdc00 + (*((uint32_t *) inptr) & 0x3ff); \
+ } \
+ else \
+ *((uint16_t *) outptr)++ = *((uint32_t *) inptr)++; \
+ }
+#endif
+#include <iconv/loop.c>
+#include <iconv/skeleton.c>
+
+
+/* Convert from UTF-16 to the internal (UCS4-like) format. */
+#define DEFINE_INIT 0
+#define DEFINE_FINI 0
+#define MIN_NEEDED_FROM 2
+#define MAX_NEEDED_FROM 4
+#define MIN_NEEDED_TO 4
+#define FROM_DIRECTION 1
+#define FROM_LOOP utf16_internal_loop
+#define TO_LOOP utf16_internal_loop /* This is not used.*/
+#define FUNCTION_NAME __gconv_transform_utf16_internal
+
+#define MIN_NEEDED_INPUT MIN_NEEDED_FROM
+#define MAX_NEEDED_INPUT MAX_NEEDED_FROM
+#define MIN_NEEDED_OUTPUT MIN_NEEDED_TO
+#define LOOPFCT FROM_LOOP
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+# define BODY \
+ { \
+ uint16_t u1 = bswap_16 (*(uint16_t *) inptr); \
+ \
+ if (u1 < 0xd800 || u1 > 0xdfff) \
+ { \
+ /* No surrogate. */ \
+ *((uint32_t *) outptr)++ = u1; \
+ inptr += 2; \
+ } \
+ else \
+ { \
+ uint16_t u2; \
+ \
+ /* It's a surrogate character. At least the first word says \
+ it is. */ \
+ if (NEED_LENGTH_TEST && inptr + 4 > inend) \
+ { \
+ /* We don't have enough input for another complete input \
+ character. */ \
+ result = GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ \
+ u2 = bswap_16 (((uint16_t *) inptr)[1]); \
+ if (u2 < 0xdc00 || u2 >= 0xdfff) \
+ { \
+ /* This is no valid second word for a surrogate. */ \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ \
+ *((uint32_t *) outptr)++ = ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00); \
+ inptr += 4; \
+ } \
+ }
+#else
+# define BODY \
+ { \
+ uint16_t u1 = *(uint16_t *) inptr; \
+ \
+ if (u1 < 0xd800 || u1 > 0xdfff) \
+ { \
+ /* No surrogate. */ \
+ *((uint32_t *) outptr)++ = u1; \
+ inptr += 2; \
+ } \
+ else \
+ { \
+ uint16_t u2; \
+ \
+ /* It's a surrogate character. At least the first word says \
+ it is. */ \
+ if (NEED_LENGTH_TEST && inptr + 4 > inend) \
+ { \
+ /* We don't have enough input for another complete input \
+ character. */ \
+ result = GCONV_INCOMPLETE_INPUT; \
+ break; \
+ } \
+ \
+ u2 = ((uint16_t *) inptr)[1]; \
+ if (u2 < 0xdc00 || u2 >= 0xdfff) \
+ { \
+ /* This is no valid second word for a surrogate. */ \
+ result = GCONV_ILLEGAL_INPUT; \
+ break; \
+ } \
+ \
+ *((uint32_t *) outptr)++ = ((u1 - 0xd7c0) << 10) + (u2 - 0xdc00); \
+ inptr += 4; \
+ } \
+ }
+#endif
+#include <iconv/loop.c>
+#include <iconv/skeleton.c>