summaryrefslogtreecommitdiff
path: root/locale
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2000-07-22 21:22:08 +0000
committerUlrich Drepper <drepper@redhat.com>2000-07-22 21:22:08 +0000
commit04fbc779fe06ebb697c7dfe02493ad2fc0f8e1e5 (patch)
tree66b7b352ede61f7a7ead4935386de558d0e17b93 /locale
parentfcc10ffab6d696cdda8a1a33b8e1720d90f7a15b (diff)
Update.
* iconv/gconv_trans.c: Correct a few bugs in the search loop. Remove remainders of hash table. * locale/categories.def: Remove remainders of transliteration hash table. * locale/langinfo.h: Likewise. * locale/programs/ld-ctype.c: Likewise. Fix code to write out transliteration tables. * locale/gen-translit.pl: New file. * locale/C-translit.h.in: New file. * locale/C-ctype.c: Include C-translit.h. Initialize transliteration data pointers with data from this file. * locale/Makefile (distribute): Add C-translit.h.in, C-translit.h, and gen-translit.pl. Add rule to generate C-translit.h.
Diffstat (limited to 'locale')
-rw-r--r--locale/C-ctype.c13
-rw-r--r--locale/C-translit.h21
-rw-r--r--locale/C-translit.h.in97
-rw-r--r--locale/Makefile8
-rw-r--r--locale/categories.def3
-rw-r--r--locale/gen-translit.pl142
-rw-r--r--locale/langinfo.h3
-rw-r--r--locale/programs/ld-ctype.c18
8 files changed, 284 insertions, 21 deletions
diff --git a/locale/C-ctype.c b/locale/C-ctype.c
index e93a585538..fe1e8ac1fb 100644
--- a/locale/C-ctype.c
+++ b/locale/C-ctype.c
@@ -20,6 +20,8 @@
#include "localeinfo.h"
#include <endian.h>
+#include "C-translit.h"
+
/* This table's entries are taken from POSIX.2 Table 2-6
``LC_CTYPE Category Definition in the POSIX Locale''.
@@ -420,12 +422,11 @@ const struct locale_data _nl_C_LC_CTYPE =
{ word: L'7' },
{ word: L'8' },
{ word: L'9' },
- { word: 0 },
- { word: 0 },
- { string: NULL },
- { string: NULL },
- { string: NULL },
- { string: NULL },
+ { word: NTRANSLIT },
+ { wstr: translit_from_idx },
+ { wstr: (uint32_t *) translit_from_tbl },
+ { wstr: translit_to_idx },
+ { wstr: (uint32_t *) translit_to_tbl },
{ word: 1 },
{ wstr: (uint32_t *) L"?" },
{ word: 0 },
diff --git a/locale/C-translit.h b/locale/C-translit.h
new file mode 100644
index 0000000000..2d42133b0e
--- /dev/null
+++ b/locale/C-translit.h
@@ -0,0 +1,21 @@
+#define NTRANSLIT 20
+static const uint32_t translit_from_idx[] =
+{
+ 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22,
+ 24, 26, 28, 30, 32, 34, 36, 38
+};
+static const wchar_t translit_from_tbl[] =
+ L"\xa9" L"\0" L"\xab" L"\0" L"\xae" L"\0" L"\xbb" L"\0" L"\xbc" L"\0"
+ L"\xbd" L"\0" L"\xbe" L"\0" L"\xc4" L"\0" L"\xc5" L"\0" L"\xc6" L"\0"
+ L"\xd6" L"\0" L"\xdc" L"\0" L"\xdf" L"\0" L"\xe4" L"\0" L"\xe5" L"\0"
+ L"\xe6" L"\0" L"\xf6" L"\0" L"\xfc" L"\0" L"\x201c" L"\0" L"\x201d";
+static const uint32_t translit_to_idx[] =
+{
+ 0, 5, 9, 14, 18, 23, 28, 33, 37, 41, 45, 49,
+ 53, 57, 61, 65, 69, 73, 77, 80
+};
+static const wchar_t translit_to_tbl[] =
+ L"(C)\0" L"\0" L"<<\0" L"\0" L"(R)\0" L"\0" L">>\0" L"\0" L"1/4\0" L"\0"
+ L"1/2\0" L"\0" L"3/4\0" L"\0" L"AE\0" L"\0" L"AA\0" L"\0" L"AE\0" L"\0"
+ L"OE\0" L"\0" L"UE\0" L"\0" L"ss\0" L"\0" L"ae\0" L"\0" L"aa\0" L"\0"
+ L"ae\0" L"\0" L"oe\0" L"\0" L"ue\0" L"\0" L"\"\0" L"\0" L"\"\0";
diff --git a/locale/C-translit.h.in b/locale/C-translit.h.in
new file mode 100644
index 0000000000..e2f711ea59
--- /dev/null
+++ b/locale/C-translit.h.in
@@ -0,0 +1,97 @@
+/* Transliteration for the C locale.
+ Copyright (C) 2000 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Ulrich Drepper <drepper@redhat.com>, 2000.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
+
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
+
+/* The entries here have to be sorted relative to the input string. */
+
+/* <U00A9> COPYRIGHT SIGN. */
+"\xa9" "(C)"
+
+/* <U00AB> LEFT-POINTING DOUBLE ANGLE QUOTATION MARK. */
+"\xab" "<<"
+
+/* <U00AE> REGISTERED SIGN. */
+"\xae" "(R)"
+
+/* <U00BB> RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK. */
+"\xbb" ">>"
+
+/* <U00BC> VULGAR FRACTION ONE QUARTER. */
+"\xbc" "1/4"
+
+/* <U00BD> VULGAR FRACTION ONE HALF. */
+"\xbd" "1/2"
+
+/* <U00BE> VULGAR FRACTION THREE QUARTERS. */
+"\xbe" "3/4"
+
+/* <U00C4> LATIN CAPITAL LETTER A WITH DIAERESIS. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xc4" "AE"
+
+/* <U00C5> LATIN CAPITAL LETTER A WITH RING ABOVE. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xc5" "AA"
+
+/* <U00C6> LATIN CAPITAL LETTER AE. */
+"\xc6" "AE"
+
+/* <U00D6> LATIN CAPITAL LETTER O WITH DIAERESIS. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xd6" "OE"
+
+/* <U00DC> LATIN CAPITAL LETTER U WITH DIAERESIS. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xdc" "UE"
+
+/* <U00DF> LATIN SMALL LETTER SHARP S. */
+"\xdf" "ss"
+
+/* <U00E4> LATIN SMALL LETTER A WITH DIAERESIS. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xe4" "ae"
+
+/* <U00E5> LATIN SMALL LETTER A WITH RING ABOVE. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xe5" "aa"
+
+/* <U00E6> LATIN SMALL LETTER AE. */
+"\xe6" "ae"
+
+/* <U00F6> LATIN SMALL LETTER O WITH DIAERESIS. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xf6" "oe"
+
+/* <U00FC> LATIN SMALL LETTER U WITH DIAERESIS. */
+/* XXX It is not clear whether this is the best transliteration for
+ all locales. If not, we probably have to take it out completely. */
+"\xfc" "ue"
+
+/* <U201C> LEFT DOUBLE QUOTATION MARK. */
+"\x201c" "\""
+
+/* <U201D> RIGHT DOUBLE QUOTATION MARK. */
+"\x201d" "\""
diff --git a/locale/Makefile b/locale/Makefile
index db71cc2422..2825a697c2 100644
--- a/locale/Makefile
+++ b/locale/Makefile
@@ -25,6 +25,7 @@ headers = locale.h langinfo.h xlocale.h
distribute = localeinfo.h categories.def iso-639.def iso-3166.def \
iso-4217.def weight.h weightwc.h strlen-hash.h elem-hash.h \
indigits.h indigitswc.h outdigits.h outdigitswc.h \
+ C-translit.h.in C-translit.h gen-translit.pl \
$(addprefix programs/, \
locale.c localedef.c \
$(localedef-modules:=.c) $(locale-modules:=.c) \
@@ -73,6 +74,13 @@ $(objpfx)localedef: $(localedef-modules:%=$(objpfx)%.o)
$(objpfx)locale: $(locale-modules:%=$(objpfx)%.o)
$(objpfx)localedef $(objpfx)locale: $(lib-modules:%=$(objpfx)%.o)
+C-translit.h: C-translit.h.in gen-translit.pl
+ $(PERL) gen-translit.pl < $< > $@.tmp
+ $(move-if-change) $@.tmp $@
+ifeq ($(with-cvs),yes)
+ test ! -d CVS || cvs $(CVSOPTS) commit -mRegenerated $@
+endif
+
localepath = "$(localedir):$(i18ndir)"
locale-CPPFLAGS := -DLOCALE_PATH='$(localepath)' \
diff --git a/locale/categories.def b/locale/categories.def
index 8e5e65a878..a8fa30e575 100644
--- a/locale/categories.def
+++ b/locale/categories.def
@@ -126,8 +126,7 @@ DEFINE_CATEGORY
DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT7_WC, "ctype-outdigit7_wc", std, word)
DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT8_WC, "ctype-outdigit8_wc", std, word)
DEFINE_ELEMENT (_NL_CTYPE_OUTDIGIT9_WC, "ctype-outdigit9_wc", std, word)
- DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_HASH_SIZE, "ctype-translit-hash-size", std, word)
- DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_HASH_LAYERS, "ctype-translit-hash-layers", std, word)
+ DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TAB_SIZE, "ctype-translit-tab-size", std, word)
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_IDX, "ctype-translit-from-idx", std, string)
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_FROM_TBL, "ctype-translit-from-tbl", std, string)
DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_TO_IDX, "ctype-translit-to-idx", std, string)
diff --git a/locale/gen-translit.pl b/locale/gen-translit.pl
new file mode 100644
index 0000000000..b6fba77c80
--- /dev/null
+++ b/locale/gen-translit.pl
@@ -0,0 +1,142 @@
+#! /usr/bin/perl -w
+open F, "cat C-translit.h.in | gcc -E - |" || die "Cannot preprocess input file";
+
+
+sub cstrlen {
+ my($str) = @_;
+ my($len) = length($str);
+ my($cnt);
+ my($res) = 0;
+
+ for ($cnt = 0; $cnt < $len; ++$cnt) {
+ if (substr($str, $cnt, 1) eq '\\') {
+ # Recognize the escape sequence.
+ if (substr($str, $cnt + 1, 1) eq 'x') {
+ my($inner);
+ for ($inner = $cnt + 2; $inner < $len && $inner < $cnt + 10; ++$inner) {
+ my($ch) = substr($str, $inner, 1);
+ next if (($ch ge '0' && $ch le '9')
+ || ($ch ge 'a' && $ch le 'f')
+ || ($ch ge 'A' && $ch le 'F'));
+ last;
+ }
+ $cnt = $inner;
+ ++$res;
+ } else {
+ die "invalid input" if ($cnt + 1 >= $len);
+ ++$res;
+ ++$cnt;
+ }
+ } else {
+ ++$res;
+ }
+ }
+
+ return $res;
+}
+
+while (<F>) {
+ next if (/^#/);
+ next if (/^[ ]*$/);
+ chop;
+
+ if (/"([^\"]*)"[ ]*"(.*)"/) {
+ my($from) = $1;
+ my($to) = $2;
+ my($fromlen) = cstrlen($from);
+ my($tolen) = cstrlen($to);
+
+ push(@froms, $from);
+ push(@fromlens, $fromlen);
+ push(@tos, $to);
+ push(@tolens, $tolen);
+ }
+}
+
+printf "#define NTRANSLIT %d\n", $#froms + 1;
+
+printf "static const uint32_t translit_from_idx[] =\n{\n ";
+$col = 2;
+$total = 0;
+for ($cnt = 0; $cnt <= $#fromlens; ++$cnt) {
+ if ($cnt != 0) {
+ if ($col + 7 >= 79) {
+ printf(",\n ");
+ $col = 2;
+ } else {
+ printf(", ");
+ $col += 2;
+ }
+ }
+ printf("%4d", $total);
+ $total += $fromlens[$cnt] + 1;
+ $col += 4;
+}
+printf("\n};\n");
+
+printf "static const wchar_t translit_from_tbl[] =\n ";
+$col = 1;
+for ($cnt = 0; $cnt <= $#froms; ++$cnt) {
+ if ($cnt != 0) {
+ if ($col + 6 >= 79) {
+ printf("\n ");
+ $col = 1;
+ }
+ printf(" L\"\\0\"");
+ $col += 6;
+ }
+ if ($col > 2 && $col + length($froms[$cnt]) + 4 >= 79) {
+ printf("\n ");
+ $col = 2;
+ } else {
+ printf(" ");
+ ++$col;
+ }
+ printf("L\"$froms[$cnt]\"");
+ $col += length($froms[$cnt]) + 3;
+}
+printf(";\n");
+
+printf "static const uint32_t translit_to_idx[] =\n{\n ";
+$col = 2;
+$total = 0;
+for ($cnt = 0; $cnt <= $#tolens; ++$cnt) {
+ if ($cnt != 0) {
+ if ($col + 7 >= 79) {
+ printf(",\n ");
+ $col = 2;
+ } else {
+ printf(", ");
+ $col += 2;
+ }
+ }
+ printf("%4d", $total);
+ $total += $tolens[$cnt] + 2;
+ $col += 4;
+}
+printf("\n};\n");
+
+printf "static const wchar_t translit_to_tbl[] =\n ";
+$col = 1;
+for ($cnt = 0; $cnt <= $#tos; ++$cnt) {
+ if ($cnt != 0) {
+ if ($col + 6 >= 79) {
+ printf("\n ");
+ $col = 1;
+ }
+ printf(" L\"\\0\"");
+ $col += 6;
+ }
+ if ($col > 2 && $col + length($tos[$cnt]) + 6 >= 79) {
+ printf("\n ");
+ $col = 2;
+ } else {
+ printf(" ");
+ ++$col;
+ }
+ printf("L\"$tos[$cnt]\\0\"");
+ $col += length($tos[$cnt]) + 5;
+}
+printf(";\n");
+
+exit 0;
diff --git a/locale/langinfo.h b/locale/langinfo.h
index d2cc2a8672..69d7292765 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -316,8 +316,7 @@ enum
_NL_CTYPE_OUTDIGIT7_WC,
_NL_CTYPE_OUTDIGIT8_WC,
_NL_CTYPE_OUTDIGIT9_WC,
- _NL_CTYPE_TRANSLIT_HASH_SIZE,
- _NL_CTYPE_TRANSLIT_HASH_LAYERS,
+ _NL_CTYPE_TRANSLIT_TAB_SIZE,
_NL_CTYPE_TRANSLIT_FROM_IDX,
_NL_CTYPE_TRANSLIT_FROM_TBL,
_NL_CTYPE_TRANSLIT_TO_IDX,
diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c
index e297aeb254..5dfcec3339 100644
--- a/locale/programs/ld-ctype.c
+++ b/locale/programs/ld-ctype.c
@@ -173,13 +173,11 @@ struct locale_ctype_t
unsigned char *width;
uint32_t mb_cur_max;
const char *codeset_name;
- uint32_t translit_hash_size;
- uint32_t translit_hash_layers;
uint32_t *translit_from_idx;
uint32_t *translit_from_tbl;
uint32_t *translit_to_idx;
uint32_t *translit_to_tbl;
- size_t translit_idx_size;
+ uint32_t translit_idx_size;
size_t translit_from_tbl_size;
size_t translit_to_tbl_size;
@@ -866,7 +864,7 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
{
#define CTYPE_EMPTY(name) \
case name: \
- iov[2 + elem + offset].iov_base = ""; \
+ iov[2 + elem + offset].iov_base = (void *) ""; \
iov[2 + elem + offset].iov_len = 0; \
idx[elem + 1] = idx[elem]; \
break
@@ -911,14 +909,12 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
ctype->names, (ctype->plane_size * ctype->plane_cnt
* sizeof (uint32_t)));
- CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_SIZE,
- &ctype->translit_hash_size, sizeof (uint32_t));
- CTYPE_DATA (_NL_CTYPE_TRANSLIT_HASH_LAYERS,
- &ctype->translit_hash_layers, sizeof (uint32_t));
+ CTYPE_DATA (_NL_CTYPE_TRANSLIT_TAB_SIZE,
+ &ctype->translit_idx_size, sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_IDX,
ctype->translit_from_idx,
- ctype->translit_idx_size);
+ ctype->translit_idx_size * sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_FROM_TBL,
ctype->translit_from_tbl,
@@ -926,7 +922,7 @@ ctype_output (struct localedef_t *locale, struct charmap_t *charmap,
CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_IDX,
ctype->translit_to_idx,
- ctype->translit_idx_size);
+ ctype->translit_idx_size * sizeof (uint32_t));
CTYPE_DATA (_NL_CTYPE_TRANSLIT_TO_TBL,
ctype->translit_to_tbl, ctype->translit_to_tbl_size);
@@ -3664,7 +3660,7 @@ Computing table size for character classes might take a while..."),
}
/* Store the information about the length. */
- ctype->translit_idx_size = number * sizeof (uint32_t);
+ ctype->translit_idx_size = number;
ctype->translit_from_tbl_size = from_len * sizeof (uint32_t);
ctype->translit_to_tbl_size = to_len * sizeof (uint32_t);
}