summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2009-07-20 20:04:42 -0700
committerUlrich Drepper <drepper@redhat.com>2009-07-20 20:04:42 -0700
commit8a4494506d9175a2c205ff8d39dc58abd83682eb (patch)
treef8a40477eef8ff0f54ceaab5a2f358b3d6f47b9a
parentc3db953c165baa444d01ee6c04ef0c51eba42522 (diff)
Check generated locale for non-ASCII 8-bit characters with case conversion.
If a locale does not have 8-bit characters with case conversion which are different from the ASCII conversion (±0x20) then we can perform some optimizations. These will follow later.
-rw-r--r--ChangeLog13
-rw-r--r--locale/C-ctype.c6
-rw-r--r--locale/langinfo.h1
-rw-r--r--locale/localeinfo.h4
-rw-r--r--locale/programs/ld-ctype.c27
5 files changed, 47 insertions, 4 deletions
diff --git a/ChangeLog b/ChangeLog
index da91742d21..9d6b6d3d10 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2009-07-20 Ulrich Drepper <drepper@redhat.com>
+
+ * locale/localeinfo.h (LIMAGIC): Update value for LC_CTYPE.
+ * locale/langinfo.h: Define _NL_CTYPE_NONASCII_CASE.
+ * locale/C-ctype.c (_nl_C_LC_CTYPE): Add initializer for
+ _NL_CTYPE_NONASCII_CASE.
+ * locale/programs/ld-ctype.c (locale_ctype_t): Add nonascii_case
+ field.
+ (ctype_finish): Check whether there are any 8-bit characters outside
+ the range ASCII has or whether the mapping isn't the same as for
+ ASCII (±0x20). Set nonascii_case appropriately.
+ (ctype_output): Add output handler for nonascii_case.
+
2009-07-17 Ulrich Drepper <drepper@redhat.com>
* sysdeps/generic/sysdep.h: Define cfi_personality, cfi_lsda,
diff --git a/locale/C-ctype.c b/locale/C-ctype.c
index 85f3d2addb..2044fb77b6 100644
--- a/locale/C-ctype.c
+++ b/locale/C-ctype.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc.
+/* Copyright (C) 1995-2002, 2003, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1995.
@@ -682,6 +682,8 @@ const struct locale_data _nl_C_LC_CTYPE attribute_hidden =
{ .string = (const char *) _nl_C_LC_CTYPE_class_alnum.header },
/* NR_MAPS wctrans_tables */
{ .string = (const char *) _nl_C_LC_CTYPE_map_toupper.header },
- { .string = (const char *) _nl_C_LC_CTYPE_map_tolower.header }
+ { .string = (const char *) _nl_C_LC_CTYPE_map_tolower.header },
+ /* _NL_CTYPE_NONASCII_CASE */
+ { .word = 0 }
}
};
diff --git a/locale/langinfo.h b/locale/langinfo.h
index 59017b31c8..d7ef6f60f4 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -348,6 +348,7 @@ enum
_NL_CTYPE_EXTRA_MAP_12,
_NL_CTYPE_EXTRA_MAP_13,
_NL_CTYPE_EXTRA_MAP_14,
+ _NL_CTYPE_NONASCII_CASE,
_NL_NUM_LC_CTYPE,
/* LC_MONETARY category: formatting of monetary quantities.
diff --git a/locale/localeinfo.h b/locale/localeinfo.h
index 3661080bb2..19ea41ae6d 100644
--- a/locale/localeinfo.h
+++ b/locale/localeinfo.h
@@ -1,5 +1,5 @@
/* Declarations for internal libc locale interfaces
- Copyright (C) 1995-2003, 2005, 2006, 2007, 2008
+ Copyright (C) 1995-2003, 2005, 2006, 2007, 2008, 2009
Free Software Foundation, Inc.
This file is part of the GNU C Library.
@@ -35,6 +35,8 @@
#define LIMAGIC(category) \
(category == LC_COLLATE \
? ((unsigned int) (0x20051014 ^ (category))) \
+ : category == LC_CTYPE \
+ ? ((unsigned int) (0x20090720 ^ (category))) \
: ((unsigned int) (0x20031115 ^ (category))))
/* Two special weight constants for the collation data. */
diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c
index d4474bf1a2..376a02c2f0 100644
--- a/locale/programs/ld-ctype.c
+++ b/locale/programs/ld-ctype.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1995-2006, 2007 Free Software Foundation, Inc.
+/* Copyright (C) 1995-2006, 2007, 2009 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@gnu.org>, 1995.
@@ -181,6 +181,7 @@ struct locale_ctype_t
size_t default_missing_lineno;
uint32_t to_nonascii;
+ uint32_t nonascii_case;
/* The arrays for the binary representation. */
char_class_t *ctype_b;
@@ -625,6 +626,27 @@ character <SP> not defined in character map")));
else
ctype->class256_collection[space_seq->bytes[0]] |= BIT (tok_print);
+ /* Check whether all single-byte characters make to their upper/lowercase
+ equivalent according to the ASCII rules. */
+ for (cnt = 'A'; cnt <= 'Z'; ++cnt)
+ {
+ uint32_t uppval = ctype->map256_collection[0][cnt];
+ uint32_t lowval = ctype->map256_collection[1][cnt];
+ uint32_t lowuppval = ctype->map256_collection[0][lowval];
+ uint32_t lowlowval = ctype->map256_collection[1][lowval];
+
+ if (uppval != cnt
+ || lowval != cnt + 0x20
+ || lowuppval != cnt
+ || lowlowval != cnt + 0x20)
+ ctype->nonascii_case = 1;
+ }
+ for (cnt = 0; cnt < 256; ++cnt)
+ if (cnt < 'A' || (cnt > 'Z' && cnt < 'a') || cnt > 'z')
+ if (ctype->map256_collection[0][cnt] != cnt
+ || ctype->map256_collection[1][cnt] != cnt)
+ ctype->nonascii_case = 1;
+
/* Now that the tests are done make sure the name array contains all
characters which are handled in the WIDTH section of the
character set definition file. */
@@ -1045,6 +1067,9 @@ ctype_output (struct localedef_t *locale, const struct charmap_t *charmap,
CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII,
&ctype->to_nonascii, sizeof (uint32_t));
+ CTYPE_DATA (_NL_CTYPE_NONASCII_CASE,
+ &ctype->nonascii_case, sizeof (uint32_t));
+
case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN):
iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t));
iov[2 + elem + offset].iov_len = sizeof (uint32_t);