summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2005-04-04 22:37:38 +0000
committerUlrich Drepper <drepper@redhat.com>2005-04-04 22:37:38 +0000
commit4e82c610255f6a186c20c73e74f8e71dcda98efc (patch)
treeb55a3e6692a0261c576330950793bde88524e0e1
parent7a50b1f6d1bd959ae5dfb5539d9cd8935eb8d926 (diff)
* intl/tst-gettext3.c: New file.
* intl/tst-gettext3.sh: New file. * intl/Makefile (distribute): Add tst-gettext3.sh. (test-srcs): Add tst-gettext3. (tests): Depend on tst-gettext3.out. (tst-gettext3.out): New rule. (CFLAGS-tst-gettext3.c): New variable. Fix bug exposed by tst-gettext3. * intl/gettextP.h (struct converted_domain): New type. (struct loaded_domain): Remove the conv, conv_tab fields. Add conversions, nconversions fields. (_nl_init_domain_conv): Remove declaration. (_nl_free_domain_conv): Remove declaration. (_nl_find_msg): Add convert argument. * intl/dcigettext.c (DCIGETTEXT): Call _nl_find_msg with convert=1. (_nl_find_msg): Add convert argument. When a conversion to a different charset is needed, create a new converted_domain element, instead of throwing away the old converted translations. (get_output_charset): New function. * intl/loadmsgcat.c (_nl_init_domain_conv): Remove function. (_nl_free_domain_conv): Remove function. (_nl_load_domain): Initialize the conversions array to empty. Use _nl_find_msg instead of _nl_init_domain_conv to retrieve the header entry. (_nl_unload_domain): Free the conversions array and its contents. * intl/gettextP.h (struct loaded_domain): Remove codeset_cntr field. (struct binding): Likewise. * intl/bindtextdom.c (set_binding_values): Drop codeset_cntr modifications.
-rw-r--r--ChangeLog34
-rw-r--r--intl/bindtextdom.c4
-rw-r--r--intl/dcigettext.c485
-rw-r--r--intl/gettextP.h45
-rw-r--r--intl/loadmsgcat.c172
-rw-r--r--intl/tst-gettext3.c60
-rw-r--r--intl/tst-gettext3.sh45
7 files changed, 523 insertions, 322 deletions
diff --git a/ChangeLog b/ChangeLog
index fbad32cd7b..dc9e0741c0 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,37 @@
+2005-03-27 Bruno Haible <bruno@clisp.org>
+
+ * intl/tst-gettext3.c: New file.
+ * intl/tst-gettext3.sh: New file.
+ * intl/Makefile (distribute): Add tst-gettext3.sh.
+ (test-srcs): Add tst-gettext3.
+ (tests): Depend on tst-gettext3.out.
+ (tst-gettext3.out): New rule.
+ (CFLAGS-tst-gettext3.c): New variable.
+
+ Fix bug exposed by tst-gettext3.
+ * intl/gettextP.h (struct converted_domain): New type.
+ (struct loaded_domain): Remove the conv, conv_tab fields. Add
+ conversions, nconversions fields.
+ (_nl_init_domain_conv): Remove declaration.
+ (_nl_free_domain_conv): Remove declaration.
+ (_nl_find_msg): Add convert argument.
+ * intl/dcigettext.c (DCIGETTEXT): Call _nl_find_msg with convert=1.
+ (_nl_find_msg): Add convert argument. When a conversion to a different
+ charset is needed, create a new converted_domain element, instead of
+ throwing away the old converted translations.
+ (get_output_charset): New function.
+ * intl/loadmsgcat.c (_nl_init_domain_conv): Remove function.
+ (_nl_free_domain_conv): Remove function.
+ (_nl_load_domain): Initialize the conversions array to empty. Use
+ _nl_find_msg instead of _nl_init_domain_conv to retrieve the header
+ entry.
+ (_nl_unload_domain): Free the conversions array and its contents.
+
+ * intl/gettextP.h (struct loaded_domain): Remove codeset_cntr field.
+ (struct binding): Likewise.
+ * intl/bindtextdom.c (set_binding_values): Drop codeset_cntr
+ modifications.
+
2005-04-04 Jakub Jelinek <jakub@redhat.com>
* sunrpc/pmap_rmt.c (xdr_rmtcall_args): Use a dummy arglen instead
diff --git a/intl/bindtextdom.c b/intl/bindtextdom.c
index 546da8e29d..fd527a180a 100644
--- a/intl/bindtextdom.c
+++ b/intl/bindtextdom.c
@@ -207,7 +207,6 @@ set_binding_values (domainname, dirnamep, codesetp)
free (binding->codeset);
binding->codeset = result;
- ++binding->codeset_cntr;
modified = 1;
}
}
@@ -271,8 +270,6 @@ set_binding_values (domainname, dirnamep, codesetp)
/* The default value. */
new_binding->dirname = (char *) _nl_default_dirname;
- new_binding->codeset_cntr = 0;
-
if (codesetp)
{
const char *codeset = *codesetp;
@@ -293,7 +290,6 @@ set_binding_values (domainname, dirnamep, codesetp)
memcpy (result, codeset, len);
#endif
codeset = result;
- ++new_binding->codeset_cntr;
}
*codesetp = codeset;
new_binding->codeset = (char *) codeset;
diff --git a/intl/dcigettext.c b/intl/dcigettext.c
index 8163064edc..c73c719d94 100644
--- a/intl/dcigettext.c
+++ b/intl/dcigettext.c
@@ -326,6 +326,10 @@ static struct transmem_list *transmem_list;
#else
typedef unsigned char transmem_block_t;
#endif
+#if defined _LIBC || HAVE_ICONV
+static const char *get_output_charset PARAMS ((struct binding *domainbinding))
+ internal_function;
+#endif
/* Names for the libintl functions are a problem. They must not clash
@@ -597,7 +601,7 @@ DCIGETTEXT (domainname, msgid1, msgid2, plural, n, category)
if (domain != NULL)
{
- retval = _nl_find_msg (domain, binding, msgid1, &retlen);
+ retval = _nl_find_msg (domain, binding, msgid1, 1, &retlen);
if (retval == NULL)
{
@@ -606,7 +610,7 @@ DCIGETTEXT (domainname, msgid1, msgid2, plural, n, category)
for (cnt = 0; domain->successor[cnt] != NULL; ++cnt)
{
retval = _nl_find_msg (domain->successor[cnt], binding,
- msgid1, &retlen);
+ msgid1, 1, &retlen);
if (retval != NULL)
{
@@ -683,10 +687,11 @@ DCIGETTEXT (domainname, msgid1, msgid2, plural, n, category)
char *
internal_function
-_nl_find_msg (domain_file, domainbinding, msgid, lengthp)
+_nl_find_msg (domain_file, domainbinding, msgid, convert, lengthp)
struct loaded_l10nfile *domain_file;
struct binding *domainbinding;
const char *msgid;
+ int convert;
size_t *lengthp;
{
struct loaded_domain *domain;
@@ -793,192 +798,317 @@ _nl_find_msg (domain_file, domainbinding, msgid, lengthp)
}
#if defined _LIBC || HAVE_ICONV
- if (domain->codeset_cntr
- != (domainbinding != NULL ? domainbinding->codeset_cntr : 0))
+ if (convert)
{
- /* The domain's codeset has changed through bind_textdomain_codeset()
- since the message catalog was initialized or last accessed. We
- have to reinitialize the converter. */
- _nl_free_domain_conv (domain);
- _nl_init_domain_conv (domain_file, domain, domainbinding);
- }
+ /* We are supposed to do a conversion. */
+ const char *encoding = get_output_charset (domainbinding);
+
+ /* Search whether a table with converted translations for this
+ encoding has already been allocated. */
+ size_t nconversions = domain->nconversions;
+ struct converted_domain *convd = NULL;
+ size_t i;
+
+ for (i = nconversions; i > 0; )
+ {
+ i--;
+ if (strcmp (domain->conversions[i].encoding, encoding) == 0)
+ {
+ convd = &domain->conversions[i];
+ break;
+ }
+ }
- if (
+ if (convd == NULL)
+ {
+ /* Allocate a table for the converted translations for this
+ encoding. */
+ struct converted_domain *new_conversions =
+ (struct converted_domain *)
+ (domain->conversions != NULL
+ ? realloc (domain->conversions,
+ (nconversions + 1) * sizeof (struct converted_domain))
+ : malloc ((nconversions + 1) * sizeof (struct converted_domain)));
+
+ if (__builtin_expect (new_conversions == NULL, 0))
+ /* Nothing we can do, no more memory. */
+ goto converted;
+ domain->conversions = new_conversions;
+
+ /* Copy the 'encoding' string to permanent storage. */
+ encoding = strdup (encoding);
+ if (__builtin_expect (encoding == NULL, 0))
+ /* Nothing we can do, no more memory. */
+ goto converted;
+
+ convd = &new_conversions[nconversions];
+ convd->encoding = encoding;
+
+ /* Find out about the character set the file is encoded with.
+ This can be found (in textual form) in the entry "". If this
+ entry does not exist or if this does not contain the 'charset='
+ information, we will assume the charset matches the one the
+ current locale and we don't have to perform any conversion. */
# ifdef _LIBC
- domain->conv != (__gconv_t) -1
+ convd->conv = (__gconv_t) -1;
# else
# if HAVE_ICONV
- domain->conv != (iconv_t) -1
+ convd->conv = (iconv_t) -1;
# endif
# endif
- )
- {
- /* We are supposed to do a conversion. First allocate an
- appropriate table with the same structure as the table
- of translations in the file, where we can put the pointers
- to the converted strings in.
- There is a slight complication with plural entries. They
- are represented by consecutive NUL terminated strings. We
- handle this case by converting RESULTLEN bytes, including
- NULs. */
-
- if (domain->conv_tab == NULL
- && ((domain->conv_tab =
- (char **) calloc (nstrings + domain->n_sysdep_strings,
- sizeof (char *)))
- == NULL))
- /* Mark that we didn't succeed allocating a table. */
- domain->conv_tab = (char **) -1;
-
- if (__builtin_expect (domain->conv_tab == (char **) -1, 0))
- /* Nothing we can do, no more memory. */
- goto converted;
-
- if (domain->conv_tab[act] == NULL)
+ {
+ char *nullentry;
+ size_t nullentrylen;
+
+ /* Get the header entry. This is a recursion, but it doesn't
+ reallocate domain->conversions because we pass convert = 0. */
+ nullentry =
+ _nl_find_msg (domain_file, domainbinding, "", 0, &nullentrylen);
+
+ if (nullentry != NULL)
+ {
+ const char *charsetstr;
+
+ charsetstr = strstr (nullentry, "charset=");
+ if (charsetstr != NULL)
+ {
+ size_t len;
+ char *charset;
+ const char *outcharset;
+
+ charsetstr += strlen ("charset=");
+ len = strcspn (charsetstr, " \t\n");
+
+ charset = (char *) alloca (len + 1);
+# if defined _LIBC || HAVE_MEMPCPY
+ *((char *) mempcpy (charset, charsetstr, len)) = '\0';
+# else
+ memcpy (charset, charsetstr, len);
+ charset[len] = '\0';
+# endif
+
+ outcharset = encoding;
+
+# ifdef _LIBC
+ /* We always want to use transliteration. */
+ outcharset = norm_add_slashes (outcharset, "TRANSLIT");
+ charset = norm_add_slashes (charset, "");
+ if (__gconv_open (outcharset, charset, &convd->conv,
+ GCONV_AVOID_NOCONV)
+ != __GCONV_OK)
+ convd->conv = (__gconv_t) -1;
+# else
+# if HAVE_ICONV
+ /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
+ we want to use transliteration. */
+# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 \
+ || _LIBICONV_VERSION >= 0x0105
+ if (strchr (outcharset, '/') == NULL)
+ {
+ char *tmp;
+
+ len = strlen (outcharset);
+ tmp = (char *) alloca (len + 10 + 1);
+ memcpy (tmp, outcharset, len);
+ memcpy (tmp + len, "//TRANSLIT", 10 + 1);
+ outcharset = tmp;
+
+ convd->conv = iconv_open (outcharset, charset);
+
+ freea (outcharset);
+ }
+ else
+# endif
+ convd->conv = iconv_open (outcharset, charset);
+# endif
+# endif
+
+ freea (charset);
+ }
+ }
+ }
+ convd->conv_tab = NULL;
+ /* Here domain->conversions is still == new_conversions. */
+ domain->nconversions++;
+ }
+
+ if (
+# ifdef _LIBC
+ convd->conv != (__gconv_t) -1
+# else
+# if HAVE_ICONV
+ convd->conv != (iconv_t) -1
+# endif
+# endif
+ )
{
- /* We haven't used this string so far, so it is not
- translated yet. Do this now. */
- /* We use a bit more efficient memory handling.
- We allocate always larger blocks which get used over
- time. This is faster than many small allocations. */
- __libc_lock_define_initialized (static, lock)
+ /* We are supposed to do a conversion. First allocate an
+ appropriate table with the same structure as the table
+ of translations in the file, where we can put the pointers
+ to the converted strings in.
+ There is a slight complication with plural entries. They
+ are represented by consecutive NUL terminated strings. We
+ handle this case by converting RESULTLEN bytes, including
+ NULs. */
+
+ if (convd->conv_tab == NULL
+ && ((convd->conv_tab =
+ (char **) calloc (nstrings + domain->n_sysdep_strings,
+ sizeof (char *)))
+ == NULL))
+ /* Mark that we didn't succeed allocating a table. */
+ convd->conv_tab = (char **) -1;
+
+ if (__builtin_expect (convd->conv_tab == (char **) -1, 0))
+ /* Nothing we can do, no more memory. */
+ goto converted;
+
+ if (convd->conv_tab[act] == NULL)
+ {
+ /* We haven't used this string so far, so it is not
+ translated yet. Do this now. */
+ /* We use a bit more efficient memory handling.
+ We allocate always larger blocks which get used over
+ time. This is faster than many small allocations. */
+ __libc_lock_define_initialized (static, lock)
# define INITIAL_BLOCK_SIZE 4080
- static unsigned char *freemem;
- static size_t freemem_size;
+ static unsigned char *freemem;
+ static size_t freemem_size;
- const unsigned char *inbuf;
- unsigned char *outbuf;
- int malloc_count;
+ const unsigned char *inbuf;
+ unsigned char *outbuf;
+ int malloc_count;
# ifndef _LIBC
- transmem_block_t *transmem_list = NULL;
+ transmem_block_t *transmem_list = NULL;
# endif
- __libc_lock_lock (lock);
+ __libc_lock_lock (lock);
- inbuf = (const unsigned char *) result;
- outbuf = freemem + sizeof (size_t);
+ inbuf = (const unsigned char *) result;
+ outbuf = freemem + sizeof (size_t);
- malloc_count = 0;
- while (1)
- {
- transmem_block_t *newmem;
+ malloc_count = 0;
+ while (1)
+ {
+ transmem_block_t *newmem;
# ifdef _LIBC
- size_t non_reversible;
- int res;
+ size_t non_reversible;
+ int res;
- if (freemem_size < sizeof (size_t))
- goto resize_freemem;
+ if (freemem_size < sizeof (size_t))
+ goto resize_freemem;
- res = __gconv (domain->conv,
- &inbuf, inbuf + resultlen,
- &outbuf,
- outbuf + freemem_size - sizeof (size_t),
- &non_reversible);
+ res = __gconv (convd->conv,
+ &inbuf, inbuf + resultlen,
+ &outbuf,
+ outbuf + freemem_size - sizeof (size_t),
+ &non_reversible);
- if (res == __GCONV_OK || res == __GCONV_EMPTY_INPUT)
- break;
+ if (res == __GCONV_OK || res == __GCONV_EMPTY_INPUT)
+ break;
- if (res != __GCONV_FULL_OUTPUT)
- {
- __libc_lock_unlock (lock);
- goto converted;
- }
+ if (res != __GCONV_FULL_OUTPUT)
+ {
+ __libc_lock_unlock (lock);
+ goto converted;
+ }
- inbuf = (const unsigned char *) result;
+ inbuf = (const unsigned char *) result;
# else
# if HAVE_ICONV
- const char *inptr = (const char *) inbuf;
- size_t inleft = resultlen;
- char *outptr = (char *) outbuf;
- size_t outleft;
-
- if (freemem_size < sizeof (size_t))
- goto resize_freemem;
-
- outleft = freemem_size - sizeof (size_t);
- if (iconv (domain->conv,
- (ICONV_CONST char **) &inptr, &inleft,
- &outptr, &outleft)
- != (size_t) (-1))
- {
- outbuf = (unsigned char *) outptr;
- break;
- }
- if (errno != E2BIG)
- {
- __libc_lock_unlock (lock);
- goto converted;
- }
+ const char *inptr = (const char *) inbuf;
+ size_t inleft = resultlen;
+ char *outptr = (char *) outbuf;
+ size_t outleft;
+
+ if (freemem_size < sizeof (size_t))
+ goto resize_freemem;
+
+ outleft = freemem_size - sizeof (size_t);
+ if (iconv (convd->conv,
+ (ICONV_CONST char **) &inptr, &inleft,
+ &outptr, &outleft)
+ != (size_t) (-1))
+ {
+ outbuf = (unsigned char *) outptr;
+ break;
+ }
+ if (errno != E2BIG)
+ {
+ __libc_lock_unlock (lock);
+ goto converted;
+ }
# endif
# endif
- resize_freemem:
- /* We must allocate a new buffer or resize the old one. */
- if (malloc_count > 0)
- {
- ++malloc_count;
- freemem_size = malloc_count * INITIAL_BLOCK_SIZE;
- newmem = (transmem_block_t *) realloc (transmem_list,
- freemem_size);
+ resize_freemem:
+ /* We must allocate a new buffer or resize the old one. */
+ if (malloc_count > 0)
+ {
+ ++malloc_count;
+ freemem_size = malloc_count * INITIAL_BLOCK_SIZE;
+ newmem = (transmem_block_t *) realloc (transmem_list,
+ freemem_size);
# ifdef _LIBC
- if (newmem != NULL)
- transmem_list = transmem_list->next;
+ if (newmem != NULL)
+ transmem_list = transmem_list->next;
+ else
+ {
+ struct transmem_list *old = transmem_list;
+
+ transmem_list = transmem_list->next;
+ free (old);
+ }
+# endif
+ }
else
{
- struct transmem_list *old = transmem_list;
-
- transmem_list = transmem_list->next;
- free (old);
+ malloc_count = 1;
+ freemem_size = INITIAL_BLOCK_SIZE;
+ newmem = (transmem_block_t *) malloc (freemem_size);
+ }
+ if (__builtin_expect (newmem == NULL, 0))
+ {
+ freemem = NULL;
+ freemem_size = 0;
+ __libc_lock_unlock (lock);
+ goto converted;
}
-# endif
- }
- else
- {
- malloc_count = 1;
- freemem_size = INITIAL_BLOCK_SIZE;
- newmem = (transmem_block_t *) malloc (freemem_size);
- }
- if (__builtin_expect (newmem == NULL, 0))
- {
- freemem = NULL;
- freemem_size = 0;
- __libc_lock_unlock (lock);
- goto converted;
- }
# ifdef _LIBC
- /* Add the block to the list of blocks we have to free
- at some point. */
- newmem->next = transmem_list;
- transmem_list = newmem;
+ /* Add the block to the list of blocks we have to free
+ at some point. */
+ newmem->next = transmem_list;
+ transmem_list = newmem;
- freemem = (unsigned char *) newmem->data;
- freemem_size -= offsetof (struct transmem_list, data);
+ freemem = (unsigned char *) newmem->data;
+ freemem_size -= offsetof (struct transmem_list, data);
# else
- transmem_list = newmem;
- freemem = newmem;
+ transmem_list = newmem;
+ freemem = newmem;
# endif
- outbuf = freemem + sizeof (size_t);
+ outbuf = freemem + sizeof (size_t);
+ }
+
+ /* We have now in our buffer a converted string. Put this
+ into the table of conversions. */
+ *(size_t *) freemem = outbuf - freemem - sizeof (size_t);
+ convd->conv_tab[act] = (char *) freemem;
+ /* Shrink freemem, but keep it aligned. */
+ freemem_size -= outbuf - freemem;
+ freemem = outbuf;
+ freemem += freemem_size & (alignof (size_t) - 1);
+ freemem_size = freemem_size & ~ (alignof (size_t) - 1);
+
+ __libc_lock_unlock (lock);
}
- /* We have now in our buffer a converted string. Put this
- into the table of conversions. */
- *(size_t *) freemem = outbuf - freemem - sizeof (size_t);
- domain->conv_tab[act] = (char *) freemem;
- /* Shrink freemem, but keep it aligned. */
- freemem_size -= outbuf - freemem;
- freemem = outbuf;
- freemem += freemem_size & (alignof (size_t) - 1);
- freemem_size = freemem_size & ~ (alignof (size_t) - 1);
-
- __libc_lock_unlock (lock);
+ /* Now convd->conv_tab[act] contains the translation of all
+ the plural variants. */
+ result = convd->conv_tab[act] + sizeof (size_t);
+ resultlen = *(size_t *) convd->conv_tab[act];
}
-
- /* Now domain->conv_tab[act] contains the translation of all
- the plural variants. */
- result = domain->conv_tab[act] + sizeof (size_t);
- resultlen = *(size_t *) domain->conv_tab[act];
}
converted:
@@ -1122,6 +1252,61 @@ guess_category_value (category, categoryname)
return language != NULL && strcmp (retval, "C") != 0 ? language : retval;
}
+#if defined _LIBC || HAVE_ICONV
+/* Returns the output charset. */
+static const char *
+internal_function
+get_output_charset (domainbinding)
+ struct binding *domainbinding;
+{
+ /* The output charset should normally be determined by the locale. But
+ sometimes the locale is not used or not correctly set up, so we provide
+ a possibility for the user to override this: the OUTPUT_CHARSET
+ environment variable. Moreover, the value specified through
+ bind_textdomain_codeset overrides both. */
+ if (domainbinding != NULL && domainbinding->codeset != NULL)
+ return domainbinding->codeset;
+ else
+ {
+ /* For speed reasons, we look at the value of OUTPUT_CHARSET only
+ once. This is a user variable that is not supposed to change
+ during a program run. */
+ static char *output_charset_cache;
+ static int output_charset_cached;
+
+ if (!output_charset_cached)
+ {
+ const char *value = getenv ("OUTPUT_CHARSET");
+
+ if (value != NULL && value[0] != '\0')
+ {
+ size_t len = strlen (value) + 1;
+ char *value_copy = (char *) malloc (len);
+
+ if (value_copy != NULL)
+ memcpy (value_copy, value, len);
+ output_charset_cache = value_copy;
+ }
+ output_charset_cached = 1;
+ }
+
+ if (output_charset_cache != NULL)
+ return output_charset_cache;
+ else
+ {
+# ifdef _LIBC
+ return _NL_CURRENT (LC_CTYPE, CODESET);
+# else
+# if HAVE_ICONV
+ extern const char *locale_charset PARAMS ((void);
+ return locale_charset ();
+# endif
+# endif
+ }
+ }
+}
+#endif
+
/* @@ begin of epilog @@ */
/* We don't want libintl.a to depend on any other library. So we
diff --git a/intl/gettextP.h b/intl/gettextP.h
index 46b51e1008..f18535a5b3 100644
--- a/intl/gettextP.h
+++ b/intl/gettextP.h
@@ -1,5 +1,5 @@
/* Header describing internals of libintl library.
- Copyright (C) 1995-1999, 2000, 2001, 2004 Free Software Foundation, Inc.
+ Copyright (C) 1995-1999, 2000, 2001, 2004-2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Written by Ulrich Drepper <drepper@cygnus.com>, 1995.
@@ -88,6 +88,26 @@ struct sysdep_string_desc
const char *pointer;
};
+/* Cache of translated strings after charset conversion.
+ Note: The strings are converted to the target encoding only on an as-needed
+ basis. */
+struct converted_domain
+{
+ /* The target encoding name. */
+ const char *encoding;
+ /* The descriptor for conversion from the message catalog's encoding to
+ this target encoding. */
+#ifdef _LIBC
+ __gconv_t conv;
+#else
+# if HAVE_ICONV
+ iconv_t conv;
+# endif
+#endif
+ /* The table of translated strings after charset conversion. */
+ char **conv_tab;
+};
+
/* The representation of an opened message catalog. */
struct loaded_domain
{
@@ -123,15 +143,9 @@ struct loaded_domain
/* 1 if the hash table uses a different endianness than this machine. */
int must_swap_hash_tab;
- int codeset_cntr;
-#ifdef _LIBC
- __gconv_t conv;
-#else
-# if HAVE_ICONV
- iconv_t conv;
-# endif
-#endif
- char **conv_tab;
+ /* Cache of charset conversions of the translated strings. */
+ struct converted_domain *conversions;
+ size_t nconversions;
struct expression *plural;
unsigned long int nplurals;
@@ -151,7 +165,6 @@ struct binding
{
struct binding *next;
char *dirname;
- int codeset_cntr; /* Incremented each time codeset changes. */
char *codeset;
char domainname[ZERO];
};
@@ -173,16 +186,10 @@ struct loaded_l10nfile *_nl_find_domain PARAMS ((const char *__dirname,
void _nl_load_domain PARAMS ((struct loaded_l10nfile *__domain,
struct binding *__domainbinding))
internal_function;
-const char *_nl_init_domain_conv PARAMS ((struct loaded_l10nfile *__domain_file,
- struct loaded_domain *__domain,
- struct binding *__domainbinding))
- internal_function;
-void _nl_free_domain_conv PARAMS ((struct loaded_domain *__domain))
- internal_function;
char *_nl_find_msg PARAMS ((struct loaded_l10nfile *domain_file,
- struct binding *domainbinding,
- const char *msgid, size_t *lengthp))
+ struct binding *domainbinding, const char *msgid,
+ int convert, size_t *lengthp))
internal_function;
#ifdef _LIBC
diff --git a/intl/loadmsgcat.c b/intl/loadmsgcat.c
index efefc69a43..1f55531097 100644
--- a/intl/loadmsgcat.c
+++ b/intl/loadmsgcat.c
@@ -1,5 +1,5 @@
/* Load needed message catalogs.
- Copyright (C) 1995-2004 Free Software Foundation, Inc.
+ Copyright (C) 1995-2005 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -752,146 +752,6 @@ get_sysdep_segment_value (name)
return NULL;
}
-/* Initialize the codeset dependent parts of an opened message catalog.
- Return the header entry. */
-const char *
-internal_function
-_nl_init_domain_conv (domain_file, domain, domainbinding)
- struct loaded_l10nfile *domain_file;
- struct loaded_domain *domain;
- struct binding *domainbinding;
-{
- /* Find out about the character set the file is encoded with.
- This can be found (in textual form) in the entry "". If this
- entry does not exist or if this does not contain the `charset='
- information, we will assume the charset matches the one the
- current locale and we don't have to perform any conversion. */
- char *nullentry;
- size_t nullentrylen;
-
- /* Preinitialize fields, to avoid recursion during _nl_find_msg. */
- domain->codeset_cntr =
- (domainbinding != NULL ? domainbinding->codeset_cntr : 0);
-#ifdef _LIBC
- domain->conv = (__gconv_t) -1;
-#else
-# if HAVE_ICONV
- domain->conv = (iconv_t) -1;
-# endif
-#endif
- domain->conv_tab = NULL;
-
- /* Get the header entry. */
- nullentry = _nl_find_msg (domain_file, domainbinding, "", &nullentrylen);
-
- if (nullentry != NULL)
- {
-#if defined _LIBC || HAVE_ICONV
- const char *charsetstr;
-
- charsetstr = strstr (nullentry, "charset=");
- if (charsetstr != NULL)
- {
- size_t len;
- char *charset;
- const char *outcharset;
-
- charsetstr += strlen ("charset=");
- len = strcspn (charsetstr, " \t\n");
-
- charset = (char *) alloca (len + 1);
-# if defined _LIBC || HAVE_MEMPCPY
- *((char *) mempcpy (charset, charsetstr, len)) = '\0';
-# else
- memcpy (charset, charsetstr, len);
- charset[len] = '\0';
-# endif
-
- /* The output charset should normally be determined by the
- locale. But sometimes the locale is not used or not correctly
- set up, so we provide a possibility for the user to override
- this. Moreover, the value specified through
- bind_textdomain_codeset overrides both. */
- if (domainbinding != NULL && domainbinding->codeset != NULL)
- outcharset = domainbinding->codeset;
- else
- {
- outcharset = getenv ("OUTPUT_CHARSET");
- if (outcharset == NULL || outcharset[0] == '\0')
- {
-# ifdef _LIBC
- outcharset = _NL_CURRENT (LC_CTYPE, CODESET);
-# else
-# if HAVE_ICONV
- extern const char *locale_charset PARAMS ((void));
- outcharset = locale_charset ();
-# endif
-# endif
- }
- }
-
-# ifdef _LIBC
- /* We always want to use transliteration. */
- outcharset = norm_add_slashes (outcharset, "TRANSLIT");
- charset = norm_add_slashes (charset, "");
- if (__gconv_open (outcharset, charset, &domain->conv,
- GCONV_AVOID_NOCONV)
- != __GCONV_OK)
- domain->conv = (__gconv_t) -1;
-# else
-# if HAVE_ICONV
- /* When using GNU libc >= 2.2 or GNU libiconv >= 1.5,
- we want to use transliteration. */
-# if (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2) || __GLIBC__ > 2 \
- || _LIBICONV_VERSION >= 0x0105
- if (strchr (outcharset, '/') == NULL)
- {
- char *tmp;
-
- len = strlen (outcharset);
- tmp = (char *) alloca (len + 10 + 1);
- memcpy (tmp, outcharset, len);
- memcpy (tmp + len, "//TRANSLIT", 10 + 1);
- outcharset = tmp;
-
- domain->conv = iconv_open (outcharset, charset);
-
- freea (outcharset);
- }
- else
-# endif
- domain->conv = iconv_open (outcharset, charset);
-# endif
-# endif
-
- freea (charset);
- }
-#endif /* _LIBC || HAVE_ICONV */
- }
-
- return nullentry;
-}
-
-/* Frees the codeset dependent parts of an opened message catalog. */
-void
-internal_function
-_nl_free_domain_conv (domain)
- struct loaded_domain *domain;
-{
- if (domain->conv_tab != NULL && domain->conv_tab != (char **) -1)
- free (domain->conv_tab);
-
-#ifdef _LIBC
- if (domain->conv != (__gconv_t) -1)
- __gconv_close (domain->conv);
-#else
-# if HAVE_ICONV
- if (domain->conv != (iconv_t) -1)
- iconv_close (domain->conv);
-# endif
-#endif
-}
-
/* Load the message catalogs specified by FILENAME. If it is no valid
message catalog do nothing. */
void
@@ -913,6 +773,7 @@ _nl_load_domain (domain_file, domainbinding)
struct loaded_domain *domain;
int revision;
const char *nullentry;
+ size_t nullentrylen;
__libc_lock_lock_recursive (lock);
if (domain_file->decided != 0)
@@ -920,8 +781,8 @@ _nl_load_domain (domain_file, domainbinding)
/* There are two possibilities:
+ is is the same thread calling again during this
- initialization via _nl_init_domain_conv and _nl_find_msg. We
- have initialized everything this call needs.
+ initialization via _nl_find_msg. We have initialized
+ everything this call needs.
+ this is another thread which tried to initialize this object.
Not necessary anymore since if the lock is available this
@@ -1388,12 +1249,12 @@ _nl_load_domain (domain_file, domainbinding)
goto out;
}
- /* Now initialize the character set converter from the character set
- the file is encoded with (found in the header entry) to the domain's
- specified character set or the locale's character set. */
- nullentry = _nl_init_domain_conv (domain_file, domain, domainbinding);
+ /* No caches of converted translations so far. */
+ domain->conversions = NULL;
+ domain->nconversions = 0;
- /* Also look for a plural specification. */
+ /* Get the header entry and look for a plural specification. */
+ nullentry = _nl_find_msg (domain_file, domainbinding, "", 0, &nullentrylen);
EXTRACT_PLURAL_EXPRESSION (nullentry, &domain->plural, &domain->nplurals);
out:
@@ -1412,10 +1273,23 @@ internal_function __libc_freeres_fn_section
_nl_unload_domain (domain)
struct loaded_domain *domain;
{
+ size_t i;
+
if (domain->plural != &__gettext_germanic_plural)
__gettext_free_exp (domain->plural);
- _nl_free_domain_conv (domain);
+ for (i = 0; i < domain->nconversions; i++)
+ {
+ struct converted_domain *convd = &domain->conversions[i];
+
+ free ((char *) convd->encoding);
+ if (convd->conv_tab != NULL && convd->conv_tab != (char **) -1)
+ free (convd->conv_tab);
+ if (convd->conv != (__gconv_t) -1)
+ __gconv_close (convd->conv);
+ }
+ if (domain->conversions != NULL)
+ free (domain->conversions);
if (domain->malloced)
free (domain->malloced);
diff --git a/intl/tst-gettext3.c b/intl/tst-gettext3.c
new file mode 100644
index 0000000000..917967b383
--- /dev/null
+++ b/intl/tst-gettext3.c
@@ -0,0 +1,60 @@
+/* Test that the gettext() results come out in the correct encoding for
+ locales that differ only in their encoding.
+ Copyright (C) 2001, 2005 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+ Contributed by Bruno Haible <bruno@clisp.org>, 2001, 2005.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, write to the Free
+ Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+ 02111-1307 USA. */
+
+#include <libintl.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+int
+main (void)
+{
+ char *s;
+ int result = 0;
+
+ unsetenv ("LANGUAGE");
+ unsetenv ("OUTPUT_CHARSET");
+ textdomain ("codeset");
+ bindtextdomain ("codeset", OBJPFX "domaindir");
+
+ setlocale (LC_ALL, "de_DE.ISO-8859-1");
+
+ /* Here we expect output in ISO-8859-1. */
+ s = gettext ("cheese");
+ if (strcmp (s, "K\344se"))
+ {
+ printf ("call 1 returned: %s\n", s);
+ result = 1;
+ }
+
+ setlocale (LC_ALL, "de_DE.UTF-8");
+
+ /* Here we expect output in UTF-8. */
+ s = gettext ("cheese");
+ if (strcmp (s, "K\303\244se"))
+ {
+ printf ("call 2 returned: %s\n", s);
+ result = 1;
+ }
+
+ return result;
+}
diff --git a/intl/tst-gettext3.sh b/intl/tst-gettext3.sh
new file mode 100644
index 0000000000..7bd977cf2d
--- /dev/null
+++ b/intl/tst-gettext3.sh
@@ -0,0 +1,45 @@
+#! /bin/sh
+# Test that the gettext() results come out in the correct encoding for
+# locales that differ only in their encoding.
+# Copyright (C) 2001, 2002, 2005 Free Software Foundation, Inc.
+# This file is part of the GNU C Library.
+#
+
+# The GNU C Library is free software; you can redistribute it and/or
+# modify it under the terms of the GNU Lesser General Public
+# License as published by the Free Software Foundation; either
+# version 2.1 of the License, or (at your option) any later version.
+
+# The GNU C Library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+# Lesser General Public License for more details.
+
+# You should have received a copy of the GNU Lesser General Public
+# License along with the GNU C Library; if not, write to the Free
+# Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
+# 02111-1307 USA.
+
+common_objpfx=$1
+objpfx=$2
+
+LC_ALL=C
+export LC_ALL
+
+# Generate the test data.
+test -d ${objpfx}domaindir || mkdir ${objpfx}domaindir
+# Create the domain directories.
+test -d ${objpfx}domaindir/de_DE || mkdir ${objpfx}domaindir/de_DE
+test -d ${objpfx}domaindir/de_DE/LC_MESSAGES || mkdir ${objpfx}domaindir/de_DE/LC_MESSAGES
+# Populate them.
+msgfmt -o ${objpfx}domaindir/de_DE/LC_MESSAGES/codeset.mo tstcodeset.po
+
+GCONV_PATH=${common_objpfx}iconvdata
+export GCONV_PATH
+LOCPATH=${common_objpfx}localedata
+export LOCPATH
+
+${common_objpfx}elf/ld.so --library-path $common_objpfx \
+${objpfx}tst-gettext3 > ${objpfx}tst-gettext3.out
+
+exit $?