Update.

1999-12-23 Ulrich Drepper <drepper@cygnus.com> * locale/en_BW: New file. * locale/en_ZW: New file. Contributed by Schalk W. Cronjé <schalkc@ntaba.co.za>. Contributed by Schalk W. Cronjé <schalkc@ntaba.co.za>.
author: Ulrich Drepper <drepper@redhat.com> 1999-12-24 05:42:34 +0000
committer: Ulrich Drepper <drepper@redhat.com> 1999-12-24 05:42:34 +0000
commit: 575b273bdc8b2fe0be70396486e9fbc400b2e641 (patch)
tree: 6df0b1bb75edc18a3786b5f7fe470ac7bce3ea73 /locale
parent: 46fd4f671c45b70909e03160aa4568db5f6c8556 (diff)
9 files changed, 477 insertions, 143 deletions
diff --git a/locale/C-collate.c b/locale/C-collate.c
index 6fd0e721be..94f6e0f60f 100644
--- a/locale/C-collate.c
+++ b/locale/C-collate.c
@@ -20,11 +20,12 @@
 #include <endian.h>
 #include "localeinfo.h"
 
+#if 0
 /* These tables' entries contain values which make the function behave
    according to POSIX.2 Table 2-8 ``LC_COLLATE Category Definition in
    the POSIX Locale''.  */
 
-const u_int32_t _nl_C_LC_COLLATE_symbol_hash[446] =
+const uint32_t _nl_C_LC_COLLATE_symbol_hash[446] =
 {
   0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu, 0xffffffffu,
   0xffffffffu, 0xffffffffu, 0x00000154u, 0x00000060u, 0xffffffffu, 0xffffffffu,
@@ -123,7 +124,7 @@ const char _nl_C_LC_COLLATE_symbol_strings[732] =
   "r\0" "s\0" "t\0" "u\0" "v\0" "w\0" "x\0" "y\0" "z\0" "left-curly-bracket\0"
   "vertical-line\0" "right-curly-bracket\0" "tilde\0" "DEL\0";
 
-const u_int32_t _nl_C_LC_COLLATE_symbol_classes[256] =
+const uint32_t _nl_C_LC_COLLATE_symbol_classes[256] =
 {
   1,   0, 1,   1, 1,   2, 1,   3, 1,   4, 1,   5, 1,   6, 1,   7,
   1,   8, 1,   9, 1,  10, 1,  11, 1,  12, 1,  13, 1,  14, 1,  15,
@@ -142,43 +143,19 @@ const u_int32_t _nl_C_LC_COLLATE_symbol_classes[256] =
   1, 112, 1, 113, 1, 114, 1, 115, 1, 116, 1, 117, 1, 118, 1, 119,
   1, 120, 1, 121, 1, 122, 1, 123, 1, 124, 1, 125, 1, 126, 1, 127
 };
+#endif
 
 const struct locale_data _nl_C_LC_COLLATE =
 {
   _nl_C_name,
   NULL, 0, 0, /* no file mapped */
   UNDELETABLE,
-  30,
+  5,
   {
     { word: 0 },
     { string: NULL },
-    { word: 0 },
-    { word: 0 },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
-    { word: 0 },
-    { word: 0 },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
-    { word: 0 },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
-    { string: NULL },
-    { word: 223 },
-    { string: (const char *) _nl_C_LC_COLLATE_symbol_hash },
-    { string: _nl_C_LC_COLLATE_symbol_strings },
     { string: NULL },
     { string: NULL },
-    { string: (const char *) _nl_C_LC_COLLATE_symbol_classes }
+    { string: NULL }
   }
 };
diff --git a/locale/categories.def b/locale/categories.def
index d94840c5c2..06d79ed202 100644
--- a/locale/categories.def
+++ b/locale/categories.def
@@ -43,6 +43,10 @@ DEFINE_CATEGORY
  LC_COLLATE, "LC_COLLATE",
  (
   DEFINE_ELEMENT (_NL_COLLATE_NRULES,         "collate-nrules",           std, word)
+  DEFINE_ELEMENT (_NL_COLLATE_RULESETS,       "collate-rulesets",         std, string)
+  DEFINE_ELEMENT (_NL_COLLATE_TABLEMB,        "collate-tablemb",          std, string)
+  DEFINE_ELEMENT (_NL_COLLATE_WEIGHTMB,       "collate-weightmb",         std, string)
+  DEFINE_ELEMENT (_NL_COLLATE_EXTRAMB,        "collate-extramb",          std, string)
   ), _nl_postload_collate)
 
 
diff --git a/locale/iso-4217.def b/locale/iso-4217.def
index bd777639e0..d35def61a2 100644
--- a/locale/iso-4217.def
+++ b/locale/iso-4217.def
@@ -12,14 +12,13 @@ DEFINE_INT_CURR("AFA ")		/* Afghanistan Afgani  */
 DEFINE_INT_CURR("ALL ")		/* Albanian Lek  */
 DEFINE_INT_CURR("AMD ")		/* Armenia Dram  */
 DEFINE_INT_CURR("ANG ")		/* Netherlands Antilles  */
-DEFINE_INT_CURR("AOK ")		/* Angolan Kwanza  */ /* obsolete */
 DEFINE_INT_CURR("AON ")		/* Angolan New Kwanza  */
 DEFINE_INT_CURR("ARS ")		/* Argentine Peso  */
 DEFINE_INT_CURR("ATS ")		/* Austrian Schilling  */
 DEFINE_INT_CURR("AUD ")		/* Australian Dollar  */
 DEFINE_INT_CURR("AWG ")		/* Aruba Guilder  */
 DEFINE_INT_CURR("AZM ")		/* Azerbaijan Manat  */
-DEFINE_INT_CURR("BAD ")		/* Bosnian Dinar  */
+DEFINE_INT_CURR("BAK ")		/* Bosnian and Herzegovina Convertible Mark  */
 DEFINE_INT_CURR("BBD ")		/* Barbados Dollar  */
 DEFINE_INT_CURR("BDT ")		/* Bangladesh Taka  */
 DEFINE_INT_CURR("BEF ")		/* Belgian Franc  */
@@ -32,7 +31,6 @@ DEFINE_INT_CURR("BOB ")		/* Bolivian Boliviano  */
 DEFINE_INT_CURR("BPS ")		/* Canton and Enderbury Islands  */ /* ?? */
 DEFINE_INT_CURR("BRE ")		/* Brazil Real  */ /* obsolete */
 DEFINE_INT_CURR("BRL ")		/* Brazil Cruzeiro  */
-DEFINE_INT_CURR("BRR ")		/* Brazil Real  */ /* obsolete */
 DEFINE_INT_CURR("BSD ")		/* Bahamas Dollar  */
 DEFINE_INT_CURR("BTN ")		/* Bhutan Ngultrum  */
 DEFINE_INT_CURR("BUK ")		/* Burma  */ /* obsolete */
@@ -129,6 +127,7 @@ DEFINE_INT_CURR("MWK ")		/* Malawi Kwacha  */
 DEFINE_INT_CURR("MXP ")		/* Mexican Peso  */
 DEFINE_INT_CURR("MYR ")		/* Malaysian Ringgit  */
 DEFINE_INT_CURR("MZM ")		/* Mozambique Metical  */
+DEFINE_INT_CURR("NAD ")		/* Namibia Dollar  */
 DEFINE_INT_CURR("NGN ")		/* Nigeria Naira  */
 DEFINE_INT_CURR("NIC ")		/* Nicaragua Cordoba  */
 DEFINE_INT_CURR("NLG ")		/* Netherlands Guilder  */
diff --git a/locale/langinfo.h b/locale/langinfo.h
index 44f4064708..ff48fab35f 100644
--- a/locale/langinfo.h
+++ b/locale/langinfo.h
@@ -231,26 +231,10 @@ enum
      This information is accessed by the strcoll and strxfrm functions.
      These `nl_langinfo' names are used only internally.  */
   _NL_COLLATE_NRULES = _NL_ITEM (LC_COLLATE, 0),
-#if 0
+  _NL_COLLATE_RULESETS,
   _NL_COLLATE_TABLEMB,
-  _NL_COLLATE_HASH_SIZE,
-  _NL_COLLATE_HASH_LAYERS,
-  _NL_COLLATE_TABLEWC,
-  _NL_COLLATE_UNDEFINED_WC,
+  _NL_COLLATE_WEIGHTMB,
   _NL_COLLATE_EXTRAMB,
-  _NL_COLLATE_EXTRAWC,
-  _NL_COLLATE_ELEM_HASH_SIZE,
-  _NL_COLLATE_ELEM_HASH,
-  _NL_COLLATE_ELEM_STR_POOL,
-  _NL_COLLATE_ELEM_VAL,
-  _NL_COLLATE_ELEM_VALMB,
-  _NL_COLLATE_ELEM_VALWC,
-  _NL_COLLATE_SYMB_HASH_SIZE,
-  _NL_COLLATE_SYMB_HASH,
-  _NL_COLLATE_SYMB_STR_POOL,
-  _NL_COLLATE_SYMB_CLASSMB,
-  _NL_COLLATE_SYMB_CLASSWC,
-#endif
   _NL_NUM_LC_COLLATE,
 
   /* LC_CTYPE category: character classification.
diff --git a/locale/lc-collate.c b/locale/lc-collate.c
index c448e1079d..02262b5ce2 100644
--- a/locale/lc-collate.c
+++ b/locale/lc-collate.c
@@ -21,47 +21,22 @@
 #include <endian.h>
 
 
-extern const u_int32_t _nl_C_LC_COLLATE_symbol_hash[];
-extern const char _nl_C_LC_COLLATE_symbol_strings[];
-extern const u_int32_t _nl_C_LC_COLLATE_symbol_classes[];
-
-
 _NL_CURRENT_DEFINE (LC_COLLATE);
 
-const u_int32_t *__collate_tablewc;
-const u_int32_t *__collate_extrawc;
-
-const u_int32_t *__collate_element_hash;
-const char *__collate_element_strings;
-const uint32_t *__collate_element_values;
-
-const u_int32_t *__collate_symbol_hash = _nl_C_LC_COLLATE_symbol_hash;
-const char *__collate_symbol_strings = _nl_C_LC_COLLATE_symbol_strings;
-const u_int32_t *__collate_symbol_classeswc = _nl_C_LC_COLLATE_symbol_classes;
-
+const int32_t *__collate_tablemb;
+const unsigned char *__collate_weightmb;
+const unsigned char *__collate_extramb;
 
 /* We are called after loading LC_CTYPE data to load it into
    the variables used by the collation functions and regex.  */
 void
 _nl_postload_collate (void)
 {
-#if 0
-  /* XXX For now */
 #define paste(a,b) paste1(a,b)
 #define paste1(a,b) a##b
+#define current(x) _NL_CURRENT (LC_COLLATE, paste(_NL_COLLATE_,x))
 
-#define current(x)							      \
-  ((const unsigned int *) _NL_CURRENT (LC_COLLATE, paste(_NL_COLLATE_,x)))
-
-  __collate_tablewc = current (TABLEWC);
-  __collate_extrawc = current (EXTRAWC);
-
-  __collate_element_hash = current (ELEM_HASH);
-  __collate_element_strings = (const char *) current (ELEM_STR_POOL);
-  __collate_element_values = (const uint32_t *) current (ELEM_VAL);
-
-  __collate_symbol_hash = current (SYMB_HASH);
-  __collate_symbol_strings = (const char *) current (SYMB_STR_POOL);
-  __collate_symbol_classeswc = current (SYMB_CLASSWC);
-#endif
+  __collate_tablemb = (const int32_t *) current (TABLEMB);
+  __collate_weightmb = (const unsigned char *) current (WEIGHTMB);
+  __collate_extramb = (const unsigned char *) current (EXTRAMB);
 }
diff --git a/locale/localeinfo.h b/locale/localeinfo.h
index c72ea9d84d..078e205f4f 100644
--- a/locale/localeinfo.h
+++ b/locale/localeinfo.h
@@ -36,9 +36,7 @@
 #define	LIMAGIC(category)	(0x980505 ^ (category))
 
 /* Two special weight constants for the collation data.  */
-#define FORWARD_CHAR ((uint32_t) 0xfffffffd)
-#define ELLIPSIS_CHAR ((uint32_t) 0xfffffffe)
-#define IGNORE_CHAR ((uint32_t) 0xffffffff)
+#define IGNORE_CHAR	2
 
 /* We use a special value for the usage counter in `locale_data' to
    signal that this data must never be removed anymore.  */
@@ -184,13 +182,8 @@ extern const wchar_t *_nl_get_walt_digit (unsigned int number);
 
 
 /* Global variables for LC_COLLATE category data.  */
-extern const uint32_t *__collate_tablewc;
-extern const uint32_t *__collate_extrawc;
-extern const uint32_t *__collate_element_hash;
-extern const char *__collate_element_strings;
-extern const uint32_t *__collate_element_values;
-extern const uint32_t *__collate_symbol_hash;
-extern const char *__collate_symbol_strings;
-extern const uint32_t *__collate_symbol_classes;
+extern const int32_t *__collate_tablemb;
+extern const unsigned char *__collate_extrweightmb;
+extern const unsigned char *__collate_extramb;
 
 #endif	/* localeinfo.h */
diff --git a/locale/programs/ld-collate.c b/locale/programs/ld-collate.c
index 52c4e3c6a1..a0f1139cde 100644
--- a/locale/programs/ld-collate.c
+++ b/locale/programs/ld-collate.c
@@ -54,6 +54,8 @@ struct section_list
   struct element_t *last;
   /* These are the rules for this section.  */
   enum coll_sort_rule *rules;
+  /* Index of the rule set in the appropriate section of the output file.  */
+  int ruleidx;
 };
 
 struct element_t;
@@ -72,7 +74,9 @@ struct element_t
   const char *name;
 
   const char *mbs;
+  size_t nmbs;
   const uint32_t *wcs;
+  size_t nwcs;
   int *mborder;
   int wcorder;
 
@@ -175,6 +179,55 @@ struct locale_collate_t
 static int nrules;
 
 
+/* These are definitions used by some of the functions for handling
+   UTF-8 encoding below.  */
+static const uint32_t encoding_mask[] =
+{
+  ~0x7ff, ~0xffff, ~0x1fffff, ~0x3ffffff
+};
+
+static const unsigned char encoding_byte[] =
+{
+  0xc0, 0xe0, 0xf0, 0xf8, 0xfc
+};
+
+
+/* We need UTF-8 encoding of numbers.  */
+static inline int
+utf8_encode (char *buf, int val)
+{
+  char *startp = buf;
+  int retval;
+
+  if (val < 0x80)
+    {
+      *buf++ = (char) val;
+      retval = 1;
+    }
+  else
+    {
+      int step;
+
+      for (step = 2; step < 6; ++step)
+	if ((val & encoding_mask[step - 2]) == 0)
+	  break;
+      retval = step;
+
+      *buf = encoding_byte[step - 2];
+      --step;
+      do
+	{
+	  buf[step] = 0x80 | (val & 0x3f);
+	  val >>= 6;
+	}
+      while (--step > 0);
+      *buf |= val;
+    }
+
+  return buf - startp;
+}
+
+
 static struct section_list *
 make_seclist_elem (struct locale_collate_t *collate, const char *string,
 		   struct section_list *next)
@@ -202,19 +255,29 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
   newp->name = name == NULL ? NULL : obstack_copy0 (&collate->mempool,
 						    name, namelen);
   if (mbs != NULL)
-    newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
+    {
+      newp->mbs = obstack_copy0 (&collate->mempool, mbs, mbslen);
+      newp->nmbs = mbslen;
+    }
   else
-    newp->mbs = NULL;
+    {
+      newp->mbs = NULL;
+      newp->nmbs = 0;
+    }
   if (wcs != NULL)
     {
-      size_t nwcs = wcslen ((wchar_t *) wcs) + 1;
+      size_t nwcs = wcslen ((wchar_t *) wcs);
       uint32_t zero = 0;
       obstack_grow (&collate->mempool, wcs, nwcs * sizeof (uint32_t));
       obstack_grow (&collate->mempool, &zero, sizeof (uint32_t));
       newp->wcs = (uint32_t *) obstack_finish (&collate->mempool);
+      newp->nwcs = nwcs;
     }
   else
-    newp->wcs = NULL;
+    {
+      newp->wcs = NULL;
+      newp->nwcs = 0;
+    }
   newp->mborder = NULL;
   newp->wcorder = 0;
   newp->used_in_level = 0;
@@ -225,7 +288,7 @@ new_element (struct locale_collate_t *collate, const char *mbs, size_t mbslen,
   newp->file = NULL;
   newp->line = 0;
 
-  newp->section = NULL;
+  newp->section = collate->current_section;
 
   newp->last = NULL;
   newp->next = NULL;
@@ -532,6 +595,7 @@ insert_weights (struct linereader *ldfile, struct element_t *elem,
   elem->line = ldfile->lineno;
   elem->last = collate->cursor;
   elem->next = collate->cursor ? collate->cursor->next : NULL;
+  elem->section = collate->current_section;
   if (collate->cursor != NULL)
     collate->cursor->next = elem;
   if (collate->start == NULL)
@@ -801,6 +865,25 @@ insert_value (struct linereader *ldfile, struct token *arg,
 	    /* This cannot happen.  */
 	    assert (! "Internal error");
 	}
+      else
+	{
+	  /* Maybe the character was used before the definition.  In this case
+	     we have to insert the byte sequences now.  */
+	  if (elem->mbs == NULL && seq != NULL)
+	    {
+	      elem->mbs = obstack_copy0 (&collate->mempool,
+					 seq->bytes, seq->nbytes);
+	      elem->nmbs = seq->nbytes;
+	    }
+
+	  if (elem->wcs == NULL && seq != ILLEGAL_CHAR_VALUE)
+	    {
+	      uint32_t wcs[2] = { wc, 0 };
+
+	      elem->wcs = obstack_copy (&collate->mempool, wcs, sizeof (wcs));
+	      elem->nwcs = 1;
+	    }
+	}
     }
 
   /* Test whether this element is not already in the list.  */
@@ -808,7 +891,7 @@ insert_value (struct linereader *ldfile, struct token *arg,
 			     && elem->next == collate->cursor))
     {
       lr_error (ldfile, _("order for `%.*s' already defined at %s:%zu"),
-		arg->val.str.lenmb, arg->val.str.startmb,
+		(int) arg->val.str.lenmb, arg->val.str.startmb,
 		elem->file, elem->line);
       lr_ignore_rest (ldfile, 0);
       return 1;
@@ -866,8 +949,8 @@ handle_ellipsis (struct linereader *ldfile, struct token *arg,
 	 sequences for the first and end character must be the same.
 	 This is mainly to prevent unwanted effects and this is often
 	 not what is wanted.  */
-      size_t len = (startp->mbs != NULL ? strlen (startp->mbs)
-		    : (endp->mbs != NULL ? strlen (endp->mbs) : 0));
+      size_t len = (startp->mbs != NULL ? startp->nmbs
+		    : (endp->mbs != NULL ? endp->nmbs : 0));
       char mbcnt[len + 1];
       char mbend[len + 1];
 
@@ -878,7 +961,7 @@ handle_ellipsis (struct linereader *ldfile, struct token *arg,
 
       if (startp != NULL && endp != NULL
 	  && startp->mbs != NULL && endp->mbs != NULL
-	  && strlen (startp->mbs) != strlen (endp->mbs))
+	  && startp->nmbs != endp->nmbs)
 	{
 	  lr_error (ldfile, _("\
 %s: byte sequences of first and last character must have the same length"),
@@ -974,7 +1057,8 @@ sequence is not lower than that of the last character"), "LC_COLLATE");
 		    {
 		      lr_error (ldfile, _("\
 order for `%.*s' already defined at %s:%zu"),
-				namelen, seq->name, elem->file, elem->line);
+				(int) namelen, seq->name,
+				elem->file, elem->line);
 		      goto increment;
 		    }
 
@@ -1066,7 +1150,7 @@ order for `%.*s' already defined at %s:%zu"),
 	    invalid_range:
 	      lr_error (ldfile, _("\
 `%s' and `%.*s' are no valid names for symbolic range"),
-			startp->name, lento, endp->name);
+			startp->name, (int) lento, endp->name);
 	      return;
 	    }
 
@@ -1112,7 +1196,7 @@ order for `%.*s' already defined at %s:%zu"),
 		    {
 		      lr_error (ldfile, _("\
 %s: order for `%.*s' already defined at %s:%zu"),
-				"LC_COLLATE", lenfrom, buf,
+				"LC_COLLATE", (int) lenfrom, buf,
 				elem->file, elem->line);
 		      continue;
 		    }
@@ -1160,8 +1244,11 @@ order for `%.*s' already defined at %s:%zu"),
 		    {
 		      /* Update the element.  */
 		      if (seq != NULL)
-			elem->mbs = obstack_copy0 (&collate->mempool,
-						   seq->bytes, seq->nbytes);
+			{
+			  elem->mbs = obstack_copy0 (&collate->mempool,
+						     seq->bytes, seq->nbytes);
+			  elem->nmbs = seq->nbytes;
+			}
 
 		      if (wc != ILLEGAL_CHAR_VALUE)
 			{
@@ -1172,11 +1259,13 @@ order for `%.*s' already defined at %s:%zu"),
 			  obstack_grow (&collate->mempool,
 					&zero, sizeof (uint32_t));
 			  elem->wcs = obstack_finish (&collate->mempool);
+			  elem->nwcs = 1;
 			}
 		    }
 
 		  elem->file = ldfile->fname;
 		  elem->line = ldfile->lineno;
+		  elem->section = collate->current_section;
 		}
 
 	      /* Enqueue the new element.  */
@@ -1281,6 +1370,8 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
   struct element_t *runp;
   int i;
   int need_undefined = 0;
+  struct section_list *sect;
+  int ruleidx;
 
   /* If this assertion is hit change the type in `element_t'.  */
   assert (nrules <= sizeof (runp->used_in_level) * 8);
@@ -1327,7 +1418,7 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
      Since at each time only the weights for each of the rules are
      only compared to other weights for this rule it is possible to
      assign more compact weight values than simply counting all
-     weights in sequence.  We can assign weights from 2 one for each
+     weights in sequence.  We can assign weights from 3, one for each
      rule individually and only for those elements, which are actually
      used for this rule.
 
@@ -1336,44 +1427,64 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
      be encoded to make it possible to emit the value as a byte
      string.  */
   for (i = 0; i < nrules; ++i)
-    mbact[i] = 2;
-  wcact = 2;
+    mbact[i] = 3;
+  wcact = 3;
   runp = collate->start;
   while (runp != NULL)
     {
-      if (runp->mbs != NULL)
+      /* Determine the order.  */
+      if (runp->used_in_level != 0)
 	{
-	  struct element_t **eptr;
+	  runp->mborder = (int *) obstack_alloc (&collate->mempool,
+						 nrules * sizeof (int));
 
-	  /* Determine the order.  */
-	  if (runp->used_in_level != 0)
-	    {
-	      runp->mborder = (int *) obstack_alloc (&collate->mempool,
-						     nrules * sizeof (int));
+	  for (i = 0; i < nrules; ++i)
+	    if ((runp->used_in_level & (1 << i)) != 0)
+	      runp->mborder[i] = mbact[i]++;
+	    else
+	      runp->mborder[i] = 0;
+	}
 
-	      for (i = 0; i < nrules; ++i)
-		if ((runp->used_in_level & (1 << i)) != 0)
-		  runp->mborder[i] = mbact[i]++;
-		else
-		  runp->mborder[i] = 0;
-	    }
+      if (runp->mbs != NULL)
+	{
+	  struct element_t **eptr;
 
 	  /* Find the point where to insert in the list.  */
 	  eptr = &collate->mbheads[((unsigned char *) runp->mbs)[0]];
 	  while (*eptr != NULL)
 	    {
-	      /* Check which string is larger, the one we want to insert
-		 or the current element of the list we are looking at.  */
-	      assert (runp->mbs[0] == (*eptr)->mbs[0]);
-	      if (strcmp (runp->mbs, (*eptr)->mbs) > 0)
+	      if ((*eptr)->nmbs < runp->nmbs)
 		break;
 
+	      if ((*eptr)->nmbs == runp->nmbs)
+		{
+		  int c = memcmp ((*eptr)->mbs, runp->mbs, runp->nmbs);
+
+		  if (c == 0)
+		    {
+		      /* This should not happen.  It means that we have
+			 to symbols with the same byte sequence.  It is
+			 of course an error.  */
+		      error_at_line (0, 0, (*eptr)->file, (*eptr)->line,
+				     _("symbol `%s' has same encoding as"),
+				     (*eptr)->name);
+		      error_at_line (0, 0, runp->file, runp->line,
+				     _("symbol `%s'"), runp->name);
+		      goto dont_insert;
+		    }
+		  else if (c < 0)
+		    /* Insert it here.  */
+		    break;
+		}
+
+	      /* To the next entry.  */
 	      eptr = &(*eptr)->mbnext;
 	    }
 
 	  /* Set the pointers.  */
 	  runp->mbnext = *eptr;
 	  *eptr = runp;
+	dont_insert:
 	}
 
       if (runp->wcs != NULL)
@@ -1394,6 +1505,7 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
 
   /* Now determine whether the UNDEFINED entry is needed and if yes,
      whether it was defined.  */
+  collate->undefined.used_in_level = need_undefined ? ~0ul : 0;
   if (need_undefined && collate->undefined.file == NULL)
     {
       error (0, 0, _("no definition of `UNDEFINED'"));
@@ -1407,6 +1519,81 @@ collate_finish (struct localedef_t *locale, struct charmap_t *charmap)
 
       collate->undefined.wcorder = wcact++;
     }
+
+  /* Finally, try to unify the rules for the sections.  Whenever the rules
+     for a section are the same as those for another section give the
+     ruleset the same index.  Since there are never many section we can
+     use an O(n^2) algorithm here.  */
+  sect = collate->sections;
+  assert (sect != NULL);
+  ruleidx = 0;
+  do
+    {
+      struct section_list *osect = collate->sections;
+
+      while (osect != sect)
+	if (memcmp (osect->rules, sect->rules, nrules) == 0)
+	  break;
+	else
+	  osect = osect->next;
+
+      if (osect == sect)
+	sect->ruleidx = ruleidx++;
+      else
+	sect->ruleidx = osect->ruleidx;
+
+      /* Next section.  */
+      sect = sect->next;
+    }
+  while (sect != NULL);
+  /* We are currently not prepared for more than 256 rulesets.  But this
+     should never really be a problem.  */
+  assert (ruleidx <= 256);
+}
+
+
+static inline int32_t
+output_weight (struct obstack *pool, struct locale_collate_t *collate,
+	       struct element_t *elem)
+{
+  size_t cnt;
+  int32_t retval;
+
+  /* Optimize the use of UNDEFINED.  */
+  if (elem == &collate->undefined)
+    /* The weights are already inserted.  */
+    return 0;
+
+  /* This byte can start exactly one collation element and this is
+     a single byte.  We can directly give the index to the weights.  */
+  retval = obstack_object_size (pool);
+
+  /* Construct the weight.  */
+  for (cnt = 0; cnt < nrules; ++cnt)
+    {
+      char buf[elem->weights[cnt].cnt * 7];
+      int len = 0;
+      int i;
+
+      /* Add the direction.  */
+      obstack_1grow (pool, elem->section->rules[cnt]);
+
+      for (i = 0; i < elem->weights[cnt].cnt; ++i)
+	/* Encode the weight value.  */
+	if (elem->weights[cnt].w[i] == NULL)
+	  {
+	    /* This entry was IGNORE.  */
+	    buf[len++] = '\3';
+	  }
+	else
+	  len += utf8_encode (&buf[len],
+			      elem->weights[cnt].w[i]->mborder[cnt]);
+
+      /* And add the buffer content.  */
+      obstack_grow (pool, buf, len);
+    }
+
+  return retval;
 }
 
 
@@ -1420,6 +1607,15 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
   struct locale_file data;
   uint32_t idx[nelems];
   size_t cnt;
+  size_t ch;
+  int32_t tablemb[256];
+  struct obstack weightpool;
+  struct obstack extrapool;
+  struct section_list *sect;
+  int i;
+
+  obstack_init (&weightpool);
+  obstack_init (&extrapool);
 
   data.magic = LIMAGIC (LC_COLLATE);
   data.n = nelems;
@@ -1438,6 +1634,208 @@ collate_output (struct localedef_t *locale, struct charmap_t *charmap,
   idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
   ++cnt;
 
+  /* Prepare the ruleset table.  */
+  for (sect = collate->sections, i = 0; sect != NULL; sect = sect->next)
+    if (sect->ruleidx == i)
+      {
+	obstack_grow (&weightpool, sect->rules, nrules);
+	++i;
+      }
+  /* And align the output.  */
+  i = (nrules * i) % __alignof__ (int32_t);
+  if (i > 0)
+    do
+      obstack_1grow (&weightpool, '\0');
+    while (++i < __alignof__ (int32_t));
+
+  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_RULESETS));
+  iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
+  iov[2 + cnt].iov_base = obstack_finish (&weightpool);
+  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+  ++cnt;
+
+  /* Generate the 8-bit table.  Walk through the lists of sequences
+     starting with the same byte and add them one after the other to
+     the table.  In case we have more than one sequence starting with
+     the same byte we have to use extra indirection.
+
+     First add a record for the NUL byte.  This entry will never be used
+     so it does not matter.  */
+  tablemb[0] = 0;
+
+  /* Now insert the `UNDEFINED' value if it is used.  Since this value
+     will probably be used more than once it is good to store the
+     weights only once.  */
+  if (collate->undefined.used_in_level != 0)
+    output_weight (&weightpool, collate, &collate->undefined);
+
+  for (ch = 1; ch < 256; ++ch)
+    if (collate->mbheads[ch]->mbnext == NULL
+	&& collate->mbheads[ch]->nmbs == 1)
+      {
+	tablemb[ch] = output_weight (&weightpool, collate,
+				      collate->mbheads[ch]);
+      }
+    else
+      {
+	/* The entries in the list are sorted by length and then
+           alphabetically.  This is the order in which we will add the
+           elements to the collation table.  This allows to simply
+           walk the table in sequence and stop at the first matching
+           entry.  Since the longer sequences are coming first in the
+           list they have the possibility to match first, just as it
+           has to be.  In the worst case we are walking to the end of
+           the list where we put, if no singlebyte sequence is defined
+           in the locale definition, the weights for UNDEFINED.
+
+	   To reduce the length of the search list we compress them a bit.
+	   This happens by collecting sequences of consecutive byte
+	   sequences in one entry (having and begin and end byte sequence)
+	   and add only one index into the weight table.  We can find the
+	   consecutive entries since they are also consecutive in the list.  */
+	struct element_t *runp = collate->mbheads[ch];
+	struct element_t *lastp;
+
+	tablemb[ch] = -obstack_object_size (&extrapool);
+
+	do
+	  {
+	    /* Store the current index in the weight table.  We know that
+	       the current position in the `extrapool' is aligned on a
+	       32-bit address.  */
+	    int32_t weightidx;
+	    int added;
+
+	    /* Output the weight info.  */
+	    weightidx = output_weight (&weightpool, collate, runp);
+
+	    /* Find out wether this is a single entry or we have more than
+	       one consecutive entry.  */
+	    if (runp->mbnext != NULL
+		&& runp->nmbs == runp->mbnext->nmbs
+		&& memcmp (runp->mbs, runp->mbnext->mbs, runp->nmbs - 1) == 0
+		&& (runp->mbs[runp->nmbs - 1] + 1
+		    == runp->mbnext->mbs[runp->nmbs - 1]))
+	      {
+		int i;
+
+		/* More than one consecutive entry.  We mark this by having
+		   a negative index into the weight table.  */
+		weightidx = -weightidx;
+
+		/* Now add first the initial byte sequence.  */
+		added = ((sizeof (int32_t) + 1 + 1 + 2 * (runp->nmbs - 1)
+			  + __alignof__ (int32_t) - 1)
+			 & ~(__alignof__ (int32_t) - 1));
+		obstack_make_room (&extrapool, added);
+
+		if (sizeof (int32_t) == sizeof (int))
+		  obstack_int_grow_fast (&extrapool, weightidx);
+		else
+		  obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
+		obstack_1grow_fast (&extrapool, runp->section->ruleidx);
+		obstack_1grow_fast (&extrapool, runp->nmbs - 1);
+		for (i = 1; i < runp->nmbs; ++i)
+		  obstack_1grow_fast (&extrapool, runp->mbs[i]);
+
+		/* Now find the end of the consecutive sequence.  */
+		do
+		  runp = runp->next;
+		while (runp->mbnext != NULL
+		       && runp->nmbs == runp->mbnext->nmbs
+		       && memcmp (runp->mbs, runp->mbnext->mbs,
+				  runp->nmbs - 1) == 0
+		       && (runp->mbs[runp->nmbs - 1] + 1
+			   == runp->mbnext->mbs[runp->nmbs - 1]));
+
+		/* And add the end by sequence.  Without length this time.  */
+		for (i = 1; i < runp->nmbs; ++i)
+		  obstack_1grow_fast (&extrapool, runp->mbs[i]);
+	      }
+	    else
+	      {
+		/* A single entry.  Simply add the index and the length and
+		   string (except for the first character which is already
+		   tested for).  */
+		int i;
+
+		added = ((sizeof (int32_t) + 1 + 1 + runp->nmbs - 1
+			  + __alignof__ (int32_t) - 1)
+			 & ~(__alignof__ (int32_t) - 1));
+		obstack_make_room (&extrapool, added);
+
+		if (sizeof (int32_t) == sizeof (int))
+		  obstack_int_grow_fast (&extrapool, weightidx);
+		else
+		  obstack_grow (&extrapool, &weightidx, sizeof (int32_t));
+		obstack_1grow_fast (&extrapool, runp->section->ruleidx);
+		obstack_1grow_fast (&extrapool, runp->nmbs - 1);
+		for (i = 1; i < runp->nmbs; ++i)
+		  obstack_1grow_fast (&extrapool, runp->mbs[i]);
+	      }
+
+	    /* Add alignment bytes if necessary.  */
+	    i = added % __alignof__ (int32_t);
+	    if (i > 0)
+	      do
+		obstack_1grow_fast (&extrapool, '\0');
+	      while (++i != __alignof__ (int32_t));
+
+	    /* Next entry.  */
+	    lastp = runp;
+	    runp = runp->mbnext;
+	  }
+	while (runp != NULL);
+
+	/* If the final entry in the list is not a single character we
+           add an UNDEFINED entry here.  */
+	if (lastp->nmbs != 1)
+	  {
+	    int added = ((sizeof (int32_t) + 1 + 1 + __alignof__ (int32_t))
+			 & ~(__alignof__ (int32_t) - 1));
+	    obstack_make_room (&extrapool, added);
+
+	    if (sizeof (int32_t) == sizeof (int))
+	      obstack_int_grow_fast (&extrapool, 0);
+	    else
+	      {
+		int32_t zero = 0;
+		obstack_grow (&extrapool, &zero, sizeof (int32_t));
+	      }
+	    /* XXX What rule? We just pick the first.  */
+	    obstack_1grow_fast (&extrapool, 0);
+	    /* Length is zero.  */
+	    obstack_1grow_fast (&extrapool, 0);
+
+	    /* Add alignment bytes if necessary.  */
+	    i = added % __alignof__ (int32_t);
+	    if (i > 0)
+	      do
+		obstack_1grow_fast (&extrapool, '\0');
+	      while (++i != __alignof__ (int32_t));
+	  }
+      }
+
+  /* Now add the three tables.  */
+  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_TABLEMB));
+  iov[2 + cnt].iov_base = tablemb;
+  iov[2 + cnt].iov_len = sizeof (tablemb);
+  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+  ++cnt;
+
+  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_WEIGHTMB));
+  iov[2 + cnt].iov_len = obstack_object_size (&weightpool);
+  iov[2 + cnt].iov_base = obstack_finish (&weightpool);
+  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+  ++cnt;
+
+  assert (cnt == _NL_ITEM_INDEX (_NL_COLLATE_EXTRAMB));
+  iov[2 + cnt].iov_len = obstack_object_size (&extrapool);
+  iov[2 + cnt].iov_base = obstack_finish (&extrapool);
+  idx[1 + cnt] = idx[cnt] + iov[2 + cnt].iov_len;
+  ++cnt;
+
+
   assert (cnt == _NL_ITEM_INDEX (_NL_NUM_LC_COLLATE));
 
   write_locale_data (output_path, "LC_COLLATE", 2 + cnt, iov);
@@ -1859,6 +2257,12 @@ error while adding equivalent collating symbol"));
 			    "LC_COLLATE", arg->val.str.startmb);
 		  /* We use the error section.  */
 		  collate->current_section = &collate->error_section;
+
+		  if (collate->error_section.first == NULL)
+		    {
+		      collate->error_section.next = collate->sections;
+		      collate->sections = &collate->error_section;
+		    }
 		}
 	      else
 		{
@@ -1871,6 +2275,11 @@ error while adding equivalent collating symbol"));
 		    lr_error (ldfile, _("\
 %s: multiple order definitions for section `%s'"),
 			      "LC_COLLATE", sp->name);
+		  else
+		    {
+		      sp->next = collate->sections;
+		      collate->sections = sp;
+		    }
 
 		  /* Next should come the end of the line or a semicolon.  */
 		  arg = lr_token (ldfile, charmap, repertoire);
@@ -1909,6 +2318,11 @@ error while adding equivalent collating symbol"));
 		lr_error (ldfile, _("\
 %s: multiple order definitions for unnamed section"),
 			  "LC_COLLATE");
+	      else
+		{
+		  collate->unnamed_section.next = collate->sections;
+		  collate->sections = &collate->unnamed_section;
+		}
 	    }
 
 	  /* Now read the direction names.  */
@@ -1987,7 +2401,7 @@ error while adding equivalent collating symbol"));
                      insert does not exist.  */
 		  lr_error (ldfile, _("\
 %s: cannot reorder after %.*s: symbol not known"),
-			    "LC_COLLATE", arg->val.str.lenmb,
+			    "LC_COLLATE", (int) arg->val.str.lenmb,
 			    arg->val.str.startmb);
 		  collate->cursor = NULL;
 		  no_error = 0;
@@ -2072,7 +2486,7 @@ error while adding equivalent collating symbol"));
                      process the whole rest of this reorder
                      specification.  */
 		  lr_error (ldfile, _("%s: section `%.*s' not known"),
-			    "LC_COLLATE", arg->val.str.lenmb,
+			    "LC_COLLATE", (int) arg->val.str.lenmb,
 			    arg->val.str.startmb);
 
 		  do
@@ -2194,7 +2608,7 @@ error while adding equivalent collating symbol"));
 	      if (runp == NULL)
 		{
 		  lr_error (ldfile, _("%s: section `%.*s' not known"),
-			    "LC_COLLATE", arg->val.str.lenmb,
+			    "LC_COLLATE", (int) arg->val.str.lenmb,
 			    arg->val.str.startmb);
 		  lr_ignore_rest (ldfile, 0);
 		}
diff --git a/locale/programs/ld-monetary.c b/locale/programs/ld-monetary.c
index afad481f0a..a56d7a7b8b 100644
--- a/locale/programs/ld-monetary.c
+++ b/locale/programs/ld-monetary.c
@@ -247,7 +247,7 @@ not correspond to a valid name in ISO 4217"),
 	error (0, 0, _("%s: field `%s' not defined"),
 	       "LC_MONETARY", "mon_grouping");
 
-      monetary->mon_grouping = "\177";
+      monetary->mon_grouping = (char *) "\177";
       monetary->mon_grouping_len = 1;
     }
 
diff --git a/locale/setlocale.c b/locale/setlocale.c
index 8f40520c6e..1fdc94ff5b 100644
--- a/locale/setlocale.c
+++ b/locale/setlocale.c
@@ -307,13 +307,6 @@ setlocale (int category, const char *locale)
       while (category-- > 0)
 	if (category != LC_ALL)
 	  {
-	    /* XXX hack.  Remove when collation works.  */
-	    if (category == LC_COLLATE)
-	      {
-		newdata[category] = NULL;
-		continue;
-	      }
-
 	    newdata[category] = _nl_find_locale (locale_path, locale_path_len,
 						 category,
 						 &newnames[category]);
@@ -350,11 +343,6 @@ setlocale (int category, const char *locale)
 
       return composite;
     }
-  else if (category == LC_COLLATE)
-    {
-      /* XXX Remove when LC_COLLATE works.  */
-      return NULL;
-    }
   else
     {
       struct locale_data *newdata = NULL;
author	Ulrich Drepper <drepper@redhat.com>	1999-12-24 05:42:34 +0000
committer	Ulrich Drepper <drepper@redhat.com>	1999-12-24 05:42:34 +0000
commit	575b273bdc8b2fe0be70396486e9fbc400b2e641 (patch)
tree	6df0b1bb75edc18a3786b5f7fe470ac7bce3ea73 /locale
parent	46fd4f671c45b70909e03160aa4568db5f6c8556 (diff)