Fix test cases tst-fnmatch and tst-regexloc for the new iso14651_t1_common file.

See: http://pubs.opengroup.org/onlinepubs/7908799/xbd/re.html > A range expression represents the set of collating elements that fall > between two elements in the current collation sequence, > inclusively. It is expressed as the starting point and the ending > point separated by a hyphen (-). > > Range expressions must not be used in portable applications because > their behaviour is dependent on the collating sequence. Ranges will be > treated according to the current collating sequence, and include such > characters that fall within the range based on that collating > sequence, regardless of character values. This, however, means that > the interpretation will differ depending on collating sequence. If, > for instance, one collating sequence defines Ã¤ as a variant of a, > while another defines it as a letter following z, then the expression > [Ã¤-z] is valid in the first language and invalid in the second. Therefore, using [a-z] does not make much sense except in the C/POSIX locale. The new iso14651_t1_common lists upper case and lower case Latin characters in a different order than the old one which causes surprising results for example in the de_DE locale: [a-z] now includes A because A comes after a in iso14651_t1_common but does not include Z because that comes after z in iso14651_t1_common. * posix/tst-fnmatch.input: Fix results for range expressions for non C locales. * posix/tst-regexloc.c: Do not use a range expression for de_DE.ISO-8859-1 locale.
author: Mike FABIAN <mfabian@redhat.com> 2018-01-23 17:29:36 +0100
committer: Mike FABIAN <mfabian@redhat.com> 2018-02-27 17:00:21 +0100
commit: ac3a3b4b0d561d776b60317d6a926050c8541655 (patch)
tree: 7e70a988722d787f4056db70e74df57be458eb55 /posix
parent: 770cbe147cf33580e05ba6de78993c3070c5c2f8 (diff)
2 files changed, 44 insertions, 18 deletions
diff --git a/posix/tst-fnmatch.input b/posix/tst-fnmatch.input
index 88b3f739a5..589fb2a940 100644
--- a/posix/tst-fnmatch.input
+++ b/posix/tst-fnmatch.input
@@ -418,21 +418,47 @@ C		"-"			"[Z-\\]]"	       NOMATCH
 # Following are tests outside the scope of IEEE 2003.2 since they are using
 # locales other than the C locale.  The main focus of the tests is on the
 # handling of ranges and the recognition of character (vs bytes).
+#
+# See:
+#
+# http://pubs.opengroup.org/onlinepubs/7908799/xbd/re.html
+#
+# > A range expression represents the set of collating elements that fall
+# > between two elements in the current collation sequence,
+# > inclusively. It is expressed as the starting point and the ending
+# > point separated by a hyphen (-).
+# >
+# > Range expressions must not be used in portable applications because
+# > their behaviour is dependent on the collating sequence. Ranges will be
+# > treated according to the current collating sequence, and include such
+# > characters that fall within the range based on that collating
+# > sequence, regardless of character values. This, however, means that
+# > the interpretation will differ depending on collating sequence. If,
+# > for instance, one collating sequence defines Ã¤ as a variant of a,
+# > while another defines it as a letter following z, then the expression
+# > [Ã¤-z] is valid in the first language and invalid in the second.
+#
+# Therefore, using [a-z] does not make much sense except in the C/POSIX locale.
+# The new iso14651_t1_common lists upper case and lower case Latin characters
+# in a different order than the old one which causes surprising results
+# for example in the de_DE locale: [a-z] now includes A because A comes
+# after a in iso14651_t1_common but does not include Z because that comes
+# after z in iso14651_t1_common.
 de_DE.ISO-8859-1 "a"			"[a-z]"		       0
 de_DE.ISO-8859-1 "z"			"[a-z]"		       0
 de_DE.ISO-8859-1 "ä"			"[a-z]"		       0
 de_DE.ISO-8859-1 "ö"			"[a-z]"		       0
 de_DE.ISO-8859-1 "ü"			"[a-z]"		       0
-de_DE.ISO-8859-1 "A"			"[a-z]"		       NOMATCH
+de_DE.ISO-8859-1 "A"			"[a-z]"		       0 # surprising but correct!
 de_DE.ISO-8859-1 "Z"			"[a-z]"		       NOMATCH
-de_DE.ISO-8859-1 "Ä"			"[a-z]"		       NOMATCH
-de_DE.ISO-8859-1 "Ö"			"[a-z]"		       NOMATCH
-de_DE.ISO-8859-1 "Ü"			"[a-z]"		       NOMATCH
+de_DE.ISO-8859-1 "Ä"			"[a-z]"		       0 # surprising but correct!
+de_DE.ISO-8859-1 "Ö"			"[a-z]"		       0 # surprising but correct!
+de_DE.ISO-8859-1 "Ü"			"[a-z]"		       0 # surprising but correct!
 de_DE.ISO-8859-1 "a"			"[A-Z]"		       NOMATCH
-de_DE.ISO-8859-1 "z"			"[A-Z]"		       NOMATCH
-de_DE.ISO-8859-1 "ä"			"[A-Z]"		       NOMATCH
-de_DE.ISO-8859-1 "ö"			"[A-Z]"		       NOMATCH
-de_DE.ISO-8859-1 "ü"			"[A-Z]"		       NOMATCH
+de_DE.ISO-8859-1 "z"			"[A-Z]"		       0 # surprising but correct!
+de_DE.ISO-8859-1 "ä"			"[A-Z]"		       0 # surprising but correct!
+de_DE.ISO-8859-1 "ö"			"[A-Z]"		       0 # surprising but correct!
+de_DE.ISO-8859-1 "ü"			"[A-Z]"		       0 # surprising but correct!
 de_DE.ISO-8859-1 "A"			"[A-Z]"		       0
 de_DE.ISO-8859-1 "Z"			"[A-Z]"		       0
 de_DE.ISO-8859-1 "Ä"			"[A-Z]"		       0
@@ -515,16 +541,16 @@ de_DE.UTF-8	 "z"			"[a-z]"		       0
 de_DE.UTF-8	 "Ã¤"			"[a-z]"		       0
 de_DE.UTF-8	 "Ã¶"			"[a-z]"		       0
 de_DE.UTF-8	 "Ã¼"			"[a-z]"		       0
-de_DE.UTF-8	 "A"			"[a-z]"		       NOMATCH
+de_DE.UTF-8	 "A"			"[a-z]"		       0 # surprising but correct!
 de_DE.UTF-8	 "Z"			"[a-z]"		       NOMATCH
-de_DE.UTF-8	 "Ã„"			"[a-z]"		       NOMATCH
-de_DE.UTF-8	 "Ã–"			"[a-z]"		       NOMATCH
-de_DE.UTF-8	 "Ãœ"			"[a-z]"		       NOMATCH
+de_DE.UTF-8	 "Ã„"			"[a-z]"	       0 # surprising but correct!
+de_DE.UTF-8	 "Ã–"			"[a-z]"	       0 # surprising but correct!
+de_DE.UTF-8	 "Ãœ"			"[a-z]"	       0 # surprising but correct!
 de_DE.UTF-8	 "a"			"[A-Z]"		       NOMATCH
-de_DE.UTF-8	 "z"			"[A-Z]"		       NOMATCH
-de_DE.UTF-8	 "Ã¤"			"[A-Z]"		       NOMATCH
-de_DE.UTF-8	 "Ã¶"			"[A-Z]"		       NOMATCH
-de_DE.UTF-8	 "Ã¼"			"[A-Z]"		       NOMATCH
+de_DE.UTF-8	 "z"			"[A-Z]"		       0 # surprising but correct!
+de_DE.UTF-8	 "Ã¤"			"[A-Z]"	       0 # surprising but correct!
+de_DE.UTF-8	 "Ã¶"			"[A-Z]"	       0 # surprising but correct!
+de_DE.UTF-8	 "Ã¼"			"[A-Z]"	       0 # surprising but correct!
 de_DE.UTF-8	 "A"			"[A-Z]"		       0
 de_DE.UTF-8	 "Z"			"[A-Z]"		       0
 de_DE.UTF-8	 "Ã„"			"[A-Z]"		       0
diff --git a/posix/tst-regexloc.c b/posix/tst-regexloc.c
index 60235b4d3b..7fbc496d0c 100644
--- a/posix/tst-regexloc.c
+++ b/posix/tst-regexloc.c
@@ -29,8 +29,8 @@ do_test (void)
 
   if (setlocale (LC_ALL, "de_DE.ISO-8859-1") == NULL)
     puts ("cannot set locale");
-  else if (regcomp (&re, "[a-f]*", 0) != REG_NOERROR)
-    puts ("cannot compile expression \"[a-f]*\"");
+  else if (regcomp (&re, "[abcdef]*", 0) != REG_NOERROR)
+    puts ("cannot compile expression \"[abcdef]*\"");
   else if (regexec (&re, "abcdefCDEF", 1, mat, 0) == REG_NOMATCH)
     puts ("no match");
   else
author	Mike FABIAN <mfabian@redhat.com>	2018-01-23 17:29:36 +0100
committer	Mike FABIAN <mfabian@redhat.com>	2018-02-27 17:00:21 +0100
commit	ac3a3b4b0d561d776b60317d6a926050c8541655 (patch)
tree	7e70a988722d787f4056db70e74df57be458eb55 /posix
parent	770cbe147cf33580e05ba6de78993c3070c5c2f8 (diff)