summaryrefslogtreecommitdiff
path: root/localedata/unicode-gen
diff options
context:
space:
mode:
Diffstat (limited to 'localedata/unicode-gen')
-rw-r--r--localedata/unicode-gen/Makefile10
-rwxr-xr-xlocaledata/unicode-gen/gen_unicode_ctype.py18
-rw-r--r--localedata/unicode-gen/unicode_utils.py14
3 files changed, 37 insertions, 5 deletions
diff --git a/localedata/unicode-gen/Makefile b/localedata/unicode-gen/Makefile
index 5b7305d54e..4cb1fabc77 100644
--- a/localedata/unicode-gen/Makefile
+++ b/localedata/unicode-gen/Makefile
@@ -41,7 +41,7 @@ PYTHON3 = python3
WGET = wget
DOWNLOADS = UnicodeData.txt DerivedCoreProperties.txt EastAsianWidth.txt
-GENERATED = i18n UTF-8 translit_combining translit_compat translit_circle translit_cjk_compat translit_font translit_fraction
+GENERATED = i18n tr_TR UTF-8 translit_combining translit_compat translit_circle translit_cjk_compat translit_font translit_fraction
REPORTS = i18n-report UTF-8-report
all: $(GENERATED)
@@ -50,6 +50,7 @@ check: check-i18n check-UTF-8
install:
cp -p i18n ../locales/i18n
+ cp -p tr_TR ../locales/tr_TR
cp -p UTF-8 ../charmaps/UTF-8
cp -p translit_combining ../locales/translit_combining
cp -p translit_compat ../locales/translit_compat
@@ -82,6 +83,13 @@ check-i18n: i18n-report
i18n-report; \
then echo manual verification required; false; else true; fi
+tr_TR: UnicodeData.txt DerivedCoreProperties.txt
+tr_TR: ../locales/tr_TR # Preserve non-ctype information.
+tr_TR: gen_unicode_ctype.py
+ $(PYTHON3) gen_unicode_ctype.py -u UnicodeData.txt \
+ -d DerivedCoreProperties.txt -i ../locales/tr_TR -o $@ \
+ --unicode_version $(UNICODE_VERSION) --turkish
+
UTF-8: UnicodeData.txt EastAsianWidth.txt
UTF-8: utf8_gen.py
$(PYTHON3) utf8_gen.py UnicodeData.txt EastAsianWidth.txt
diff --git a/localedata/unicode-gen/gen_unicode_ctype.py b/localedata/unicode-gen/gen_unicode_ctype.py
index 0f064f5ba5..bcb50bf9a5 100755
--- a/localedata/unicode-gen/gen_unicode_ctype.py
+++ b/localedata/unicode-gen/gen_unicode_ctype.py
@@ -196,7 +196,7 @@ def output_tail(i18n_file, tail=''):
else:
i18n_file.write('END LC_CTYPE\n')
-def output_tables(i18n_file, unicode_version):
+def output_tables(i18n_file, unicode_version, turkish):
'''Write the new LC_CTYPE character classes to the output file'''
i18n_file.write('% The following is the 14652 i18n fdcc-set '
+ 'LC_CTYPE category.\n')
@@ -240,8 +240,14 @@ def output_tables(i18n_file, unicode_version):
+ '(sections 7.25.2.1.12 and 6.4.4.1).\n')
output_charclass(i18n_file, 'xdigit', unicode_utils.is_xdigit)
output_charclass(i18n_file, 'blank', unicode_utils.is_blank)
- output_charmap(i18n_file, 'toupper', unicode_utils.to_upper)
- output_charmap(i18n_file, 'tolower', unicode_utils.to_lower)
+ if turkish:
+ i18n_file.write('% The case conversions reflect '
+ + 'Turkish conventions.\n')
+ output_charmap(i18n_file, 'toupper', unicode_utils.to_upper_turkish)
+ output_charmap(i18n_file, 'tolower', unicode_utils.to_lower_turkish)
+ else:
+ output_charmap(i18n_file, 'toupper', unicode_utils.to_upper)
+ output_charmap(i18n_file, 'tolower', unicode_utils.to_lower)
output_charmap(i18n_file, 'map "totitle";', unicode_utils.to_title)
i18n_file.write('% The "combining" class reflects ISO/IEC 10646-1 '
+ 'annex B.1\n')
@@ -298,6 +304,10 @@ if __name__ == "__main__":
required=True,
type=str,
help='The Unicode version of the input files used.')
+ PARSER.add_argument(
+ '--turkish',
+ action='store_true',
+ help='Use Turkish case conversions.')
ARGS = PARSER.parse_args()
unicode_utils.fill_attributes(
@@ -310,5 +320,5 @@ if __name__ == "__main__":
(HEAD, TAIL) = read_input_file(ARGS.input_file)
with open(ARGS.output_file, mode='w') as I18N_FILE:
output_head(I18N_FILE, ARGS.unicode_version, head=HEAD)
- output_tables(I18N_FILE, ARGS.unicode_version)
+ output_tables(I18N_FILE, ARGS.unicode_version, ARGS.turkish)
output_tail(I18N_FILE, tail=TAIL)
diff --git a/localedata/unicode-gen/unicode_utils.py b/localedata/unicode-gen/unicode_utils.py
index ee91582823..26a57ef293 100644
--- a/localedata/unicode-gen/unicode_utils.py
+++ b/localedata/unicode-gen/unicode_utils.py
@@ -220,6 +220,20 @@ def to_lower(code_point):
else:
return code_point
+def to_upper_turkish(code_point):
+ '''Returns the code point of the Turkish uppercase version
+ of the given code point'''
+ if code_point == 0x0069:
+ return 0x0130
+ return to_upper(code_point)
+
+def to_lower_turkish(code_point):
+ '''Returns the code point of the Turkish lowercase version
+ of the given code point'''
+ if code_point == 0x0049:
+ return 0x0131
+ return to_lower(code_point)
+
def to_title(code_point):
'''Returns the code point of the titlecase version
of the given code point'''