From f0c7c524bb92cdc42cc4e0f7ba1ddda865a4494c Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Sun, 16 Nov 2003 07:14:28 +0000 Subject: Update. * posix/regex_internal.h: Add forward declaration of re_dfa_t. Replace last two parameters of re_string_allocate and re_string_construct with pointer to DFA. (re_dfa_t): Add map_notascii field. * posix/regcomp.c (re_compile_internal): Add call of re_string_construct. (init_dfa): Initialize mpa_notascii. * posix/regex_internal.c: Adjust definitions of re_string_allocate and re_string_construct. Pass DFA to re_string_construct. Adjust definition. Initialize map_notascii field. (build_wcs_upper_buffer): If map_notascii is zero use simplfied method to map ASCII values to upper case. * posix/regex.c: Include localeinfo.h. * posix/regexec.c: Adjust call of re_string_allocate. * locale/langinfo.h: Add _NL_CTYPE_MAP_TO_NONASCII. * locale/localeinfo.h (LIMAGIC): Change value. * locale/categories.def. Add entry for _NL_CTYPE_MAP_TO_NONASCII. * locale/C-ctype.h: Likewise. * locale/programs/ld-ctype.c: Compute whether any mapping maps from ASCII to non-ASCII value. Write out that value. --- ChangeLog | 23 +++++++ locale/C-ctype.c | 6 +- locale/categories.def | 3 +- locale/langinfo.h | 3 +- locale/localeinfo.h | 4 +- locale/programs/ld-ctype.c | 16 ++++- localedata/ChangeLog | 4 ++ localedata/Makefile | 3 +- posix/regcomp.c | 5 +- posix/regex.c | 26 +++---- posix/regex_internal.c | 166 +++++++++++++++++++++++++++++++-------------- posix/regex_internal.h | 18 ++--- posix/regexec.c | 9 ++- 13 files changed, 200 insertions(+), 86 deletions(-) diff --git a/ChangeLog b/ChangeLog index 8df65fb210..5d49a9ee0f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,5 +1,28 @@ 2003-11-15 Ulrich Drepper + * posix/regex_internal.h: Add forward declaration of re_dfa_t. + Replace last two parameters of re_string_allocate and + re_string_construct with pointer to DFA. + (re_dfa_t): Add map_notascii field. + * posix/regcomp.c (re_compile_internal): Add call of + re_string_construct. + (init_dfa): Initialize mpa_notascii. + * posix/regex_internal.c: Adjust definitions of re_string_allocate + and re_string_construct. + Pass DFA to re_string_construct. Adjust definition. Initialize + map_notascii field. + (build_wcs_upper_buffer): If map_notascii is zero use simplfied + method to map ASCII values to upper case. + * posix/regex.c: Include localeinfo.h. + * posix/regexec.c: Adjust call of re_string_allocate. + + * locale/langinfo.h: Add _NL_CTYPE_MAP_TO_NONASCII. + * locale/localeinfo.h (LIMAGIC): Change value. + * locale/categories.def. Add entry for _NL_CTYPE_MAP_TO_NONASCII. + * locale/C-ctype.h: Likewise. + * locale/programs/ld-ctype.c: Compute whether any mapping maps from + ASCII to non-ASCII value. Write out that value. + * wcsmbs/mbsinit.c: Undef mbsinit and __mbsinit. * include/wchar.h: Provide inline versions of mbsinit and __mbsinit. diff --git a/locale/C-ctype.c b/locale/C-ctype.c index ff56258e58..85f3d2addb 100644 --- a/locale/C-ctype.c +++ b/locale/C-ctype.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995-1999, 2000, 2001, 2002 Free Software Foundation, Inc. +/* Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1995. @@ -528,7 +528,7 @@ _nl_C_LC_CTYPE_width attribute_hidden = }; /* Number of fields with fixed meanings, starting at 0. */ -#define NR_FIXED 70 +#define NR_FIXED 71 /* Number of class fields, starting at CLASS_OFFSET. */ #define NR_CLASSES 12 /* Number of map fields, starting at MAP_OFFSET. */ @@ -665,6 +665,8 @@ const struct locale_data _nl_C_LC_CTYPE attribute_hidden = { .word = 0 }, /* _NL_CTYPE_TRANSLIT_IGNORE */ { .wstr = NULL }, + /* _NL_CTYPE_MAP_TO_NONASCII */ + { .word = 0 }, /* NR_CLASSES wctype_tables */ { .string = (const char *) _nl_C_LC_CTYPE_class_upper.header }, { .string = (const char *) _nl_C_LC_CTYPE_class_lower.header }, diff --git a/locale/categories.def b/locale/categories.def index dc01a977ce..c4831f61a8 100644 --- a/locale/categories.def +++ b/locale/categories.def @@ -1,5 +1,5 @@ /* Definition of all available locale categories and their items. -*- C -*- - Copyright (C) 1995-2001, 2002 Free Software Foundation, Inc. + Copyright (C) 1995-2001, 2002, 2003 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -133,6 +133,7 @@ DEFINE_CATEGORY DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_DEFAULT_MISSING, "ctype-translit-default-missing", std, wstring) DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE_LEN, "ctype-translit-ignore-len", std, word) DEFINE_ELEMENT (_NL_CTYPE_TRANSLIT_IGNORE, "ctype-translit-ignore", std, string) + DEFINE_ELEMENT (_NL_CTYPE_MAP_TO_NONASCII, "map-to-nonascii", std, word) ), _nl_postload_ctype) diff --git a/locale/langinfo.h b/locale/langinfo.h index ef17b15ba2..6d7c25c6c2 100644 --- a/locale/langinfo.h +++ b/locale/langinfo.h @@ -1,5 +1,5 @@ /* Access to locale-dependent parameters. - Copyright (C) 1995-99,2000,01,02 Free Software Foundation, Inc. + Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -334,6 +334,7 @@ enum _NL_CTYPE_TRANSLIT_DEFAULT_MISSING, _NL_CTYPE_TRANSLIT_IGNORE_LEN, _NL_CTYPE_TRANSLIT_IGNORE, + _NL_CTYPE_MAP_TO_NONASCII, _NL_CTYPE_EXTRA_MAP_1, _NL_CTYPE_EXTRA_MAP_2, _NL_CTYPE_EXTRA_MAP_3, diff --git a/locale/localeinfo.h b/locale/localeinfo.h index 466c16015c..e7b8f84da8 100644 --- a/locale/localeinfo.h +++ b/locale/localeinfo.h @@ -1,5 +1,5 @@ /* Declarations for internal libc locale interfaces - Copyright (C) 1995-2001, 2002 Free Software Foundation, Inc. + Copyright (C) 1995-2001, 2002, 2003 Free Software Foundation, Inc. This file is part of the GNU C Library. The GNU C Library is free software; you can redistribute it and/or @@ -31,7 +31,7 @@ #include /* For loaded_l10nfile definition. */ /* Magic number at the beginning of a locale data file for CATEGORY. */ -#define LIMAGIC(category) ((unsigned int) (0x20000828 ^ (category))) +#define LIMAGIC(category) ((unsigned int) (0x20031115 ^ (category))) /* Two special weight constants for the collation data. */ #define IGNORE_CHAR 2 diff --git a/locale/programs/ld-ctype.c b/locale/programs/ld-ctype.c index 2a2c831481..499868237b 100644 --- a/locale/programs/ld-ctype.c +++ b/locale/programs/ld-ctype.c @@ -1,4 +1,4 @@ -/* Copyright (C) 1995-1999, 2000, 2001, 2002 Free Software Foundation, Inc. +/* Copyright (C) 1995-2002, 2003 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Ulrich Drepper , 1995. @@ -181,6 +181,8 @@ struct locale_ctype_t const char *default_missing_file; size_t default_missing_lineno; + uint32_t to_nonascii; + /* The arrays for the binary representation. */ char_class_t *ctype_b; char_class32_t *ctype32_b; @@ -1035,6 +1037,10 @@ ctype_output (struct localedef_t *locale, const struct charmap_t *charmap, idx[elem + 1] = idx[elem] + iov[2 + elem + offset].iov_len; break; + + CTYPE_DATA (_NL_CTYPE_MAP_TO_NONASCII, + &ctype->to_nonascii, sizeof (uint32_t)); + case _NL_ITEM_INDEX (_NL_CTYPE_INDIGITS_MB_LEN): iov[2 + elem + offset].iov_base = alloca (sizeof (uint32_t)); iov[2 + elem + offset].iov_len = sizeof (uint32_t); @@ -2706,6 +2712,14 @@ with character code range values one must use the absolute ellipsis `...'")); if (!ignore_content) { + /* Check whether the mapping converts from an ASCII value + to a non-ASCII value. */ + if (from_seq != NULL && from_seq->nbytes == 1 + && isascii (from_seq->bytes[0]) + && to_seq != NULL && (to_seq->nbytes != 1 + || !isascii (to_seq->bytes[0]))) + ctype->to_nonascii = 1; + if (mapidx < 2 && from_seq != NULL && to_seq != NULL && from_seq->nbytes == 1 && to_seq->nbytes == 1) /* We can use this value. */ diff --git a/localedata/ChangeLog b/localedata/ChangeLog index 46a63cf422..77ace9d0ce 100644 --- a/localedata/ChangeLog +++ b/localedata/ChangeLog @@ -1,3 +1,7 @@ +2003-11-15 Ulrich Drepper + + * Makefile (tst-leaks-ENV): Add LOCPATH. + 2003-11-11 Jakub Jelinek * Makefile (LOCALES): Add tr_TR.UTF-8. diff --git a/localedata/Makefile b/localedata/Makefile index 1437af403d..ebba83a42a 100644 --- a/localedata/Makefile +++ b/localedata/Makefile @@ -287,6 +287,7 @@ tst-setlocale-ENV = LOCPATH=$(common-objpfx)localedata LC_ALL=ja_JP.EUC-JP bug-iconv-trans-ENV = LOCPATH=$(common-objpfx)localedata -tst-leaks-ENV = MALLOC_TRACE=$(objpfx)tst-leaks.mtrace +tst-leaks-ENV = MALLOC_TRACE=$(objpfx)tst-leaks.mtrace \ + LOCPATH=$(common-objpfx)localedata $(objpfx)mtrace-tst-leaks: $(objpfx)tst-leaks.out $(common-objpfx)malloc/mtrace $(objpfx)tst-leaks.mtrace > $@ diff --git a/posix/regcomp.c b/posix/regcomp.c index 9f56b389df..68ce551c3a 100644 --- a/posix/regcomp.c +++ b/posix/regcomp.c @@ -748,8 +748,7 @@ re_compile_internal (preg, pattern, length, syntax) #endif err = re_string_construct (®exp, pattern, length, preg->translate, - syntax & RE_ICASE, dfa->mb_cur_max, - dfa->is_utf8); + syntax & RE_ICASE, dfa); if (BE (err != REG_NOERROR, 0)) { re_free (dfa); @@ -828,6 +827,8 @@ init_dfa (dfa, pat_len) if (dfa->mb_cur_max > 1 && strcmp (_NL_CURRENT (LC_CTYPE, _NL_CTYPE_CODESET_NAME), "UTF-8") == 0) dfa->is_utf8 = 1; + dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII) + != 0); #endif if (BE (dfa->nodes == NULL || dfa->state_table == NULL diff --git a/posix/regex.c b/posix/regex.c index 98d86e1b80..f18178a479 100644 --- a/posix/regex.c +++ b/posix/regex.c @@ -20,25 +20,27 @@ #ifdef _LIBC /* We have to keep the namespace clean. */ -# define regfree(preg) __regfree (preg) -# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) -# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) -# define regerror(errcode, preg, errbuf, errbuf_size) \ +# define regfree(preg) __regfree (preg) +# define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef) +# define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags) +# define regerror(errcode, preg, errbuf, errbuf_size) \ __regerror(errcode, preg, errbuf, errbuf_size) -# define re_set_registers(bu, re, nu, st, en) \ +# define re_set_registers(bu, re, nu, st, en) \ __re_set_registers (bu, re, nu, st, en) -# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ +# define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \ __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop) -# define re_match(bufp, string, size, pos, regs) \ +# define re_match(bufp, string, size, pos, regs) \ __re_match (bufp, string, size, pos, regs) -# define re_search(bufp, string, size, startpos, range, regs) \ +# define re_search(bufp, string, size, startpos, range, regs) \ __re_search (bufp, string, size, startpos, range, regs) -# define re_compile_pattern(pattern, length, bufp) \ +# define re_compile_pattern(pattern, length, bufp) \ __re_compile_pattern (pattern, length, bufp) -# define re_set_syntax(syntax) __re_set_syntax (syntax) -# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ +# define re_set_syntax(syntax) __re_set_syntax (syntax) +# define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \ __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop) -# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) +# define re_compile_fastmap(bufp) __re_compile_fastmap (bufp) + +# include "../locale/localeinfo.h" #endif /* POSIX says that must be included (by the caller) before diff --git a/posix/regex_internal.c b/posix/regex_internal.c index 329fc81de9..6f07bd4dd1 100644 --- a/posix/regex_internal.c +++ b/posix/regex_internal.c @@ -21,7 +21,7 @@ static void re_string_construct_common (const char *str, int len, re_string_t *pstr, RE_TRANSLATE_TYPE trans, int icase, - int mb_cur_max, int is_utf8); + const re_dfa_t *dfa); #ifdef RE_ENABLE_I18N static int re_string_skip_chars (re_string_t *pstr, int new_raw_idx, wint_t *last_wc); @@ -47,17 +47,16 @@ static unsigned int inline calc_state_hash (const re_node_set *nodes, re_string_reconstruct before using the object. */ static reg_errcode_t -re_string_allocate (pstr, str, len, init_len, trans, icase, - mb_cur_max, is_utf8) +re_string_allocate (pstr, str, len, init_len, trans, icase, dfa) re_string_t *pstr; const char *str; - int len, init_len, icase, mb_cur_max, is_utf8; + int len, init_len, icase; RE_TRANSLATE_TYPE trans; + const re_dfa_t *dfa; { reg_errcode_t ret; int init_buf_len = (len + 1 < init_len) ? len + 1: init_len; - re_string_construct_common (str, len, pstr, trans, icase, - mb_cur_max, is_utf8); + re_string_construct_common (str, len, pstr, trans, icase, dfa); pstr->stop = pstr->len; ret = re_string_realloc_buffers (pstr, init_buf_len); @@ -68,22 +67,22 @@ re_string_allocate (pstr, str, len, init_len, trans, icase, : (unsigned char *) str); pstr->mbs = MBS_ALLOCATED (pstr) ? pstr->mbs : pstr->mbs_case; pstr->valid_len = (MBS_CASE_ALLOCATED (pstr) || MBS_ALLOCATED (pstr) - || mb_cur_max > 1) ? pstr->valid_len : len; + || dfa->mb_cur_max > 1) ? pstr->valid_len : len; return REG_NOERROR; } /* This function allocate the buffers, and initialize them. */ static reg_errcode_t -re_string_construct (pstr, str, len, trans, icase, mb_cur_max, is_utf8) +re_string_construct (pstr, str, len, trans, icase, dfa) re_string_t *pstr; const char *str; - int len, icase, mb_cur_max, is_utf8; + int len, icase; RE_TRANSLATE_TYPE trans; + const re_dfa_t *dfa; { reg_errcode_t ret; - re_string_construct_common (str, len, pstr, trans, icase, - mb_cur_max, is_utf8); + re_string_construct_common (str, len, pstr, trans, icase, dfa); pstr->stop = pstr->len; /* Set 0 so that this function can initialize whole buffers. */ pstr->valid_len = 0; @@ -101,7 +100,7 @@ re_string_construct (pstr, str, len, trans, icase, mb_cur_max, is_utf8) if (icase) { #ifdef RE_ENABLE_I18N - if (mb_cur_max > 1) + if (dfa->mb_cur_max > 1) build_wcs_upper_buffer (pstr); else #endif /* RE_ENABLE_I18N */ @@ -110,7 +109,7 @@ re_string_construct (pstr, str, len, trans, icase, mb_cur_max, is_utf8) else { #ifdef RE_ENABLE_I18N - if (mb_cur_max > 1) + if (dfa->mb_cur_max > 1) build_wcs_buffer (pstr); else #endif /* RE_ENABLE_I18N */ @@ -167,20 +166,22 @@ re_string_realloc_buffers (pstr, new_buf_len) static void -re_string_construct_common (str, len, pstr, trans, icase, mb_cur_max, is_utf8) +re_string_construct_common (str, len, pstr, trans, icase, dfa) const char *str; int len; re_string_t *pstr; RE_TRANSLATE_TYPE trans; - int icase, mb_cur_max, is_utf8; + int icase; + const re_dfa_t *dfa; { memset (pstr, '\0', sizeof (re_string_t)); pstr->raw_mbs = (const unsigned char *) str; pstr->len = len; pstr->trans = trans; pstr->icase = icase ? 1 : 0; - pstr->mb_cur_max = mb_cur_max; - pstr->is_utf8 = is_utf8; + pstr->mb_cur_max = dfa->mb_cur_max; + pstr->is_utf8 = dfa->is_utf8; + pstr->map_notascii = dfa->map_notascii; } #ifdef RE_ENABLE_I18N @@ -253,47 +254,110 @@ build_wcs_upper_buffer (pstr) /* Build the buffers from pstr->valid_len to either pstr->len or pstr->bufs_len. */ end_idx = (pstr->bufs_len > pstr->len)? pstr->len : pstr->bufs_len; - for (byte_idx = pstr->valid_len; byte_idx < end_idx;) - { - wchar_t wc; - remain_len = end_idx - byte_idx; - prev_st = pstr->cur_state; - mbclen = mbrtowc (&wc, ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx - + byte_idx), remain_len, &pstr->cur_state); - if (BE (mbclen == (size_t) -2, 0)) + +#ifdef _LIBC + /* The following optimization assumes that the wchar_t encoding is + always ISO 10646. */ + if (! pstr->map_notascii && pstr->trans == NULL) + for (byte_idx = pstr->valid_len; byte_idx < end_idx;) + if (isascii (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]) + && mbsinit (&pstr->cur_state)) { - /* The buffer doesn't have enough space, finish to build. */ - pstr->cur_state = prev_st; - break; + /* In case of a singlebyte character. */ + pstr->mbs[byte_idx] + = toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]); + /* The next step uses the assumption that wchar_t is encoded + with ISO 10646: all ASCII values can be converted like this. */ + pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx]; + ++byte_idx; } - else if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0) + else { - /* In case of a singlebyte character. */ - int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; - /* Apply the translation if we need. */ - if (pstr->trans != NULL && mbclen == 1) + wchar_t wc; + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + mbclen = mbrtowc (&wc, + ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + + byte_idx), remain_len, &pstr->cur_state); + if (BE (mbclen > 1, 1)) { - ch = pstr->trans[ch]; - pstr->mbs_case[byte_idx] = ch; + if (iswlower (wc)) + wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), + &prev_st); + else + memcpy (pstr->mbs + byte_idx, + pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); + pstr->wcs[byte_idx++] = towupper (wc); + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else if (mbclen == (size_t) -1 || mbclen == 0) + { + /* In case of a singlebyte character. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + /* Apply the translation if we need. */ + if (BE (pstr->trans != NULL, 0) && mbclen == 1) + { + ch = pstr->trans[ch]; + pstr->mbs_case[byte_idx] = ch; + } + pstr->wcs[byte_idx] = towupper (wc); + pstr->mbs[byte_idx++] = toupper (ch); + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; } - pstr->wcs[byte_idx] = iswlower (wc) ? towupper (wc) : wc; - pstr->mbs[byte_idx++] = islower (ch) ? toupper (ch) : ch; - if (BE (mbclen == (size_t) -1, 0)) - pstr->cur_state = prev_st; - } - else /* mbclen > 1 */ - { - if (iswlower (wc)) - wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st); else - memcpy (pstr->mbs + byte_idx, - pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); - pstr->wcs[byte_idx++] = iswlower (wc) ? towupper (wc) : wc; - /* Write paddings. */ - for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) - pstr->wcs[byte_idx++] = WEOF; + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } } - } + else +#endif + for (byte_idx = pstr->valid_len; byte_idx < end_idx;) + { + wchar_t wc; + remain_len = end_idx - byte_idx; + prev_st = pstr->cur_state; + mbclen = mbrtowc (&wc, + ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx + + byte_idx), remain_len, &pstr->cur_state); + if (mbclen == 1 || mbclen == (size_t) -1 || mbclen == 0) + { + /* In case of a singlebyte character. */ + int ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]; + /* Apply the translation if we need. */ + if (BE (pstr->trans != NULL, 0) && mbclen == 1) + { + ch = pstr->trans[ch]; + pstr->mbs_case[byte_idx] = ch; + } + pstr->wcs[byte_idx] = towupper (wc); + pstr->mbs[byte_idx++] = toupper (ch); + if (BE (mbclen == (size_t) -1, 0)) + pstr->cur_state = prev_st; + } + else if (BE (mbclen != (size_t) -2, 1)) + { + if (iswlower (wc)) + wcrtomb ((char *) pstr->mbs + byte_idx, towupper (wc), &prev_st); + else + memcpy (pstr->mbs + byte_idx, + pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen); + pstr->wcs[byte_idx++] = towupper (wc); + /* Write paddings. */ + for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;) + pstr->wcs[byte_idx++] = WEOF; + } + else + { + /* The buffer doesn't have enough space, finish to build. */ + pstr->cur_state = prev_st; + break; + } + } pstr->valid_len = byte_idx; } diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 18622cad5c..9fcf865f65 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -335,6 +335,7 @@ struct re_string_t /* 1 if REG_ICASE. */ unsigned int icase : 1; unsigned int is_utf8 : 1; + unsigned int map_notascii : 1; int mb_cur_max; }; typedef struct re_string_t re_string_t; @@ -345,31 +346,32 @@ typedef struct re_string_t re_string_t; #define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL) +struct re_dfa_t; +typedef struct re_dfa_t re_dfa_t; #ifndef RE_NO_INTERNAL_PROTOTYPES static reg_errcode_t re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len, RE_TRANSLATE_TYPE trans, int icase, - int mb_cur_max, int is_utf8); + const re_dfa_t *dfa); static reg_errcode_t re_string_construct (re_string_t *pstr, const char *str, int len, RE_TRANSLATE_TYPE trans, - int icase, int mb_cur_max, - int is_utf8); + int icase, const re_dfa_t *dfa); static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx, int eflags, int newline); static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr, int new_buf_len); -#ifdef RE_ENABLE_I18N +# ifdef RE_ENABLE_I18N static void build_wcs_buffer (re_string_t *pstr); static void build_wcs_upper_buffer (re_string_t *pstr); -#endif /* RE_ENABLE_I18N */ +# endif /* RE_ENABLE_I18N */ static void build_upper_buffer (re_string_t *pstr); static void re_string_translate_buffer (re_string_t *pstr); static void re_string_destruct (re_string_t *pstr); -#ifdef RE_ENABLE_I18N +# ifdef RE_ENABLE_I18N static int re_string_elem_size_at (const re_string_t *pstr, int idx); static inline int re_string_char_size_at (const re_string_t *pstr, int idx); static inline wint_t re_string_wchar_at (const re_string_t *pstr, int idx); -#endif /* RE_ENABLE_I18N */ +# endif /* RE_ENABLE_I18N */ static unsigned int re_string_context_at (const re_string_t *input, int idx, int eflags, int newline_anchor); #endif @@ -610,9 +612,9 @@ struct re_dfa_t collating element. */ unsigned int has_mb_node : 1; unsigned int is_utf8 : 1; + unsigned int map_notascii : 1; int mb_cur_max; }; -typedef struct re_dfa_t re_dfa_t; #ifndef RE_NO_INTERNAL_PROTOTYPES static reg_errcode_t re_node_set_alloc (re_node_set *set, int size); diff --git a/posix/regexec.c b/posix/regexec.c index e962275ba3..7470197506 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -605,8 +605,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch, fl_longest_match = (nmatch != 0 || dfa->nbackref); err = re_string_allocate (&input, string, length, dfa->nodes_len + 1, - preg->translate, preg->syntax & RE_ICASE, - dfa->mb_cur_max, dfa->is_utf8); + preg->translate, preg->syntax & RE_ICASE, dfa); if (BE (err != REG_NOERROR, 0)) goto free_return; input.stop = stop; @@ -1760,7 +1759,7 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node, { struct re_backref_cache_entry *ent = mctx->bkref_ents + bi; int dst, cpos; - + /* If this backreference goes beyond the point we're examining, don't go any further. */ if (ent->str_idx > str_idx) @@ -1797,12 +1796,12 @@ check_dst_limits_calc_pos (dfa, mctx, limit, eclosures, subexp_idx, from_node, } break; } - + case OP_OPEN_SUBEXP: if (str_idx == lim->subexp_from && subexp_idx == dfa->nodes[node].opr.idx) return -1; break; - + case OP_CLOSE_SUBEXP: if (str_idx == lim->subexp_to && subexp_idx == dfa->nodes[node].opr.idx) return 0; -- cgit v1.2.3