From 7db612081aa9c2d0b7e6205582a80aa2e9342f8f Mon Sep 17 00:00:00 2001 From: Ulrich Drepper Date: Fri, 12 Nov 2004 09:45:05 +0000 Subject: Update. 2004-11-12 Ulrich Drepper * posix/Makefile (tests): Add bug-regex24. * posix/bug-regex24.c: New file. 2004-11-12 Paolo Bonzini * posix/regexec.c (check_dst_limits_calc_pos_1): Use the map to cut recursive paths. Make exit condition more precise. (match_ctx_add_entry): Initialize the map. * posix/regex_internal.h (struct re_backref_cache_entry): Add a map of reachable subexpression nodes from each backreference cache entry. --- ChangeLog | 13 +++++++++++ posix/Makefile | 3 ++- posix/bug-regex24.c | 59 ++++++++++++++++++++++++++++++++++++++++++++++++++ posix/regex_internal.h | 6 ++--- posix/regexec.c | 25 ++++++++++++++++++--- 5 files changed, 99 insertions(+), 7 deletions(-) create mode 100644 posix/bug-regex24.c diff --git a/ChangeLog b/ChangeLog index b02b433c88..d883905126 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,16 @@ +2004-11-12 Ulrich Drepper + + * posix/Makefile (tests): Add bug-regex24. + * posix/bug-regex24.c: New file. + +2004-11-12 Paolo Bonzini + + * posix/regexec.c (check_dst_limits_calc_pos_1): Use the map to + cut recursive paths. Make exit condition more precise. + (match_ctx_add_entry): Initialize the map. + * posix/regex_internal.h (struct re_backref_cache_entry): Add a map of + reachable subexpression nodes from each backreference cache entry. + 2004-11-10 Jakub Jelinek * sysdeps/unix/sysv/linux/setreuid.c: Remove sys/syscall.h, diff --git a/posix/Makefile b/posix/Makefile index cd6a52c098..8bc15ad215 100644 --- a/posix/Makefile +++ b/posix/Makefile @@ -79,7 +79,8 @@ tests := tstgetopt testfnm runtests runptests \ bug-regex8 bug-regex9 bug-regex10 bug-regex11 bug-regex12 \ bug-regex13 bug-regex14 bug-regex15 bug-regex16 \ bug-regex17 bug-regex18 bug-regex19 bug-regex20 \ - bug-regex21 bug-regex22 bug-regex23 tst-nice tst-nanosleep \ + bug-regex21 bug-regex22 bug-regex23 bug-regex24 \ + tst-nice tst-nanosleep \ transbug tst-rxspencer tst-pcre tst-boost \ bug-ga1 tst-vfork1 tst-vfork2 tst-waitid \ tst-getaddrinfo2 bug-glob1 bug-glob2 diff --git a/posix/bug-regex24.c b/posix/bug-regex24.c new file mode 100644 index 0000000000..83ea10bb62 --- /dev/null +++ b/posix/bug-regex24.c @@ -0,0 +1,59 @@ +#include +#include + +#define str "civic" + +#define N 10 +static const char *expected[N] = + { + str, "c", "i", "", "", "", "", "", "", "" + }; + +static int +do_test (void) +{ + regex_t rbuf; + static const char pat[] = "\ +^(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?)(.?).?\\9\\8\\7\\6\\5\\4\\3\\2\\1$"; + + int err = regcomp (&rbuf, pat, REG_EXTENDED); + if (err != 0) + { + char errstr[300]; + regerror (err, &rbuf, errstr, sizeof (errstr)); + puts (errstr); + return err; + } + + regmatch_t m[N]; + err = regexec (&rbuf, str, N, m, 0); + if (err != 0) + { + puts ("regexec failed"); + return 1; + } + + int result = 0; + for (int i = 0; i < N; ++i) + if (m[i].rm_so == -1) + { + printf ("m[%d] unused\n", i); + result = 1; + } + else + { + int len = m[i].rm_eo - m[i].rm_so; + + printf ("m[%d] = \"%.*s\"\n", i, len, str + m[i].rm_so); + + if (strlen (expected[i]) != len + || memcmp (expected[i], str + m[i].rm_so, len) != 0) + result = 1; + } + + return result; +} + +#define TIMEOUT 30 +#define TEST_FUNCTION do_test () +#include "../test-skeleton.c" diff --git a/posix/regex_internal.h b/posix/regex_internal.h index 023056c028..14d95a5b84 100644 --- a/posix/regex_internal.h +++ b/posix/regex_internal.h @@ -548,9 +548,9 @@ struct re_backref_cache_entry int str_idx; int subexp_from; int subexp_to; - /* We need only one byte from the following field. If other small - fields are added the type could be changed to 'char'. */ - int more; + char more; + char unused; + unsigned short int eps_reachable_subexps_map; }; typedef struct diff --git a/posix/regexec.c b/posix/regexec.c index 79104119be..a03df2636a 100644 --- a/posix/regexec.c +++ b/posix/regexec.c @@ -1885,7 +1885,12 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx) { int dst, cpos; - if (ent->node != node || ent->subexp_from != ent->subexp_to) + if (ent->node != node) + continue; + + if (subexp_idx <= 8 * sizeof (ent->eps_reachable_subexps_map) + && (ent->eps_reachable_subexps_map + & (1 << (subexp_idx - 1))) == 0) continue; /* Recurse trying to reach the OP_OPEN_SUBEXP and @@ -1906,11 +1911,13 @@ check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx) cpos = check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, dst, bkref_idx); - if (cpos == -1 && (boundaries & 1)) + if (cpos == -1 /* && (boundaries & 1) */) return -1; - if (cpos == 0 /* && (boundaries & 2) */) + if (cpos == 0 && (boundaries & 2)) return 0; + + ent->eps_reachable_subexps_map &= ~(1 << (subexp_idx - 1)); } while (ent++->more); break; @@ -4169,6 +4176,18 @@ match_ctx_add_entry (mctx, node, str_idx, from, to) mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx; mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from; mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to; + + /* This is a cache that saves negative results of check_dst_limits_calc_pos. + If bit N is clear, means that this entry won't epsilon-transition to + an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression. If + it is set, check_dst_limits_calc_pos_1 will recurse and try to find one + such node. + + A backreference does not epsilon-transition unless it is empty, so set + to all zeros if FROM != TO. */ + mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map + = (from == to ? ~0 : 0); + mctx->bkref_ents[mctx->nbkref_ents++].more = 0; if (mctx->max_mb_elem_len < to - from) mctx->max_mb_elem_len = to - from; -- cgit v1.2.3