summaryrefslogtreecommitdiff
path: root/posix/regexec.c
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2003-11-29 06:13:09 +0000
committerUlrich Drepper <drepper@redhat.com>2003-11-29 06:13:09 +0000
commitbb3f4825c411e676c51479fea59643af540810b5 (patch)
treec16c5849da218c04f66b8ccc47b1e911d673493d /posix/regexec.c
parent46bf9de7b17d4539a19b69bd8407d5d2987b034b (diff)
Update.
2003-11-28 Ulrich Drepper <drepper@redhat.com> * sysdeps/x86_64/fpu/libm-test-ulps: Add some more minor changes to compensate other setup. 2003-11-27 Andreas Jaeger <aj@suse.de> * sysdeps/x86_64/fpu/libm-test-ulps: Add ulps for new atan2 test. * math/libm-test.inc (atan2_test): Add test that run infinitly. Reported by "Willus" <etc231etc231@willus.com>. 2003-11-27 Michael Matz <matz@suse.de> * sysdeps/ieee754/dbl-64/mpsqrt.c (fastiroot): Fix 64-bit problem with wrong types. 2003-11-28 Jakub Jelinek <jakub@redhat.com> * posix/regexec.c (acquire_init_state_context): Make inline. Add always_inline attribute. (check_matching): Add BE macro. Move if (cur_state->has_backref) into if (dfa->nbackref). (sift_states_backward): Fix comment. (transit_state): Add BE macro. Move if (next_state->has_backref) into if (dfa->nbackref && next_state). Don't check for next_state != NULL twice. * posix/regcomp.c (peek_token): Use opr.ctx_type instead of opr.idx for ANCHOR. (parse_expression): Only call init_word_char if word context will be needed. * posix/bug-regex11.c (tests): Add new tests. * posix/tst-regex.c: Include getopt.h. (timing): New variable. (main): Set timing to 1 if --timing argument is present. Add 2 new tests. (run_test, run_test_backwards): Handle timing. 2003-11-27 Jakub Jelinek <jakub@redhat.com> * posix/regex_internal.h (re_string_t): Remove mbs_case field. Add offsets, valid_raw_len, raw_len, raw_stop, mbs_allocated and offsets_needed fields. Change icase, is_utf8 and map_notascii type from int bitfield to unsigned char. (MBS_ALLOCATED, MBS_CASE_ALLOCATED): Remove. (build_wcs_upper_buffer): Change prototype to return int. (re_string_peek_byte_case, re_string_fetch_byte_case): Remove defines, add prototypes. * posix/regex_internal.c (re_string_allocate): Don't initialize stop here. Don't initialize mbs_case. Set valid_raw_len. Use mbs_allocated instead of MBS_* macros. (re_string_construct): Don't initialize stop and valid_len here. Don't initialize mbs_case. Use mbs_allocated instead of MBS_* macros. Reallocate buffers if build_wcs_upper_buffer converted too few bytes. Set valid_len to bufs_len only for single byte no translation and set in that case valid_raw_len as well. (re_string_realloc_buffers): Reallocate offsets if not NULL. Use mbs_allocated instead of MBS_ALLOCATED. Don't reallocate mbs_case. (re_string_construct_common): Initialize raw_len, mbs_allocated, stop and raw_stop. (build_wcs_buffer): Apply pstr->trans before mbrtowc instead of after it. Set valid_raw_len. Don't set mbs_case. (build_wcs_upper_buffer): Return REG_NOERROR or REG_ESPACE. Only use the fast path if !pstr->offsets_needed. Apply pstr->trans before mbrtowc instead of after it. If upper case character uses different number of bytes than lower case, goto to the slow path. Don't call towupper unnecessarily twice. Set valid_raw_len as well. Handle in the slow path the case if lower and upper case use different number of characters. Don't set mbs_case. (re_string_skip_chars): Use valid_raw_len instead of valid_len. (build_upper_buffer): Don't set mbs_case. Add BE macro. Set valid_raw_len. (re_string_translate_buffer): Set mbs instead of mbs_case. Set valid_raw_len. (re_string_reconstruct): Use raw_len/raw_stop to initialize len/stop. Clear valid_raw_len and offsets_needed when clearing valid_len. Use mbs_allocated instead of MBS_* macros. Check original offset against valid_raw_len instead of valid_len. Remove mbs_case handling. Adjust valid_raw_len together with valid_len. If is_utf8 and looking for tip context, apply pstr->trans first. If buffers start with partial multi-byte character, initialize mbs array as well if mbs_allocated. Check return value of build_wcs_upper_buffer. (re_string_peek_byte_case): New function. (re_string_fetch_byte_case): New function. (re_string_destruct): Use mbs_allocated instead of MBS_ALLOCATED. Don't free mbs_case. Free offsets. * posix/regcomp.c (init_dfa): Only check if charset name is UTF-8 if mb_cur_max == 6. * posix/regexec.c (re_search_internal): Initialize input.raw_stop as well. Use valid_raw_len instead of valid_len when looking through fastmap. Adjust registers through input.offsets. (extend_buffers): Allow build_wcs_upper_buffer to fail. * posix/bug-regex18.c (tests): Enable #ifdefed out tests. Add new tests.
Diffstat (limited to 'posix/regexec.c')
-rw-r--r--posix/regexec.c75
1 files changed, 45 insertions, 30 deletions
diff --git a/posix/regexec.c b/posix/regexec.c
index 53f49ea972..1942a7fee9 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -50,10 +50,9 @@ static int re_search_stub (struct re_pattern_buffer *bufp,
int ret_len);
static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
int nregs, int regs_allocated);
-static re_dfastate_t *acquire_init_state_context (reg_errcode_t *err,
- const regex_t *preg,
- const re_match_context_t *mctx,
- int idx);
+static inline re_dfastate_t *acquire_init_state_context
+ (reg_errcode_t *err, const regex_t *preg, const re_match_context_t *mctx,
+ int idx) __attribute ((always_inline));
static reg_errcode_t prune_impossible_nodes (const regex_t *preg,
re_match_context_t *mctx);
static int check_matching (const regex_t *preg, re_match_context_t *mctx,
@@ -609,6 +608,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
if (BE (err != REG_NOERROR, 0))
goto free_return;
input.stop = stop;
+ input.raw_stop = stop;
err = match_ctx_init (&mctx, eflags, &input, dfa->nbackref * 2);
if (BE (err != REG_NOERROR, 0))
@@ -703,7 +703,7 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
instead. */
/* If MATCH_FIRST is out of the valid range, reconstruct the
buffers. */
- if (input.raw_mbs_idx + input.valid_len <= match_first
+ if (input.raw_mbs_idx + input.valid_raw_len <= match_first
|| match_first < input.raw_mbs_idx)
{
err = re_string_reconstruct (&input, match_first, eflags,
@@ -807,6 +807,17 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
if (pmatch[reg_idx].rm_so != -1)
{
+ if (BE (input.offsets_needed != 0, 0))
+ {
+ if (pmatch[reg_idx].rm_so == input.valid_len)
+ pmatch[reg_idx].rm_so += input.valid_raw_len - input.valid_len;
+ else
+ pmatch[reg_idx].rm_so = input.offsets[pmatch[reg_idx].rm_so];
+ if (pmatch[reg_idx].rm_eo == input.valid_len)
+ pmatch[reg_idx].rm_eo += input.valid_raw_len - input.valid_len;
+ else
+ pmatch[reg_idx].rm_eo = input.offsets[pmatch[reg_idx].rm_eo];
+ }
pmatch[reg_idx].rm_so += match_first;
pmatch[reg_idx].rm_eo += match_first;
}
@@ -909,7 +920,7 @@ prune_impossible_nodes (preg, mctx)
We must select appropriate initial state depending on the context,
since initial states may have constraints like "\<", "^", etc.. */
-static re_dfastate_t *
+static inline re_dfastate_t *
acquire_init_state_context (err, preg, mctx, idx)
reg_errcode_t *err;
const regex_t *preg;
@@ -976,22 +987,22 @@ check_matching (preg, mctx, fl_longest_match)
/* Check OP_OPEN_SUBEXP in the initial state in case that we use them
later. E.g. Processing back references. */
- if (dfa->nbackref)
+ if (BE (dfa->nbackref, 0))
{
err = check_subexp_matching_top (dfa, mctx, &cur_state->nodes, 0);
if (BE (err != REG_NOERROR, 0))
return err;
- }
- if (cur_state->has_backref)
- {
- err = transit_state_bkref (preg, &cur_state->nodes, mctx);
- if (BE (err != REG_NOERROR, 0))
- return err;
+ if (cur_state->has_backref)
+ {
+ err = transit_state_bkref (preg, &cur_state->nodes, mctx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
}
/* If the RE accepts NULL string. */
- if (cur_state->halt)
+ if (BE (cur_state->halt, 0))
{
if (!cur_state->has_constraint
|| check_halt_state_context (preg, cur_state, mctx, cur_str_idx))
@@ -1372,11 +1383,11 @@ update_regs (dfa, pmatch, cur_node, cur_idx, nmatch)
i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
away the node `a'.
ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
- throwed away, we throw away the node `a'.
+ thrown away, we throw away the node `a'.
3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
node `a'.
- ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is throwed away,
+ ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
we throw away the node `a'. */
#define STATE_NODE_CONTAINS(state,node) \
@@ -2041,7 +2052,7 @@ sift_states_iter_mb (preg, mctx, sctx, node_idx, str_idx, max_str_idx)
!STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
dfa->nexts[node_idx]))
/* The node can't accept the `multi byte', or the
- destination was already throwed away, then the node
+ destination was already thrown away, then the node
could't accept the current input `multi byte'. */
naccepted = 0;
/* Otherwise, it is sure that the node could accept
@@ -2188,24 +2199,24 @@ transit_state (err, preg, mctx, state)
}
}
- /* Check OP_OPEN_SUBEXP in the current state in case that we use them
- later. We must check them here, since the back references in the
- next state might use them. */
- if (dfa->nbackref && next_state/* && fl_process_bkref */)
+ if (BE (dfa->nbackref, 0) && next_state != NULL)
{
+ /* Check OP_OPEN_SUBEXP in the current state in case that we use them
+ later. We must check them here, since the back references in the
+ next state might use them. */
*err = check_subexp_matching_top (dfa, mctx, &next_state->nodes,
cur_idx);
if (BE (*err != REG_NOERROR, 0))
return NULL;
- }
- /* If the next state has back references. */
- if (next_state != NULL && next_state->has_backref)
- {
- *err = transit_state_bkref (preg, &next_state->nodes, mctx);
- if (BE (*err != REG_NOERROR, 0))
- return NULL;
- next_state = mctx->state_log[cur_idx];
+ /* If the next state has back references. */
+ if (next_state->has_backref)
+ {
+ *err = transit_state_bkref (preg, &next_state->nodes, mctx);
+ if (BE (*err != REG_NOERROR, 0))
+ return NULL;
+ next_state = mctx->state_log[cur_idx];
+ }
}
return next_state;
}
@@ -3858,7 +3869,11 @@ extend_buffers (mctx)
{
#ifdef RE_ENABLE_I18N
if (pstr->mb_cur_max > 1)
- build_wcs_upper_buffer (pstr);
+ {
+ ret = build_wcs_upper_buffer (pstr);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
+ }
else
#endif /* RE_ENABLE_I18N */
build_upper_buffer (pstr);