summaryrefslogtreecommitdiff
path: root/posix
diff options
context:
space:
mode:
authorJakub Jelinek <jakub@redhat.com>2004-11-10 09:02:52 +0000
committerJakub Jelinek <jakub@redhat.com>2004-11-10 09:02:52 +0000
commit3504bb650f48534549bbd0313dc15fa71455e302 (patch)
tree742dd9cbcee1147fb36bcc02816bec415544597e /posix
parentcbf4bcd2b3d53de274548dbf4c28017d1f07d5b2 (diff)
Updated to fedora-glibc-20041110T0839
Diffstat (limited to 'posix')
-rw-r--r--posix/regcomp.c84
-rw-r--r--posix/regex_internal.c16
-rw-r--r--posix/regex_internal.h16
-rw-r--r--posix/regexec.c519
-rw-r--r--posix/rxspencer/tests5
5 files changed, 346 insertions, 294 deletions
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 9b435a885e..ba7a1cc5d4 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -566,6 +566,23 @@ weak_alias (__regerror, regerror)
#endif
+#ifdef RE_ENABLE_I18N
+/* This static array is used for the map to single-byte characters when
+ UTF-8 is used. Otherwise we would allocate memory just to initialize
+ it the same all the time. UTF-8 is the preferred encoding so this is
+ a worthwhile optimization. */
+static const bitset utf8_sb_map =
+{
+ /* Set the first 128 bits. */
+# if UINT_MAX == 0xffffffff
+ 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff
+# else
+# error "Add case for new unsigned int size"
+# endif
+};
+#endif
+
+
static void
free_dfa_content (re_dfa_t *dfa)
{
@@ -613,7 +630,8 @@ free_dfa_content (re_dfa_t *dfa)
}
re_free (dfa->state_table);
#ifdef RE_ENABLE_I18N
- re_free (dfa->sb_char);
+ if (dfa->sb_char != utf8_sb_map)
+ re_free (dfa->sb_char);
#endif
#ifdef DEBUG
re_free (dfa->re_str);
@@ -824,6 +842,9 @@ init_dfa (dfa, pat_len)
int pat_len;
{
int table_size;
+#ifndef _LIBC
+ char *codeset_name;
+#endif
memset (dfa, '\0', sizeof (re_dfa_t));
@@ -853,22 +874,59 @@ init_dfa (dfa, pat_len)
dfa->is_utf8 = 1;
dfa->map_notascii = (_NL_CURRENT_WORD (LC_CTYPE, _NL_CTYPE_MAP_TO_NONASCII)
!= 0);
+#else
+# ifdef HAVE_LANGINFO_CODESET
+ codeset_name = nl_langinfo (CODESET);
+# else
+ codeset_name = getenv ("LC_ALL");
+ if (codeset_name == NULL || codeset[0] == '\0')
+ codeset_name = getenv ("LC_CTYPE");
+ if (codeset_name == NULL || codeset[0] == '\0')
+ codeset_name = getenv ("LANG");
+ if (codeset_name == NULL)
+ codeset_name = "";
+ else if (strchr (codeset_name, '.') != NULL)
+ codeset_name = strchr (codeset_name, '.') + 1;
+# endif
+
+ if (strcasecmp (codeset_name, "UTF-8") == 0
+ || strcasecmp (codeset_name, "UTF8") == 0)
+ dfa->is_utf8 = 1;
+
+ /* We check exhaustively in the loop below if this charset is a
+ superset of ASCII. */
+ dfa->map_notascii = 0;
#endif
+
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
{
- int i, j, ch;
-
- dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
- if (BE (dfa->sb_char == NULL, 0))
- return REG_ESPACE;
if (dfa->is_utf8)
- memset (dfa->sb_char, 255, sizeof (unsigned int) * BITSET_UINTS / 2);
+ dfa->sb_char = (re_bitset_ptr_t) utf8_sb_map;
else
- for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
- for (j = 0; j < UINT_BITS; ++j, ++ch)
- if (__btowc (ch) != WEOF)
- dfa->sb_char[i] |= 1 << j;
+ {
+ int i, j, ch;
+
+ dfa->sb_char = (re_bitset_ptr_t) calloc (sizeof (bitset), 1);
+ if (BE (dfa->sb_char == NULL, 0))
+ return REG_ESPACE;
+
+ /* Clear all bits by, then set those corresponding to single
+ byte chars. */
+ bitset_empty (dfa->sb_char);
+
+ for (i = 0, ch = 0; i < BITSET_UINTS; ++i)
+ for (j = 0; j < UINT_BITS; ++j, ++ch)
+ {
+ wchar_t wch = __btowc (ch);
+ if (wch != WEOF)
+ dfa->sb_char[i] |= 1 << j;
+# ifndef _LIBC
+ if (isascii (ch) && wch != (wchar_t) ch)
+ dfa->map_notascii = 1;
+# endif
+ }
+ }
}
#endif
@@ -1544,7 +1602,9 @@ calc_eclosure_iter (new_set, dfa, node, root)
? dfa->nodes[node].opr.ctx_type : 0);
/* If the current node has constraints, duplicate all nodes.
Since they must inherit the constraints. */
- if (constraint && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
+ if (constraint
+ && dfa->edests[node].nelem
+ && !dfa->nodes[dfa->edests[node].elems[0]].duplicated)
{
int org_node, cur_node;
org_node = cur_node = node;
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index 96f113745f..609719f79c 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -293,9 +293,8 @@ build_wcs_upper_buffer (pstr)
byte_idx = pstr->valid_len;
end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
-#ifdef _LIBC
- /* The following optimization assumes that the wchar_t encoding is
- always ISO 10646. */
+ /* The following optimization assumes that ASCII characters can be
+ mapped to wide characters with a simple cast. */
if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
{
while (byte_idx < end_idx)
@@ -309,8 +308,7 @@ build_wcs_upper_buffer (pstr)
pstr->mbs[byte_idx]
= toupper (pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx]);
/* The next step uses the assumption that wchar_t is encoded
- with ISO 10646: all ASCII values can be converted like
- this. */
+ ASCII-safe: all ASCII values can be converted like this. */
pstr->wcs[byte_idx] = (wchar_t) pstr->mbs[byte_idx];
++byte_idx;
continue;
@@ -368,14 +366,11 @@ build_wcs_upper_buffer (pstr)
return REG_NOERROR;
}
else
-#endif
for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
{
wchar_t wc;
const char *p;
-#ifdef _LIBC
-offsets_needed:
-#endif
+ offsets_needed:
remain_len = end_idx - byte_idx;
prev_st = pstr->cur_state;
if (BE (pstr->trans != NULL, 0))
@@ -647,7 +642,6 @@ re_string_reconstruct (pstr, idx, eflags)
int wcs_idx;
wint_t wc = WEOF;
-#ifdef _LIBC
if (pstr->is_utf8)
{
const unsigned char *raw, *p, *q, *end;
@@ -687,7 +681,7 @@ re_string_reconstruct (pstr, idx, eflags)
break;
}
}
-#endif
+
if (wc == WEOF)
pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
if (BE (pstr->valid_len, 0))
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index d7d7d9a8b1..023056c028 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -27,6 +27,9 @@
#include <stdlib.h>
#include <string.h>
+#if defined HAVE_LANGINFO_H || defined HAVE_LANGINFO_CODESET || defined _LIBC
+# include <langinfo.h>
+#endif
#if defined HAVE_LOCALE_H || defined _LIBC
# include <locale.h>
#endif
@@ -98,6 +101,7 @@
# define __btowc btowc
# define __mempcpy mempcpy
# define __wcrtomb wcrtomb
+# define __regfree regfree
# define attribute_hidden
#endif /* not _LIBC */
@@ -544,7 +548,9 @@ struct re_backref_cache_entry
int str_idx;
int subexp_from;
int subexp_to;
- int flag;
+ /* We need only one byte from the following field. If other small
+ fields are added the type could be changed to 'char'. */
+ int more;
};
typedef struct
@@ -576,17 +582,11 @@ typedef struct
typedef struct
{
- int cur_bkref;
- int cls_subexp_idx;
-
re_dfastate_t **sifted_states;
re_dfastate_t **limited_states;
-
- re_node_set limits;
-
int last_node;
int last_str_idx;
- int check_subexp;
+ re_node_set limits;
} re_sift_context_t;
struct re_fail_stack_ent_t
diff --git a/posix/regexec.c b/posix/regexec.c
index b7b5b1a6d4..72ae70b916 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -29,7 +29,6 @@ static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, int node,
internal_function;
static int search_cur_bkref_entry (re_match_context_t *mctx, int str_idx)
internal_function;
-static void match_ctx_clear_flag (re_match_context_t *mctx) internal_function;
static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, int node,
int str_idx) internal_function;
static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
@@ -37,7 +36,7 @@ static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
internal_function;
static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
re_dfastate_t **limited_sts, int last_node,
- int last_str_idx, int check_subexp)
+ int last_str_idx)
internal_function;
static reg_errcode_t re_search_internal (const regex_t *preg,
const char *string, int length,
@@ -94,6 +93,9 @@ static int sift_states_iter_mb (const re_match_context_t *mctx,
#endif /* RE_ENABLE_I18N */
static reg_errcode_t sift_states_backward (re_match_context_t *mctx,
re_sift_context_t *sctx) internal_function;
+static reg_errcode_t build_sifted_states (re_match_context_t *mctx,
+ re_sift_context_t *sctx, int str_idx,
+ re_node_set *cur_dest) internal_function;
static reg_errcode_t update_cur_sifted_state (re_match_context_t *mctx,
re_sift_context_t *sctx,
int str_idx,
@@ -107,9 +109,13 @@ static reg_errcode_t sub_epsilon_src_nodes (re_dfa_t *dfa, int node,
static int check_dst_limits (re_match_context_t *mctx, re_node_set *limits,
int dst_node, int dst_idx, int src_node,
int src_idx) internal_function;
+static int check_dst_limits_calc_pos_1 (re_match_context_t *mctx,
+ int boundaries, int subexp_idx,
+ int from_node, int bkref_idx) internal_function;
static int check_dst_limits_calc_pos (re_match_context_t *mctx,
- int limit, re_node_set *eclosures,
- int subexp_idx, int node, int str_idx) internal_function;
+ int limit, int subexp_idx,
+ int node, int str_idx,
+ int bkref_idx) internal_function;
static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
re_node_set *dest_nodes,
const re_node_set *candidates,
@@ -118,7 +124,7 @@ static reg_errcode_t check_subexp_limits (re_dfa_t *dfa,
int str_idx) internal_function;
static reg_errcode_t sift_states_bkref (re_match_context_t *mctx,
re_sift_context_t *sctx,
- int str_idx, re_node_set *dest_nodes) internal_function;
+ int str_idx, const re_node_set *candidates) internal_function;
static reg_errcode_t clean_state_log_if_needed (re_match_context_t *mctx,
int next_state_log_idx) internal_function;
static reg_errcode_t merge_state_array (re_dfa_t *dfa, re_dfastate_t **dst,
@@ -170,8 +176,7 @@ static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
int type) internal_function;
static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
re_node_set *cur_nodes, int cur_str,
- int last_str, int subexp_num,
- int type) internal_function;
+ int subexp_num, int type) internal_function;
static re_dfastate_t **build_trtable (re_dfa_t *dfa,
re_dfastate_t *state) internal_function;
#ifdef RE_ENABLE_I18N
@@ -578,8 +583,6 @@ re_exec (s)
}
#endif /* _REGEX_RE_COMP */
-static re_node_set empty_set;
-
/* Internal entry point. */
/* Searches for a compiled pattern PREG in the string STRING, whose
@@ -642,8 +645,6 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
start = range = 0;
}
- re_node_set_init_empty (&empty_set);
-
/* We must check the longest matching, if nmatch > 0. */
fl_longest_match = (nmatch != 0 || dfa->nbackref);
@@ -910,9 +911,8 @@ prune_impossible_nodes (mctx)
{
memset (lim_states, '\0',
sizeof (re_dfastate_t *) * (match_last + 1));
- match_ctx_clear_flag (mctx);
sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
- match_last, 0);
+ match_last);
ret = sift_states_backward (mctx, &sctx);
re_node_set_free (&sctx.limits);
if (BE (ret != REG_NOERROR, 0))
@@ -942,8 +942,7 @@ prune_impossible_nodes (mctx)
}
else
{
- sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
- match_last, 0);
+ sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
ret = sift_states_backward (mctx, &sctx);
re_node_set_free (&sctx.limits);
if (BE (ret != REG_NOERROR, 0))
@@ -1496,17 +1495,14 @@ sift_states_backward (mctx, sctx)
re_match_context_t *mctx;
re_sift_context_t *sctx;
{
- re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
int null_cnt = 0;
int str_idx = sctx->last_str_idx;
re_node_set cur_dest;
- re_node_set *cur_src; /* Points the state_log[str_idx]->nodes */
#ifdef DEBUG
assert (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
#endif
- cur_src = &mctx->state_log[str_idx]->nodes;
/* Build sifted state_log[str_idx]. It has the nodes which can epsilon
transit to the last_node and the last_node itself. */
@@ -1520,7 +1516,6 @@ sift_states_backward (mctx, sctx)
/* Then check each states in the state_log. */
while (str_idx > 0)
{
- int i, ret;
/* Update counters. */
null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
if (null_cnt > mctx->max_mb_elem_len)
@@ -1532,56 +1527,12 @@ sift_states_backward (mctx, sctx)
}
re_node_set_empty (&cur_dest);
--str_idx;
- cur_src = ((mctx->state_log[str_idx] == NULL) ? &empty_set
- : &mctx->state_log[str_idx]->nodes);
-
- /* Then build the next sifted state.
- We build the next sifted state on `cur_dest', and update
- `sifted_states[str_idx]' with `cur_dest'.
- Note:
- `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
- `cur_src' points the node_set of the old `state_log[str_idx]'. */
- for (i = 0; i < cur_src->nelem; i++)
- {
- int prev_node = cur_src->elems[i];
- int naccepted = 0;
- re_token_type_t type = dfa->nodes[prev_node].type;
-
- if (IS_EPSILON_NODE (type))
- continue;
-#ifdef RE_ENABLE_I18N
- /* If the node may accept `multi byte'. */
- if (ACCEPT_MB_NODE (type))
- naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
- str_idx, sctx->last_str_idx);
-
-#endif /* RE_ENABLE_I18N */
- /* We don't check backreferences here.
- See update_cur_sifted_state(). */
-
- if (!naccepted
- && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
- && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
- dfa->nexts[prev_node]))
- naccepted = 1;
-
- if (naccepted == 0)
- continue;
- if (sctx->limits.nelem)
- {
- int to_idx = str_idx + naccepted;
- if (check_dst_limits (mctx, &sctx->limits,
- dfa->nexts[prev_node], to_idx,
- prev_node, str_idx))
- continue;
- }
- ret = re_node_set_insert (&cur_dest, prev_node);
- if (BE (ret == -1, 0))
- {
- err = REG_ESPACE;
- goto free_return;
- }
+ if (mctx->state_log[str_idx])
+ {
+ err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
}
/* Add all the nodes which satisfy the following conditions:
@@ -1598,6 +1549,66 @@ sift_states_backward (mctx, sctx)
return err;
}
+static reg_errcode_t
+build_sifted_states (mctx, sctx, str_idx, cur_dest)
+ re_match_context_t *mctx;
+ re_sift_context_t *sctx;
+ int str_idx;
+ re_node_set *cur_dest;
+{
+ re_dfa_t *const dfa = mctx->dfa;
+ re_node_set *cur_src = &mctx->state_log[str_idx]->nodes;
+ int i;
+
+ /* Then build the next sifted state.
+ We build the next sifted state on `cur_dest', and update
+ `sifted_states[str_idx]' with `cur_dest'.
+ Note:
+ `cur_dest' is the sifted state from `state_log[str_idx + 1]'.
+ `cur_src' points the node_set of the old `state_log[str_idx]'. */
+ for (i = 0; i < cur_src->nelem; i++)
+ {
+ int prev_node = cur_src->elems[i];
+ int naccepted = 0;
+ re_token_type_t type = dfa->nodes[prev_node].type;
+ int ret;
+
+ if (IS_EPSILON_NODE (type))
+ continue;
+#ifdef RE_ENABLE_I18N
+ /* If the node may accept `multi byte'. */
+ if (ACCEPT_MB_NODE (type))
+ naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
+ str_idx, sctx->last_str_idx);
+#endif /* RE_ENABLE_I18N */
+
+ /* We don't check backreferences here.
+ See update_cur_sifted_state(). */
+ if (!naccepted
+ && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
+ && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
+ dfa->nexts[prev_node]))
+ naccepted = 1;
+
+ if (naccepted == 0)
+ continue;
+
+ if (sctx->limits.nelem)
+ {
+ int to_idx = str_idx + naccepted;
+ if (check_dst_limits (mctx, &sctx->limits,
+ dfa->nexts[prev_node], to_idx,
+ prev_node, str_idx))
+ continue;
+ }
+ ret = re_node_set_insert (cur_dest, prev_node);
+ if (BE (ret == -1, 0))
+ return REG_ESPACE;
+ }
+
+ return REG_NOERROR;
+}
+
/* Helper functions. */
static reg_errcode_t
@@ -1665,36 +1676,39 @@ update_cur_sifted_state (mctx, sctx, str_idx, dest_nodes)
re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
const re_node_set *candidates;
- candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
+ candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
: &mctx->state_log[str_idx]->nodes);
- /* At first, add the nodes which can epsilon transit to a node in
- DEST_NODE. */
- if (dest_nodes->nelem)
+ if (dest_nodes->nelem == 0)
+ sctx->sifted_states[str_idx] = NULL;
+ else
{
- err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
- if (BE (err != REG_NOERROR, 0))
- return err;
- }
+ if (candidates)
+ {
+ /* At first, add the nodes which can epsilon transit to a node in
+ DEST_NODE. */
+ err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
- /* Then, check the limitations in the current sift_context. */
- if (dest_nodes->nelem && sctx->limits.nelem)
- {
- err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
- mctx->bkref_ents, str_idx);
+ /* Then, check the limitations in the current sift_context. */
+ if (sctx->limits.nelem)
+ {
+ err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
+ mctx->bkref_ents, str_idx);
+ if (BE (err != REG_NOERROR, 0))
+ return err;
+ }
+ }
+
+ sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
if (BE (err != REG_NOERROR, 0))
return err;
}
- /* Update state_log. */
- sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
- if (BE (sctx->sifted_states[str_idx] == NULL && err != REG_NOERROR, 0))
- return err;
-
- if ((mctx->state_log[str_idx] != NULL
- && mctx->state_log[str_idx]->has_backref))
+ if (candidates && mctx->state_log[str_idx]->has_backref)
{
- err = sift_states_bkref (mctx, sctx, str_idx, dest_nodes);
+ err = sift_states_bkref (mctx, sctx, str_idx, candidates);
if (BE (err != REG_NOERROR, 0))
return err;
}
@@ -1789,6 +1803,8 @@ check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
re_dfa_t *const dfa = mctx->dfa;
int lim_idx, src_pos, dst_pos;
+ int dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
+ int src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
{
int subexp_idx;
@@ -1797,11 +1813,11 @@ check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
subexp_idx = dfa->nodes[ent->node].opr.idx - 1;
dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
- dfa->eclosures + dst_node,
- subexp_idx, dst_node, dst_idx);
+ subexp_idx, dst_node, dst_idx,
+ dst_bkref_idx);
src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
- dfa->eclosures + src_node,
- subexp_idx, src_node, src_idx);
+ subexp_idx, src_node, src_idx,
+ src_bkref_idx);
/* In case of:
<src> <dst> ( <subexp> )
@@ -1816,27 +1832,14 @@ check_dst_limits (mctx, limits, dst_node, dst_idx, src_node, src_idx)
}
static int
-check_dst_limits_calc_pos (mctx, limit, eclosures, subexp_idx, from_node,
- str_idx)
+check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx, from_node, bkref_idx)
re_match_context_t *mctx;
- re_node_set *eclosures;
- int limit, subexp_idx, from_node, str_idx;
+ int boundaries, subexp_idx, from_node, bkref_idx;
{
re_dfa_t *const dfa = mctx->dfa;
- struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
+ re_node_set *eclosures = dfa->eclosures + from_node;
int node_idx;
- /* If we are outside the range of the subexpression, return -1 or 1. */
- if (str_idx < lim->subexp_from)
- return -1;
-
- if (lim->subexp_to < str_idx)
- return 1;
-
- /* If we are within the subexpression, return 0. */
- if (str_idx != lim->subexp_from && str_idx != lim->subexp_to)
- return 0;
-
/* Else, we are on the boundary: examine the nodes on the epsilon
closure. */
for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
@@ -1846,17 +1849,11 @@ check_dst_limits_calc_pos (mctx, limit, eclosures, subexp_idx, from_node,
{
case OP_BACK_REF:
{
- int bi = search_cur_bkref_entry (mctx, str_idx);
- for (; bi < mctx->nbkref_ents; ++bi)
+ struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
+ do
{
- struct re_backref_cache_entry *ent = mctx->bkref_ents + bi;
int dst, cpos;
- /* If this backreference goes beyond the point we're
- examining, don't go any further. */
- if (ent->str_idx > str_idx)
- break;
-
if (ent->node != node || ent->subexp_from != ent->subexp_to)
continue;
@@ -1869,33 +1866,32 @@ check_dst_limits_calc_pos (mctx, limit, eclosures, subexp_idx, from_node,
dst = dfa->edests[node].elems[0];
if (dst == from_node)
{
- if (str_idx == lim->subexp_from)
+ if (boundaries & 1)
return -1;
- else /* if (str_idx == lim->subexp_to) */
+ else /* if (boundaries & 2) */
return 0;
}
- cpos = check_dst_limits_calc_pos (mctx, limit,
- dfa->eclosures + dst,
- subexp_idx, dst,
- str_idx);
+ cpos = check_dst_limits_calc_pos_1 (mctx, boundaries,
+ subexp_idx, dst, bkref_idx);
- if (cpos == -1 && str_idx == lim->subexp_from)
+ if (cpos == -1 && (boundaries & 1))
return -1;
- if (cpos == 0 /* && str_idx == lim->lim->subexp_to */)
+ if (cpos == 0 /* && (boundaries & 2) */)
return 0;
}
- break;
- }
+ while (ent++->more);
+ break;
+ }
case OP_OPEN_SUBEXP:
- if (str_idx == lim->subexp_from && subexp_idx == dfa->nodes[node].opr.idx)
+ if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
return -1;
break;
case OP_CLOSE_SUBEXP:
- if (str_idx == lim->subexp_to && subexp_idx == dfa->nodes[node].opr.idx)
+ if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
return 0;
break;
@@ -1904,10 +1900,33 @@ check_dst_limits_calc_pos (mctx, limit, eclosures, subexp_idx, from_node,
}
}
- if (str_idx == lim->subexp_to)
+ return (boundaries & 2) ? 1 : 0;
+}
+
+static int
+check_dst_limits_calc_pos (mctx, limit, subexp_idx, from_node, str_idx, bkref_idx)
+ re_match_context_t *mctx;
+ int limit, subexp_idx, from_node, str_idx, bkref_idx;
+{
+ struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
+ int boundaries;
+
+ /* If we are outside the range of the subexpression, return -1 or 1. */
+ if (str_idx < lim->subexp_from)
+ return -1;
+
+ if (lim->subexp_to < str_idx)
return 1;
- else
+
+ /* If we are within the subexpression, return 0. */
+ boundaries = (str_idx == lim->subexp_from);
+ boundaries |= (str_idx == lim->subexp_to) << 1;
+ if (boundaries == 0)
return 0;
+
+ /* Else, examine epsilon closure. */
+ return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
+ from_node, bkref_idx);
}
/* Check the limitations of sub expressions LIMITS, and remove the nodes
@@ -2009,115 +2028,91 @@ check_subexp_limits (dfa, dest_nodes, candidates, limits, bkref_ents, str_idx)
}
static reg_errcode_t
-sift_states_bkref (mctx, sctx, str_idx, dest_nodes)
+sift_states_bkref (mctx, sctx, str_idx, candidates)
re_match_context_t *mctx;
re_sift_context_t *sctx;
int str_idx;
- re_node_set *dest_nodes;
+ const re_node_set *candidates;
{
re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
int node_idx, node;
re_sift_context_t local_sctx;
- const re_node_set *candidates;
- candidates = ((mctx->state_log[str_idx] == NULL) ? &empty_set
- : &mctx->state_log[str_idx]->nodes);
+ int first_idx = search_cur_bkref_entry (mctx, str_idx);
+
+ if (first_idx == -1)
+ return REG_NOERROR;
+
local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized. */
for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
{
- int cur_bkref_idx = re_string_cur_idx (&mctx->input);
+ int enabled_idx;
re_token_type_t type;
+ struct re_backref_cache_entry *entry;
node = candidates->elems[node_idx];
type = dfa->nodes[node].type;
- if (node == sctx->cur_bkref && str_idx == cur_bkref_idx)
- continue;
/* Avoid infinite loop for the REs like "()\1+". */
if (node == sctx->last_node && str_idx == sctx->last_str_idx)
continue;
- if (type == OP_BACK_REF)
+ if (type != OP_BACK_REF)
+ continue;
+
+ entry = mctx->bkref_ents + first_idx;
+ enabled_idx = first_idx;
+ do
{
- int enabled_idx = search_cur_bkref_entry (mctx, str_idx);
- for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
- {
- int disabled_idx, subexp_len, to_idx, dst_node;
- struct re_backref_cache_entry *entry;
- entry = mctx->bkref_ents + enabled_idx;
- if (entry->str_idx > str_idx)
- break;
- if (entry->node != node)
- continue;
- subexp_len = entry->subexp_to - entry->subexp_from;
- to_idx = str_idx + subexp_len;
- dst_node = (subexp_len ? dfa->nexts[node]
- : dfa->edests[node].elems[0]);
-
- if (to_idx > sctx->last_str_idx
- || sctx->sifted_states[to_idx] == NULL
- || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx],
- dst_node)
- || check_dst_limits (mctx, &sctx->limits, node,
- str_idx, dst_node, to_idx))
- continue;
- {
- re_dfastate_t *cur_state;
- entry->flag = 0;
- for (disabled_idx = enabled_idx + 1;
- disabled_idx < mctx->nbkref_ents; ++disabled_idx)
- {
- struct re_backref_cache_entry *entry2;
- entry2 = mctx->bkref_ents + disabled_idx;
- if (entry2->str_idx > str_idx)
- break;
- entry2->flag = (entry2->node == node) ? 1 : entry2->flag;
- }
+ int subexp_len, to_idx, dst_node;
+ re_dfastate_t *cur_state;
- if (local_sctx.sifted_states == NULL)
- {
- local_sctx = *sctx;
- err = re_node_set_init_copy (&local_sctx.limits,
- &sctx->limits);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- }
- local_sctx.last_node = node;
- local_sctx.last_str_idx = str_idx;
- err = re_node_set_insert (&local_sctx.limits, enabled_idx);
- if (BE (err < 0, 0))
- {
- err = REG_ESPACE;
- goto free_return;
- }
- cur_state = local_sctx.sifted_states[str_idx];
- err = sift_states_backward (mctx, &local_sctx);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- if (sctx->limited_states != NULL)
- {
- err = merge_state_array (dfa, sctx->limited_states,
- local_sctx.sifted_states,
- str_idx + 1);
- if (BE (err != REG_NOERROR, 0))
- goto free_return;
- }
- local_sctx.sifted_states[str_idx] = cur_state;
- re_node_set_remove (&local_sctx.limits, enabled_idx);
- /* We must not use the variable entry here, since
- mctx->bkref_ents might be realloced. */
- mctx->bkref_ents[enabled_idx].flag = 1;
- }
+ if (entry->node != node)
+ continue;
+ subexp_len = entry->subexp_to - entry->subexp_from;
+ to_idx = str_idx + subexp_len;
+ dst_node = (subexp_len ? dfa->nexts[node]
+ : dfa->edests[node].elems[0]);
+
+ if (to_idx > sctx->last_str_idx
+ || sctx->sifted_states[to_idx] == NULL
+ || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
+ || check_dst_limits (mctx, &sctx->limits, node,
+ str_idx, dst_node, to_idx))
+ continue;
+
+ if (local_sctx.sifted_states == NULL)
+ {
+ local_sctx = *sctx;
+ err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
}
- enabled_idx = search_cur_bkref_entry (mctx, str_idx);
- for (; enabled_idx < mctx->nbkref_ents; ++enabled_idx)
+ local_sctx.last_node = node;
+ local_sctx.last_str_idx = str_idx;
+ err = re_node_set_insert (&local_sctx.limits, enabled_idx);
+ if (BE (err < 0, 0))
{
- struct re_backref_cache_entry *entry;
- entry = mctx->bkref_ents + enabled_idx;
- if (entry->str_idx > str_idx)
- break;
- if (entry->node == node)
- entry->flag = 0;
+ err = REG_ESPACE;
+ goto free_return;
}
+ cur_state = local_sctx.sifted_states[str_idx];
+ err = sift_states_backward (mctx, &local_sctx);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ if (sctx->limited_states != NULL)
+ {
+ err = merge_state_array (dfa, sctx->limited_states,
+ local_sctx.sifted_states,
+ str_idx + 1);
+ if (BE (err != REG_NOERROR, 0))
+ goto free_return;
+ }
+ local_sctx.sifted_states[str_idx] = cur_state;
+ re_node_set_remove (&local_sctx.limits, enabled_idx);
+
+ /* mctx->bkref_ents may have changed, reload the pointer. */
+ entry = mctx->bkref_ents + enabled_idx;
}
+ while (enabled_idx++, entry++->more);
}
err = REG_NOERROR;
free_return:
@@ -2611,15 +2606,15 @@ get_subexp (mctx, bkref_node, bkref_str_idx)
const char *buf = (const char *) re_string_get_buffer (&mctx->input);
/* Return if we have already checked BKREF_NODE at BKREF_STR_IDX. */
int cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
- for (; cache_idx < mctx->nbkref_ents; ++cache_idx)
+ if (cache_idx != -1)
{
- const struct re_backref_cache_entry *entry
- = &mctx->bkref_ents[cache_idx];
- if (entry->str_idx > bkref_str_idx)
- break;
- if (entry->node == bkref_node)
- return REG_NOERROR; /* We already checked it. */
+ const struct re_backref_cache_entry *entry = mctx->bkref_ents + cache_idx;
+ do
+ if (entry->node == bkref_node)
+ return REG_NOERROR; /* We already checked it. */
+ while (entry++->more);
}
+
subexp_num = dfa->nodes[bkref_node].opr.idx - 1;
/* For each sub expression */
@@ -2871,7 +2866,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str,
{
if (next_nodes.nelem)
{
- err = expand_bkref_cache (mctx, &next_nodes, str_idx, last_str,
+ err = expand_bkref_cache (mctx, &next_nodes, str_idx,
subexp_num, type);
if (BE ( err != REG_NOERROR, 0))
{
@@ -2920,7 +2915,7 @@ check_arrival (mctx, path, top_node, top_str, last_node, last_str,
re_node_set_free (&next_nodes);
return err;
}
- err = expand_bkref_cache (mctx, &next_nodes, str_idx, last_str,
+ err = expand_bkref_cache (mctx, &next_nodes, str_idx,
subexp_num, type);
if (BE ( err != REG_NOERROR, 0))
{
@@ -2969,6 +2964,7 @@ check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
re_node_set *cur_nodes, *next_nodes;
{
re_dfa_t *const dfa = mctx->dfa;
+ int result;
int cur_idx;
reg_errcode_t err;
re_node_set union_set;
@@ -3002,8 +2998,8 @@ check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
return err;
}
}
- err = re_node_set_insert (&union_set, next_node);
- if (BE (err < 0, 0))
+ result = re_node_set_insert (&union_set, next_node);
+ if (BE (result < 0, 0))
{
re_node_set_free (&union_set);
return REG_ESPACE;
@@ -3022,8 +3018,8 @@ check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
if (naccepted
|| check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
{
- err = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
- if (BE (err < 0, 0))
+ result = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
+ if (BE (result < 0, 0))
{
re_node_set_free (&union_set);
return REG_ESPACE;
@@ -3140,24 +3136,26 @@ check_arrival_expand_ecl_sub (dfa, dst_nodes, target, ex_subexp, type)
in MCTX->BKREF_ENTS. */
static reg_errcode_t
-expand_bkref_cache (mctx, cur_nodes, cur_str, last_str, subexp_num,
+expand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num,
type)
re_match_context_t *mctx;
- int cur_str, last_str, subexp_num, type;
+ int cur_str, subexp_num, type;
re_node_set *cur_nodes;
{
re_dfa_t *const dfa = mctx->dfa;
reg_errcode_t err;
- int cache_idx, cache_idx_start;
- /* The current state. */
+ int cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
+ struct re_backref_cache_entry *ent;
- cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
- for (cache_idx = cache_idx_start; cache_idx < mctx->nbkref_ents; ++cache_idx)
+ if (cache_idx_start == -1)
+ return REG_NOERROR;
+
+ restart:
+ ent = mctx->bkref_ents + cache_idx_start;
+ do
{
int to_idx, next_node;
- struct re_backref_cache_entry *ent = mctx->bkref_ents + cache_idx;
- if (ent->str_idx > cur_str)
- break;
+
/* Is this entry ENT is appropriate? */
if (!re_node_set_contains (cur_nodes, ent->node))
continue; /* No. */
@@ -3186,8 +3184,7 @@ expand_bkref_cache (mctx, cur_nodes, cur_str, last_str, subexp_num,
return err;
}
/* TODO: It is still inefficient... */
- cache_idx = cache_idx_start - 1;
- continue;
+ goto restart;
}
else
{
@@ -3222,6 +3219,7 @@ expand_bkref_cache (mctx, cur_nodes, cur_str, last_str, subexp_num,
return err;
}
}
+ while (ent++->more);
return REG_NOERROR;
}
@@ -3463,6 +3461,7 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
bitset *dests_ch;
{
reg_errcode_t err;
+ int result;
int i, j, k;
int ndests; /* Number of the destinations from `state'. */
bitset accepts; /* Characters a node can accept. */
@@ -3611,8 +3610,8 @@ group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch)
}
/* Put the position in the current group. */
- err = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
- if (BE (err < 0, 0))
+ result = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
+ if (BE (result < 0, 0))
goto error_return;
/* If all characters are consumed, go to next node. */
@@ -4147,26 +4146,30 @@ match_ctx_add_entry (mctx, node, str_idx, from, to)
sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
mctx->abkref_ents *= 2;
}
+ if (mctx->nbkref_ents > 0
+ && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
+ mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
+
mctx->bkref_ents[mctx->nbkref_ents].node = node;
mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
- mctx->bkref_ents[mctx->nbkref_ents++].flag = 0;
+ mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
if (mctx->max_mb_elem_len < to - from)
mctx->max_mb_elem_len = to - from;
return REG_NOERROR;
}
-/* Search for the first entry which has the same str_idx.
- Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
+/* Search for the first entry which has the same str_idx, or -1 if none is
+ found. Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX. */
static int
search_cur_bkref_entry (mctx, str_idx)
re_match_context_t *mctx;
int str_idx;
{
- int left, right, mid;
- right = mctx->nbkref_ents;
+ int left, right, mid, last;
+ last = right = mctx->nbkref_ents;
for (left = 0; left < right;)
{
mid = (left + right) / 2;
@@ -4175,16 +4178,10 @@ search_cur_bkref_entry (mctx, str_idx)
else
right = mid;
}
- return left;
-}
-
-static void
-match_ctx_clear_flag (mctx)
- re_match_context_t *mctx;
-{
- int i;
- for (i = 0; i < mctx->nbkref_ents; ++i)
- mctx->bkref_ents[i].flag = 0;
+ if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
+ return left;
+ else
+ return -1;
}
/* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
@@ -4250,18 +4247,14 @@ match_ctx_add_sublast (subtop, node, str_idx)
}
static void
-sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx,
- check_subexp)
+sift_ctx_init (sctx, sifted_sts, limited_sts, last_node, last_str_idx)
re_sift_context_t *sctx;
re_dfastate_t **sifted_sts, **limited_sts;
- int last_node, last_str_idx, check_subexp;
+ int last_node, last_str_idx;
{
sctx->sifted_states = sifted_sts;
sctx->limited_states = limited_sts;
sctx->last_node = last_node;
sctx->last_str_idx = last_str_idx;
- sctx->check_subexp = check_subexp;
- sctx->cur_bkref = -1;
- sctx->cls_subexp_idx = -1;
re_node_set_init_empty (&sctx->limits);
}
diff --git a/posix/rxspencer/tests b/posix/rxspencer/tests
index f4a3fb3df5..30fff15946 100644
--- a/posix/rxspencer/tests
+++ b/posix/rxspencer/tests
@@ -505,3 +505,8 @@ Char \([a-z0-9_]*\)\[.* b Char xyz[k Char xyz[k xyz
a?b - ab ab
-\{0,1\}[0-9]*$ b -5 -5
a*a*a*a*a*a*a* & aaaaaa aaaaaa
+(\b){0} - x @x -
+\(\b\)\{0,0\} b abc @abc -
+a(\b){0}c - ac ac -
+a(.*)b(\0){0}c - abc abc @bc,-
+a(.*)b(\0){0}c - axbc axbc x,-