summaryrefslogtreecommitdiff
path: root/posix/regex_internal.h
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2003-11-18 23:40:59 +0000
committerUlrich Drepper <drepper@redhat.com>2003-11-18 23:40:59 +0000
commitad7f28c29d06ddb4506d0d75e089732740b5bd2b (patch)
treede9ee40c2d213a4113e3da2b8cedc3d505386bc1 /posix/regex_internal.h
parent5146ec9a2c092cb74b5cd0eb8b5e938b46f1631b (diff)
Update.
* posix/regex_internal.h (re_token_type_t): Remove unused ALT, END_OF_RE_TOKEN_T and SUBEXP. Reorder values. Add OP_UTF8_PERIOD and EPSILON_BIT. (IS_EPSILON_NODE): Just test if EPSILON_BIT is set. (ACCEPT_MB_NODE): Return 1 for OP_UTF8_PERIOD as well. * posix/regex_internal.c (create_ci_newstate, create_cd_newstate): Handle OP_UTF8_PERIOD. (re_string_reconstruct): Set valid_len for single byte char searching with no translation and case sensitivity. * posix/regcomp.c (re_compile_fastmap_iter, calc_first): Handle OP_UTF8_PERIOD. (re_compile_internal): Don't call optimize_utf8 if preg->translate != NULL. (optimize_utf8): Remove BACK_SLASH case. Transform OP_PERIOD into OP_UTF8_PERIOD if the searching can be optimized. (parse_bracket_exp): Don't create SIMPLE_BRACKET if it doesn't have any bits set and COMPLEX_BRACKET is used. * posix/regexec.c (transit_state_mb): Fix comment typo. (group_nodes_into_DFAstates, check_node_accept): Handle OP_UTF8_PERIOD. (check_node_accept_bytes): Likewise. Reorder slightly so that re_string_char_size_at and re_string_elem_size_at are called only when needed. * posix/bug-regex20.c (BRE, ERE): Define. (tests): Use them to make lines shorter. Expect . to be optimized. Add lots of new tests. (main): Run (ATM just case sensitive) test with backwards searching as well. 2003-11-18 Jakub Jelinek <jakub@redhat.com>
Diffstat (limited to 'posix/regex_internal.h')
-rw-r--r--posix/regex_internal.h59
1 files changed, 27 insertions, 32 deletions
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index 9fcf865f65..f905d2b510 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -167,8 +167,31 @@ typedef enum
{
NON_TYPE = 0,
+ /* Node type, These are used by token, node, tree. */
+ CHARACTER = 1,
+ END_OF_RE = 2,
+ SIMPLE_BRACKET = 3,
+ OP_BACK_REF = 4,
+ OP_PERIOD = 5,
+#ifdef RE_ENABLE_I18N
+ COMPLEX_BRACKET = 6,
+ OP_UTF8_PERIOD = 7,
+#endif /* RE_ENABLE_I18N */
+
+ EPSILON_BIT = 8,
+ OP_OPEN_SUBEXP = EPSILON_BIT | 0,
+ OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
+ OP_ALT = EPSILON_BIT | 2,
+ OP_DUP_ASTERISK = EPSILON_BIT | 3,
+ OP_DUP_PLUS = EPSILON_BIT | 4,
+ OP_DUP_QUESTION = EPSILON_BIT | 5,
+ ANCHOR = EPSILON_BIT | 6,
+
+ /* Tree type, these are used only by tree. */
+ CONCAT = 16,
+
/* Token type, these are used only by token. */
- OP_OPEN_BRACKET,
+ OP_OPEN_BRACKET = 17,
OP_CLOSE_BRACKET,
OP_CHARSET_RANGE,
OP_OPEN_DUP_NUM,
@@ -184,32 +207,8 @@ typedef enum
OP_NOTWORD,
OP_SPACE,
OP_NOTSPACE,
- BACK_SLASH,
+ BACK_SLASH
- /* Tree type, these are used only by tree. */
- CONCAT,
- ALT,
- SUBEXP,
- SIMPLE_BRACKET,
-#ifdef RE_ENABLE_I18N
- COMPLEX_BRACKET,
-#endif /* RE_ENABLE_I18N */
-
- /* Node type, These are used by token, node, tree. */
- OP_OPEN_SUBEXP,
- OP_CLOSE_SUBEXP,
- OP_PERIOD,
- CHARACTER,
- END_OF_RE,
- OP_ALT,
- OP_DUP_ASTERISK,
- OP_DUP_PLUS,
- OP_DUP_QUESTION,
- OP_BACK_REF,
- ANCHOR,
-
- /* Dummy marker. */
- END_OF_RE_TOKEN_T
} re_token_type_t;
#ifdef RE_ENABLE_I18N
@@ -284,13 +283,9 @@ typedef struct
#endif
} re_token_t;
-#define IS_EPSILON_NODE(type) \
- ((type) == OP_ALT || (type) == OP_DUP_ASTERISK || (type) == OP_DUP_PLUS \
- || (type) == OP_DUP_QUESTION || (type) == ANCHOR \
- || (type) == OP_OPEN_SUBEXP || (type) == OP_CLOSE_SUBEXP)
-
+#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
#define ACCEPT_MB_NODE(type) \
- ((type) == COMPLEX_BRACKET || (type) == OP_PERIOD)
+ ((type) >= OP_PERIOD && (type) <= OP_UTF8_PERIOD)
struct re_string_t
{