summaryrefslogtreecommitdiff
path: root/posix/regex_internal.h
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>2002-04-24 21:54:53 +0000
committerUlrich Drepper <drepper@redhat.com>2002-04-24 21:54:53 +0000
commit612546c60dd28d7af44fbb2bc98c69c33b4a0c49 (patch)
tree11dc6bf94bd7beb3271366b33a86be162b3bfa6e /posix/regex_internal.h
parentbe479a6dfe81c5b426e2cbabd62be2c042ab2d45 (diff)
Update.
2002-04-22 Isamu Hasegawa <isamu@yamato.ibm.com> * posix/regcomp.c (re_compile_internal): Adapt it to new interface of buffer building functions. * posix/regex_internal.c (re_string_allocate): New function. (re_string_realloc_buffers): New function. (re_string_skip_chars): New function. (re_string_reconstruct): New function. (re_string_construct): Adapt it to new interface of buffer building functions. (re_string_construct_common): Likewise. (build_wcs_buffer): Likewise. (build_wcs_upper_buffer): Likewise. (build_upper_buffer): Likewise. (re_string_translate_buffer): Likewise. (re_string_context_at): Adapt it to variable length buffers. * posix/regex_internal.h (re_string_t): Add new fields to handle variable length buffers. (re_match_context_t): Likewise. * posix/regexec.c (re_search_internal): Adapt it to new interface of re_string_t and re_match_context_t. (acquire_init_state_context): Likewise. (check_matching): Likewise. (check_halt_state_context): Likewise. (proceed_next_node): Likewise. (set_regs): Likewise. (sift_states_backward): Likewise. (clean_state_log_if_need): Likewise. (sift_states_iter_mb): Likewise. (sift_states_iter_bkref): Likewise. (add_epsilon_backreference): Likewise. (transit_state): Likewise. (transit_state_sb): Likewise. (transit_state_mb): Likewise. (transit_state_bkref): Likewise. (transit_state_bkref_loop): Likewise. (check_node_accept): Likewise. (match_ctx_init): Likewise. (extend_buffers): New function. 2002-04-21 Bruno Haible <bruno@clisp.org> * iconvdata/tst-table.sh: For the second check, use the truncated GB18030 charmap table, like for the first check.
Diffstat (limited to 'posix/regex_internal.h')
-rw-r--r--posix/regex_internal.h112
1 files changed, 75 insertions, 37 deletions
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index bb28102cc9..f676ae2746 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -201,33 +201,67 @@ typedef struct
struct re_string_t
{
+ /* Indicate the raw buffer which is the original string passed as an
+ argument of regexec(), re_search(), etc.. */
+ const unsigned char *raw_mbs;
+ /* Index in RAW_MBS. Each character mbs[i] corresponds to
+ raw_mbs[raw_mbs_idx + i]. */
+ int raw_mbs_idx;
/* Store the multibyte string. In case of "case insensitive mode" like
- REG_ICASE, upper cases of the string are stored. */
- const unsigned char *mbs;
+ REG_ICASE, upper cases of the string are stored, otherwise MBS points
+ the same address that RAW_MBS points. */
+ unsigned char *mbs;
/* Store the case sensitive multibyte string. In case of
"case insensitive mode", the original string are stored,
otherwise MBS_CASE points the same address that MBS points. */
- const unsigned char *mbs_case;
- int cur_idx;
- int len;
+ unsigned char *mbs_case;
#ifdef RE_ENABLE_I18N
/* Store the wide character string which is corresponding to MBS. */
wchar_t *wcs;
+ mbstate_t cur_state;
#endif
- /* 1 if mbs is allocated by regex library. */
- unsigned int mbs_alloc : 1;
- /* 1 if mbs_case is allocated by regex library. */
- unsigned int mbs_case_alloc : 1;
+ /* The length of the valid characters in the buffers. */
+ int valid_len;
+ /* The length of the buffers MBS, MBS_CASE, and WCS. */
+ int bufs_len;
+ /* The index in MBS, which is updated by re_string_fetch_byte. */
+ int cur_idx;
+ /* This is length_of_RAW_MBS - RAW_MBS_IDX. */
+ int len;
+ /* The context of mbs[0]. We store the context independently, since
+ the context of mbs[0] may be different from raw_mbs[0], which is
+ the beginning of the input string. */
+ unsigned int tip_context;
+ /* The translation passed as a part of an argument of re_compile_pattern. */
+ RE_TRANSLATE_TYPE trans;
+ /* 1 if REG_ICASE. */
+ unsigned int icase : 1;
};
typedef struct re_string_t re_string_t;
+/* In case of REG_ICASE, we allocate the buffer dynamically for mbs. */
+#define MBS_ALLOCATED(pstr) (pstr->icase)
+/* In case that we need translation, we allocate the buffer dynamically
+ for mbs_case. Note that mbs == mbs_case if not REG_ICASE. */
+#define MBS_CASE_ALLOCATED(pstr) (pstr->trans != NULL)
+
+static reg_errcode_t re_string_allocate (re_string_t *pstr,
+ const unsigned char *str, int len,
+ int init_len,
+ RE_TRANSLATE_TYPE trans, int icase);
static reg_errcode_t re_string_construct (re_string_t *pstr,
const unsigned char *str, int len,
- RE_TRANSLATE_TYPE trans);
-static reg_errcode_t re_string_construct_toupper (re_string_t *pstr,
- const unsigned char *str,
- int len,
- RE_TRANSLATE_TYPE trans);
+ RE_TRANSLATE_TYPE trans, int icase);
+static reg_errcode_t re_string_reconstruct (re_string_t *pstr, int idx,
+ int eflags, int newline);
+static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
+ int new_buf_len);
+#ifdef RE_ENABLE_I18N
+static void build_wcs_buffer (re_string_t *pstr);
+static void build_wcs_upper_buffer (re_string_t *pstr);
+#endif /* RE_ENABLE_I18N */
+static void build_upper_buffer (re_string_t *pstr);
+static void re_string_translate_buffer (re_string_t *pstr);
static void re_string_destruct (re_string_t *pstr);
#ifdef RE_ENABLE_I18N
static int re_string_elem_size_at (const re_string_t *pstr, int idx);
@@ -253,8 +287,7 @@ static unsigned int re_string_context_at (const re_string_t *input, int idx,
#define re_string_cur_idx(pstr) ((pstr)->cur_idx)
#define re_string_get_buffer(pstr) ((pstr)->mbs)
#define re_string_length(pstr) ((pstr)->len)
-#define re_string_byte_at(pstr,idx) \
- ((pstr)->mbs[idx])
+#define re_string_byte_at(pstr,idx) ((pstr)->mbs[idx])
#define re_string_skip_bytes(pstr,idx) ((pstr)->cur_idx += (idx))
#define re_string_set_index(pstr,idx) ((pstr)->cur_idx = (idx))
@@ -279,27 +312,6 @@ struct bin_tree_t
};
typedef struct bin_tree_t bin_tree_t;
-struct re_backref_cache_entry
-{
- int node;
- int from;
- int to;
- int flag;
-};
-
-typedef struct
-{
- int eflags;
- int match_first;
- int match_last;
- int state_log_top;
- /* Back reference cache. */
- int nbkref_ents;
- int abkref_ents;
- struct re_backref_cache_entry *bkref_ents;
- int max_bkref_len;
-} re_match_context_t;
-
#define CONTEXT_WORD 1
#define CONTEXT_NEWLINE (CONTEXT_WORD << 1)
@@ -363,6 +375,32 @@ struct re_state_table_entry
re_dfastate_t **array;
};
+struct re_backref_cache_entry
+{
+ int node;
+ int from;
+ int to;
+ int flag;
+};
+
+typedef struct
+{
+ /* EFLAGS of the argument of regexec. */
+ int eflags;
+ /* Where the matching ends. */
+ int match_last;
+ /* The string object corresponding to the input string. */
+ re_string_t *input;
+ /* The state log used by the matcher. */
+ re_dfastate_t **state_log;
+ int state_log_top;
+ /* Back reference cache. */
+ int nbkref_ents;
+ int abkref_ents;
+ struct re_backref_cache_entry *bkref_ents;
+ int max_bkref_len;
+} re_match_context_t;
+
struct re_dfa_t
{
re_bitset_ptr_t word_char;