summaryrefslogtreecommitdiff
path: root/posix
diff options
context:
space:
mode:
authorRoland McGrath <roland@gnu.org>2005-02-16 12:31:10 +0000
committerRoland McGrath <roland@gnu.org>2005-02-16 12:31:10 +0000
commit833861be818bb5d45ab0c47370b84068dfb2fedf (patch)
tree2f1754a415c378f6b067f9158cc42df24d4641d2 /posix
parentc397a0064061e28a00eea873669e59f3983db791 (diff)
import later fedora-branch tweaks
Diffstat (limited to 'posix')
-rw-r--r--posix/Makefile13
-rw-r--r--posix/bug-regex19.c22
-rw-r--r--posix/execl.c50
-rw-r--r--posix/execle.c53
-rw-r--r--posix/execlp.c50
-rw-r--r--posix/execvp.c98
-rw-r--r--posix/getconf.c4
-rw-r--r--posix/regcomp.c970
-rw-r--r--posix/regex_internal.c73
-rw-r--r--posix/regex_internal.h36
-rw-r--r--posix/regexec.c144
-rw-r--r--posix/rxspencer/tests9
-rw-r--r--posix/tst-rxspencer.c20
-rw-r--r--posix/unistd.h114
14 files changed, 797 insertions, 859 deletions
diff --git a/posix/Makefile b/posix/Makefile
index 3af9e6681d..149283c65d 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -1,4 +1,4 @@
-# Copyright (C) 1991-1999, 2000-2003, 2004, 2005 Free Software Foundation, Inc.
+# Copyright (C) 1991-1999, 2000-2003, 2004 Free Software Foundation, Inc.
# This file is part of the GNU C Library.
# The GNU C Library is free software; you can redistribute it and/or
@@ -140,27 +140,16 @@ CFLAGS-waitid.c = -fexceptions
CFLAGS-waitpid.c = -fexceptions -fasynchronous-unwind-tables
CFLAGS-getopt.c = -fexceptions
CFLAGS-wordexp.c = -fexceptions
-CFLAGS-wordexp.os = -fomit-frame-pointer
CFLAGS-sysconf.c = -fexceptions -DGETCONF_DIR='"$(libexecdir)/getconf"'
CFLAGS-pathconf.c = -fexceptions
CFLAGS-fpathconf.c = -fexceptions
CFLAGS-spawn.c = -fexceptions
-CFLAGS-spawn.os = -fomit-frame-pointer
CFLAGS-spawnp.c = -fexceptions
-CFLAGS-spawnp.os = -fomit-frame-pointer
CFLAGS-spawni.c = -fexceptions
-CFLAGS-spawni.os = -fomit-frame-pointer
CFLAGS-pause.c = -fexceptions
CFLAGS-glob.c = $(uses-callbacks) -fexceptions
CFLAGS-glob64.c = $(uses-callbacks) -fexceptions
CFLAGS-getconf.c = -DGETCONF_DIR='"$(libexecdir)/getconf"'
-CFLAGS-execve.os = -fomit-frame-pointer
-CFLAGS-fexecve.os = -fomit-frame-pointer
-CFLAGS-execv.os = -fomit-frame-pointer
-CFLAGS-execle.os = -fomit-frame-pointer
-CFLAGS-execl.os = -fomit-frame-pointer
-CFLAGS-execvp.os = -fomit-frame-pointer
-CFLAGS-execlp.os = -fomit-frame-pointer
tstgetopt-ARGS = -a -b -cfoobar --required foobar --optional=bazbug \
--none random --col --color --colour
diff --git a/posix/bug-regex19.c b/posix/bug-regex19.c
index 3a173a6ca0..4000b19b4d 100644
--- a/posix/bug-regex19.c
+++ b/posix/bug-regex19.c
@@ -1,5 +1,5 @@
/* Regular expression tests.
- Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
@@ -170,22 +170,22 @@ static struct test_s
{ERE, "[^k]\\B[^k]", "kBk", 0, -1},
{ERE, "[^C]\\B[^C]", "CCCABA", 0, 3},
{ERE, "[^C]\\B[^C]", "CBC", 0, -1},
- {ERE, ".(\\b|\\B).", "=~AB", 0, 0},
+ {ERE, ".(\\b|\\B).", "=~AB", 0, 1},
{ERE, ".(\\b|\\B).", "A=C", 0, 0},
{ERE, ".(\\b|\\B).", "ABC", 0, 0},
- {ERE, ".(\\b|\\B).", "=~\\!", 0, 0},
- {ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 0},
+ {ERE, ".(\\b|\\B).", "=~\\!", 0, -1},
+ {ERE, "[^k](\\b|\\B)[^k]", "=~AB", 0, 1},
{ERE, "[^k](\\b|\\B)[^k]", "A=C", 0, 0},
{ERE, "[^k](\\b|\\B)[^k]", "ABC", 0, 0},
- {ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 0},
- {ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, 0},
- {ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, 0},
- {ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 0},
+ {ERE, "[^k](\\b|\\B)[^k]", "=~kBD", 0, 3},
+ {ERE, "[^k](\\b|\\B)[^k]", "=~\\!", 0, -1},
+ {ERE, "[^k](\\b|\\B)[^k]", "=~kB", 0, -1},
+ {ERE, "[^C](\\b|\\B)[^C]", "=~AB", 0, 1},
{ERE, "[^C](\\b|\\B)[^C]", "A=C", 0, 0},
{ERE, "[^C](\\b|\\B)[^C]", "ABC", 0, 0},
- {ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 0},
- {ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, 0},
- {ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, 0},
+ {ERE, "[^C](\\b|\\B)[^C]", "=~CBD", 0, 3},
+ {ERE, "[^C](\\b|\\B)[^C]", "=~\\!", 0, -1},
+ {ERE, "[^C](\\b|\\B)[^C]", "=~CB", 0, -1},
{ERE, "\\b([A]|[!]|.B)", "A=AC", 0, 0},
{ERE, "\\b([A]|[!]|.B)", "=AC", 0, 1},
{ERE, "\\b([A]|[!]|.B)", "!AC", 0, 1},
diff --git a/posix/execl.c b/posix/execl.c
index 12b59f9de3..62fd45db58 100644
--- a/posix/execl.c
+++ b/posix/execl.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,92,94,97,98,99,2002,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991,92,94,97,98,99,2002 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,10 +16,10 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
+#include <alloca.h>
#include <unistd.h>
#include <stdarg.h>
#include <stddef.h>
-#include <stdlib.h>
#include <string.h>
#include <stackinfo.h>
@@ -33,44 +33,46 @@
int
execl (const char *path, const char *arg, ...)
{
-#define INITIAL_ARGV_MAX 1024
- size_t argv_max = INITIAL_ARGV_MAX;
- const char *initial_argv[INITIAL_ARGV_MAX];
- const char **argv = initial_argv;
+ size_t argv_max = 1024;
+ const char **argv = alloca (argv_max * sizeof (const char *));
+ unsigned int i;
va_list args;
argv[0] = arg;
va_start (args, arg);
- unsigned int i = 0;
+ i = 0;
while (argv[i++] != NULL)
{
if (i == argv_max)
{
- argv_max *= 2;
- const char **nptr = realloc (argv == initial_argv ? NULL : argv,
- argv_max * sizeof (const char *));
- if (nptr == NULL)
+ const char **nptr = alloca ((argv_max *= 2) * sizeof (const char *));
+
+#ifndef _STACK_GROWS_UP
+ if ((char *) nptr + argv_max == (char *) argv)
{
- if (argv != initial_argv)
- free (argv);
- return -1;
+ /* Stack grows down. */
+ argv = (const char **) memcpy (nptr, argv,
+ i * sizeof (const char *));
+ argv_max += i;
}
- if (argv == initial_argv)
- /* We have to copy the already filled-in data ourselves. */
- memcpy (nptr, argv, i * sizeof (const char *));
-
- argv = nptr;
+ else
+#endif
+#ifndef _STACK_GROWS_DOWN
+ if ((char *) argv + i == (char *) nptr)
+ /* Stack grows up. */
+ argv_max += i;
+ else
+#endif
+ /* We have a hole in the stack. */
+ argv = (const char **) memcpy (nptr, argv,
+ i * sizeof (const char *));
}
argv[i] = va_arg (args, const char *);
}
va_end (args);
- int ret = __execve (path, (char *const *) argv, __environ);
- if (argv != initial_argv)
- free (argv);
-
- return ret;
+ return __execve (path, (char *const *) argv, __environ);
}
libc_hidden_def (execl)
diff --git a/posix/execle.c b/posix/execle.c
index 70522ad2e5..2199ebeb74 100644
--- a/posix/execle.c
+++ b/posix/execle.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,97,98,99,2002,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991,97,98,99,2002 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,10 +16,10 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
+#include <alloca.h>
#include <unistd.h>
#include <stdarg.h>
#include <stddef.h>
-#include <stdlib.h>
#include <string.h>
#include <stackinfo.h>
@@ -29,45 +29,48 @@
int
execle (const char *path, const char *arg, ...)
{
-#define INITIAL_ARGV_MAX 1024
- size_t argv_max = INITIAL_ARGV_MAX;
- const char *initial_argv[INITIAL_ARGV_MAX];
- const char **argv = initial_argv;
+ size_t argv_max = 1024;
+ const char **argv = alloca (argv_max * sizeof (const char *));
+ const char *const *envp;
+ unsigned int i;
va_list args;
argv[0] = arg;
va_start (args, arg);
- unsigned int i = 0;
+ i = 0;
while (argv[i++] != NULL)
{
if (i == argv_max)
{
- argv_max *= 2;
- const char **nptr = realloc (argv == initial_argv ? NULL : argv,
- argv_max * sizeof (const char *));
- if (nptr == NULL)
+ const char **nptr = alloca ((argv_max *= 2) * sizeof (const char *));
+
+#ifndef _STACK_GROWS_UP
+ if ((char *) nptr + argv_max == (char *) argv)
{
- if (argv != initial_argv)
- free (argv);
- return -1;
+ /* Stack grows down. */
+ argv = (const char **) memcpy (nptr, argv,
+ i * sizeof (const char *));
+ argv_max += i;
}
- if (argv == initial_argv)
- /* We have to copy the already filled-in data ourselves. */
- memcpy (nptr, argv, i * sizeof (const char *));
-
- argv = nptr;
+ else
+#endif
+#ifndef _STACK_GROWS_DOWN
+ if ((char *) argv + i == (char *) nptr)
+ /* Stack grows up. */
+ argv_max += i;
+ else
+#endif
+ /* We have a hole in the stack. */
+ argv = (const char **) memcpy (nptr, argv,
+ i * sizeof (const char *));
}
argv[i] = va_arg (args, const char *);
}
- const char *const *envp = va_arg (args, const char *const *);
+ envp = va_arg (args, const char *const *);
va_end (args);
- int ret = __execve (path, (char *const *) argv, (char *const *) envp);
- if (argv != initial_argv)
- free (argv);
-
- return ret;
+ return __execve (path, (char *const *) argv, (char *const *) envp);
}
libc_hidden_def (execle)
diff --git a/posix/execlp.c b/posix/execlp.c
index 66996a9367..ba8fc74c90 100644
--- a/posix/execlp.c
+++ b/posix/execlp.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,93,96,97,98,99,2002,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991,93,96,97,98,99,2002 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -16,10 +16,10 @@
Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307 USA. */
+#include <alloca.h>
#include <unistd.h>
#include <stdarg.h>
#include <stddef.h>
-#include <stdlib.h>
#include <string.h>
#include <stackinfo.h>
@@ -30,44 +30,46 @@
int
execlp (const char *file, const char *arg, ...)
{
-#define INITIAL_ARGV_MAX 1024
- size_t argv_max = INITIAL_ARGV_MAX;
- const char *initial_argv[INITIAL_ARGV_MAX];
- const char **argv = initial_argv;
+ size_t argv_max = 1024;
+ const char **argv = alloca (argv_max * sizeof (const char *));
+ unsigned int i;
va_list args;
argv[0] = arg;
va_start (args, arg);
- unsigned int i = 0;
+ i = 0;
while (argv[i++] != NULL)
{
if (i == argv_max)
{
- argv_max *= 2;
- const char **nptr = realloc (argv == initial_argv ? NULL : argv,
- argv_max * sizeof (const char *));
- if (nptr == NULL)
+ const char **nptr = alloca ((argv_max *= 2) * sizeof (const char *));
+
+#ifndef _STACK_GROWS_UP
+ if ((char *) nptr + argv_max == (char *) argv)
{
- if (argv != initial_argv)
- free (argv);
- return -1;
+ /* Stack grows down. */
+ argv = (const char **) memcpy (nptr, argv,
+ i * sizeof (const char *));
+ argv_max += i;
}
- if (argv == initial_argv)
- /* We have to copy the already filled-in data ourselves. */
- memcpy (nptr, argv, i * sizeof (const char *));
-
- argv = nptr;
+ else
+#endif
+#ifndef _STACK_GROWS_DOWN
+ if ((char *) argv + i == (char *) nptr)
+ /* Stack grows up. */
+ argv_max += i;
+ else
+#endif
+ /* We have a hole in the stack. */
+ argv = (const char **) memcpy (nptr, argv,
+ i * sizeof (const char *));
}
argv[i] = va_arg (args, const char *);
}
va_end (args);
- int ret = execvp (file, (char *const *) argv);
- if (argv != initial_argv)
- free (argv);
-
- return ret;
+ return execvp (file, (char *const *) argv);
}
libc_hidden_def (execlp)
diff --git a/posix/execvp.c b/posix/execvp.c
index 9ccfd7fc22..d6f60c02e7 100644
--- a/posix/execvp.c
+++ b/posix/execvp.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991,92,1995-99,2002,2004,2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991,92,1995-99,2002,2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -18,7 +18,6 @@
#include <unistd.h>
#include <stdarg.h>
-#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
@@ -27,9 +26,9 @@
/* The file is accessible but it is not an executable file. Invoke
the shell to interpret it as a script. */
-static char **
+static void
internal_function
-allocate_scripts_argv (const char *file, char *const argv[])
+script_execute (const char *file, char *const argv[])
{
/* Count the arguments. */
int argc = 0;
@@ -37,19 +36,19 @@ allocate_scripts_argv (const char *file, char *const argv[])
;
/* Construct an argument list for the shell. */
- char **new_argv = (char **) malloc ((argc + 1) * sizeof (char *));
- if (new_argv != NULL)
- {
- new_argv[0] = (char *) _PATH_BSHELL;
- new_argv[1] = (char *) file;
- while (argc > 1)
- {
- new_argv[argc] = argv[argc - 1];
- --argc;
- }
- }
-
- return new_argv;
+ {
+ char *new_argv[argc + 1];
+ new_argv[0] = (char *) _PATH_BSHELL;
+ new_argv[1] = (char *) file;
+ while (argc > 1)
+ {
+ new_argv[argc] = argv[argc - 1];
+ --argc;
+ }
+
+ /* Execute the shell. */
+ __execve (new_argv[0], new_argv, __environ);
+ }
}
@@ -67,58 +66,42 @@ execvp (file, argv)
return -1;
}
- char **script_argv = NULL;
-
if (strchr (file, '/') != NULL)
{
/* Don't search when it contains a slash. */
__execve (file, argv, __environ);
if (errno == ENOEXEC)
- {
- script_argv = allocate_scripts_argv (file, argv);
- if (script_argv != NULL)
- {
- __execve (script_argv[0], script_argv, __environ);
-
- free (script_argv);
- }
- }
+ script_execute (file, argv);
}
else
{
- char *path = getenv ("PATH");
- bool path_malloc = false;
+ int got_eacces = 0;
+ char *path, *p, *name;
+ size_t len;
+ size_t pathlen;
+
+ path = getenv ("PATH");
if (path == NULL)
{
/* There is no `PATH' in the environment.
The default search path is the current directory
followed by the path `confstr' returns for `_CS_PATH'. */
- size_t len = confstr (_CS_PATH, (char *) NULL, 0);
- path = (char *) malloc (1 + len);
- if (path == NULL)
- return -1;
+ len = confstr (_CS_PATH, (char *) NULL, 0);
+ path = (char *) __alloca (1 + len);
path[0] = ':';
(void) confstr (_CS_PATH, path + 1, len);
- path_malloc = true;
}
- size_t len = strlen (file) + 1;
- size_t pathlen = strlen (path);
- char *name = malloc (pathlen + len + 1);
- if (name == NULL)
- {
- if (path_malloc)
- free (path);
- return -1;
- }
+ len = strlen (file) + 1;
+ pathlen = strlen (path);
+ name = __alloca (pathlen + len + 1);
/* Copy the file name at the top. */
name = (char *) memcpy (name + pathlen + 1, file, len);
/* And add the slash. */
*--name = '/';
- bool got_eacces = false;
- char *p = path;
+ p = path;
do
{
char *startp;
@@ -137,21 +120,7 @@ execvp (file, argv)
__execve (startp, argv, __environ);
if (errno == ENOEXEC)
- {
- if (script_argv == NULL)
- {
- script_argv = allocate_scripts_argv (file, argv);
- if (script_argv == NULL)
- {
- /* A possible EACCES error is not as important as
- the ENOMEM. */
- got_eacces = false;
- break;
- }
- }
-
- __execve (script_argv[0], script_argv, __environ);
- }
+ script_execute (startp, argv);
switch (errno)
{
@@ -159,7 +128,7 @@ execvp (file, argv)
/* Record the we got a `Permission denied' error. If we end
up finding no executable we can use, we want to diagnose
that we did find one but were denied access. */
- got_eacces = true;
+ got_eacces = 1;
case ENOENT:
case ESTALE:
case ENOTDIR:
@@ -187,11 +156,6 @@ execvp (file, argv)
/* At least one failure was due to permissions, so report that
error. */
__set_errno (EACCES);
-
- free (script_argv);
- free (name);
- if (path_malloc)
- free (path);
}
/* Return the error from the last attempt (probably ENOENT). */
diff --git a/posix/getconf.c b/posix/getconf.c
index 0cc0c0d7b5..4ce4f8e413 100644
--- a/posix/getconf.c
+++ b/posix/getconf.c
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991, 92, 1995-2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991, 92, 1995-2003, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -964,7 +964,7 @@ main (int argc, char *argv[])
Copyright (C) %s Free Software Foundation, Inc.\n\
This is free software; see the source for copying conditions. There is NO\n\
warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
-"), "2005");
+"), "2004");
fprintf (stderr, gettext ("Written by %s.\n"), "Roland McGrath");
return 0;
}
diff --git a/posix/regcomp.c b/posix/regcomp.c
index 1a5f7952c3..5de5bf725a 100644
--- a/posix/regcomp.c
+++ b/posix/regcomp.c
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -33,21 +33,19 @@ static reg_errcode_t create_initial_state (re_dfa_t *dfa);
#ifdef RE_ENABLE_I18N
static void optimize_utf8 (re_dfa_t *dfa);
#endif
-static reg_errcode_t analyze (regex_t *preg);
-static reg_errcode_t create_initial_state (re_dfa_t *dfa);
-static reg_errcode_t preorder (bin_tree_t *root,
- reg_errcode_t (fn (void *, bin_tree_t *)),
- void *extra);
-static reg_errcode_t postorder (bin_tree_t *root,
- reg_errcode_t (fn (void *, bin_tree_t *)),
- void *extra);
-static reg_errcode_t optimize_subexps (void *extra, bin_tree_t *node);
-static reg_errcode_t lower_subexps (void *extra, bin_tree_t *node);
-static bin_tree_t *lower_subexp (reg_errcode_t *err, regex_t *preg,
- bin_tree_t *node);
-static reg_errcode_t calc_first (void *extra, bin_tree_t *node);
-static reg_errcode_t calc_next (void *extra, bin_tree_t *node);
-static reg_errcode_t link_nfa_nodes (void *extra, bin_tree_t *node);
+struct subexp_optimize
+{
+ re_dfa_t *dfa;
+ re_token_t *nodes;
+ int no_sub, re_nsub;
+};
+static bin_tree_t *optimize_subexps (struct subexp_optimize *so,
+ bin_tree_t *node, int sidx, int depth);
+static reg_errcode_t analyze (re_dfa_t *dfa);
+static reg_errcode_t analyze_tree (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_first (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_next (re_dfa_t *dfa, bin_tree_t *node);
+static void calc_epsdest (re_dfa_t *dfa, bin_tree_t *node);
static reg_errcode_t duplicate_node_closure (re_dfa_t *dfa, int top_org_node,
int top_clone_node, int root_node,
unsigned int constraint);
@@ -58,7 +56,7 @@ static int search_duplicated_node (re_dfa_t *dfa, int org_node,
static reg_errcode_t calc_eclosure (re_dfa_t *dfa);
static reg_errcode_t calc_eclosure_iter (re_node_set *new_set, re_dfa_t *dfa,
int node, int root);
-static reg_errcode_t calc_inveclosure (re_dfa_t *dfa);
+static void calc_inveclosure (re_dfa_t *dfa);
static int fetch_number (re_string_t *input, re_token_t *token,
reg_syntax_t syntax);
static void fetch_token (re_token_t *result, re_string_t *input,
@@ -140,14 +138,14 @@ static bin_tree_t *build_charclass_op (re_dfa_t *dfa,
int non_match, reg_errcode_t *err);
static bin_tree_t *create_tree (re_dfa_t *dfa,
bin_tree_t *left, bin_tree_t *right,
- re_token_type_t type);
-static bin_tree_t *create_token_tree (re_dfa_t *dfa,
- bin_tree_t *left, bin_tree_t *right,
- const re_token_t *token);
+ re_token_type_t type, int index);
+static bin_tree_t *re_dfa_add_tree_node (re_dfa_t *dfa,
+ bin_tree_t *left, bin_tree_t *right,
+ const re_token_t *token)
+ __attribute ((noinline));
static bin_tree_t *duplicate_tree (const bin_tree_t *src, re_dfa_t *dfa);
-static void free_token (re_token_t *node);
-static reg_errcode_t free_tree (void *extra, bin_tree_t *node);
-static reg_errcode_t mark_opt_subexp (void *extra, bin_tree_t *node);
+static void mark_opt_subexp (const bin_tree_t *src, re_dfa_t *dfa);
+static void mark_opt_subexp_iter (const bin_tree_t *src, re_dfa_t *dfa, int idx);
/* This table gives an error message for each of the error codes listed
in regex.h. Obviously the order here has to be same as there.
@@ -600,7 +598,16 @@ free_dfa_content (re_dfa_t *dfa)
if (dfa->nodes)
for (i = 0; i < dfa->nodes_len; ++i)
- free_token (dfa->nodes + i);
+ {
+ re_token_t *node = dfa->nodes + i;
+#ifdef RE_ENABLE_I18N
+ if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
+ free_charset (node->opr.mbcset);
+ else
+#endif /* RE_ENABLE_I18N */
+ if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
+ re_free (node->opr.sbcset);
+ }
re_free (dfa->nexts);
for (i = 0; i < dfa->nodes_len; ++i)
{
@@ -804,17 +811,29 @@ re_compile_internal (preg, pattern, length, syntax)
if (BE (dfa->str_tree == NULL, 0))
goto re_compile_internal_free_return;
- /* Analyze the tree and create the nfa. */
- err = analyze (preg);
- if (BE (err != REG_NOERROR, 0))
- goto re_compile_internal_free_return;
-
#ifdef RE_ENABLE_I18N
/* If possible, do searching in single byte encoding to speed things up. */
if (dfa->is_utf8 && !(syntax & RE_ICASE) && preg->translate == NULL)
optimize_utf8 (dfa);
#endif
+ if (preg->re_nsub > 0)
+ {
+ struct subexp_optimize so;
+
+ so.dfa = dfa;
+ so.nodes = dfa->nodes;
+ so.no_sub = preg->no_sub;
+ so.re_nsub = preg->re_nsub;
+ dfa->str_tree = optimize_subexps (&so, dfa->str_tree, -1, 0);
+ }
+
+ /* Analyze the tree and collect information which is necessary to
+ create the dfa. */
+ err = analyze (dfa);
+ if (BE (err != REG_NOERROR, 0))
+ goto re_compile_internal_free_return;
+
/* Then create the initial state of the dfa. */
err = create_initial_state (dfa);
@@ -875,9 +894,9 @@ init_dfa (dfa, pat_len)
codeset_name = nl_langinfo (CODESET);
# else
codeset_name = getenv ("LC_ALL");
- if (codeset_name == NULL || codeset_name[0] == '\0')
+ if (codeset_name == NULL || codeset[0] == '\0')
codeset_name = getenv ("LC_CTYPE");
- if (codeset_name == NULL || codeset_name[0] == '\0')
+ if (codeset_name == NULL || codeset[0] == '\0')
codeset_name = getenv ("LANG");
if (codeset_name == NULL)
codeset_name = "";
@@ -979,7 +998,7 @@ create_initial_state (dfa)
/* Initial states have the epsilon closure of the node which is
the first node of the regular expression. */
- first = dfa->str_tree->first->node_idx;
+ first = dfa->str_tree->first;
dfa->init_node = first;
err = re_node_set_init_copy (&init_nodes, dfa->eclosures + first);
if (BE (err != REG_NOERROR, 0))
@@ -1085,11 +1104,10 @@ optimize_utf8 (dfa)
case OP_ALT:
case END_OF_RE:
case OP_DUP_ASTERISK:
+ case OP_DUP_QUESTION:
case OP_OPEN_SUBEXP:
case OP_CLOSE_SUBEXP:
break;
- case COMPLEX_BRACKET:
- return;
case SIMPLE_BRACKET:
/* Just double check. */
for (i = 0x80 / UINT_BITS; i < BITSET_UINTS; ++i)
@@ -1097,7 +1115,7 @@ optimize_utf8 (dfa)
return;
break;
default:
- abort ();
+ return;
}
if (mb_chars || has_period)
@@ -1117,14 +1135,90 @@ optimize_utf8 (dfa)
}
#endif
+static bin_tree_t *
+optimize_subexps (so, node, sidx, depth)
+ struct subexp_optimize *so;
+ bin_tree_t *node;
+ int sidx, depth;
+{
+ int idx, new_depth, new_sidx;
+ bin_tree_t *ret;
+ if (node == NULL)
+ return NULL;
+
+ new_depth = 0;
+ new_sidx = sidx;
+ if ((depth & 1) && node->type == CONCAT
+ && node->right && node->right->type == 0
+ && so->nodes[idx = node->right->node_idx].type == OP_CLOSE_SUBEXP)
+ {
+ new_depth = depth + 1;
+ if (new_depth == 2
+ || (so->nodes[idx].opr.idx < 8 * sizeof (so->dfa->used_bkref_map)
+ && so->dfa->used_bkref_map & (1 << so->nodes[idx].opr.idx)))
+ new_sidx = so->nodes[idx].opr.idx;
+ }
+ node->left = optimize_subexps (so, node->left, new_sidx, new_depth);
+ new_depth = (depth & 1) == 0 && node->type == CONCAT
+ && node->left && node->left->type == 0
+ && so->nodes[node->left->node_idx].type == OP_OPEN_SUBEXP
+ ? depth + 1 : 0;
+ node->right = optimize_subexps (so, node->right, sidx, new_depth);
+
+ if (node->type != CONCAT)
+ return node;
+ if ((depth & 1) == 0
+ && node->left
+ && node->left->type == 0
+ && so->nodes[idx = node->left->node_idx].type == OP_OPEN_SUBEXP)
+ ret = node->right;
+ else if ((depth & 1)
+ && node->right
+ && node->right->type == 0
+ && so->nodes[idx = node->right->node_idx].type == OP_CLOSE_SUBEXP)
+ ret = node->left;
+ else
+ return node;
+
+ if (so->nodes[idx].opr.idx < 8 * sizeof (so->dfa->used_bkref_map)
+ && so->dfa->used_bkref_map & (1 << so->nodes[idx].opr.idx))
+ return node;
+
+ if (!so->no_sub)
+ {
+ int i;
+
+ if (depth < 2)
+ return node;
+
+ if (so->dfa->subexp_map == NULL)
+ {
+ so->dfa->subexp_map = re_malloc (int, so->re_nsub);
+ if (so->dfa->subexp_map == NULL)
+ return node;
+
+ for (i = 0; i < so->re_nsub; i++)
+ so->dfa->subexp_map[i] = i;
+ }
+
+ i = so->nodes[idx].opr.idx;
+ assert (sidx < i);
+ so->dfa->subexp_map[i] = sidx;
+ }
+
+ so->nodes[idx].type = OP_DELETED_SUBEXP;
+ ret->parent = node->parent;
+ return ret;
+}
+
/* Analyze the structure tree, and calculate "first", "next", "edest",
"eclosure", and "inveclosure". */
static reg_errcode_t
-analyze (preg)
- regex_t *preg;
+analyze (dfa)
+ re_dfa_t *dfa;
{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
+ int i;
reg_errcode_t ret;
/* Allocate arrays. */
@@ -1132,321 +1226,225 @@ analyze (preg)
dfa->org_indices = re_malloc (int, dfa->nodes_alloc);
dfa->edests = re_malloc (re_node_set, dfa->nodes_alloc);
dfa->eclosures = re_malloc (re_node_set, dfa->nodes_alloc);
+ dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_alloc);
if (BE (dfa->nexts == NULL || dfa->org_indices == NULL || dfa->edests == NULL
- || dfa->eclosures == NULL, 0))
+ || dfa->eclosures == NULL || dfa->inveclosures == NULL, 0))
return REG_ESPACE;
-
- dfa->subexp_map = re_malloc (int, preg->re_nsub);
- if (dfa->subexp_map != NULL)
+ /* Initialize them. */
+ for (i = 0; i < dfa->nodes_len; ++i)
{
- int i;
- for (i = 0; i < preg->re_nsub; i++)
- dfa->subexp_map[i] = i;
- preorder (dfa->str_tree, optimize_subexps, dfa);
- for (i = 0; i < preg->re_nsub; i++)
- if (dfa->subexp_map[i] != i)
- break;
- if (i == preg->re_nsub)
- {
- free (dfa->subexp_map);
- dfa->subexp_map = NULL;
- }
+ dfa->nexts[i] = -1;
+ re_node_set_init_empty (dfa->edests + i);
+ re_node_set_init_empty (dfa->eclosures + i);
+ re_node_set_init_empty (dfa->inveclosures + i);
}
- ret = postorder (dfa->str_tree, lower_subexps, preg);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
- ret = postorder (dfa->str_tree, calc_first, dfa);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
- preorder (dfa->str_tree, calc_next, dfa);
- ret = preorder (dfa->str_tree, link_nfa_nodes, dfa);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
- ret = calc_eclosure (dfa);
- if (BE (ret != REG_NOERROR, 0))
- return ret;
-
- /* We only need this during the prune_impossible_nodes pass in regexec.c;
- skip it if p_i_n will not run, as calc_inveclosure can be quadratic. */
- if ((!preg->no_sub && preg->re_nsub > 0 && dfa->has_plural_match)
- || dfa->nbackref)
+ ret = analyze_tree (dfa, dfa->str_tree);
+ if (BE (ret == REG_NOERROR, 1))
{
- dfa->inveclosures = re_malloc (re_node_set, dfa->nodes_len);
- if (BE (dfa->inveclosures == NULL, 0))
- return REG_ESPACE;
- ret = calc_inveclosure (dfa);
+ ret = calc_eclosure (dfa);
+ if (ret == REG_NOERROR)
+ calc_inveclosure (dfa);
}
-
return ret;
}
-/* Our parse trees are very unbalanced, so we cannot use a stack to
- implement parse tree visits. Instead, we use parent pointers and
- some hairy code in these two functions. */
-static reg_errcode_t
-postorder (root, fn, extra)
- bin_tree_t *root;
- reg_errcode_t (fn (void *, bin_tree_t *));
- void *extra;
-{
- bin_tree_t *node, *prev;
-
- for (node = root; ; )
- {
- /* Descend down the tree, preferably to the left (or to the right
- if that's the only child). */
- while (node->left || node->right)
- if (node->left)
- node = node->left;
- else
- node = node->right;
-
- do
- {
- reg_errcode_t err = fn (extra, node);
- if (BE (err != REG_NOERROR, 0))
- return err;
- if (node->parent == NULL)
- return REG_NOERROR;
- prev = node;
- node = node->parent;
- }
- /* Go up while we have a node that is reached from the right. */
- while (node->right == prev || node->right == NULL);
- node = node->right;
- }
-}
-
-static reg_errcode_t
-preorder (root, fn, extra)
- bin_tree_t *root;
- reg_errcode_t (fn (void *, bin_tree_t *));
- void *extra;
-{
- bin_tree_t *node;
-
- for (node = root; ; )
- {
- reg_errcode_t err = fn (extra, node);
- if (BE (err != REG_NOERROR, 0))
- return err;
-
- /* Go to the left node, or up and to the right. */
- if (node->left)
- node = node->left;
- else
- {
- bin_tree_t *prev = NULL;
- while (node->right == prev || node->right == NULL)
- {
- prev = node;
- node = node->parent;
- if (!node)
- return REG_NOERROR;
- }
- node = node->right;
- }
- }
-}
+/* Helper functions for analyze.
+ This function calculate "first", "next", and "edest" for the subtree
+ whose root is NODE. */
-/* Optimization pass: if a SUBEXP is entirely contained, strip it and tell
- re_search_internal to map the inner one's opr.idx to this one's. Adjust
- backreferences as well. Requires a preorder visit. */
static reg_errcode_t
-optimize_subexps (extra, node)
- void *extra;
+analyze_tree (dfa, node)
+ re_dfa_t *dfa;
bin_tree_t *node;
{
- re_dfa_t *dfa = (re_dfa_t *) extra;
+ reg_errcode_t ret;
+ if (node->first == -1)
+ calc_first (dfa, node);
+ if (node->next == -1)
+ calc_next (dfa, node);
+ calc_epsdest (dfa, node);
- if (node->token.type == OP_BACK_REF && dfa->subexp_map)
+ /* Calculate "first" etc. for the left child. */
+ if (node->left != NULL)
{
- int idx = node->token.opr.idx;
- node->token.opr.idx = dfa->subexp_map[idx];
- dfa->used_bkref_map |= 1 << node->token.opr.idx;
+ ret = analyze_tree (dfa, node->left);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
}
-
- else if (node->token.type == SUBEXP
- && node->left && node->left->token.type == SUBEXP)
+ /* Calculate "first" etc. for the right child. */
+ if (node->right != NULL)
{
- int other_idx = node->left->token.opr.idx;
-
- node->left = node->left->left;
- if (node->left)
- node->left->parent = node;
-
- dfa->subexp_map[other_idx] = dfa->subexp_map[node->token.opr.idx];
- if (other_idx < 8 * sizeof (dfa->used_bkref_map))
- dfa->used_bkref_map &= ~(1 << other_idx);
+ ret = analyze_tree (dfa, node->right);
+ if (BE (ret != REG_NOERROR, 0))
+ return ret;
}
-
return REG_NOERROR;
}
-/* Lowering pass: Turn each SUBEXP node into the appropriate concatenation
- of OP_OPEN_SUBEXP, the body of the SUBEXP (if any) and OP_CLOSE_SUBEXP. */
-static reg_errcode_t
-lower_subexps (extra, node)
- void *extra;
+/* Calculate "first" for the node NODE. */
+static void
+calc_first (dfa, node)
+ re_dfa_t *dfa;
bin_tree_t *node;
{
- regex_t *preg = (regex_t *) extra;
- reg_errcode_t err = REG_NOERROR;
+ int idx, type;
+ idx = node->node_idx;
+ type = (node->type == 0) ? dfa->nodes[idx].type : node->type;
- if (node->left && node->left->token.type == SUBEXP)
+ switch (type)
{
- node->left = lower_subexp (&err, preg, node->left);
- if (node->left)
- node->left->parent = node;
- }
- if (node->right && node->right->token.type == SUBEXP)
- {
- node->right = lower_subexp (&err, preg, node->right);
- if (node->right)
- node->right->parent = node;
+#ifdef DEBUG
+ case OP_OPEN_BRACKET:
+ case OP_CLOSE_BRACKET:
+ case OP_OPEN_DUP_NUM:
+ case OP_CLOSE_DUP_NUM:
+ case OP_DUP_PLUS:
+ case OP_NON_MATCH_LIST:
+ case OP_OPEN_COLL_ELEM:
+ case OP_CLOSE_COLL_ELEM:
+ case OP_OPEN_EQUIV_CLASS:
+ case OP_CLOSE_EQUIV_CLASS:
+ case OP_OPEN_CHAR_CLASS:
+ case OP_CLOSE_CHAR_CLASS:
+ /* These must not appear here. */
+ assert (0);
+#endif
+ case END_OF_RE:
+ case CHARACTER:
+ case OP_PERIOD:
+ case OP_DUP_ASTERISK:
+ case OP_DUP_QUESTION:
+#ifdef RE_ENABLE_I18N
+ case OP_UTF8_PERIOD:
+ case COMPLEX_BRACKET:
+#endif /* RE_ENABLE_I18N */
+ case SIMPLE_BRACKET:
+ case OP_BACK_REF:
+ case ANCHOR:
+ case OP_OPEN_SUBEXP:
+ case OP_CLOSE_SUBEXP:
+ node->first = idx;
+ break;
+ case OP_ALT:
+ node->first = idx;
+ break;
+ /* else fall through */
+ default:
+#ifdef DEBUG
+ assert (node->left != NULL);
+#endif
+ if (node->left->first == -1)
+ calc_first (dfa, node->left);
+ node->first = node->left->first;
+ break;
}
-
- return err;
}
-static bin_tree_t *
-lower_subexp (err, preg, node)
- reg_errcode_t *err;
- regex_t *preg;
- bin_tree_t *node;
-{
- re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- bin_tree_t *body = node->left;
- bin_tree_t *op, *cls, *tree1, *tree;
-
- if (preg->no_sub
- && (node->token.opr.idx >= 8 * sizeof (dfa->used_bkref_map)
- || !(dfa->used_bkref_map & (1 << node->token.opr.idx))))
- return node->left;
-
- /* Convert the SUBEXP node to the concatenation of an
- OP_OPEN_SUBEXP, the contents, and an OP_CLOSE_SUBEXP. */
- op = create_tree (dfa, NULL, NULL, OP_OPEN_SUBEXP);
- cls = create_tree (dfa, NULL, NULL, OP_CLOSE_SUBEXP);
- tree1 = body ? create_tree (dfa, body, cls, CONCAT) : cls;
- tree = create_tree (dfa, op, tree1, CONCAT);
- if (BE (tree == NULL || tree1 == NULL || op == NULL || cls == NULL, 0))
- {
- *err = REG_ESPACE;
- return NULL;
- }
-
- op->token.opr.idx = cls->token.opr.idx = node->token.opr.idx;
- op->token.opt_subexp = cls->token.opt_subexp = node->token.opt_subexp;
- return tree;
-}
+/* Calculate "next" for the node NODE. */
-/* Pass 1 in building the NFA: compute FIRST and create unlinked automaton
- nodes. Requires a postorder visit. */
-static reg_errcode_t
-calc_first (extra, node)
- void *extra;
+static void
+calc_next (dfa, node)
+ re_dfa_t *dfa;
bin_tree_t *node;
{
- re_dfa_t *dfa = (re_dfa_t *) extra;
- if (node->token.type == CONCAT)
- {
- node->first = node->left->first;
- node->node_idx = node->left->node_idx;
- }
- else
+ int idx, type;
+ bin_tree_t *parent = node->parent;
+ if (parent == NULL)
{
- node->first = node;
- node->node_idx = re_dfa_add_node (dfa, node->token);
- if (BE (node->node_idx == -1, 0))
- return REG_ESPACE;
+ node->next = -1;
+ idx = node->node_idx;
+ if (node->type == 0)
+ dfa->nexts[idx] = node->next;
+ return;
}
- return REG_NOERROR;
-}
-/* Pass 2: compute NEXT on the tree. Preorder visit. */
-static reg_errcode_t
-calc_next (extra, node)
- void *extra;
- bin_tree_t *node;
-{
- switch (node->token.type)
+ idx = parent->node_idx;
+ type = (parent->type == 0) ? dfa->nodes[idx].type : parent->type;
+
+ switch (type)
{
case OP_DUP_ASTERISK:
- node->left->next = node;
+ node->next = idx;
break;
case CONCAT:
- node->left->next = node->right->first;
- node->right->next = node->next;
- break;
+ if (parent->left == node)
+ {
+ if (parent->right->first == -1)
+ calc_first (dfa, parent->right);
+ node->next = parent->right->first;
+ break;
+ }
+ /* else fall through */
default:
- if (node->left)
- node->left->next = node->next;
- if (node->right)
- node->right->next = node->next;
+ if (parent->next == -1)
+ calc_next (dfa, parent);
+ node->next = parent->next;
break;
}
- return REG_NOERROR;
+ idx = node->node_idx;
+ if (node->type == 0)
+ dfa->nexts[idx] = node->next;
}
-/* Pass 3: link all DFA nodes to their NEXT node (any order will do). */
-static reg_errcode_t
-link_nfa_nodes (extra, node)
- void *extra;
+/* Calculate "edest" for the node NODE. */
+
+static void
+calc_epsdest (dfa, node)
+ re_dfa_t *dfa;
bin_tree_t *node;
{
- re_dfa_t *dfa = (re_dfa_t *) extra;
- int idx = node->node_idx;
- reg_errcode_t err = REG_NOERROR;
-
- switch (node->token.type)
+ int idx;
+ idx = node->node_idx;
+ if (node->type == 0)
{
- case CONCAT:
- break;
-
- case END_OF_RE:
- assert (node->next == NULL);
- break;
-
- case OP_DUP_ASTERISK:
- case OP_ALT:
- {
- int left, right;
- dfa->has_plural_match = 1;
- if (node->left != NULL)
- left = node->left->first->node_idx;
- else
- left = node->next->node_idx;
- if (node->right != NULL)
- right = node->right->first->node_idx;
- else
- right = node->next->node_idx;
- assert (left > -1);
- assert (right > -1);
- err = re_node_set_init_2 (dfa->edests + idx, left, right);
- }
- break;
-
- case ANCHOR:
- case OP_OPEN_SUBEXP:
- case OP_CLOSE_SUBEXP:
- err = re_node_set_init_1 (dfa->edests + idx, node->next->node_idx);
- break;
-
- case OP_BACK_REF:
- dfa->nexts[idx] = node->next->node_idx;
- if (node->token.type == OP_BACK_REF)
- re_node_set_init_1 (dfa->edests + idx, dfa->nexts[idx]);
- break;
-
- default:
- assert (!IS_EPSILON_NODE (node->token.type));
- dfa->nexts[idx] = node->next->node_idx;
- break;
+ if (dfa->nodes[idx].type == OP_DUP_ASTERISK
+ || dfa->nodes[idx].type == OP_DUP_QUESTION)
+ {
+ if (node->left->first == -1)
+ calc_first (dfa, node->left);
+ if (node->next == -1)
+ calc_next (dfa, node);
+ re_node_set_init_2 (dfa->edests + idx, node->left->first,
+ node->next);
+ }
+ else if (dfa->nodes[idx].type == OP_ALT)
+ {
+ int left, right;
+ if (node->left != NULL)
+ {
+ if (node->left->first == -1)
+ calc_first (dfa, node->left);
+ left = node->left->first;
+ }
+ else
+ {
+ if (node->next == -1)
+ calc_next (dfa, node);
+ left = node->next;
+ }
+ if (node->right != NULL)
+ {
+ if (node->right->first == -1)
+ calc_first (dfa, node->right);
+ right = node->right->first;
+ }
+ else
+ {
+ if (node->next == -1)
+ calc_next (dfa, node);
+ right = node->next;
+ }
+ re_node_set_init_2 (dfa->edests + idx, left, right);
+ }
+ else if (dfa->nodes[idx].type == ANCHOR
+ || dfa->nodes[idx].type == OP_OPEN_SUBEXP
+ || dfa->nodes[idx].type == OP_CLOSE_SUBEXP
+ || dfa->nodes[idx].type == OP_BACK_REF)
+ re_node_set_init_1 (dfa->edests + idx, node->next);
+ else
+ assert (!IS_EPSILON_NODE (dfa->nodes[idx].type));
}
-
- return err;
}
/* Duplicate the epsilon closure of the node ROOT_NODE.
@@ -1522,7 +1520,7 @@ duplicate_node_closure (dfa, top_org_node, top_clone_node, root_node,
else /* dfa->edests[org_node].nelem == 2 */
{
/* In case of the node can epsilon-transit, and it has two
- destinations. In the bin_tree_t and DFA, that's '|' and '*'. */
+ destinations. E.g. '|', '*', '+', '?'. */
org_dest = dfa->edests[org_node].elems[0];
re_node_set_empty (dfa->edests + clone_node);
/* Search for a duplicated node which satisfies the constraint. */
@@ -1593,13 +1591,16 @@ duplicate_node (new_idx, dfa, org_idx, constraint)
int *new_idx, org_idx;
unsigned int constraint;
{
- int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx]);
+ int dup_idx = re_dfa_add_node (dfa, dfa->nodes[org_idx], 1);
if (BE (dup_idx == -1, 0))
return REG_ESPACE;
dfa->nodes[dup_idx].constraint = constraint;
if (dfa->nodes[org_idx].type == ANCHOR)
dfa->nodes[dup_idx].constraint |= dfa->nodes[org_idx].opr.ctx_type;
dfa->nodes[dup_idx].duplicated = 1;
+ re_node_set_init_empty (dfa->edests + dup_idx);
+ re_node_set_init_empty (dfa->eclosures + dup_idx);
+ re_node_set_init_empty (dfa->inveclosures + dup_idx);
/* Store the index of the original node. */
dfa->org_indices[dup_idx] = org_idx;
@@ -1607,26 +1608,21 @@ duplicate_node (new_idx, dfa, org_idx, constraint)
return REG_NOERROR;
}
-static reg_errcode_t
+static void
calc_inveclosure (dfa)
re_dfa_t *dfa;
{
- int src, idx, ret;
- for (idx = 0; idx < dfa->nodes_len; ++idx)
- re_node_set_init_empty (dfa->inveclosures + idx);
-
+ int src, idx, dest;
for (src = 0; src < dfa->nodes_len; ++src)
{
- int *elems = dfa->eclosures[src].elems;
+ if (dfa->nodes[src].type == OP_DELETED_SUBEXP)
+ continue;
for (idx = 0; idx < dfa->eclosures[src].nelem; ++idx)
{
- ret = re_node_set_insert_last (dfa->inveclosures + elems[idx], src);
- if (BE (ret == -1, 0))
- return REG_ESPACE;
+ dest = dfa->eclosures[src].elems[idx];
+ re_node_set_insert_last (dfa->inveclosures + dest, src);
}
}
-
- return REG_NOERROR;
}
/* Calculate "eclosure" for all the node in DFA. */
@@ -1656,6 +1652,8 @@ calc_eclosure (dfa)
#ifdef DEBUG
assert (dfa->eclosures[node_idx].nelem != -1);
#endif
+ if (dfa->nodes[node_idx].type == OP_DELETED_SUBEXP)
+ continue;
/* If we have already calculated, skip it. */
if (dfa->eclosures[node_idx].nelem != 0)
@@ -1861,7 +1859,7 @@ peek_token (token, input, syntax)
if (!(syntax & RE_NO_GNU_OPS))
{
token->type = ANCHOR;
- token->opr.ctx_type = NOT_WORD_DELIM;
+ token->opr.ctx_type = INSIDE_WORD;
}
break;
case 'w':
@@ -2126,9 +2124,9 @@ parse (regexp, preg, syntax, err)
tree = parse_reg_exp (regexp, preg, &current_token, syntax, 0, err);
if (BE (*err != REG_NOERROR && tree == NULL, 0))
return NULL;
- eor = create_tree (dfa, NULL, NULL, END_OF_RE);
+ eor = re_dfa_add_tree_node (dfa, NULL, NULL, &current_token);
if (tree != NULL)
- root = create_tree (dfa, tree, eor, CONCAT);
+ root = create_tree (dfa, tree, eor, CONCAT, 0);
else
root = eor;
if (BE (eor == NULL || root == NULL, 0))
@@ -2165,6 +2163,7 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err)
while (token->type == OP_ALT)
{
+ re_token_t alt_token = *token;
fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
if (token->type != OP_ALT && token->type != END_OF_RE
&& (nest == 0 || token->type != OP_CLOSE_SUBEXP))
@@ -2175,12 +2174,13 @@ parse_reg_exp (regexp, preg, token, syntax, nest, err)
}
else
branch = NULL;
- tree = create_tree (dfa, tree, branch, OP_ALT);
+ tree = re_dfa_add_tree_node (dfa, tree, branch, &alt_token);
if (BE (tree == NULL, 0))
{
*err = REG_ESPACE;
return NULL;
}
+ dfa->has_plural_match = 1;
}
return tree;
}
@@ -2219,7 +2219,7 @@ parse_branch (regexp, preg, token, syntax, nest, err)
}
if (tree != NULL && exp != NULL)
{
- tree = create_tree (dfa, tree, exp, CONCAT);
+ tree = create_tree (dfa, tree, exp, CONCAT, 0);
if (tree == NULL)
{
*err = REG_ESPACE;
@@ -2253,7 +2253,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
switch (token->type)
{
case CHARACTER:
- tree = create_token_tree (dfa, NULL, NULL, token);
+ tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
if (BE (tree == NULL, 0))
{
*err = REG_ESPACE;
@@ -2267,8 +2267,8 @@ parse_expression (regexp, preg, token, syntax, nest, err)
{
bin_tree_t *mbc_remain;
fetch_token (token, regexp, syntax);
- mbc_remain = create_token_tree (dfa, NULL, NULL, token);
- tree = create_tree (dfa, tree, mbc_remain, CONCAT);
+ mbc_remain = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ tree = create_tree (dfa, tree, mbc_remain, CONCAT, 0);
if (BE (mbc_remain == NULL || tree == NULL, 0))
{
*err = REG_ESPACE;
@@ -2295,7 +2295,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
return NULL;
}
dfa->used_bkref_map |= 1 << token->opr.idx;
- tree = create_token_tree (dfa, NULL, NULL, token);
+ tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
if (BE (tree == NULL, 0))
{
*err = REG_ESPACE;
@@ -2340,7 +2340,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
token->type = CHARACTER;
/* mb_partial and word_char bits should be initialized already
by peek_token. */
- tree = create_token_tree (dfa, NULL, NULL, token);
+ tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
if (BE (tree == NULL, 0))
{
*err = REG_ESPACE;
@@ -2349,27 +2349,18 @@ parse_expression (regexp, preg, token, syntax, nest, err)
break;
case ANCHOR:
if ((token->opr.ctx_type
- & (WORD_DELIM | NOT_WORD_DELIM | WORD_FIRST | WORD_LAST))
+ & (WORD_DELIM | INSIDE_WORD | WORD_FIRST | WORD_LAST))
&& dfa->word_ops_used == 0)
init_word_char (dfa);
- if (token->opr.ctx_type == WORD_DELIM
- || token->opr.ctx_type == NOT_WORD_DELIM)
+ if (token->opr.ctx_type == WORD_DELIM)
{
bin_tree_t *tree_first, *tree_last;
- if (token->opr.ctx_type == WORD_DELIM)
- {
- token->opr.ctx_type = WORD_FIRST;
- tree_first = create_token_tree (dfa, NULL, NULL, token);
- token->opr.ctx_type = WORD_LAST;
- }
- else
- {
- token->opr.ctx_type = INSIDE_WORD;
- tree_first = create_token_tree (dfa, NULL, NULL, token);
- token->opr.ctx_type = INSIDE_NOTWORD;
- }
- tree_last = create_token_tree (dfa, NULL, NULL, token);
- tree = create_tree (dfa, tree_first, tree_last, OP_ALT);
+ token->opr.ctx_type = WORD_FIRST;
+ tree_first = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ token->opr.ctx_type = WORD_LAST;
+ tree_last = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ token->type = OP_ALT;
+ tree = re_dfa_add_tree_node (dfa, tree_first, tree_last, token);
if (BE (tree_first == NULL || tree_last == NULL || tree == NULL, 0))
{
*err = REG_ESPACE;
@@ -2378,7 +2369,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
}
else
{
- tree = create_token_tree (dfa, NULL, NULL, token);
+ tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
if (BE (tree == NULL, 0))
{
*err = REG_ESPACE;
@@ -2392,7 +2383,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
fetch_token (token, regexp, syntax);
return tree;
case OP_PERIOD:
- tree = create_token_tree (dfa, NULL, NULL, token);
+ tree = re_dfa_add_tree_node (dfa, NULL, NULL, token);
if (BE (tree == NULL, 0))
{
*err = REG_ESPACE;
@@ -2448,6 +2439,7 @@ parse_expression (regexp, preg, token, syntax, nest, err)
*err = REG_BADRPT;
return NULL;
}
+ dfa->has_plural_match = 1;
}
return tree;
@@ -2470,10 +2462,17 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
reg_errcode_t *err;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- bin_tree_t *tree;
+ bin_tree_t *tree, *left_par, *right_par;
size_t cur_nsub;
cur_nsub = preg->re_nsub++;
+ left_par = re_dfa_add_tree_node (dfa, NULL, NULL, token);
+ if (BE (left_par == NULL, 0))
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ dfa->nodes[left_par->node_idx].opr.idx = cur_nsub;
fetch_token (token, regexp, syntax | RE_CARET_ANCHORS_HERE);
/* The subexpression may be a null string. */
@@ -2482,20 +2481,26 @@ parse_sub_exp (regexp, preg, token, syntax, nest, err)
else
{
tree = parse_reg_exp (regexp, preg, token, syntax, nest, err);
- if (BE (*err == REG_NOERROR && token->type != OP_CLOSE_SUBEXP, 0))
- *err = REG_EPAREN;
- if (BE (*err != REG_NOERROR, 0))
+ if (BE (*err != REG_NOERROR && tree == NULL, 0))
return NULL;
}
+ if (BE (token->type != OP_CLOSE_SUBEXP, 0))
+ {
+ *err = REG_EPAREN;
+ return NULL;
+ }
+ right_par = re_dfa_add_tree_node (dfa, NULL, NULL, token);
dfa->completed_bkref_map |= 1 << cur_nsub;
-
- tree = create_tree (dfa, tree, NULL, SUBEXP);
- if (BE (tree == NULL, 0))
+ tree = ((tree == NULL) ? right_par
+ : create_tree (dfa, tree, right_par, CONCAT, 0));
+ tree = create_tree (dfa, left_par, tree, CONCAT, 0);
+ if (BE (right_par == NULL || tree == NULL, 0))
{
*err = REG_ESPACE;
return NULL;
}
- tree->token.opr.idx = cur_nsub;
+ dfa->nodes[right_par->node_idx].opr.idx = cur_nsub;
+
return tree;
}
@@ -2510,6 +2515,7 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
reg_syntax_t syntax;
reg_errcode_t *err;
{
+ re_token_t dup_token;
bin_tree_t *tree = NULL, *old_tree = NULL;
int i, start, end, start_idx = re_string_cur_idx (regexp);
re_token_t start_token = *token;
@@ -2572,13 +2578,9 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
fetch_token (token, regexp, syntax);
- if (BE (elem == NULL, 0))
+ /* Treat "<re>{0}*" etc. as "<re>{0}". */
+ if (BE (elem == NULL || (start == 0 && end == 0), 0))
return NULL;
- if (BE (start == 0 && end == 0, 0))
- {
- postorder (elem, free_tree, NULL);
- return NULL;
- }
/* Extract "<re>{n,m}" to "<re><re>...<re><re>{0,<m-n>}". */
if (BE (start > 0, 0))
@@ -2587,7 +2589,7 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
for (i = 2; i <= start; ++i)
{
elem = duplicate_tree (elem, dfa);
- tree = create_tree (dfa, tree, elem, CONCAT);
+ tree = create_tree (dfa, tree, elem, CONCAT, 0);
if (BE (elem == NULL || tree == NULL, 0))
goto parse_dup_op_espace;
}
@@ -2602,10 +2604,9 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
else
old_tree = NULL;
- if (elem->token.type == SUBEXP)
- postorder (elem, mark_opt_subexp, (void *) (long) elem->token.opr.idx);
-
- tree = create_tree (dfa, elem, NULL, (end == -1 ? OP_DUP_ASTERISK : OP_ALT));
+ mark_opt_subexp (elem, dfa);
+ dup_token.type = (end == -1 ? OP_DUP_ASTERISK : OP_DUP_QUESTION);
+ tree = re_dfa_add_tree_node (dfa, elem, NULL, &dup_token);
if (BE (tree == NULL, 0))
goto parse_dup_op_espace;
@@ -2615,17 +2616,17 @@ parse_dup_op (elem, regexp, dfa, token, syntax, err)
for (i = start + 2; i <= end; ++i)
{
elem = duplicate_tree (elem, dfa);
- tree = create_tree (dfa, tree, elem, CONCAT);
+ tree = create_tree (dfa, tree, elem, CONCAT, 0);
if (BE (elem == NULL || tree == NULL, 0))
goto parse_dup_op_espace;
- tree = create_tree (dfa, tree, NULL, OP_ALT);
+ tree = re_dfa_add_tree_node (dfa, tree, NULL, &dup_token);
if (BE (tree == NULL, 0))
goto parse_dup_op_espace;
}
if (old_tree)
- tree = create_tree (dfa, old_tree, tree, CONCAT);
+ tree = create_tree (dfa, old_tree, tree, CONCAT, 0);
return tree;
@@ -3281,59 +3282,57 @@ parse_bracket_exp (regexp, dfa, token, syntax, err)
/* Ensure only single byte characters are set. */
if (dfa->mb_cur_max > 1)
bitset_mask (sbcset, dfa->sb_char);
+#endif /* RE_ENABLE_I18N */
+ /* Build a tree for simple bracket. */
+ br_token.type = SIMPLE_BRACKET;
+ br_token.opr.sbcset = sbcset;
+ work_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
+ if (BE (work_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+
+#ifdef RE_ENABLE_I18N
if (mbcset->nmbchars || mbcset->ncoll_syms || mbcset->nequiv_classes
|| mbcset->nranges || (dfa->mb_cur_max > 1 && (mbcset->nchar_classes
|| mbcset->non_match)))
{
+ re_token_t alt_token;
bin_tree_t *mbc_tree;
int sbc_idx;
/* Build a tree for complex bracket. */
dfa->has_mb_node = 1;
- br_token.type = COMPLEX_BRACKET;
- br_token.opr.mbcset = mbcset;
- mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
- if (BE (mbc_tree == NULL, 0))
- goto parse_bracket_exp_espace;
for (sbc_idx = 0; sbc_idx < BITSET_UINTS; ++sbc_idx)
if (sbcset[sbc_idx])
break;
/* If there are no bits set in sbcset, there is no point
of having both SIMPLE_BRACKET and COMPLEX_BRACKET. */
- if (sbc_idx < BITSET_UINTS)
- {
- /* Build a tree for simple bracket. */
- br_token.type = SIMPLE_BRACKET;
- br_token.opr.sbcset = sbcset;
- work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
- if (BE (work_tree == NULL, 0))
- goto parse_bracket_exp_espace;
-
- /* Then join them by ALT node. */
- work_tree = create_tree (dfa, work_tree, mbc_tree, OP_ALT);
- if (BE (work_tree == NULL, 0))
- goto parse_bracket_exp_espace;
- }
- else
+ if (sbc_idx == BITSET_UINTS)
{
re_free (sbcset);
- work_tree = mbc_tree;
+ dfa->nodes[work_tree->node_idx].type = COMPLEX_BRACKET;
+ dfa->nodes[work_tree->node_idx].opr.mbcset = mbcset;
+ return work_tree;
}
+ br_token.type = COMPLEX_BRACKET;
+ br_token.opr.mbcset = mbcset;
+ mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
+ if (BE (mbc_tree == NULL, 0))
+ goto parse_bracket_exp_espace;
+ /* Then join them by ALT node. */
+ alt_token.type = OP_ALT;
+ dfa->has_plural_match = 1;
+ work_tree = re_dfa_add_tree_node (dfa, work_tree, mbc_tree, &alt_token);
+ if (BE (mbc_tree != NULL, 1))
+ return work_tree;
}
else
-#endif /* not RE_ENABLE_I18N */
{
-#ifdef RE_ENABLE_I18N
free_charset (mbcset);
-#endif
- /* Build a tree for simple bracket. */
- br_token.type = SIMPLE_BRACKET;
- br_token.opr.sbcset = sbcset;
- work_tree = create_token_tree (dfa, NULL, NULL, &br_token);
- if (BE (work_tree == NULL, 0))
- goto parse_bracket_exp_espace;
+ return work_tree;
}
+#else /* not RE_ENABLE_I18N */
return work_tree;
+#endif /* not RE_ENABLE_I18N */
parse_bracket_exp_espace:
*err = REG_ESPACE;
@@ -3694,23 +3693,26 @@ build_charclass_op (dfa, trans, class_name, extra, non_match, err)
/* Build a tree for simple bracket. */
br_token.type = SIMPLE_BRACKET;
br_token.opr.sbcset = sbcset;
- tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
if (BE (tree == NULL, 0))
goto build_word_op_espace;
#ifdef RE_ENABLE_I18N
if (dfa->mb_cur_max > 1)
{
+ re_token_t alt_token;
bin_tree_t *mbc_tree;
/* Build a tree for complex bracket. */
br_token.type = COMPLEX_BRACKET;
br_token.opr.mbcset = mbcset;
dfa->has_mb_node = 1;
- mbc_tree = create_token_tree (dfa, NULL, NULL, &br_token);
+ mbc_tree = re_dfa_add_tree_node (dfa, NULL, NULL, &br_token);
if (BE (mbc_tree == NULL, 0))
goto build_word_op_espace;
/* Then join them by ALT node. */
- tree = create_tree (dfa, tree, mbc_tree, OP_ALT);
+ alt_token.type = OP_ALT;
+ dfa->has_plural_match = 1;
+ tree = re_dfa_add_tree_node (dfa, tree, mbc_tree, &alt_token);
if (BE (mbc_tree != NULL, 1))
return tree;
}
@@ -3781,23 +3783,12 @@ free_charset (re_charset_t *cset)
/* Create a tree node. */
static bin_tree_t *
-create_tree (dfa, left, right, type)
+create_tree (dfa, left, right, type, index)
re_dfa_t *dfa;
bin_tree_t *left;
bin_tree_t *right;
re_token_type_t type;
-{
- re_token_t t;
- t.type = type;
- return create_token_tree (dfa, left, right, &t);
-}
-
-static bin_tree_t *
-create_token_tree (dfa, left, right, token)
- re_dfa_t *dfa;
- bin_tree_t *left;
- bin_tree_t *right;
- const re_token_t *token;
+ int index;
{
bin_tree_t *tree;
if (BE (dfa->str_tree_storage_idx == BIN_TREE_STORAGE_SIZE, 0))
@@ -3815,12 +3806,11 @@ create_token_tree (dfa, left, right, token)
tree->parent = NULL;
tree->left = left;
tree->right = right;
- tree->token = *token;
- tree->token.duplicated = 0;
- tree->token.opt_subexp = 0;
- tree->first = NULL;
- tree->next = NULL;
- tree->node_idx = -1;
+ tree->type = type;
+ tree->node_idx = index;
+ tree->first = -1;
+ tree->next = -1;
+ re_node_set_init_empty (&tree->eclosure);
if (left != NULL)
left->parent = tree;
@@ -3829,89 +3819,103 @@ create_token_tree (dfa, left, right, token)
return tree;
}
-/* Mark the tree SRC as an optional subexpression.
- To be called from preorder or postorder. */
+/* Create both a DFA node and a tree for it. */
-static reg_errcode_t
-mark_opt_subexp (extra, node)
- void *extra;
- bin_tree_t *node;
+static bin_tree_t *
+re_dfa_add_tree_node (dfa, left, right, token)
+ re_dfa_t *dfa;
+ bin_tree_t *left;
+ bin_tree_t *right;
+ const re_token_t *token;
{
- int idx = (int) (long) extra;
- if (node->token.type == SUBEXP && node->token.opr.idx == idx)
- node->token.opt_subexp = 1;
+ int new_idx = re_dfa_add_node (dfa, *token, 0);
- return REG_NOERROR;
+ if (new_idx == -1)
+ return NULL;
+
+ return create_tree (dfa, left, right, 0, new_idx);
}
-/* Free the allocated memory inside NODE. */
+/* Mark the tree SRC as an optional subexpression. */
static void
-free_token (re_token_t *node)
+mark_opt_subexp (src, dfa)
+ const bin_tree_t *src;
+ re_dfa_t *dfa;
{
-#ifdef RE_ENABLE_I18N
- if (node->type == COMPLEX_BRACKET && node->duplicated == 0)
- free_charset (node->opr.mbcset);
- else
-#endif /* RE_ENABLE_I18N */
- if (node->type == SIMPLE_BRACKET && node->duplicated == 0)
- re_free (node->opr.sbcset);
+ /* Pass an OPT_SUBEXP_IDX which is != 1 if the duplicated tree is
+ a subexpression. */
+ if (src->type == CONCAT
+ && src->left->type == NON_TYPE
+ && dfa->nodes[src->left->node_idx].type == OP_OPEN_SUBEXP)
+ mark_opt_subexp_iter (src, dfa, dfa->nodes[src->left->node_idx].opr.idx);
}
-/* Worker function for tree walking. Free the allocated memory inside NODE
- and its children. */
-static reg_errcode_t
-free_tree (void *extra, bin_tree_t *node)
+/* Recursive tree walker for mark_opt_subexp. */
+
+static void
+mark_opt_subexp_iter (src, dfa, idx)
+ const bin_tree_t *src;
+ re_dfa_t *dfa;
+ int idx;
{
- free_token (&node->token);
- return REG_NOERROR;
+ int node_idx;
+
+ if (src->type == NON_TYPE)
+ {
+ node_idx = src->node_idx;
+ if ((dfa->nodes[node_idx].type == OP_OPEN_SUBEXP
+ || dfa->nodes[node_idx].type == OP_CLOSE_SUBEXP)
+ && dfa->nodes[node_idx].opr.idx == idx)
+ dfa->nodes[node_idx].opt_subexp = 1;
+ }
+
+ if (src->left != NULL)
+ mark_opt_subexp_iter (src->left, dfa, idx);
+
+ if (src->right != NULL)
+ mark_opt_subexp_iter (src->right, dfa, idx);
}
-/* Duplicate the node SRC, and return new node. This is a preorder
- visit similar to the one implemented by the generic visitor, but
- we need more infrastructure to maintain two parallel trees --- so,
- it's easier to duplicate. */
+/* Duplicate the node SRC, and return new node. */
static bin_tree_t *
-duplicate_tree (root, dfa)
- const bin_tree_t *root;
+duplicate_tree (src, dfa)
+ const bin_tree_t *src;
re_dfa_t *dfa;
{
- const bin_tree_t *node;
- bin_tree_t *dup_root;
- bin_tree_t **p_new = &dup_root, *dup_node = root->parent;
+ bin_tree_t *left = NULL, *right = NULL, *new_tree;
+ int new_node_idx;
+ /* Since node indies must be according to Post-order of the tree,
+ we must duplicate the left at first. */
+ if (src->left != NULL)
+ {
+ left = duplicate_tree (src->left, dfa);
+ if (left == NULL)
+ return NULL;
+ }
- for (node = root; ; )
+ /* Secondaly, duplicate the right. */
+ if (src->right != NULL)
{
- /* Create a new tree and link it back to the current parent. */
- *p_new = create_token_tree (dfa, NULL, NULL, &node->token);
- if (*p_new == NULL)
+ right = duplicate_tree (src->right, dfa);
+ if (right == NULL)
return NULL;
- (*p_new)->parent = dup_node;
- (*p_new)->token.duplicated = 1;
- dup_node = *p_new;
+ }
- /* Go to the left node, or up and to the right. */
- if (node->left)
- {
- node = node->left;
- p_new = &dup_node->left;
- }
- else
- {
- const bin_tree_t *prev = NULL;
- while (node->right == prev || node->right == NULL)
- {
- prev = node;
- node = node->parent;
- dup_node = dup_node->parent;
- if (!node)
- return dup_root;
- }
- node = node->right;
- p_new = &dup_node->right;
- }
+ /* At last, duplicate itself. */
+ if (src->type == NON_TYPE)
+ {
+ new_node_idx = re_dfa_add_node (dfa, dfa->nodes[src->node_idx], 0);
+ dfa->nodes[new_node_idx].duplicated = 1;
+ if (BE (new_node_idx == -1, 0))
+ return NULL;
}
+ else
+ new_node_idx = src->type;
+
+ new_tree = create_tree (dfa, left, right, src->type, new_node_idx);
+ return new_tree;
}
diff --git a/posix/regex_internal.c b/posix/regex_internal.c
index c3295a851c..001b50b134 100644
--- a/posix/regex_internal.c
+++ b/posix/regex_internal.c
@@ -1330,44 +1330,47 @@ re_node_set_remove_at (set, idx)
Or return -1, if an error will be occured. */
static int
-re_dfa_add_node (dfa, token)
+re_dfa_add_node (dfa, token, mode)
re_dfa_t *dfa;
re_token_t token;
+ int mode;
{
- int type = token.type;
if (BE (dfa->nodes_len >= dfa->nodes_alloc, 0))
{
int new_nodes_alloc = dfa->nodes_alloc * 2;
- int *new_nexts, *new_indices;
- re_node_set *new_edests, *new_eclosures;
-
re_token_t *new_array = re_realloc (dfa->nodes, re_token_t,
new_nodes_alloc);
if (BE (new_array == NULL, 0))
return -1;
dfa->nodes = new_array;
- new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
- new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
- new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
- new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
- if (BE (new_nexts == NULL || new_indices == NULL
- || new_edests == NULL || new_eclosures == NULL, 0))
- return -1;
- dfa->nexts = new_nexts;
- dfa->org_indices = new_indices;
- dfa->edests = new_edests;
- dfa->eclosures = new_eclosures;
+ if (mode)
+ {
+ int *new_nexts, *new_indices;
+ re_node_set *new_edests, *new_eclosures, *new_inveclosures;
+
+ new_nexts = re_realloc (dfa->nexts, int, new_nodes_alloc);
+ new_indices = re_realloc (dfa->org_indices, int, new_nodes_alloc);
+ new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
+ new_eclosures = re_realloc (dfa->eclosures, re_node_set,
+ new_nodes_alloc);
+ new_inveclosures = re_realloc (dfa->inveclosures, re_node_set,
+ new_nodes_alloc);
+ if (BE (new_nexts == NULL || new_indices == NULL
+ || new_edests == NULL || new_eclosures == NULL
+ || new_inveclosures == NULL, 0))
+ return -1;
+ dfa->nexts = new_nexts;
+ dfa->org_indices = new_indices;
+ dfa->edests = new_edests;
+ dfa->eclosures = new_eclosures;
+ dfa->inveclosures = new_inveclosures;
+ }
dfa->nodes_alloc = new_nodes_alloc;
}
dfa->nodes[dfa->nodes_len] = token;
+ dfa->nodes[dfa->nodes_len].opt_subexp = 0;
+ dfa->nodes[dfa->nodes_len].duplicated = 0;
dfa->nodes[dfa->nodes_len].constraint = 0;
-#ifdef RE_ENABLE_I18N
- dfa->nodes[dfa->nodes_len].accept_mb =
- (type == OP_PERIOD && dfa->mb_cur_max > 1) || type == COMPLEX_BRACKET;
-#endif
- dfa->nexts[dfa->nodes_len] = -1;
- re_node_set_init_empty (dfa->edests + dfa->nodes_len);
- re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
return dfa->nodes_len++;
}
@@ -1548,13 +1551,16 @@ create_ci_newstate (dfa, nodes, hash)
re_token_type_t type = node->type;
if (type == CHARACTER && !node->constraint)
continue;
-#ifdef RE_ENABLE_I18N
- newstate->accept_mb |= node->accept_mb;
-#endif /* RE_ENABLE_I18N */
/* If the state has the halt node, the state is a halt state. */
- if (type == END_OF_RE)
+ else if (type == END_OF_RE)
newstate->halt = 1;
+#ifdef RE_ENABLE_I18N
+ else if (type == COMPLEX_BRACKET
+ || type == OP_UTF8_PERIOD
+ || (type == OP_PERIOD && dfa->mb_cur_max > 1))
+ newstate->accept_mb = 1;
+#endif /* RE_ENABLE_I18N */
else if (type == OP_BACK_REF)
newstate->has_backref = 1;
else if (type == ANCHOR || node->constraint)
@@ -1605,13 +1611,15 @@ create_cd_newstate (dfa, nodes, context, hash)
if (type == CHARACTER && !constraint)
continue;
-#ifdef RE_ENABLE_I18N
- newstate->accept_mb |= node->accept_mb;
-#endif /* RE_ENABLE_I18N */
-
/* If the state has the halt node, the state is a halt state. */
- if (type == END_OF_RE)
+ else if (type == END_OF_RE)
newstate->halt = 1;
+#ifdef RE_ENABLE_I18N
+ else if (type == COMPLEX_BRACKET
+ || type == OP_UTF8_PERIOD
+ || (type == OP_PERIOD && dfa->mb_cur_max > 1))
+ newstate->accept_mb = 1;
+#endif /* RE_ENABLE_I18N */
else if (type == OP_BACK_REF)
newstate->has_backref = 1;
else if (type == ANCHOR)
@@ -1660,7 +1668,6 @@ free_state (state)
re_free (state->entrance_nodes);
}
re_node_set_free (&state->nodes);
- re_free (state->word_trtable);
re_free (state->trtable);
re_free (state);
}
diff --git a/posix/regex_internal.h b/posix/regex_internal.h
index f065cf449d..0ccd8d3665 100644
--- a/posix/regex_internal.h
+++ b/posix/regex_internal.h
@@ -1,5 +1,5 @@
/* Extended regular expression matching and search library.
- Copyright (C) 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
@@ -143,21 +143,18 @@ static inline void bitset_mask (bitset dest, const bitset src);
#define NEXT_NEWLINE_CONSTRAINT 0x0020
#define PREV_BEGBUF_CONSTRAINT 0x0040
#define NEXT_ENDBUF_CONSTRAINT 0x0080
-#define WORD_DELIM_CONSTRAINT 0x0100
-#define NOT_WORD_DELIM_CONSTRAINT 0x0200
+#define DUMMY_CONSTRAINT 0x0100
typedef enum
{
INSIDE_WORD = PREV_WORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
WORD_FIRST = PREV_NOTWORD_CONSTRAINT | NEXT_WORD_CONSTRAINT,
WORD_LAST = PREV_WORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
- INSIDE_NOTWORD = PREV_NOTWORD_CONSTRAINT | NEXT_NOTWORD_CONSTRAINT,
LINE_FIRST = PREV_NEWLINE_CONSTRAINT,
LINE_LAST = NEXT_NEWLINE_CONSTRAINT,
BUF_FIRST = PREV_BEGBUF_CONSTRAINT,
BUF_LAST = NEXT_ENDBUF_CONSTRAINT,
- WORD_DELIM = WORD_DELIM_CONSTRAINT,
- NOT_WORD_DELIM = NOT_WORD_DELIM_CONSTRAINT
+ WORD_DELIM = DUMMY_CONSTRAINT
} re_context_type;
typedef struct
@@ -189,16 +186,16 @@ typedef enum
OP_CLOSE_SUBEXP = EPSILON_BIT | 1,
OP_ALT = EPSILON_BIT | 2,
OP_DUP_ASTERISK = EPSILON_BIT | 3,
- ANCHOR = EPSILON_BIT | 4,
+ OP_DUP_PLUS = EPSILON_BIT | 4,
+ OP_DUP_QUESTION = EPSILON_BIT | 5,
+ ANCHOR = EPSILON_BIT | 6,
+ OP_DELETED_SUBEXP = EPSILON_BIT | 7,
/* Tree type, these are used only by tree. */
CONCAT = 16,
- SUBEXP = 17,
/* Token type, these are used only by token. */
- OP_DUP_PLUS = 18,
- OP_DUP_QUESTION,
- OP_OPEN_BRACKET,
+ OP_OPEN_BRACKET = 17,
OP_CLOSE_BRACKET,
OP_CHARSET_RANGE,
OP_OPEN_DUP_NUM,
@@ -287,7 +284,6 @@ typedef struct
unsigned int duplicated : 1;
unsigned int opt_subexp : 1;
#ifdef RE_ENABLE_I18N
- unsigned int accept_mb : 1;
/* These 2 bits can be moved into the union if needed (e.g. if running out
of bits; move opr.c to opr.c.c and move the flags to opr.c.flags). */
unsigned int mb_partial : 1;
@@ -296,6 +292,8 @@ typedef struct
} re_token_t;
#define IS_EPSILON_NODE(type) ((type) & EPSILON_BIT)
+#define ACCEPT_MB_NODE(type) \
+ ((type) >= OP_PERIOD && (type) <= OP_UTF8_PERIOD)
struct re_string_t
{
@@ -431,14 +429,15 @@ struct bin_tree_t
struct bin_tree_t *parent;
struct bin_tree_t *left;
struct bin_tree_t *right;
- struct bin_tree_t *first;
- struct bin_tree_t *next;
-
- re_token_t token;
/* `node_idx' is the index in dfa->nodes, if `type' == 0.
Otherwise `type' indicate the type of this node. */
+ re_token_type_t type;
int node_idx;
+
+ int first;
+ int next;
+ re_node_set eclosure;
};
typedef struct bin_tree_t bin_tree_t;
@@ -487,7 +486,7 @@ struct re_dfastate_t
re_node_set non_eps_nodes;
re_node_set inveclosure;
re_node_set *entrance_nodes;
- struct re_dfastate_t **trtable, **word_trtable;
+ struct re_dfastate_t **trtable;
unsigned int context : 4;
unsigned int halt : 1;
/* If this state can accept `multi byte'.
@@ -497,6 +496,7 @@ struct re_dfastate_t
/* If this state has backreference node(s). */
unsigned int has_backref : 1;
unsigned int has_constraint : 1;
+ unsigned int word_trtable : 1;
};
typedef struct re_dfastate_t re_dfastate_t;
@@ -678,7 +678,7 @@ static void re_node_set_remove_at (re_node_set *set, int idx) internal_function;
(re_node_set_remove_at (set, re_node_set_contains (set, id) - 1))
#define re_node_set_empty(p) ((p)->nelem = 0)
#define re_node_set_free(set) re_free ((set)->elems)
-static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token) internal_function;
+static int re_dfa_add_node (re_dfa_t *dfa, re_token_t token, int mode) internal_function;
static re_dfastate_t *re_acquire_state (reg_errcode_t *err, re_dfa_t *dfa,
const re_node_set *nodes) internal_function;
static re_dfastate_t *re_acquire_state_context (reg_errcode_t *err,
diff --git a/posix/regexec.c b/posix/regexec.c
index 636396e6f7..91b48dd4a2 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -175,8 +175,8 @@ static reg_errcode_t check_arrival_expand_ecl_sub (re_dfa_t *dfa,
static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
re_node_set *cur_nodes, int cur_str,
int subexp_num, int type) internal_function;
-static int build_trtable (re_dfa_t *dfa,
- re_dfastate_t *state) internal_function;
+static re_dfastate_t **build_trtable (re_dfa_t *dfa,
+ re_dfastate_t *state) internal_function;
#ifdef RE_ENABLE_I18N
static int check_node_accept_bytes (re_dfa_t *dfa, int node_idx,
const re_string_t *input, int idx) internal_function;
@@ -605,7 +605,6 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
re_dfa_t *dfa = (re_dfa_t *)preg->buffer;
int left_lim, right_lim, incr;
int fl_longest_match, match_first, match_kind, match_last = -1;
- int extra_nmatch;
int sb, ch;
#if defined _LIBC || (defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L)
re_match_context_t mctx = { .dfa = dfa };
@@ -621,9 +620,6 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
mctx.dfa = dfa;
#endif
- extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
- nmatch -= extra_nmatch;
-
/* Check if the DFA haven't been compiled. */
if (BE (preg->used == 0 || dfa->init_state == NULL
|| dfa->init_state_word == NULL || dfa->init_state_nl == NULL
@@ -886,14 +882,11 @@ re_search_internal (preg, string, length, start, range, stop, nmatch, pmatch,
pmatch[reg_idx].rm_so += match_first;
pmatch[reg_idx].rm_eo += match_first;
}
- for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
- {
- pmatch[nmatch + reg_idx].rm_so = -1;
- pmatch[nmatch + reg_idx].rm_eo = -1;
- }
if (dfa->subexp_map)
- for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
+ for (reg_idx = 0;
+ reg_idx + 1 < nmatch && reg_idx < preg->re_nsub;
+ reg_idx++)
if (dfa->subexp_map[reg_idx] != reg_idx)
{
pmatch[reg_idx + 1].rm_so
@@ -1269,7 +1262,7 @@ proceed_next_node (mctx, nregs, regs, pidx, node, eps_via_nodes, fs)
re_token_type_t type = dfa->nodes[node].type;
#ifdef RE_ENABLE_I18N
- if (dfa->nodes[node].accept_mb)
+ if (ACCEPT_MB_NODE (type))
naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
else
#endif /* RE_ENABLE_I18N */
@@ -1378,7 +1371,7 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
int fl_backtrack;
{
re_dfa_t *dfa = (re_dfa_t *) preg->buffer;
- int idx, cur_node;
+ int idx, cur_node, real_nmatch;
re_node_set eps_via_nodes;
struct re_fail_stack_t *fs;
struct re_fail_stack_t fs_body = { 0, 2, NULL };
@@ -1399,14 +1392,15 @@ set_regs (preg, mctx, nmatch, pmatch, fl_backtrack)
fs = NULL;
cur_node = dfa->init_node;
+ real_nmatch = (nmatch <= preg->re_nsub) ? nmatch : preg->re_nsub + 1;
re_node_set_init_empty (&eps_via_nodes);
- prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * nmatch);
- memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
+ prev_idx_match = (regmatch_t *) alloca (sizeof (regmatch_t) * real_nmatch);
+ memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * real_nmatch);
for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
{
- update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
+ update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, real_nmatch);
if (idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
{
@@ -1630,13 +1624,15 @@ build_sifted_states (mctx, sctx, str_idx, cur_dest)
int naccepted = 0;
int ret;
-#ifdef DEBUG
+#if defined DEBUG || defined RE_ENABLE_I18N
re_token_type_t type = dfa->nodes[prev_node].type;
+#endif
+#ifdef DEBUG
assert (!IS_EPSILON_NODE (type));
#endif
#ifdef RE_ENABLE_I18N
/* If the node may accept `multi byte'. */
- if (dfa->nodes[prev_node].accept_mb)
+ if (ACCEPT_MB_NODE (type))
naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
str_idx, sctx->last_str_idx);
#endif /* RE_ENABLE_I18N */
@@ -2222,6 +2218,7 @@ transit_state (err, mctx, state)
re_match_context_t *mctx;
re_dfastate_t *state;
{
+ re_dfa_t *const dfa = mctx->dfa;
re_dfastate_t **trtable;
unsigned char ch;
@@ -2236,22 +2233,21 @@ transit_state (err, mctx, state)
#endif /* RE_ENABLE_I18N */
/* Then decide the next state with the single byte. */
-#if 0
- if (0)
- /* don't use transition table */
- return transit_state_sb (err, mctx, state);
-#endif
-
- /* Use transition table */
- ch = re_string_fetch_byte (&mctx->input);
- for (;;)
+ if (1)
{
+ /* Use transition table */
+ ch = re_string_fetch_byte (&mctx->input);
trtable = state->trtable;
- if (BE (trtable != NULL, 1))
- return trtable[ch];
-
- trtable = state->word_trtable;
- if (BE (trtable != NULL, 1))
+ if (trtable == NULL)
+ {
+ trtable = build_trtable (dfa, state);
+ if (trtable == NULL)
+ {
+ *err = REG_ESPACE;
+ return NULL;
+ }
+ }
+ if (BE (state->word_trtable, 0))
{
unsigned int context;
context
@@ -2263,15 +2259,14 @@ transit_state (err, mctx, state)
else
return trtable[ch];
}
-
- if (!build_trtable (mctx->dfa, state))
- {
- *err = REG_ESPACE;
- return NULL;
- }
-
- /* Retry, we now have a transition table. */
+ else
+ return trtable[ch];
}
+#if 0
+ else
+ /* don't use transition table */
+ return transit_state_sb (err, mctx, state);
+#endif
}
/* Update the state_log if we need */
@@ -2475,13 +2470,10 @@ transit_state_mb (mctx, pstate)
{
re_node_set dest_nodes, *new_nodes;
int cur_node_idx = pstate->nodes.elems[i];
- int naccepted, dest_idx;
+ int naccepted = 0, dest_idx;
unsigned int context;
re_dfastate_t *dest_state;
- if (!dfa->nodes[cur_node_idx].accept_mb)
- continue;
-
if (dfa->nodes[cur_node_idx].constraint)
{
context = re_string_context_at (&mctx->input,
@@ -2493,8 +2485,9 @@ transit_state_mb (mctx, pstate)
}
/* How many bytes the node can accept? */
- naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
- re_string_cur_idx (&mctx->input));
+ if (ACCEPT_MB_NODE (dfa->nodes[cur_node_idx].type))
+ naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
+ re_string_cur_idx (&mctx->input));
if (naccepted == 0)
continue;
@@ -2508,7 +2501,9 @@ transit_state_mb (mctx, pstate)
#ifdef DEBUG
assert (dfa->nexts[cur_node_idx] != -1);
#endif
- new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
+ /* `cur_node_idx' may point the entity of the OP_CONTEXT_NODE,
+ then we use pstate->nodes.elems[i] instead. */
+ new_nodes = dfa->eclosures + dfa->nexts[pstate->nodes.elems[i]];
dest_state = mctx->state_log[dest_idx];
if (dest_state == NULL)
@@ -3024,13 +3019,15 @@ check_arrival_add_next_nodes (mctx, str_idx, cur_nodes, next_nodes)
{
int naccepted = 0;
int cur_node = cur_nodes->elems[cur_idx];
-#ifdef DEBUG
+#if defined DEBUG || defined RE_ENABLE_I18N
re_token_type_t type = dfa->nodes[cur_node].type;
+#endif
+#ifdef DEBUG
assert (!IS_EPSILON_NODE (type));
#endif
#ifdef RE_ENABLE_I18N
/* If the node may accept `multi byte'. */
- if (dfa->nodes[cur_node].accept_mb)
+ if (ACCEPT_MB_NODE (type))
{
naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
str_idx);
@@ -3276,15 +3273,15 @@ expand_bkref_cache (mctx, cur_nodes, cur_str, subexp_num,
}
/* Build transition table for the state.
- Return 1 if succeeded, otherwise return NULL. */
+ Return the new table if succeeded, otherwise return NULL. */
-static int
+static re_dfastate_t **
build_trtable (dfa, state)
re_dfa_t *dfa;
re_dfastate_t *state;
{
reg_errcode_t err;
- int i, j, ch, need_word_trtable = 0;
+ int i, j, ch;
unsigned int elem, mask;
int dests_node_malloced = 0, dest_states_malloced = 0;
int ndests; /* Number of the destination states from `state'. */
@@ -3301,20 +3298,20 @@ build_trtable (dfa, state)
#ifdef _LIBC
if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX))
dests_node = (re_node_set *)
- alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+ alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
else
#endif
{
dests_node = (re_node_set *)
- malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
+ malloc ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX);
if (BE (dests_node == NULL, 0))
- return 0;
+ return NULL;
dests_node_malloced = 1;
}
dests_ch = (bitset *) (dests_node + SBC_MAX);
/* Initialize transiton table. */
- state->word_trtable = state->trtable = NULL;
+ state->word_trtable = 0;
/* At first, group all nodes belonging to `state' into several
destinations. */
@@ -3323,14 +3320,14 @@ build_trtable (dfa, state)
{
if (dests_node_malloced)
free (dests_node);
- /* Return 0 in case of an error, 1 otherwise. */
+ /* Return NULL in case of an error, trtable otherwise. */
if (ndests == 0)
{
state->trtable = (re_dfastate_t **)
- calloc (sizeof (re_dfastate_t *), SBC_MAX);
- return 1;
+ calloc (sizeof (re_dfastate_t *), SBC_MAX);;
+ return state->trtable;
}
- return 0;
+ return NULL;
}
err = re_node_set_alloc (&follows, ndests + 1);
@@ -3341,12 +3338,12 @@ build_trtable (dfa, state)
if (__libc_use_alloca ((sizeof (re_node_set) + sizeof (bitset)) * SBC_MAX
+ ndests * 3 * sizeof (re_dfastate_t *)))
dest_states = (re_dfastate_t **)
- alloca (ndests * 3 * sizeof (re_dfastate_t *));
+ alloca (ndests * 3 * sizeof (re_dfastate_t *));
else
#endif
{
dest_states = (re_dfastate_t **)
- malloc (ndests * 3 * sizeof (re_dfastate_t *));
+ malloc (ndests * 3 * sizeof (re_dfastate_t *));
if (BE (dest_states == NULL, 0))
{
out_free:
@@ -3357,7 +3354,7 @@ out_free:
re_node_set_free (dests_node + i);
if (dests_node_malloced)
free (dests_node);
- return 0;
+ return NULL;
}
dest_states_malloced = 1;
}
@@ -3393,8 +3390,9 @@ out_free:
if (BE (dest_states_word[i] == NULL && err != REG_NOERROR, 0))
goto out_free;
- if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
- need_word_trtable = 1;
+ if (dest_states[i] != dest_states_word[i]
+ && dfa->mb_cur_max > 1)
+ state->word_trtable = 1;
dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
CONTEXT_NEWLINE);
@@ -3409,14 +3407,13 @@ out_free:
bitset_merge (acceptable, dests_ch[i]);
}
- if (!BE (need_word_trtable, 0))
+ if (!BE (state->word_trtable, 0))
{
/* We don't care about whether the following character is a word
character, or we are in a single-byte character set so we can
discern by looking at the character code: allocate a
256-entry transition table. */
- trtable = state->trtable =
- (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
+ trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
if (BE (trtable == NULL, 0))
goto out_free;
@@ -3446,8 +3443,8 @@ out_free:
by looking at the character code: build two 256-entry
transition tables, one starting at trtable[0] and one
starting at trtable[SBC_MAX]. */
- trtable = state->word_trtable =
- (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
+ trtable = (re_dfastate_t **) calloc (sizeof (re_dfastate_t *),
+ 2 * SBC_MAX);
if (BE (trtable == NULL, 0))
goto out_free;
@@ -3478,7 +3475,7 @@ out_free:
{
/* k-th destination accepts newline character. */
trtable[NEWLINE_CHAR] = dest_states_nl[j];
- if (need_word_trtable)
+ if (state->word_trtable)
trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
/* There must be only one destination which accepts
newline. See group_nodes_into_DFAstates. */
@@ -3496,7 +3493,8 @@ out_free:
if (dests_node_malloced)
free (dests_node);
- return 1;
+ state->trtable = trtable;
+ return trtable;
}
/* Group all nodes belonging to STATE into several destinations.
diff --git a/posix/rxspencer/tests b/posix/rxspencer/tests
index a8b6e4baa8..a724252d8c 100644
--- a/posix/rxspencer/tests
+++ b/posix/rxspencer/tests
@@ -526,12 +526,3 @@ a((b+|((c)*)))+d - abcd abcd c,c,c,c
(((\b))){0} - x @x -,-,-
a(((.*)))b((\2)){0}c - abc abc @bc,@bc,@bc,-,-
a(((.*)))b((\1)){0}c - axbc axbc x,x,x,-,-
-
-\b & SaT @aT
-\b & aT @aT
-a.*\b & abT ab
-\b & STSS
-\B & abc @bc
-\B & aSbTc
-\B & SaT @SaT
-\B & aSTSb @TSb
diff --git a/posix/tst-rxspencer.c b/posix/tst-rxspencer.c
index a68bab2de9..cb40421797 100644
--- a/posix/tst-rxspencer.c
+++ b/posix/tst-rxspencer.c
@@ -1,5 +1,5 @@
/* Regular expression tests.
- Copyright (C) 2003, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2003 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Jakub Jelinek <jakub@redhat.com>, 2003.
@@ -127,15 +127,14 @@ mb_frob_string (const char *str, const char *letters)
}
/* Like mb_frob_string, but don't replace anything between
- [: and :], [. and .] or [= and =] or characters escaped
- with a backslash. */
+ [: and :], [. and .] or [= and =]. */
static char *
mb_frob_pattern (const char *str, const char *letters)
{
char *ret, *dst;
const char *src;
- int in_class = 0, escaped = 0;
+ int in_class = 0;
if (str == NULL)
return NULL;
@@ -145,18 +144,7 @@ mb_frob_pattern (const char *str, const char *letters)
return NULL;
for (src = str, dst = ret; *src; ++src)
- if (*src == '\\')
- {
- escaped ^= 1;
- *dst++ = *src;
- }
- else if (escaped)
- {
- escaped = 0;
- *dst++ = *src;
- continue;
- }
- else if (!in_class && strchr (letters, *src))
+ if (!in_class && strchr (letters, *src))
dst = mb_replace (dst, *src);
else
{
diff --git a/posix/unistd.h b/posix/unistd.h
index 744c10c50b..5d42169e82 100644
--- a/posix/unistd.h
+++ b/posix/unistd.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1991-2003, 2004, 2005 Free Software Foundation, Inc.
+/* Copyright (C) 1991-2002, 2003, 2004 Free Software Foundation, Inc.
This file is part of the GNU C Library.
The GNU C Library is free software; you can redistribute it and/or
@@ -295,8 +295,7 @@ extern __off64_t __REDIRECT_NTH (lseek,
# endif
#endif
#ifdef __USE_LARGEFILE64
-extern __off64_t lseek64 (int __fd, __off64_t __offset, int __whence)
- __THROW;
+extern __off64_t lseek64 (int __fd, __off64_t __offset, int __whence) __THROW;
#endif
/* Close the file descriptor FD.
@@ -310,13 +309,13 @@ extern int close (int __fd);
This function is a cancellation point and therefore not marked with
__THROW. */
-extern ssize_t read (int __fd, void *__buf, size_t __nbytes) __wur;
+extern ssize_t read (int __fd, void *__buf, size_t __nbytes);
/* Write N bytes of BUF to FD. Return the number written, or -1.
This function is a cancellation point and therefore not marked with
__THROW. */
-extern ssize_t write (int __fd, __const void *__buf, size_t __n) __wur;
+extern ssize_t write (int __fd, __const void *__buf, size_t __n);
#ifdef __USE_UNIX98
# ifndef __USE_FILE_OFFSET64
@@ -327,7 +326,7 @@ extern ssize_t write (int __fd, __const void *__buf, size_t __n) __wur;
This function is a cancellation point and therefore not marked with
__THROW. */
extern ssize_t pread (int __fd, void *__buf, size_t __nbytes,
- __off_t __offset) __wur;
+ __off_t __offset);
/* Write N bytes of BUF to FD at the given position OFFSET without
changing the file pointer. Return the number written, or -1.
@@ -335,15 +334,15 @@ extern ssize_t pread (int __fd, void *__buf, size_t __nbytes,
This function is a cancellation point and therefore not marked with
__THROW. */
extern ssize_t pwrite (int __fd, __const void *__buf, size_t __n,
- __off_t __offset) __wur;
+ __off_t __offset);
# else
# ifdef __REDIRECT
extern ssize_t __REDIRECT (pread, (int __fd, void *__buf, size_t __nbytes,
__off64_t __offset),
- pread64) __wur;
+ pread64);
extern ssize_t __REDIRECT (pwrite, (int __fd, __const void *__buf,
size_t __nbytes, __off64_t __offset),
- pwrite64) __wur;
+ pwrite64);
# else
# define pread pread64
# define pwrite pwrite64
@@ -355,11 +354,11 @@ extern ssize_t __REDIRECT (pwrite, (int __fd, __const void *__buf,
changing the file pointer. Return the number read, -1 for errors
or 0 for EOF. */
extern ssize_t pread64 (int __fd, void *__buf, size_t __nbytes,
- __off64_t __offset) __wur;
+ __off64_t __offset);
/* Write N bytes of BUF to FD at the given position OFFSET without
changing the file pointer. Return the number written, or -1. */
extern ssize_t pwrite64 (int __fd, __const void *__buf, size_t __n,
- __off64_t __offset) __wur;
+ __off64_t __offset);
# endif
#endif
@@ -367,7 +366,7 @@ extern ssize_t pwrite64 (int __fd, __const void *__buf, size_t __n,
If successful, two file descriptors are stored in PIPEDES;
bytes written on PIPEDES[1] can be read from PIPEDES[0].
Returns 0 if successful, -1 if not. */
-extern int pipe (int __pipedes[2]) __THROW __wur;
+extern int pipe (int __pipedes[2]) __THROW;
/* Schedule an alarm. In SECONDS seconds, the process will get a SIGALRM.
If SECONDS is zero, any currently scheduled alarm will be cancelled.
@@ -417,26 +416,26 @@ extern int pause (void);
/* Change the owner and group of FILE. */
extern int chown (__const char *__file, __uid_t __owner, __gid_t __group)
- __THROW __nonnull ((1)) __wur;
+ __THROW __nonnull ((1));
#if defined __USE_BSD || defined __USE_XOPEN_EXTENDED
/* Change the owner and group of the file that FD is open on. */
-extern int fchown (int __fd, __uid_t __owner, __gid_t __group) __THROW __wur;
+extern int fchown (int __fd, __uid_t __owner, __gid_t __group) __THROW;
/* Change owner and group of FILE, if it is a symbolic
link the ownership of the symbolic link is changed. */
extern int lchown (__const char *__file, __uid_t __owner, __gid_t __group)
- __THROW __nonnull ((1)) __wur;
+ __THROW __nonnull ((1));
#endif /* Use BSD || X/Open Unix. */
/* Change the process's working directory to PATH. */
-extern int chdir (__const char *__path) __THROW __nonnull ((1)) __wur;
+extern int chdir (__const char *__path) __THROW __nonnull ((1));
#if defined __USE_BSD || defined __USE_XOPEN_EXTENDED
/* Change the process's working directory to the one FD is open on. */
-extern int fchdir (int __fd) __THROW __wur;
+extern int fchdir (int __fd) __THROW;
#endif
/* Get the pathname of the current working directory,
@@ -446,7 +445,7 @@ extern int fchdir (int __fd) __THROW __wur;
an array is allocated with `malloc'; the array is SIZE
bytes long, unless SIZE == 0, in which case it is as
big as necessary. */
-extern char *getcwd (char *__buf, size_t __size) __THROW __wur;
+extern char *getcwd (char *__buf, size_t __size) __THROW;
#ifdef __USE_GNU
/* Return a malloc'd string containing the current directory name.
@@ -459,13 +458,12 @@ extern char *get_current_dir_name (void) __THROW;
/* Put the absolute pathname of the current working directory in BUF.
If successful, return BUF. If not, put an error message in
BUF and return NULL. BUF should be at least PATH_MAX bytes long. */
-extern char *getwd (char *__buf)
- __THROW __nonnull ((1)) __attribute_deprecated__ __wur;
+extern char *getwd (char *__buf) __THROW __nonnull ((1));
#endif
/* Duplicate FD, returning a new file descriptor on the same file. */
-extern int dup (int __fd) __THROW __wur;
+extern int dup (int __fd) __THROW;
/* Duplicate FD to FD2, closing FD2 and making it open on the same file. */
extern int dup2 (int __fd, int __fd2) __THROW;
@@ -518,7 +516,7 @@ extern int execlp (__const char *__file, __const char *__arg, ...)
#if defined __USE_MISC || defined __USE_XOPEN
/* Add INC to priority of the current process. */
-extern int nice (int __inc) __THROW __wur;
+extern int nice (int __inc) __THROW;
#endif
@@ -631,7 +629,7 @@ extern __gid_t getegid (void) __THROW;
/* If SIZE is zero, return the number of supplementary groups
the calling process is in. Otherwise, fill in the group IDs
of its supplementary groups in LIST and return the number written. */
-extern int getgroups (int __size, __gid_t __list[]) __THROW __wur;
+extern int getgroups (int __size, __gid_t __list[]) __THROW;
#ifdef __USE_GNU
/* Return nonzero iff the calling process is in group GID. */
@@ -675,23 +673,19 @@ extern int setegid (__gid_t __gid) __THROW;
#ifdef __USE_GNU
/* Fetch the effective user ID, real user ID, and saved-set user ID,
of the calling process. */
-extern int getresuid (__uid_t *__euid, __uid_t *__ruid, __uid_t *__suid)
- __THROW;
+extern int getresuid (__uid_t *__euid, __uid_t *__ruid, __uid_t *__suid);
/* Fetch the effective group ID, real group ID, and saved-set group ID,
of the calling process. */
-extern int getresgid (__gid_t *__egid, __gid_t *__rgid, __gid_t *__sgid)
- __THROW;
+extern int getresgid (__gid_t *__egid, __gid_t *__rgid, __gid_t *__sgid);
/* Set the effective user ID, real user ID, and saved-set user ID,
of the calling process to EUID, RUID, and SUID, respectively. */
-extern int setresuid (__uid_t __euid, __uid_t __ruid, __uid_t __suid)
- __THROW;
+extern int setresuid (__uid_t __euid, __uid_t __ruid, __uid_t __suid);
/* Set the effective group ID, real group ID, and saved-set group ID,
of the calling process to EGID, RGID, and SGID, respectively. */
-extern int setresgid (__gid_t __egid, __gid_t __rgid, __gid_t __sgid)
- __THROW;
+extern int setresgid (__gid_t __egid, __gid_t __rgid, __gid_t __sgid);
#endif
@@ -716,7 +710,7 @@ extern char *ttyname (int __fd) __THROW;
/* Store at most BUFLEN characters of the pathname of the terminal FD is
open on in BUF. Return 0 on success, otherwise an error number. */
extern int ttyname_r (int __fd, char *__buf, size_t __buflen)
- __THROW __nonnull ((2)) __wur;
+ __THROW __nonnull ((2));
/* Return 1 if FD is a valid descriptor associated
with a terminal, zero if not. */
@@ -732,18 +726,18 @@ extern int ttyslot (void) __THROW;
/* Make a link to FROM named TO. */
extern int link (__const char *__from, __const char *__to)
- __THROW __nonnull ((1, 2)) __wur;
+ __THROW __nonnull ((1, 2));
#if defined __USE_BSD || defined __USE_XOPEN_EXTENDED
/* Make a symbolic link to FROM named TO. */
extern int symlink (__const char *__from, __const char *__to)
- __THROW __nonnull ((1, 2)) __wur;
+ __THROW __nonnull ((1, 2));
/* Read the contents of the symbolic link PATH into no more than
LEN bytes of BUF. The contents are not null-terminated.
Returns the number of characters read, or -1 for errors. */
extern int readlink (__const char *__restrict __path, char *__restrict __buf,
- size_t __len) __THROW __nonnull ((1, 2)) __wur;
+ size_t __len) __THROW __nonnull ((1, 2));
#endif /* Use BSD. */
/* Remove the link NAME. */
@@ -802,20 +796,20 @@ extern int gethostname (char *__name, size_t __len) __THROW __nonnull ((1));
/* Set the name of the current host to NAME, which is LEN bytes long.
This call is restricted to the super-user. */
extern int sethostname (__const char *__name, size_t __len)
- __THROW __nonnull ((1)) __wur;
+ __THROW __nonnull ((1));
/* Set the current machine's Internet number to ID.
This call is restricted to the super-user. */
-extern int sethostid (long int __id) __THROW __wur;
+extern int sethostid (long int __id) __THROW;
/* Get and set the NIS (aka YP) domain name, if any.
Called just like `gethostname' and `sethostname'.
The NIS domain name is usually the empty string when not using NIS. */
extern int getdomainname (char *__name, size_t __len)
- __THROW __nonnull ((1)) __wur;
+ __THROW __nonnull ((1));
extern int setdomainname (__const char *__name, size_t __len)
- __THROW __nonnull ((1)) __wur;
+ __THROW __nonnull ((1));
/* Revoke access permissions to all processes currently communicating
@@ -824,7 +818,7 @@ extern int setdomainname (__const char *__name, size_t __len)
extern int vhangup (void) __THROW;
/* Revoke the access of all descriptors currently open on FILE. */
-extern int revoke (__const char *__file) __THROW __nonnull ((1)) __wur;
+extern int revoke (__const char *__file) __THROW __nonnull ((1));
/* Enable statistical profiling, writing samples of the PC into at most
@@ -852,14 +846,14 @@ extern void setusershell (void) __THROW; /* Rewind and re-read the file. */
/* Put the program in the background, and dissociate from the controlling
terminal. If NOCHDIR is zero, do `chdir ("/")'. If NOCLOSE is zero,
redirects stdin, stdout, and stderr to /dev/null. */
-extern int daemon (int __nochdir, int __noclose) __THROW __wur;
+extern int daemon (int __nochdir, int __noclose) __THROW;
#endif /* Use BSD || X/Open. */
#if defined __USE_BSD || (defined __USE_XOPEN && !defined __USE_XOPEN2K)
/* Make PATH be the root directory (the starting point for absolute paths).
This call is restricted to the super-user. */
-extern int chroot (__const char *__path) __THROW __nonnull ((1)) __wur;
+extern int chroot (__const char *__path) __THROW __nonnull ((1));
/* Prompt with PROMPT and read a string from the terminal without echoing.
Uses /dev/tty if possible; otherwise stderr and stdin. */
@@ -890,56 +884,52 @@ extern void sync (void) __THROW;
extern int getpagesize (void) __THROW __attribute__ ((__const__));
-/* Return the maximum number of file descriptors
- the current process could possibly have. */
-extern int getdtablesize (void) __THROW;
-
-
/* Truncate FILE to LENGTH bytes. */
# ifndef __USE_FILE_OFFSET64
extern int truncate (__const char *__file, __off_t __length)
- __THROW __nonnull ((1)) __wur;
+ __THROW __nonnull ((1));
# else
# ifdef __REDIRECT_NTH
extern int __REDIRECT_NTH (truncate,
(__const char *__file, __off64_t __length),
- truncate64) __nonnull ((1)) __wur;
+ truncate64) __nonnull ((1));
# else
# define truncate truncate64
# endif
# endif
# ifdef __USE_LARGEFILE64
extern int truncate64 (__const char *__file, __off64_t __length)
- __THROW __nonnull ((1)) __wur;
+ __THROW __nonnull ((1));
# endif
-#endif /* Use BSD || X/Open Unix. */
-
-#if defined __USE_BSD || defined __USE_XOPEN_EXTENDED || defined __USE_XOPEN2K
-
/* Truncate the file FD is open on to LENGTH bytes. */
# ifndef __USE_FILE_OFFSET64
-extern int ftruncate (int __fd, __off_t __length) __THROW __wur;
+extern int ftruncate (int __fd, __off_t __length) __THROW;
# else
# ifdef __REDIRECT_NTH
extern int __REDIRECT_NTH (ftruncate, (int __fd, __off64_t __length),
- ftruncate64) __wur;
+ ftruncate64);
# else
# define ftruncate ftruncate64
# endif
# endif
# ifdef __USE_LARGEFILE64
-extern int ftruncate64 (int __fd, __off64_t __length) __THROW __wur;
+extern int ftruncate64 (int __fd, __off64_t __length) __THROW;
# endif
-#endif /* Use BSD || X/Open Unix || POSIX 2003. */
+
+/* Return the maximum number of file descriptors
+ the current process could possibly have. */
+extern int getdtablesize (void) __THROW;
+
+#endif /* Use BSD || X/Open Unix. */
#if defined __USE_MISC || defined __USE_XOPEN_EXTENDED
/* Set the end of accessible data space (aka "the break") to ADDR.
Returns zero on success and -1 for errors (with errno set). */
-extern int brk (void *__addr) __THROW __wur;
+extern int brk (void *__addr) __THROW;
/* Increase or decrease the end of accessible data space by DELTA bytes.
If successful, returns the address the previous end of data space
@@ -983,17 +973,17 @@ extern long int syscall (long int __sysno, ...) __THROW;
# define F_TEST 3 /* Test a region for other processes locks. */
# ifndef __USE_FILE_OFFSET64
-extern int lockf (int __fd, int __cmd, __off_t __len) __wur;
+extern int lockf (int __fd, int __cmd, __off_t __len);
# else
# ifdef __REDIRECT
extern int __REDIRECT (lockf, (int __fd, int __cmd, __off64_t __len),
- lockf64) __wur;
+ lockf64);
# else
# define lockf lockf64
# endif
# endif
# ifdef __USE_LARGEFILE64
-extern int lockf64 (int __fd, int __cmd, __off64_t __len) __wur;
+extern int lockf64 (int __fd, int __cmd, __off64_t __len);
# endif
#endif /* Use misc and F_LOCK not already defined. */