summaryrefslogtreecommitdiff
path: root/iconv
diff options
context:
space:
mode:
authorUlrich Drepper <drepper@redhat.com>1999-01-18 23:15:16 +0000
committerUlrich Drepper <drepper@redhat.com>1999-01-18 23:15:16 +0000
commit2bd608801708e2fa3d0e39f1220604a81a036a78 (patch)
tree8e941c6a4cd4d12302fcaa0a78dd1c6a88dad16f /iconv
parent464d646f3e667e42742a995b841080ec7b6a1540 (diff)
Update.
1999-01-18 Ulrich Drepper <drepper@cygnus.com> * iconv/gconv_conf.c (add_module): Complete rewrite. Use cleverer data structures and avoid creating intermediate representations first. Rewrite also all helper functions. * iconv/gconv_db.c (find_derivation): Use new data structure for module database. * iconv/Versions: Remove __gconv_nmodules. * iconv/iconv_prog.c: Rewrite generation of charset name list to use new data structure. * iconv/gconv_int.h (struct gconv_module): Add new elements for database data structure. (__gconv_modules_db): Update type. (__gconv_transform_dummy): Removed. * iconv/gconv_builtin.h: Remove dummy transformation. * iconv/gconv_simple.c: Remove __gconv_transform_dummy. * sysdeps/unix/sysv/linux/sparc/sparc32/syscalls.list: Remove __syscall_vfork, add vfork. * sysdeps/unix/sysv/linux/sparc/sparc64/syscalls.list: Likewise. * Rules: Add dummp.c and dummy.o to common-generated.
Diffstat (limited to 'iconv')
-rw-r--r--iconv/Versions2
-rw-r--r--iconv/gconv_builtin.h5
-rw-r--r--iconv/gconv_conf.c308
-rw-r--r--iconv/gconv_db.c453
-rw-r--r--iconv/gconv_int.h12
-rw-r--r--iconv/gconv_simple.c32
-rw-r--r--iconv/iconv_prog.c50
7 files changed, 482 insertions, 380 deletions
diff --git a/iconv/Versions b/iconv/Versions
index 25d16429f8..577a54867d 100644
--- a/iconv/Versions
+++ b/iconv/Versions
@@ -1,7 +1,7 @@
libc {
GLIBC_2.1 {
# global variables
- __gconv_alias_db; __gconv_nmodules; __gconv_modules_db;
+ __gconv_alias_db; __gconv_modules_db;
# i*
iconv_open; iconv; iconv_close;
diff --git a/iconv/gconv_builtin.h b/iconv/gconv_builtin.h
index 554989be6b..e12f1e46ee 100644
--- a/iconv/gconv_builtin.h
+++ b/iconv/gconv_builtin.h
@@ -79,8 +79,3 @@ BUILTIN_TRANSFORMATION (NULL, "INTERNAL", 8, "UNICODELITTLE//",
1, "=INTERNAL->ucs2little",
__gconv_transform_internal_ucs2little, NULL, NULL,
4, 4, 2, 2)
-
-
-BUILTIN_TRANSFORMATION ("(.*)", NULL, 0, "\\1", 1, "=dummy",
- __gconv_transform_dummy, NULL, NULL,
- 1, 1, 1, 1)
diff --git a/iconv/gconv_conf.c b/iconv/gconv_conf.c
index d3f516effa..24ec14aea8 100644
--- a/iconv/gconv_conf.c
+++ b/iconv/gconv_conf.c
@@ -1,5 +1,5 @@
/* Handle configuration data.
- Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@@ -83,99 +83,84 @@ builtin_aliases[] =
#endif
-/* Function for searching module. */
+/* Test whether there is already a matching module known. */
static int
-module_compare (const void *p1, const void *p2)
+internal_function
+detect_conflict (const char *alias, size_t alias_len)
{
- const struct gconv_module *s1 = (const struct gconv_module *) p1;
- const struct gconv_module *s2 = (const struct gconv_module *) p2;
- int result;
+ struct gconv_module *node = __gconv_modules_db;
- if (s1->from_pattern == NULL)
+ while (node != NULL)
{
- if (s2->from_pattern == NULL)
- result = strcmp (s1->from_constpfx, s2->from_constpfx);
- else
- result = -1;
- }
- else if (s2->from_pattern == NULL)
- result = 1;
- else
- result = strcmp (s1->from_pattern, s2->from_pattern);
-
- if (result == 0)
- result = strcmp (s1->to_string, s2->to_string);
-
- return result;
-}
-
-
-/* This function is used to test for a conflict which could be introduced
- if adding a new alias.
+ int cmpres = strncmp (alias, node->from_constpfx,
+ MIN (alias_len, node->from_constpfx_len));
- This function is a *very* ugly hack. The action-function is not
- supposed to alter the parameter. But we have to do this. We will if
- necessary compile the regular expression so that we can see whether it
- matches the alias name. This is safe in this environment and for the
- sake of performance we do it this way. The alternative would be to
- compile all regular expressions right from the start or to forget about
- the compilation though we might need it later.
-
- The second ugliness is that we have no possibility to pass parameters
- to the function. Therefore we use a global variable. This is no problem
- since we are for sure alone even in multi-threaded applications. */
-
-/* This is alias we want to check. */
-static const char *alias_to_test;
-
-/* This variable is set to a nonzero value once we have found a matching
- entry. */
-static int abort_conflict_search;
-
-static void
-detect_conflict (const void *p, VISIT value, int level)
-{
- struct gconv_module *s = *(struct gconv_module **) p;
-
- if ((value != endorder && value != leaf) || s->from_constpfx == NULL
- || abort_conflict_search)
- return;
-
- /* Before we test the whole expression (if this is a regular expression)
- make sure the constant prefix matches. In case this is no regular
- expression this is the whole string. */
- if (strcmp (alias_to_test, s->from_constpfx) == 0)
- {
- if (s->from_pattern == NULL)
- /* This is a simple string and therefore we have a conflict. */
- abort_conflict_search = 1;
- else
+ if (cmpres == 0)
{
- /* Make sure the regular expression is compiled (if possible). */
- if (s->from_regex == NULL)
+ struct gconv_module *runp;
+
+ if (alias_len < node->from_constpfx_len)
+ /* Cannot possibly match. */
+ return 0;
+
+ /* This means the prefix and the alias are identical. If
+ there is now a simple extry or a regular expression
+ matching this name we have found a conflict. If there is
+ no conflict with the elements in the `same' list there
+ cannot be a conflict. */
+ runp = node;
+ do
{
- /* Beware, this is what I warned you about in the comment
- above. We are modifying the object. */
- if (__regcomp (&s->from_regex_mem, s->from_pattern,
- REG_EXTENDED | REG_ICASE) != 0)
- /* Something is wrong. Remember this. */
- s->from_regex = (regex_t *) -1L;
+ if (runp->from_pattern == NULL)
+ {
+ /* This is a simple entry and therefore we have a
+ conflict if the strings are really the same. */
+ if (alias_len == node->from_constpfx_len)
+ return 1;
+ }
else
- s->from_regex = &s->from_regex_mem;
+ {
+ /* Compile the regular expression if necessary. */
+ if (runp->from_regex == NULL)
+ {
+ if (__regcomp (&runp->from_regex_mem,
+ runp->from_pattern,
+ REG_EXTENDED | REG_ICASE) != 0)
+ /* Something is wrong. Remember this. */
+ runp->from_regex = (regex_t *) -1L;
+ else
+ runp->from_regex = &runp->from_regex_mem;
+ }
+
+ if (runp->from_regex != (regex_t *) -1L)
+ {
+ regmatch_t match[1];
+
+ /* Try to match the regular expression. */
+ if (__regexec (runp->from_regex, alias, 1, match, 0) == 0
+ && match[0].rm_so == 0
+ && alias[match[0].rm_eo] == '\0')
+ /* They match, therefore it is a conflict. */
+ return 1;
+ }
+ }
+
+ runp = runp->same;
}
+ while (runp != NULL);
- if (s->from_regex != (regex_t *) -1L)
- {
- regmatch_t match[1];
+ if (alias_len == node->from_constpfx_len)
+ return 0;
- if (__regexec (s->from_regex, alias_to_test, 1, match, 0) == 0
- && match[0].rm_so == 0
- && alias_to_test[match[0].rm_eo] == '\0')
- /* The whole string matched. This is also a conflict. */
- abort_conflict_search = 1;
- }
+ node = node->matching;
}
+ else if (cmpres < 0)
+ node = node->left;
+ else
+ node = node->right;
}
+
+ return node != NULL;
}
@@ -207,13 +192,8 @@ add_alias (char *rp, void *modules)
return;
*wp++ = '\0';
- /* Test whether this alias conflicts with any available module. See
- the comment before the function `detect_conflict' for a description
- of this ugly hack. */
- alias_to_test = from;
- abort_conflict_search = 0;
- __twalk (modules, detect_conflict);
- if (abort_conflict_search)
+ /* Test whether this alias conflicts with any available module. */
+ if (detect_conflict (from, to - from - 1))
/* It does conflict, don't add the alias. */
return;
@@ -234,8 +214,74 @@ add_alias (char *rp, void *modules)
}
+/* Insert a data structure for a new module in the search tree. */
+static inline void
+internal_function
+insert_module (struct gconv_module *newp)
+{
+ struct gconv_module **rootp = &__gconv_modules_db;
+
+ while (*rootp != NULL)
+ {
+ struct gconv_module *root = *rootp;
+ size_t minlen = MIN (newp->from_constpfx_len, root->from_constpfx_len);
+ int cmpres;
+
+ cmpres = strncmp (newp->from_constpfx, root->from_constpfx, minlen);
+ if (cmpres == 0)
+ {
+ /* This can mean two things: the prefix is entirely the same or
+ it matches only for the minimum length of both strings. */
+ if (newp->from_constpfx_len == root->from_constpfx_len)
+ {
+ /* Both prefixes are identical. Insert the string at the
+ end of the `same' list if it is not already there. */
+ const char *from_pattern = (newp->from_pattern
+ ?: newp->from_constpfx);
+
+ while (strcmp (from_pattern,
+ root->from_pattern ?: root->from_constpfx) != 0
+ || strcmp (newp->to_string, root->to_string) != 0)
+ {
+ rootp = &root->same;
+ root = *rootp;
+ if (root == NULL)
+ break;
+ }
+
+ if (root != NULL)
+ /* This is a no new conversion. */
+ return;
+
+ break;
+ }
+
+ /* The new element either has a prefix which is itself a
+ prefix for the prefix of the current node or vice verse.
+ In the first case we insert the node right here. Otherwise
+ we have to descent further. */
+ if (newp->from_constpfx_len < root->from_constpfx_len)
+ {
+ newp->matching = root;
+ break;
+ }
+
+ rootp = &root->matching;
+ }
+ else if (cmpres < 0)
+ rootp = &root->left;
+ else
+ rootp = &root->right;
+ }
+
+ /* Plug in the new node here. */
+ *rootp = newp;
+}
+
+
/* Add new module. */
static inline void
+internal_function
add_module (char *rp, const char *directory, size_t dir_len, void **modules,
size_t *nmodules, int modcounter)
{
@@ -259,7 +305,7 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules,
while (*rp != '\0' && !isspace (*rp))
{
if (!isalnum (*rp) && *rp != '-' && *rp != '/' && *rp != '.'
- && *rp != '_')
+ && *rp != '_' && *rp != '(' && *rp != ')')
from_is_regex = 1;
++rp;
}
@@ -293,7 +339,7 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules,
*wp++ = '\0';
cost_hi = strtol (rp, &endp, 10);
- if (rp == endp)
+ if (rp == endp || cost_hi < 1)
/* No useful information. */
cost_hi = 1;
}
@@ -328,7 +374,8 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules,
else
const_len = to - from - 1;
- new_module = (struct gconv_module *) malloc (sizeof (struct gconv_module)
+ new_module = (struct gconv_module *) calloc (1,
+ sizeof (struct gconv_module)
+ (wp - from)
+ dir_len + need_ext);
if (new_module != NULL)
@@ -340,13 +387,9 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules,
from, to - from);
if (from_is_regex)
new_module->from_pattern = new_module->from_constpfx;
- else
- new_module->from_pattern = NULL;
new_module->from_constpfx_len = const_len;
- new_module->from_regex = NULL;
-
new_module->to_string = memcpy ((char *) new_module->from_constpfx
+ (to - from), to, module - to);
@@ -388,31 +431,12 @@ add_module (char *rp, const char *directory, size_t dir_len, void **modules,
}
}
- if (__tfind (new_module, modules, module_compare) == NULL)
- {
- if (__tsearch (new_module, modules, module_compare) == NULL)
- /* Something went wrong while inserting the new module. */
- free (new_module);
- else
- ++*nmodules;
- }
+ /* Now insert the new module data structure in our search tree. */
+ insert_module (new_module);
}
}
-static void
-insert_module (const void *nodep, VISIT value, int level)
-{
- if (value == preorder || value == leaf)
- __gconv_modules_db[__gconv_nmodules++] = *(struct gconv_module **) nodep;
-}
-
-static void
-nothing (void *unused __attribute__ ((unused)))
-{
-}
-
-
/* Read the next configuration file. */
static void
internal_function
@@ -495,15 +519,14 @@ __gconv_read_conf (void)
{
/* Append the default path to the user-defined path. */
size_t user_len = strlen (user_path);
- char *tmp;
gconv_path = alloca (user_len + 1 + sizeof (default_gconv_path));
- tmp = __mempcpy (gconv_path, user_path, user_len);
- *tmp++ = ':';
- __mempcpy (tmp, default_gconv_path, sizeof (default_gconv_path));
+ __mempcpy (__mempcpy (__mempcpy (gconv_path, user_path, user_len),
+ ":", 1),
+ default_gconv_path, sizeof (default_gconv_path));
}
- elem = strtok_r (gconv_path, ":", &gconv_path);
+ elem = __strtok_r (gconv_path, ":", &gconv_path);
while (elem != NULL)
{
#ifndef MAXPATHLEN
@@ -515,43 +538,38 @@ __gconv_read_conf (void)
if (__realpath (elem, real_elem) != NULL)
{
size_t elem_len = strlen (real_elem);
- char *filename, *tmp;
+ char *filename;
filename = alloca (elem_len + 1 + sizeof (gconv_conf_filename));
- tmp = __mempcpy (filename, real_elem, elem_len);
- *tmp++ = '/';
- __mempcpy (tmp, gconv_conf_filename, sizeof (gconv_conf_filename));
+ __mempcpy (__mempcpy (__mempcpy (filename, real_elem, elem_len),
+ "/", 1),
+ gconv_conf_filename, sizeof (gconv_conf_filename));
/* Read the next configuration file. */
read_conf_file (filename, real_elem, elem_len, &modules, &nmodules);
}
/* Get next element in the path. */
- elem = strtok_r (NULL, ":", &gconv_path);
+ elem = __strtok_r (NULL, ":", &gconv_path);
}
- /* If the configuration files do not contain any valid module specification
- remember this by setting the pointer to the module array to NULL. */
- nmodules += sizeof (builtin_modules) / sizeof (builtin_modules[0]);
- if (nmodules == 0)
- __gconv_modules_db = NULL;
- else
+ /* Add the internal modules. */
+ for (cnt = 0; cnt < sizeof (builtin_modules) / sizeof (builtin_modules[0]);
+ ++cnt)
{
- __gconv_modules_db =
- (struct gconv_module **) malloc (nmodules
- * sizeof (struct gconv_module));
- if (__gconv_modules_db != NULL)
+ if (builtin_modules[cnt].from_pattern == NULL)
{
- size_t cnt;
+ struct gconv_alias fake_alias;
- /* Insert all module entries into the array. */
- __twalk (modules, insert_module);
+ fake_alias.fromname = builtin_modules[cnt].from_constpfx;
- /* Finally insert the builtin transformations. */
- for (cnt = 0; cnt < (sizeof (builtin_modules)
- / sizeof (struct gconv_module)); ++cnt)
- __gconv_modules_db[__gconv_nmodules++] = &builtin_modules[cnt];
+ if (__tfind (&fake_alias, &__gconv_alias_db, __gconv_alias_compare)
+ != NULL)
+ /* It'll conflict so don't add it. */
+ continue;
}
+
+ insert_module (&builtin_modules[cnt]);
}
/* Add aliases for builtin conversions. */
@@ -561,10 +579,6 @@ __gconv_read_conf (void)
char *copy = strdupa (builtin_aliases[--cnt]);
add_alias (copy, modules);
}
-
- if (nmodules != 0)
- /* Now remove the tree data structure. */
- __tdestroy (modules, nothing);
/* Restore the error number. */
__set_errno (save_errno);
diff --git a/iconv/gconv_db.c b/iconv/gconv_db.c
index e6253b8380..5269d792f6 100644
--- a/iconv/gconv_db.c
+++ b/iconv/gconv_db.c
@@ -1,5 +1,5 @@
/* Provide access to the collection of available transformation modules.
- Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@@ -18,9 +18,11 @@
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
Boston, MA 02111-1307, USA. */
+#include <limits.h>
#include <search.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/param.h>
#include <bits/libc-lock.h>
#include <ldsodefs.h>
@@ -32,8 +34,7 @@
void *__gconv_alias_db;
/* Array with available modules. */
-size_t __gconv_nmodules;
-struct gconv_module **__gconv_modules_db;
+struct gconv_module *__gconv_modules_db;
/* We modify global data. */
__libc_lock_define_initialized (static, lock)
@@ -55,14 +56,20 @@ __gconv_alias_compare (const void *p1, const void *p2)
struct derivation_step
{
const char *result_set;
+ size_t result_set_len;
+ int cost_lo;
+ int cost_hi;
struct gconv_module *code;
struct derivation_step *last;
struct derivation_step *next;
};
-#define NEW_STEP(result, module, last_mod) \
+#define NEW_STEP(result, hi, lo, module, last_mod) \
({ struct derivation_step *newp = alloca (sizeof (struct derivation_step)); \
newp->result_set = result; \
+ newp->result_set_len = strlen (result); \
+ newp->cost_hi = hi; \
+ newp->cost_lo = lo; \
newp->code = module; \
newp->last = last_mod; \
newp->next = NULL; \
@@ -224,7 +231,7 @@ gen_steps (struct derivation_step *best, const char *toset,
{
status = _CALL_DL_FCT (result[step_cnt].init_fct,
(&result[step_cnt]));
-
+
if (status != GCONV_OK)
{
failed = 1;
@@ -276,9 +283,9 @@ find_derivation (const char *toset, const char *toset_expand,
struct gconv_step **handle, size_t *nsteps)
{
__libc_lock_define_initialized (static, lock)
- struct derivation_step *first, *current, **lastp, *best = NULL;
- int best_cost_hi = 0;
- int best_cost_lo = 0;
+ struct derivation_step *first, *current, **lastp, *solution = NULL;
+ int best_cost_hi = INT_MAX;
+ int best_cost_lo = INT_MAX;
int result;
result = derivation_lookup (fromset_expand ?: fromset, toset_expand ?: toset,
@@ -299,202 +306,287 @@ find_derivation (const char *toset, const char *toset_expand,
return result;
}
- /* ### TODO
- For now we use a simple algorithm with quadratic runtime behaviour.
+ /* For now we use a simple algorithm with quadratic runtime behaviour.
The task is to match the `toset' with any of the available rules,
starting from FROMSET. */
if (fromset_expand != NULL)
{
- first = NEW_STEP (fromset_expand, NULL, NULL);
- first->next = NEW_STEP (fromset, NULL, NULL);
+ first = NEW_STEP (fromset_expand, 0, 0, NULL, NULL);
+ first->next = NEW_STEP (fromset, 0, 0, NULL, NULL);
lastp = &first->next->next;
}
else
{
- first = NEW_STEP (fromset, NULL, NULL);
+ first = NEW_STEP (fromset, 0, 0, NULL, NULL);
lastp = &first->next;
}
- current = first;
- while (current != NULL)
+ for (current = first; current != NULL; current = current->next)
{
/* Now match all the available module specifications against the
current charset name. If any of them matches check whether
we already have a derivation for this charset. If yes, use the
one with the lower costs. Otherwise add the new charset at the
- end. */
- size_t cnt;
-
- for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
- {
- const char *result_set = NULL;
+ end.
+
+ The module database is organized in a tree form which allows to
+ search for prefixes. So we search for the first entry with a
+ matching prefix and any other matching entry can be found from
+ this place. */
+ struct gconv_module *node = __gconv_modules_db;
+
+ /* Maybe it is not necessary anymore to look for a solution for
+ this entry since the cost is already as high (or heigher) as
+ the cost for the best solution so far. */
+ if (current->cost_hi > best_cost_hi
+ || (current->cost_hi == best_cost_hi
+ && current->cost_lo >= best_cost_lo))
+ continue;
+
+ while (node != NULL)
+ {
+ int cmpres = strncmp (current->result_set, node->from_constpfx,
+ MIN (current->result_set_len,
+ node->from_constpfx_len));
- if (__gconv_modules_db[cnt]->from_pattern == NULL)
+ if (cmpres == 0)
{
- if (__strcasecmp (current->result_set,
- __gconv_modules_db[cnt]->from_constpfx) == 0)
+ /* Walk through the list of modules with this prefix and
+ try to match the name. */
+ struct gconv_module *runp;
+
+ if (current->result_set_len < node->from_constpfx_len)
+ /* Cannot possibly match. */
+ break;
+
+ /* Check all the modules with this prefix. */
+ runp = node;
+ do
{
- if (strcmp (__gconv_modules_db[cnt]->to_string, "-") == 0)
- result_set = toset_expand ?: toset;
+ const char *result_set = NULL;
+
+ if (runp->from_pattern == NULL)
+ {
+ /* This is a simple entry and therefore we have a
+ found an matching entry if the strings are really
+ equal. */
+ if (current->result_set_len == runp->from_constpfx_len)
+ {
+ if (strcmp (runp->to_string, "-") == 0)
+ result_set = toset_expand ?: toset;
+ else
+ result_set = runp->to_string;
+ }
+ }
else
- result_set = __gconv_modules_db[cnt]->to_string;
- }
- }
- else
- /* We have a regular expression. First see if the prefix
- matches. */
- if (__strncasecmp (current->result_set,
- __gconv_modules_db[cnt]->from_constpfx,
- __gconv_modules_db[cnt]->from_constpfx_len)
- == 0)
- {
- /* First compile the regex if not already done. */
- if (__gconv_modules_db[cnt]->from_regex == NULL)
- {
- if (__regcomp (&__gconv_modules_db[cnt]->from_regex_mem,
- __gconv_modules_db[cnt]->from_pattern,
- REG_EXTENDED | REG_ICASE) != 0)
- /* Something is wrong. Remember this. */
- __gconv_modules_db[cnt]->from_regex = (regex_t *) -1L;
- else
- __gconv_modules_db[cnt]->from_regex
- = &__gconv_modules_db[cnt]->from_regex_mem;
- }
-
- if (__gconv_modules_db[cnt]->from_regex != (regex_t *) -1L)
- {
- /* Try to match the from name. */
- regmatch_t match[4];
-
- if (__regexec (__gconv_modules_db[cnt]->from_regex,
- current->result_set, 4, match, 0) == 0
- && match[0].rm_so == 0
- && current->result_set[match[0].rm_eo] == '\0')
- {
- /* At least the whole <from> string is matched.
- We must now match sed-like possible
- subexpressions from the match to the
- toset expression. */
+ {
+ /* Compile the regular expression if necessary. */
+ if (runp->from_regex == NULL)
+ {
+ if (__regcomp (&runp->from_regex_mem,
+ runp->from_pattern,
+ REG_EXTENDED | REG_ICASE) != 0)
+ /* Something is wrong. Remember this. */
+ runp->from_regex = (regex_t *) -1L;
+ else
+ runp->from_regex = &runp->from_regex_mem;
+ }
+
+ if (runp->from_regex != (regex_t *) -1L)
+ {
+ regmatch_t match[4];
+
+ /* Try to match the regular expression. */
+ if (__regexec (runp->from_regex, current->result_set,
+ 4, match, 0) == 0
+ && match[0].rm_so == 0
+ && current->result_set[match[0].rm_eo] == '\0')
+ {
+ /* At least the whole <from> string is matched.
+ We must now match sed-like possible
+ subexpressions from the match to the
+ toset expression. */
#define ENSURE_LEN(LEN) \
if (wp + (LEN) >= constr + len - 1) \
{ \
char *newp = alloca (len += 128); \
- memcpy (newp, constr, wp - constr); \
- wp = newp + (wp - constr); \
+ wp = __mempcpy (newp, constr, wp - constr); \
constr = newp; \
}
- size_t len = 128;
- char *constr = alloca (len);
- char *wp = constr;
- const char *cp = __gconv_modules_db[cnt]->to_string;
-
- while (*cp != '\0')
- {
- if (*cp != '\\')
- {
- ENSURE_LEN (1);
- *wp++ = *cp++;
- }
- else if (cp[1] == '\0')
- /* Backslash at end of string. */
- break;
- else
- {
- ++cp;
- if (*cp == '\\')
- {
- *wp++ = *cp++;
- ENSURE_LEN (1);
- }
- else if (*cp < '1' || *cp > '3')
- break;
- else
- {
- int idx = *cp - '0';
- if (match[idx].rm_so == -1)
- /* No match. */
- break;
-
- ENSURE_LEN (match[idx].rm_eo
- - match[idx].rm_so);
- wp = __mempcpy (wp,
- &current->result_set[match[idx].rm_so],
- match[idx].rm_eo
- - match[idx].rm_so);
- ++cp;
- }
- }
- }
- if (*cp == '\0' && wp != constr)
- {
- /* Terminate the constructed string. */
- *wp = '\0';
- result_set = constr;
- }
- }
- }
- }
-
- if (result_set != NULL)
- {
- /* We managed to find a derivation. First see whether
- this is what we are looking for. */
- if (__strcasecmp (result_set, toset) == 0
- || (toset_expand != NULL
- && __strcasecmp (result_set, toset_expand) == 0))
- {
- /* Determine the costs. If they are lower than the
- previous solution (or this is the first solution)
- remember this solution. */
- int cost_hi = __gconv_modules_db[cnt]->cost_hi;
- int cost_lo = __gconv_modules_db[cnt]->cost_lo;
- struct derivation_step *runp = current;
- while (runp->code != NULL)
- {
- cost_hi += runp->code->cost_hi;
- cost_lo += runp->code->cost_lo;
- runp = runp->last;
+ size_t len = 128;
+ char *constr = alloca (len);
+ char *wp = constr;
+ const char *cp = runp->to_string;
+
+ while (*cp != '\0')
+ {
+ if (*cp != '\\')
+ {
+ ENSURE_LEN (1);
+ *wp++ = *cp++;
+ }
+ else if (cp[1] == '\0')
+ /* Backslash at end of string. */
+ break;
+ else
+ {
+ ++cp;
+ if (*cp == '\\')
+ {
+ *wp++ = *cp++;
+ ENSURE_LEN (1);
+ }
+ else if (*cp < '1' || *cp > '3')
+ break;
+ else
+ {
+ int idx = *cp - '0';
+ if (match[idx].rm_so == -1)
+ /* No match. */
+ break;
+
+ ENSURE_LEN (match[idx].rm_eo
+ - match[idx].rm_so);
+ wp = __mempcpy (wp,
+ &current->result_set[match[idx].rm_so],
+ match[idx].rm_eo
+ - match[idx].rm_so);
+ ++cp;
+ }
+ }
+ }
+ if (*cp == '\0' && wp != constr)
+ {
+ /* Terminate the constructed string. */
+ *wp = '\0';
+ result_set = constr;
+ }
+ }
+ }
}
- if (best == NULL || cost_hi < best_cost_hi
- || (cost_hi == best_cost_hi && cost_lo < best_cost_lo))
+
+ if (result_set != NULL)
{
- best = NEW_STEP (result_set, __gconv_modules_db[cnt],
- current);
- best_cost_hi = cost_hi;
- best_cost_lo = cost_lo;
+ int cost_hi = runp->cost_hi + current->cost_hi;
+ int cost_lo = runp->cost_lo + current->cost_lo;
+ struct derivation_step *step;
+
+ /* We managed to find a derivation. First see whether
+ this is what we are looking for. */
+ if (__strcasecmp (result_set, toset) == 0
+ || (toset_expand != NULL
+ && __strcasecmp (result_set, toset_expand) == 0))
+ {
+ if (solution == NULL || cost_hi < best_cost_hi
+ || (cost_hi == best_cost_hi
+ && cost_lo < best_cost_lo))
+ {
+ best_cost_hi = cost_hi;
+ best_cost_lo = cost_lo;
+ }
+
+ /* Append this solution to list. */
+ if (solution == NULL)
+ solution = NEW_STEP (result_set, 0, 0, runp,
+ current);
+ else
+ {
+ while (solution->next != NULL)
+ solution = solution->next;
+
+ solution->next = NEW_STEP (result_set, 0, 0,
+ runp, current);
+ }
+ }
+ else if (cost_hi < best_cost_hi
+ || (cost_hi == best_cost_hi
+ && cost_lo < best_cost_lo))
+ {
+ /* Append at the end if there is no entry with
+ this name. */
+ for (step = first; step != NULL; step = step->next)
+ if (__strcasecmp (result_set, step->result_set)
+ == 0)
+ break;
+
+ if (step == NULL)
+ {
+ *lastp = NEW_STEP (result_set,
+ cost_hi, cost_lo,
+ runp, current);
+ lastp = &(*lastp)->next;
+ }
+ else if (step->cost_hi > cost_hi
+ || (step->cost_hi == cost_hi
+ && step->cost_lo > cost_lo))
+ {
+ step->code = runp;
+ step->last = current;
+
+ /* Update the cost for all steps. */
+ for (step = first; step != NULL;
+ step = step->next)
+ {
+ struct derivation_step *back;
+
+ if (step->code == NULL)
+ /* This is one of the entries we started
+ from. */
+ continue;
+
+ step->cost_hi = step->code->cost_hi;
+ step->cost_lo = step->code->cost_lo;
+
+ for (back = step->last; back->code != NULL;
+ back = back->last)
+ {
+ step->cost_hi += back->code->cost_hi;
+ step->cost_lo += back->code->cost_lo;
+ }
+ }
+
+ for (step = solution; step != NULL;
+ step = step->next)
+ {
+ step->cost_hi = (step->code->cost_hi
+ + step->last->cost_hi);
+ step->cost_lo = (step->code->cost_lo
+ + step->last->cost_lo);
+
+ if (step->cost_hi < best_cost_hi
+ || (step->cost_hi == best_cost_hi
+ && step->cost_lo < best_cost_lo))
+ {
+ solution = step;
+ best_cost_hi = step->cost_hi;
+ best_cost_lo = step->cost_lo;
+ }
+ }
+ }
+ }
}
+
+ runp = runp->same;
}
- else
- {
- /* Append at the end if there is no entry with this name. */
- struct derivation_step *runp = first;
+ while (runp != NULL);
- while (runp != NULL)
- {
- if (__strcasecmp (result_set, runp->result_set) == 0)
- break;
- runp = runp->next;
- }
+ if (current->result_set_len == node->from_constpfx_len)
+ break;
- if (runp == NULL)
- {
- *lastp = NEW_STEP (result_set, __gconv_modules_db[cnt],
- current);
- lastp = &(*lastp)->next;
- }
- }
+ node = node->matching;
}
+ else if (cmpres < 0)
+ node = node->left;
+ else
+ node = node->right;
}
-
- /* Go on with the next entry. */
- current = current->next;
}
- if (best != NULL)
+ if (solution != NULL)
/* We really found a way to do the transformation. Now build a data
structure describing the transformation steps.*/
- result = gen_steps (best, toset_expand ?: toset, fromset_expand ?: fromset,
- handle, nsteps);
+ result = gen_steps (solution, toset_expand ?: toset,
+ fromset_expand ?: fromset, handle, nsteps);
else
{
/* We haven't found a transformation. Clear the result values. */
@@ -620,20 +712,35 @@ __gconv_close_transform (struct gconv_step *steps, size_t nsteps)
}
+/* Free the modules mentioned. */
+static void
+internal_function
+free_modules_db (struct gconv_module *node)
+{
+ if (node->left != NULL)
+ free_modules_db (node->left);
+ if (node->right != NULL)
+ free_modules_db (node->right);
+ if (node->same != NULL)
+ free_modules_db (node->same);
+ do
+ {
+ struct gconv_module *act = node;
+ node = node->matching;
+ free (act);
+ }
+ while (node != NULL);
+}
+
+
/* Free all resources if necessary. */
static void __attribute__ ((unused))
free_mem (void)
{
- size_t cnt;
-
if (__gconv_alias_db != NULL)
__tdestroy (__gconv_alias_db, free);
- for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
- /* Modules which names do not start with a slash are builtin
- transformations and the memory is not allocated dynamically. */
- if (__gconv_modules_db[cnt]->module_name[0] == '/')
- free (__gconv_modules_db[cnt]);
+ free_modules_db (__gconv_modules_db);
if (known_derivations != NULL)
__tdestroy (known_derivations, free_derivation);
diff --git a/iconv/gconv_int.h b/iconv/gconv_int.h
index bc67b0b050..c78d0bb98b 100644
--- a/iconv/gconv_int.h
+++ b/iconv/gconv_int.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1997, 1998 Free Software Foundation, Inc.
+/* Copyright (C) 1997, 1998, 1999 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Ulrich Drepper <drepper@cygnus.com>, 1997.
@@ -75,6 +75,11 @@ struct gconv_module
int cost_lo;
const char *module_name;
+
+ struct gconv_module *left; /* Prefix smaller. */
+ struct gconv_module *same; /* List of entries with identical prefix. */
+ struct gconv_module *matching;/* Next node with more specific prefix. */
+ struct gconv_module *right; /* Prefix larger. */
};
@@ -85,7 +90,7 @@ extern void *__gconv_alias_db;
/* Array with available modules. */
extern size_t __gconv_nmodules;
-extern struct gconv_module **__gconv_modules_db;
+extern struct gconv_module *__gconv_modules_db;
/* Return in *HANDLE decriptor for transformation from FROMSET to TOSET. */
@@ -102,7 +107,7 @@ extern int __gconv_close (gconv_t cd)
bytes in buffer starting at *OUTBUF. Return number of written
characters in *CONVERTED if this pointer is not null. */
extern int __gconv (gconv_t __cd, const char **__inbuf, const char *inbufend,
- char **__outbuf, char *outbufend, size_t *__converted)
+ char **__outbuf, char *outbufend, size_t *converted)
internal_function;
/* Return in *HANDLE a pointer to an array with *NSTEPS elements describing
@@ -152,7 +157,6 @@ extern void __gconv_get_builtin_trans (const char *__name,
const char **__inbuf, const char *__inbufend, \
size_t *__written, int __do_flush)
-__BUILTIN_TRANS (__gconv_transform_dummy);
__BUILTIN_TRANS (__gconv_transform_ascii_internal);
__BUILTIN_TRANS (__gconv_transform_internal_ascii);
__BUILTIN_TRANS (__gconv_transform_utf8_internal);
diff --git a/iconv/gconv_simple.c b/iconv/gconv_simple.c
index c71c5ed0a4..4084d04b44 100644
--- a/iconv/gconv_simple.c
+++ b/iconv/gconv_simple.c
@@ -46,38 +46,6 @@ static const unsigned char encoding_byte[] =
};
-
-int
-__gconv_transform_dummy (struct gconv_step *step, struct gconv_step_data *data,
- const char **inbuf, const char *inbufend,
- size_t *written, int do_flush)
-{
- size_t do_write;
-
- /* We have no stateful encoding. So we don't have to do anything
- special. */
- if (do_flush)
- do_write = 0;
- else
- {
- do_write = MIN (inbufend - *inbuf, data->outbufend - data->outbuf);
-
- memcpy (data->outbuf, inbuf, do_write);
-
- *inbuf -= do_write;
- *data->outbuf += do_write;
- }
-
- /* ### TODO Actually, this number must be devided according to the
- size of the input charset. I.e., if the input is in UCS4 the
- number of copied bytes must be divided by 4. */
- if (written != NULL)
- *written = do_write;
-
- return GCONV_OK;
-}
-
-
/* Transform from the internal, UCS4-like format, to UCS4. The
difference between the internal ucs4 format and the real UCS4
format is, if any, the endianess. The Unicode/ISO 10646 says that
diff --git a/iconv/iconv_prog.c b/iconv/iconv_prog.c
index 8f05d57a18..78fa6234f4 100644
--- a/iconv/iconv_prog.c
+++ b/iconv/iconv_prog.c
@@ -99,7 +99,7 @@ static int process_block (iconv_t cd, const char *addr, size_t len,
FILE *output);
static int process_fd (iconv_t cd, int fd, FILE *output);
static int process_file (iconv_t cd, FILE *input, FILE *output);
-static void print_known_names (void);
+static void print_known_names (void) internal_function;
int
@@ -503,9 +503,38 @@ do_print (const void *nodep, VISIT value, int level)
}
static void
+internal_function
+add_known_names (struct gconv_module *node)
+{
+ if (node->left != NULL)
+ add_known_names (node->left);
+ if (node->right != NULL)
+ add_known_names (node->right);
+ if (node->same != NULL)
+ add_known_names (node->same);
+ do
+ {
+ if (node->from_pattern == NULL)
+ {
+ if (strcmp (node->from_constpfx, "INTERNAL"))
+ tsearch (node->from_constpfx, &printlist,
+ (__compar_fn_t) strverscmp);
+ if (strcmp (node->to_string, "INTERNAL"))
+ tsearch (node->to_string, &printlist, (__compar_fn_t) strverscmp);
+ }
+ else
+ if (strcmp (node->from_pattern, "INTERNAL"))
+ tsearch (node->from_pattern, &printlist, (__compar_fn_t) strverscmp);
+
+ node = node->matching;
+ }
+ while (node != NULL);
+}
+
+static void
+internal_function
print_known_names (void)
{
- size_t cnt;
iconv_t h;
/* We must initialize the internal databases first. */
@@ -516,22 +545,7 @@ print_known_names (void)
twalk (__gconv_alias_db, insert_print_list);
/* Add the from- and to-names from the known modules. */
- for (cnt = 0; cnt < __gconv_nmodules; ++cnt)
- {
- if (__gconv_modules_db[cnt]->from_pattern == NULL)
- {
- if (strcmp (__gconv_modules_db[cnt]->from_constpfx, "INTERNAL"))
- tsearch (__gconv_modules_db[cnt]->from_constpfx, &printlist,
- (__compar_fn_t) strverscmp);
- if (strcmp (__gconv_modules_db[cnt]->to_string, "INTERNAL"))
- tsearch (__gconv_modules_db[cnt]->to_string, &printlist,
- (__compar_fn_t) strverscmp);
- }
- else
- if (strcmp (__gconv_modules_db[cnt]->from_pattern, "INTERNAL"))
- tsearch (__gconv_modules_db[cnt]->from_pattern, &printlist,
- (__compar_fn_t) strverscmp);
- }
+ add_known_names (__gconv_modules_db);
fputs (_("\
The following list contain all the coded character sets known. This does\n\