/* Copyright (C) 1995 Free Software Foundation, Inc. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include #include #include #include #include #include #include "localedef.h" #include "hash.h" /* Data structure for representing charmap database. */ struct charmap charmap_data; /* Line number in charmap file. */ static unsigned int line_no; /* Prototypes for local functions. */ static void read_prolog (FILE *infile); static unsigned long read_body (FILE *infile); /* Read complete table of symbolic names for character set from file. If this file does not exist or is not readable a default file is tried. If this also is not readable no character map is defined. */ void charmap_read (const char *filename) { unsigned long max_char; long path_max = pathconf (".", _PC_PATH_MAX); char buf[path_max]; FILE *infile = NULL; /* Initialize charmap data. */ charmap_data.codeset_name = NULL; charmap_data.mb_cur_max = -1; charmap_data.mb_cur_min = -1; charmap_data.escape_char = '\\'; charmap_data.comment_char = '#'; if (filename != NULL) { strcpy (buf, filename); infile = fopen (filename, "r"); if (infile == NULL && filename[0] != '/') { snprintf (buf, path_max, "%s/%s", CHARMAP_PATH, filename); infile = fopen (buf, "r"); } } if (infile == NULL) { if (filename != NULL) error (0, errno, gettext ("input file `%s' not found"), filename); snprintf (buf, path_max, "%s/%s", CHARMAP_PATH, DEFAULT_CHARMAP); infile = fopen (buf, "r"); if (infile == NULL) error (4, errno, gettext ("input file `%s' not found"), filename); } charmap_data.filename = buf; init_hash (&charmap_data.table, 500); line_no = 0; /* Read the prolog of the charmap file. */ read_prolog (infile); /* Last works on the charmap tables global data. */ if (charmap_data.mb_cur_max == -1) charmap_data.mb_cur_max = 1; if (charmap_data.mb_cur_min == -1) charmap_data.mb_cur_min = charmap_data.mb_cur_max; if ((size_t) charmap_data.mb_cur_max > sizeof (long)) { error (2, 0, gettext ("program limitation: for now only upto %Zu " "bytes per character are allowed"), sizeof (long)); } /* Now process all entries. */ max_char = read_body (infile); /* We don't need the file anymore. */ fclose (infile); /* Determine the optimal table size when using the simple modulo hashing function. */ if (max_char >= 256) { int size; /* Current best values, initialized to some never reached high value. */ int best_count = 10000; int best_size = 10000; int best_product = best_count * best_size; /* Give warning. */ error (-1, 0, gettext ("computing character table size: this may take " "a while")); for (size = 256; size <= best_product; ++size) { /* Array with slot counters. */ int cnt[size]; /* Current character. */ int ch; /* Maximal number of characters in any slot. */ int maxcnt = 0; /* Product of current size and maximal count. */ int product = 0; /* Iteration pointer through hashing table. */ char *ptr = NULL; /* Initializes counters to zero. */ memset(cnt, 0, size * sizeof (int)); /* Iterate through whole hashing table. */ while (product < best_product && iterate_table (&charmap_data.table, (void **) &ptr, (void **) &ch)) { /* Increment slot counter. */ ++cnt[ch % size]; /* Test for current maximum. */ if (cnt[ch % size] > maxcnt) { maxcnt = cnt[ch % size]; product = maxcnt * size; } } if (product < best_product) { best_count = maxcnt; best_size = size; best_product = best_count * best_size; } } charmap_data.hash_size = best_size; charmap_data.hash_layers = best_count; } else { charmap_data.hash_size = 256; charmap_data.hash_layers = 1; } } #define SYNTAX_ERROR \ do { error (0, 0, gettext ("%s:%u: syntax error in charmap file"), \ charmap_data.filename, line_no); \ goto end_of_loop; } while (0) /* Read the prolog of the charmap file until the line containing `CHARMAP'. All possible entries are processed. */ static void read_prolog (FILE *infile) { size_t bufsize = sysconf (_SC_LINE_MAX); char buf[bufsize]; while (1) { char *cp = buf; char len; /* Read the next line. */ fgets (buf, bufsize, infile); len = strlen (buf); /* On EOF simply return. */ if (len == 0 || buf[len - 1] != '\n') error (4, 0, gettext ("%s: unexpected end of file in charmap"), charmap_data.filename); /* This is the next line. */ ++line_no; /* Comments and empty lines are ignored. */ if (len == 1 || buf[0] == charmap_data.comment_char) continue; buf[len - 1] = '\0'; /* Throw away leading white spaces. This is not defined in POSIX.2 so don't do it if conformance is requested. */ if (!posix_conformance) while (isspace (*cp)) ++cp; /* If `CHARMAP' is read the prolog is over. */ if (strncmp (cp, "CHARMAP", 7) == 0 && (!posix_conformance || cp[7] == '\0')) return; /* Now it can be only one of special symbols defining the charmap parameters. All are beginning with '<'. */ if (*cp != '<') SYNTAX_ERROR; ++cp; if (strncmp (cp, "code_set_name>", 14) == 0) { char *startp; #define cp_to_arg(no,pred) \ cp += no; \ while (isspace (*cp)) \ ++cp; \ if (*cp == '\0' || !pred (*cp)) \ SYNTAX_ERROR; cp_to_arg (14,isgraph) if (charmap_data.codeset_name != NULL) { error (0, 0, gettext ("%s:%u: duplicate code set name " "specification"), charmap_data.filename, line_no); free (charmap_data.codeset_name); } startp = cp; while (*cp != '\0' && isgraph (*cp) && !isspace (*cp)) ++cp; charmap_data.codeset_name = (char *) xmalloc (cp - startp + 1); strncpy (startp, startp, cp - startp); } else if (strncmp (cp, "mb_cur_max>", 11) == 0) { int new_val; cp_to_arg (11,isdigit) if (charmap_data.mb_cur_max != -1) error (0, 0, gettext ("%s:%u: duplicate definition of mb_cur_max"), charmap_data.filename, line_no); new_val = (int) strtol (cp, &cp, posix_conformance ? 10 : 0); if (new_val < 1) error (0, 0, gettext ("%s:%u: illegal value for mb_cur_max: %d"), charmap_data.filename, line_no, new_val); else charmap_data.mb_cur_max = new_val; } else if (strncmp (cp, "mb_cur_min>", 11) == 0) { int new_val; cp_to_arg (11,isdigit) if (charmap_data.mb_cur_max != -1) error (0, 0, gettext ("%s:%u: duplicate definition of mb_cur_min"), charmap_data.filename, line_no); new_val = (int) strtol (cp, &cp, posix_conformance ? 10 : 0); if (new_val < 1) error (0, 0, gettext ("%s:%u: illegal value for mb_cur_min: %d"), charmap_data.filename, line_no, new_val); else charmap_data.mb_cur_min = new_val; } else if (strncmp (cp, "escape_char>", 12) == 0) { cp_to_arg (12, isgraph) charmap_data.escape_char = *cp; } else if (strncmp (cp, "comment_char>", 13) == 0) { cp_to_arg (13, isgraph) charmap_data.comment_char = *cp; } else SYNTAX_ERROR; end_of_loop: } } #undef cp_to_arg static unsigned long read_body (FILE *infile) { unsigned long max_char = 0; size_t bufsize = sysconf (_SC_LINE_MAX); char buf[bufsize]; char name_str[bufsize / 2]; char code_str[bufsize / 2]; while (1) { char *cp = buf; size_t len; /* Read the next line. */ fgets (buf, bufsize, infile); len = strlen (buf); /* On EOF simply return. */ if (len == 0) error (0, 0, gettext ("%s: `END CHARMAP' is missing"), charmap_data.filename); /* This is the next line. */ ++line_no; if (len == bufsize - 1) { error (0, 0, gettext ("%s:%u: line too long; use `getconf " "LINE_MAX' to get the current maximum line" "length"), charmap_data.filename, line_no); do { fgets (buf, bufsize, infile); len = strlen (buf); } while (len == bufsize - 1); continue; } /* Comments and empty lines are ignored. */ if (len == 1 || buf[0] == charmap_data.comment_char) continue; buf[len - 1] = '\0'; /* Throw away leading white spaces. This is not defined in POSIX.2 so don't do it if conformance is requested. */ if (!posix_conformance) while (isspace (*cp)) ++cp; if (*cp == '<') { char *end1p, *end2p, *start2p; size_t cnt = 0; unsigned long char_value = 0; if (sscanf (cp + 1, "%s %s", name_str, code_str) != 2) SYNTAX_ERROR; end1p = cp = name_str; while (*cp != '\0' && *cp != '>') { if (*cp == charmap_data.escape_char) if (*++cp == '\0') SYNTAX_ERROR; *end1p++ = *cp++; } if (*cp == '\0') /* No final '>'. Make error condition. */ end1p = name_str; else ++cp; *end1p = '\0'; if (*cp == '.' && *++cp == '.' && *++cp == '.' && *++cp == '<') { /* This might be the alternate form. */ start2p = end2p = ++cp; while (*cp != '\0' && *cp != '>') { if (*cp == charmap_data.escape_char) if (*++cp == '\0') SYNTAX_ERROR; *end2p = *cp++; } if (*cp == '\0') /* NO final '>'. Make error condition. */ end2p = start2p; else ++cp; } else start2p = end2p = NULL; if (end1p == name_str || (start2p != NULL && start2p != end2p) || *cp != '\0' || *code_str != charmap_data.escape_char) SYNTAX_ERROR; cp = code_str; do { char *begin; long val; switch (*++cp) { case 'd': val = strtol ((begin = cp + 1), &cp, 10); break; case 'x': val = strtol ((begin = cp + 1), &cp, 16); break; default: val = strtol ((begin = cp), &cp, 8); break; } if (begin == cp) SYNTAX_ERROR; if (posix_conformance && cp - begin < 2) error (0, 0, gettext ("%s:%u: byte constant has less than " "two digits"), charmap_data.filename, line_no); if (val < 0 || val > 255) { error (0, 0, gettext ("%s:%u: character encoding must be " "given in 8-bit bytes"), charmap_data.filename, line_no); goto end_of_loop; } if (cnt < (size_t) charmap_data.mb_cur_max) { if (cnt < sizeof (long)) /* FIXME */ char_value = (char_value << 8) | val; } else { error (0, 0, gettext ("%s:%u: number of bytes in character " "definition exceeds `mb_cur_max'"), charmap_data.filename, line_no); break; } ++cnt; } while (*cp == charmap_data.escape_char); /* Ignore the rest of the line (comment). */ if (end2p == NULL) { if (insert_entry (&charmap_data.table, name_str, end1p - name_str, (void *) char_value)) error (0, 0, gettext ("%s:%u: duplicate entry"), charmap_data.filename, line_no); max_char = MAX (max_char, char_value); } else { char *en1, *en2, *start1p; long n1, n2, n; start1p = name_str; while (*start1p == *start2p && !isdigit (*start1p) && start1p < end1p) ++start1p, ++start2p; n1 = strtol (start1p, &en1, 10); n2 = strtol (start2p, &en2, 10); if (en1 - start1p != en2 - start2p || en1 != end1p || en2 != end2p) SYNTAX_ERROR; if (n1 > n2) error (0, 0, gettext ("%s:%u: starting character is bigger " "than last"), charmap_data.filename, line_no); n = n1; while (n <= n2) { snprintf(start1p, en1 - start1p, "%0*d", en1 - start1p, n); if (insert_entry (&charmap_data.table, name_str, en1 - name_str, (void *) (char_value + n - n1))) error (0, 0, gettext ("%s:%u: duplicate entry"), charmap_data.filename, line_no); max_char = MAX (max_char, char_value + n - n1); ++n; } } } else { if (strncmp (cp, "END CHARMAP", 11) == 0) return max_char; SYNTAX_ERROR; } end_of_loop: } return max_char; } /* * Local Variables: * mode:c * c-basic-offset:2 * End: */