/* Copyright (C) 1995 Free Software Foundation, Inc. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for more details. You should have received a copy of the GNU Library General Public License along with the GNU C Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #include #include #include #include #include #include #include "localedef.h" #include "token.h" /* Include the hashing table for the keywords. */ const struct locale_keyword* in_word_set (register const char *str, register int len); #include "keyword.h" /* Contains the status of reading the locale definition file. */ struct locfile_data locfile_data; /* This is a flag used while collation input. This is the only place where element names beside the ones defined in the character map are allowed. There we must not give error messages. */ int reject_new_char = 1; /* Prototypes for local functions. */ static int get_char (void); #define LD locfile_data /* Opens the locale definition file and initializes the status data structure for following calls of `locfile_lex'. */ void locfile_open (const char *fname) { if (fname == NULL) /* We read from stdin. */ LD.filename = ""; else { if (freopen (fname, "r", stdin) == NULL) error (4, 0, gettext ("input file `%s' not found"), fname); LD.filename = fname; } /* Set default values. */ LD.escape_char = '\\'; LD.comment_char = '#'; LD.bufsize = sysconf (_SC_LINE_MAX); LD.buf = (char *) xmalloc (LD.bufsize); LD.strbuf = (char *) xmalloc (LD.bufsize); LD.buf_ptr = LD.returned_tokens = LD.line_no = 0; /* Now sign that we want immediately read a line. */ LD.continue_line = 1; LD.buf[LD.buf_ptr] = '\0'; } int xlocfile_lex (char **token, int *token_len) { int retval = locfile_lex (token, token_len); if (retval == 0) /* I.e. end of file. */ error (4, 0, gettext ("%s: unexpected end of file in locale defintion " "file"), locfile_data.filename); return retval; } int locfile_lex (char **token, int *token_len) { int start_again; int retval = 0; do { int start_ptr; start_again = 0; /* Read the next line. Skip over empty lines and comments. */ if ((LD.buf[LD.buf_ptr] == '\0' && LD.continue_line != 0) || LD.buf_ptr >= LD.bufsize || (posix_conformance == 0 && LD.buf[LD.buf_ptr] == LD.comment_char)) do { size_t linelen; LD.buf_ptr = 0; if (fgets (LD.buf, LD.bufsize, stdin) == NULL) { /* This makes subsequent calls also return EOF. */ LD.buf[0] = '\0'; return 0; } /* Increment line number counter. */ ++LD.line_no; /* We now have to look whether this line is continued and whether it at all fits into our buffer. */ linelen = strlen (LD.buf); if (linelen == LD.bufsize - 1) /* The did not fit into the buffer. */ error (2, 0, gettext ("%s:%Zd: line too long; use " "`getconf LINE_MAX' to get the maximum " "line length"), LD.filename, LD.line_no); /* Remove '\n' at end of line. */ if (LD.buf[linelen - 1] == '\n') LD.buf[--linelen] = '\0'; if (linelen > 0 && LD.buf[linelen - 1] == LD.escape_char) { LD.buf[--linelen] = '\0'; LD.continue_line = 1; } else LD.continue_line = 0; while (isspace (LD.buf[LD.buf_ptr])) ++LD.buf_ptr; /* We are not so restrictive and allow white spaces before a comment. */ if (posix_conformance == 0 && LD.buf[LD.buf_ptr] == LD.comment_char && LD.buf_ptr != 0) error (0, 0, gettext ("%s:%Zd: comment does not start in " "column 1"), LD.filename, LD.line_no); } while (LD.buf[LD.buf_ptr] == '\0' || LD.buf[LD.buf_ptr] == LD.comment_char); /* Get information for return values. */ *token = LD.buf + LD.buf_ptr; start_ptr = LD.buf_ptr; /* If no further character is in the line this is the end of a logical line. This information is needed in the parser. */ if (LD.buf[LD.buf_ptr] == '\0') { LD.buf_ptr = LD.bufsize; retval = TOK_ENDOFLINE; } else if (isalpha (LD.buf[LD.buf_ptr])) /* The token is an identifier. The POSIX standard does not say what characters might be contained but offical POSIX locale definition files contain beside alnum characters '_', '-' and '+'. */ { const struct locale_keyword *kw; do ++LD.buf_ptr; while (isalnum (LD.buf[LD.buf_ptr]) || LD.buf[LD.buf_ptr] == '_' || LD.buf[LD.buf_ptr] == '-' || LD.buf[LD.buf_ptr] == '+'); /* Look in table of keywords. */ kw = in_word_set (*token, LD.buf_ptr - start_ptr); if (kw == NULL) retval = TOK_IDENT; else { if (kw->token_id == TOK_ESCAPE_CHAR || kw->token_id == TOK_COMMENT_CHAR) /* `escape_char' and `comment_char' are keywords for the lexer. Do not give them to the parser. */ { start_again = 1; if (!isspace (LD.buf[LD.buf_ptr]) || (posix_conformance && LD.returned_tokens > 0)) error (0, 0, gettext ("%s:%Zd: syntax error in locale " "definition file"), LD.filename, LD.line_no); do ++LD.buf_ptr; while (isspace (LD.buf[LD.buf_ptr])); kw->token_id == TOK_ESCAPE_CHAR ? LD.escape_char : LD.comment_char = LD.buf[LD.buf_ptr++]; ignore_to_eol (0, posix_conformance); } else /* It is one of the normal keywords. */ retval = kw->token_id; } *token_len = LD.buf_ptr - start_ptr; } else if (LD.buf[LD.buf_ptr] == '"') /* Read a string. All symbolic character descriptions are expanded. This has to be done in a local buffer because a simple symbolic character like may expand to upto 6 bytes. */ { char *last = LD.strbuf; ++LD.buf_ptr; while (LD.buf[LD.buf_ptr] != '"') { int pre = LD.buf_ptr; int char_val = get_char (); /* token, token_len); */ if (char_val == 0) { error (4, 0, gettext ("%s:%Zd: unterminated string at end " "of line"), LD.filename, LD.line_no); /* NOTREACHED */ } if (char_val > 0) /* Unknown characters are simply not stored. */ last += char_to_utf (last, char_val); else { char tmp[LD.buf_ptr - pre + 1]; memcpy (tmp, &LD.buf[pre], LD.buf_ptr - pre); tmp[LD.buf_ptr - pre] = '\0'; error (0, 0, gettext ("%s:%Zd: character `%s' not defined"), LD.filename, LD.line_no, tmp); } } if (LD.buf[LD.buf_ptr] != '\0') ++LD.buf_ptr; *last = '\0'; *token = LD.strbuf; *token_len = last - LD.strbuf; retval = TOK_STRING; } else if (LD.buf[LD.buf_ptr] == '.' && LD.buf[LD.buf_ptr + 1] == '.' && LD.buf[LD.buf_ptr + 2] == '.') { LD.buf_ptr += 3; retval = TOK_ELLIPSIS; } else if (LD.buf[LD.buf_ptr] == LD.escape_char) { char *endp; ++LD.buf_ptr; switch (LD.buf[LD.buf_ptr]) { case 'x': if (isdigit (LD.buf[++LD.buf_ptr])) { retval = strtol (&LD.buf[LD.buf_ptr], &endp, 16); if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255) retval = 'x'; else LD.buf_ptr = endp - LD.buf; } else retval = 'x'; break; case 'd': if (isdigit (LD.buf[++LD.buf_ptr])) { retval = strtol (&LD.buf[LD.buf_ptr], &endp, 10); if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255) retval = 'd'; else LD.buf_ptr = endp - LD.buf; } else retval = 'd'; break; case '0'...'9': retval = strtol (&LD.buf[LD.buf_ptr], &endp, 8); if (endp - (LD.buf + LD.buf_ptr) < 2 || retval > 255) retval = LD.buf[LD.buf_ptr++]; else LD.buf_ptr = endp - LD.buf; break; case 'a': retval = '\a'; ++LD.buf_ptr; break; case 'b': retval = '\b'; ++LD.buf_ptr; break; case 'f': retval = '\f'; ++LD.buf_ptr; break; case 'n': retval = '\n'; ++LD.buf_ptr; break; case 'r': retval = '\r'; ++LD.buf_ptr; break; case 't': retval = '\t'; ++LD.buf_ptr; break; case 'v': retval = '\v'; ++LD.buf_ptr; break; default: retval = LD.buf[LD.buf_ptr++]; break; } } else if (isdigit (LD.buf[LD.buf_ptr])) { char *endp; *token_len = strtol (&LD.buf[LD.buf_ptr], &endp, 10); LD.buf_ptr = endp - LD.buf; retval = TOK_NUMBER; } else if (LD.buf[LD.buf_ptr] == '-' && LD.buf[LD.buf_ptr + 1] == '1') { LD.buf_ptr += 2; retval = TOK_MINUS1; } else { int ch = get_char (); /* token, token_len); */ if (ch != -1) { *token_len = ch; retval = TOK_CHAR; } else retval = TOK_ILL_CHAR; } /* Ignore white space. */ while (isspace (LD.buf[LD.buf_ptr])) ++LD.buf_ptr; } while (start_again != 0); ++LD.returned_tokens; return retval; } /* Code a character with UTF-8 if the character map has multi-byte characters. */ int char_to_utf (char *buf, int char_val) { if (charmap_data.mb_cur_max == 1) { *buf++ = char_val; return 1; } else { /* The number of bits coded in each character. */ #define CBPC 6 static struct coding_tab { int mask; int val; } tab[] = { { 0x7f, 0x00 }, { 0x7ff, 0xc0 }, { 0xffff, 0xe0 }, { 0x1fffff, 0xf0 }, { 0x3ffffff, 0xf8 }, { 0x7fffffff, 0xfc }, { 0, } }; struct coding_tab *t; int c; int cnt = 1; for (t = tab; char_val > t->mask; ++t, ++cnt) ; c = cnt; buf += cnt; while (c > 1) { *--buf = 0x80 | (char_val & ((1 << CBPC) - 1)); char_val >>= CBPC; --c; } *--buf = t->val | char_val; return cnt; } } /* Ignore rest of line upto ENDOFLINE token, starting with given token. If WARN_FLAG is set warn about any token but ENDOFLINE. */ void ignore_to_eol (int token, int warn_flag) { if (token == TOK_ENDOFLINE) return; if (LD.buf[LD.buf_ptr] != '\0' && warn_flag) error (0, 0, gettext ("%s:%Zd: trailing garbage at end of line"), locfile_data.filename, locfile_data.line_no); while (LD.continue_line) { LD.continue_line = 0; /* Increment line number counter. */ ++LD.line_no; if (fgets (LD.buf, LD.bufsize, stdin) != NULL) { /* We now have to look whether this line is continued and whether it at all fits into our buffer. */ int linelen = strlen (LD.buf); if (linelen == LD.bufsize - 1) /* The did not fit into the buffer. */ error (2, 0, gettext ("%s:%Zd: line too long; use `getconf " "LINE_MAX' to get the current maximum " "line length"), LD.filename, LD.line_no); /* Remove '\n' at end of line. */ if (LD.buf[linelen - 1] == '\n') --linelen; if (LD.buf[linelen - 1] == LD.escape_char) LD.continue_line = 1; } } /* This causes to begin the next line. */ LD.buf_ptr = LD.bufsize; } /* Return the value of the character at the beginning of the input buffer. Symbolic character constants are expanded. */ static int get_char (void) { if (LD.buf[LD.buf_ptr] == '<') /* This is a symbolic character name. */ { int char_val; char *startp = LD.buf + (++LD.buf_ptr); char *endp = startp; while (LD.buf[LD.buf_ptr] != '>' && isprint (LD.buf[LD.buf_ptr])) { if (LD.buf[LD.buf_ptr] == '\0' || (LD.buf[LD.buf_ptr] == LD.escape_char && LD.buf[++LD.buf_ptr] == '\0')) break; *endp++ = LD.buf[LD.buf_ptr++]; } if (LD.buf[LD.buf_ptr] != '>' && LD.buf[LD.buf_ptr] == '\0') { error (0, 0, gettext ("%s:%Zd: end of line in character symbol"), LD.filename, LD.line_no); if (startp == endp) return -1; } else ++LD.buf_ptr; char_val = find_char (startp, endp - startp); if (char_val == -1 && verbose != 0 && reject_new_char != 0) { /* Locale defintions are often given very general. Missing characters are only reported when explicitely requested. */ char tmp[endp - startp + 3]; tmp[0] = '<'; memcpy (tmp + 1, startp, endp - startp); tmp[endp - startp + 1] = '>'; tmp[endp - startp + 2] = '\0'; error (0, 0, gettext ("%s:%Zd: character `%s' not defined"), LD.filename, LD.line_no, tmp); } return char_val; } else return (int) LD.buf[LD.buf_ptr++]; } /* * Local Variables: * mode:c * c-basic-offset:2 * End: */