summaryrefslogtreecommitdiff
path: root/stdio-common/vfscanf.c
diff options
context:
space:
mode:
Diffstat (limited to 'stdio-common/vfscanf.c')
-rw-r--r--stdio-common/vfscanf.c345
1 files changed, 272 insertions, 73 deletions
diff --git a/stdio-common/vfscanf.c b/stdio-common/vfscanf.c
index 28f8ceb820..70d8bf0b18 100644
--- a/stdio-common/vfscanf.c
+++ b/stdio-common/vfscanf.c
@@ -1,20 +1,20 @@
/* Copyright (C) 1991, 92, 93, 94, 95, 96 Free Software Foundation, Inc.
-This file is part of the GNU C Library.
+ This file is part of the GNU C Library.
-The GNU C Library is free software; you can redistribute it and/or
-modify it under the terms of the GNU Library General Public License as
-published by the Free Software Foundation; either version 2 of the
-License, or (at your option) any later version.
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Library General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
-The GNU C Library is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
-Library General Public License for more details.
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Library General Public License for more details.
-You should have received a copy of the GNU Library General Public
-License along with the GNU C Library; see the file COPYING.LIB. If
-not, write to the Free Software Foundation, Inc., 675 Mass Ave,
-Cambridge, MA 02139, USA. */
+ You should have received a copy of the GNU Library General Public
+ License along with the GNU C Library; see the file COPYING.LIB. If not,
+ write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ Boston, MA 02111-1307, USA. */
#include "../locale/localeinfo.h"
#include <errno.h>
@@ -24,6 +24,7 @@ Cambridge, MA 02139, USA. */
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <wctype.h>
#include <libc-lock.h>
#ifdef __GNUC__
@@ -55,6 +56,12 @@ Cambridge, MA 02139, USA. */
# define va_list _IO_va_list
# define ungetc(c, s) (--read_in, _IO_ungetc (c, s))
# define inchar() ((c = _IO_getc_unlocked (s)), (void) ++read_in, c)
+# define encode_error() do { \
+ if (errp != NULL) *errp |= 4; \
+ _IO_funlockfile (s); \
+ __set_errno (EILSEQ); \
+ return done; \
+ } while (0)
# define conv_error() do { \
if (errp != NULL) *errp |= 2; \
_IO_funlockfile (s); \
@@ -88,6 +95,11 @@ Cambridge, MA 02139, USA. */
#else
# define ungetc(c, s) (--read_in, ungetc (c, s))
# define inchar() ((c = getc (s)), (void) ++read_in, c)
+# define encode_error() do { \
+ _IO_funlockfile (s); \
+ __set_errno (EILSEQ); \
+ return done; \
+ } while (0)
# define conv_error() do { \
funlockfile (s); \
return done; \
@@ -173,6 +185,7 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
} num;
/* Character-buffer pointer. */
char *str = NULL;
+ wchar_t *wstr = NULL;
char **strptr = NULL;
size_t strsize = 0;
/* We must not react on white spaces immediately because they can
@@ -435,40 +448,138 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
break;
case 'c': /* Match characters. */
- if (!(flags & SUPPRESS))
+ if ((flags & LONG) == 0)
{
- str = ARG (char *);
- if (str == NULL)
- conv_error ();
- }
+ if (!(flags & SUPPRESS))
+ {
+ str = ARG (char *);
+ if (str == NULL)
+ conv_error ();
+ }
- c = inchar ();
- if (c == EOF)
- input_error ();
+ c = inchar ();
+ if (c == EOF)
+ input_error ();
- if (width == -1)
- width = 1;
+ if (width == -1)
+ width = 1;
- if (!(flags & SUPPRESS))
- {
- do
- *str++ = c;
- while (--width > 0 && inchar () != EOF);
+ if (!(flags & SUPPRESS))
+ {
+ do
+ *str++ = c;
+ while (--width > 0 && inchar () != EOF);
+ }
+ else
+ while (--width > 0 && inchar () != EOF);
+
+ if (width > 0)
+ /* I.e., EOF was read. */
+ --read_in;
+
+ if (!(flags & SUPPRESS))
+ ++done;
+
+ break;
}
- else
- while (--width > 0 && inchar () != EOF);
+ /* FALLTHROUGH */
+ case 'C':
+ /* Get UTF-8 encoded wide character. Here we assume (as in
+ other parts of the libc) that we only have to handle
+ UTF-8. */
+ {
+ wint_t val;
+ size_t cnt = 0;
+ int first = 1;
- if (width > 0)
- /* I.e., EOF was read. */
- --read_in;
+ if (!(flags & SUPPRESS))
+ {
+ wstr = ARG (wchar_t *);
+ if (str == NULL)
+ conv_error ();
+ }
- if (!(flags & SUPPRESS))
- ++done;
+ do
+ {
+#define NEXT_WIDE_CHAR(First) \
+ c = inchar (); \
+ if (c == EOF) \
+ /* EOF is only an error for the first character. */ \
+ if (First) \
+ input_error (); \
+ else \
+ break; \
+ val = c; \
+ if (val >= 0x80) \
+ { \
+ if ((c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
+ encode_error (); \
+ if ((c & 0xe0) == 0xc0) \
+ { \
+ /* We expect two bytes. */ \
+ cnt = 1; \
+ val &= 0x1f; \
+ } \
+ else if ((c & 0xf0) == 0xe0) \
+ { \
+ /* We expect three bytes. */ \
+ cnt = 2; \
+ val &= 0x0f; \
+ } \
+ else if ((c & 0xf8) == 0xf0) \
+ { \
+ /* We expect four bytes. */ \
+ cnt = 3; \
+ val &= 0x07; \
+ } \
+ else if ((c & 0xfc) == 0xf8) \
+ { \
+ /* We expect five bytes. */ \
+ cnt = 4; \
+ val &= 0x03; \
+ } \
+ else \
+ { \
+ /* We expect six bytes. */ \
+ cnt = 5; \
+ val &= 0x01; \
+ } \
+ \
+ do \
+ { \
+ c = inchar (); \
+ if (c == EOF \
+ || (c & 0xc0) == 0x80 || (c & 0xfe) == 0xfe) \
+ encode_error (); \
+ val <<= 6; \
+ val |= c & 0x3f; \
+ } \
+ while (--cnt > 0); \
+ } \
+ \
+ if (!(flags & SUPPRESS)) \
+ *wstr++ = val; \
+ first = 0
+
+ NEXT_WIDE_CHAR (first);
+ }
+ while (--width > 0);
+ if (width > 0)
+ /* I.e., EOF was read. */
+ --read_in;
+
+ if (!(flags & SUPPRESS))
+ ++done;
+ }
break;
case 's': /* Read a string. */
-#define STRING_ARG \
+ if (flags & LONG)
+ /* We have to process a wide character string. */
+ goto wide_char_string;
+
+#define STRING_ARG(Str, Type) \
if (!(flags & SUPPRESS)) \
{ \
if (flags & MALLOC) \
@@ -479,14 +590,15 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
conv_error (); \
/* Allocate an initial buffer. */ \
strsize = 100; \
- *strptr = str = malloc (strsize); \
+ *strptr = malloc (strsize * sizeof (Type)); \
+ Str = (Type *) *strptr; \
} \
else \
- str = ARG (char *); \
- if (str == NULL) \
+ Str = ARG (Type *); \
+ if (Str == NULL) \
conv_error (); \
}
- STRING_ARG;
+ STRING_ARG (str, char);
c = inchar ();
if (c == EOF)
@@ -499,43 +611,44 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
ungetc (c, s);
break;
}
-#define STRING_ADD_CHAR(c) \
+#define STRING_ADD_CHAR(Str, c, Type) \
if (!(flags & SUPPRESS)) \
{ \
- *str++ = c; \
- if ((flags & MALLOC) && str == *strptr + strsize) \
+ *Str++ = c; \
+ if ((flags & MALLOC) && (char *) Str == *strptr + strsize) \
{ \
/* Enlarge the buffer. */ \
- str = realloc (*strptr, strsize * 2); \
- if (str == NULL) \
+ Str = realloc (*strptr, strsize * 2 * sizeof (Type)); \
+ if (Str == NULL) \
{ \
/* Can't allocate that much. Last-ditch effort. */\
- str = realloc (*strptr, strsize + 1); \
- if (str == NULL) \
+ Str = realloc (*strptr, \
+ (strsize + 1) * sizeof (Type)); \
+ if (Str == NULL) \
{ \
/* We lose. Oh well. \
Terminate the string and stop converting, \
so at least we don't skip any input. */ \
- (*strptr)[strsize] = '\0'; \
+ ((Type *) (*strptr))[strsize] = '\0'; \
++done; \
conv_error (); \
} \
else \
{ \
- *strptr = str; \
- str += strsize; \
+ *strptr = (char *) Str; \
+ Str = ((Type *) *strptr) + strsize; \
++strsize; \
} \
} \
else \
{ \
- *strptr = str; \
- str += strsize; \
+ *strptr = (char *) Str; \
+ Str = ((Type *) *strptr) + strsize; \
strsize *= 2; \
} \
} \
}
- STRING_ADD_CHAR (c);
+ STRING_ADD_CHAR (str, c, char);
} while ((width <= 0 || --width > 0) && inchar () != EOF);
if (!(flags & SUPPRESS))
@@ -545,6 +658,43 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
}
break;
+ case 'S':
+ /* Wide character string. */
+ wide_char_string:
+ {
+ wint_t val;
+ int first = 1;
+ STRING_ARG (wstr, wchar_t);
+
+ do
+ {
+ size_t cnt = 0;
+ NEXT_WIDE_CHAR (first);
+
+ if (iswspace (val))
+ {
+ /* XXX We would have to push back the whole wide char
+ with possibly many bytes. But since scanf does
+ not make a difference for white space characters
+ we can simply push back a simple <SP> which is
+ guaranteed to be in the [:space:] class. */
+ ungetc (' ', s);
+ break;
+ }
+
+ STRING_ADD_CHAR (wstr, val, wchar_t);
+ first = 0;
+ }
+ while (width <= 0 || --width > 0);
+
+ if (!(flags & SUPPRESS))
+ {
+ *wstr = L'\0';
+ ++done;
+ }
+ }
+ break;
+
case 'x': /* Hexadecimal integer. */
case 'X': /* Ditto. */
base = 16;
@@ -765,11 +915,19 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
break;
case '[': /* Character class. */
- STRING_ARG;
+ if (flags & LONG)
+ {
+ STRING_ARG (wstr, wchar_t);
+ c = '\0'; /* This is to keep gcc quiet. */
+ }
+ else
+ {
+ STRING_ARG (str, char);
- c = inchar ();
- if (c == EOF)
- input_error ();
+ c = inchar ();
+ if (c == EOF)
+ input_error ();
+ }
if (*f == '^')
{
@@ -814,31 +972,72 @@ __vfscanf (FILE *s, const char *format, va_list argptr)
}
if (fc == '\0')
{
- ungetc (c, s);
+ if (!(flags & LONG))
+ ungetc (c, s);
conv_error();
}
- num.ul = read_in - 1; /* -1 because we already read one char. */
- do
+ if (flags & LONG)
{
- if (wp[c] == not_in)
+ wint_t val;
+ int first = 1;
+
+ do
{
- ungetc (c, s);
- break;
+ size_t cnt = 0;
+ NEXT_WIDE_CHAR (first);
+ if (val > 255 || wp[val] == not_in)
+ {
+ /* XXX We have a problem here. We read a wide
+ character and this possibly took several
+ bytes. But we can only push back one single
+ character. To be sure we don't create wrong
+ input we push it back only in case it is
+ representable within one byte. */
+ if (val < 0x80)
+ ungetc (val, s);
+ break;
+ }
+ STRING_ADD_CHAR (wstr, val, wchar_t);
+ if (width > 0)
+ --width;
+ first = 0;
}
- STRING_ADD_CHAR (c);
- if (width > 0)
- --width;
- }
- while (width != 0 && inchar () != EOF);
+ while (width != 0);
- if (read_in == num.ul)
- conv_error ();
+ if (first)
+ conv_error ();
- if (!(flags & SUPPRESS))
+ if (!(flags & SUPPRESS))
+ {
+ *wstr = L'\0';
+ ++done;
+ }
+ }
+ else
{
- *str = '\0';
- ++done;
+ num.ul = read_in - 1; /* -1 because we already read one char. */
+ do
+ {
+ if (wp[c] == not_in)
+ {
+ ungetc (c, s);
+ break;
+ }
+ STRING_ADD_CHAR (str, c, char);
+ if (width > 0)
+ --width;
+ }
+ while (width != 0 && inchar () != EOF);
+
+ if (read_in == num.ul)
+ conv_error ();
+
+ if (!(flags & SUPPRESS))
+ {
+ *str = '\0';
+ ++done;
+ }
}
break;