From be503db4d0f217bb1bb4b3e3639402d386378d1b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 15 Dec 2024 10:44:15 -0800 Subject: vsprintf: simplify number handling Instead of dealing with all the different special types (size_t, unsigned char, ptrdiff_t..) just deal with the size of the integer type and the sign. This avoids a lot of unnecessary case statements, and the games we play with the value of the 'SIGN' flags value Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 144 +++++++++++++++++---------------------------------------- 1 file changed, 42 insertions(+), 102 deletions(-) (limited to 'lib/vsprintf.c') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 9d3dac38a3f4a..4ed9a37b5e168 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -407,7 +407,7 @@ int num_to_str(char *buf, int size, unsigned long long num, unsigned int width) return len + width; } -#define SIGN 1 /* unsigned/signed, must be 1 */ +#define SIGN 1 /* unsigned/signed */ #define LEFT 2 /* left justified */ #define PLUS 4 /* show plus */ #define SPACE 8 /* space if plus */ @@ -415,12 +415,15 @@ int num_to_str(char *buf, int size, unsigned long long num, unsigned int width) #define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */ #define SPECIAL 64 /* prefix hex with "0x", octal with "0" */ -static_assert(SIGN == 1); static_assert(ZEROPAD == ('0' - ' ')); static_assert(SMALL == ('a' ^ 'A')); enum format_type { FORMAT_TYPE_NONE, /* Just a string part */ + FORMAT_TYPE_1BYTE = 1, /* char/short/int are their own sizes */ + FORMAT_TYPE_2BYTE = 2, + FORMAT_TYPE_8BYTE = 3, + FORMAT_TYPE_4BYTE = 4, FORMAT_TYPE_WIDTH, FORMAT_TYPE_PRECISION, FORMAT_TYPE_CHAR, @@ -428,19 +431,10 @@ enum format_type { FORMAT_TYPE_PTR, FORMAT_TYPE_PERCENT_CHAR, FORMAT_TYPE_INVALID, - FORMAT_TYPE_LONG_LONG, - FORMAT_TYPE_ULONG, - FORMAT_TYPE_LONG, - FORMAT_TYPE_UBYTE, - FORMAT_TYPE_BYTE, - FORMAT_TYPE_USHORT, - FORMAT_TYPE_SHORT, - FORMAT_TYPE_UINT, - FORMAT_TYPE_INT, - FORMAT_TYPE_SIZE_T, - FORMAT_TYPE_PTRDIFF }; +#define FORMAT_TYPE_SIZE(type) (sizeof(type) <= 4 ? sizeof(type) : FORMAT_TYPE_8BYTE) + struct printf_spec { unsigned int type:8; /* format_type enum */ signed int field_width:24; /* width of output field */ @@ -2707,23 +2701,19 @@ qualifier: } if (qualifier == 'L') - spec->type = FORMAT_TYPE_LONG_LONG; + spec->type = FORMAT_TYPE_SIZE(long long); else if (qualifier == 'l') { - BUILD_BUG_ON(FORMAT_TYPE_ULONG + SIGN != FORMAT_TYPE_LONG); - spec->type = FORMAT_TYPE_ULONG + (spec->flags & SIGN); + spec->type = FORMAT_TYPE_SIZE(long); } else if (qualifier == 'z') { - spec->type = FORMAT_TYPE_SIZE_T; + spec->type = FORMAT_TYPE_SIZE(size_t); } else if (qualifier == 't') { - spec->type = FORMAT_TYPE_PTRDIFF; + spec->type = FORMAT_TYPE_SIZE(ptrdiff_t); } else if (qualifier == 'H') { - BUILD_BUG_ON(FORMAT_TYPE_UBYTE + SIGN != FORMAT_TYPE_BYTE); - spec->type = FORMAT_TYPE_UBYTE + (spec->flags & SIGN); + spec->type = FORMAT_TYPE_SIZE(char); } else if (qualifier == 'h') { - BUILD_BUG_ON(FORMAT_TYPE_USHORT + SIGN != FORMAT_TYPE_SHORT); - spec->type = FORMAT_TYPE_USHORT + (spec->flags & SIGN); + spec->type = FORMAT_TYPE_SIZE(short); } else { - BUILD_BUG_ON(FORMAT_TYPE_UINT + SIGN != FORMAT_TYPE_INT); - spec->type = FORMAT_TYPE_UINT + (spec->flags & SIGN); + spec->type = FORMAT_TYPE_SIZE(int); } return ++fmt - start; @@ -2747,6 +2737,22 @@ set_precision(struct printf_spec *spec, int prec) } } +/* + * Turn a 1/2/4-byte value into a 64-bit one for printing: truncate + * as necessary and deal with signedness. + * + * The 'spec.type' is the size of the value in bytes. + */ +static unsigned long long convert_num_spec(unsigned int val, struct printf_spec spec) +{ + unsigned int shift = 32 - spec.type*8; + + val <<= shift; + if (!(spec.flags & SIGN)) + return val >> shift; + return (int)val >> shift; +} + /** * vsnprintf - Format a string and place it in a buffer * @buf: The buffer to place the result into @@ -2873,43 +2879,10 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) goto out; default: - switch (spec.type) { - case FORMAT_TYPE_LONG_LONG: + if (spec.type == FORMAT_TYPE_8BYTE) num = va_arg(args, long long); - break; - case FORMAT_TYPE_ULONG: - num = va_arg(args, unsigned long); - break; - case FORMAT_TYPE_LONG: - num = va_arg(args, long); - break; - case FORMAT_TYPE_SIZE_T: - if (spec.flags & SIGN) - num = va_arg(args, ssize_t); - else - num = va_arg(args, size_t); - break; - case FORMAT_TYPE_PTRDIFF: - num = va_arg(args, ptrdiff_t); - break; - case FORMAT_TYPE_UBYTE: - num = (unsigned char) va_arg(args, int); - break; - case FORMAT_TYPE_BYTE: - num = (signed char) va_arg(args, int); - break; - case FORMAT_TYPE_USHORT: - num = (unsigned short) va_arg(args, int); - break; - case FORMAT_TYPE_SHORT: - num = (short) va_arg(args, int); - break; - case FORMAT_TYPE_INT: - num = (int) va_arg(args, int); - break; - default: - num = va_arg(args, unsigned int); - } + else + num = convert_num_spec(va_arg(args, int), spec); str = number(str, end, num, spec); } @@ -3183,26 +3156,13 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) default: switch (spec.type) { - - case FORMAT_TYPE_LONG_LONG: + case FORMAT_TYPE_8BYTE: save_arg(long long); break; - case FORMAT_TYPE_ULONG: - case FORMAT_TYPE_LONG: - save_arg(unsigned long); - break; - case FORMAT_TYPE_SIZE_T: - save_arg(size_t); - break; - case FORMAT_TYPE_PTRDIFF: - save_arg(ptrdiff_t); - break; - case FORMAT_TYPE_UBYTE: - case FORMAT_TYPE_BYTE: + case FORMAT_TYPE_1BYTE: save_arg(char); break; - case FORMAT_TYPE_USHORT: - case FORMAT_TYPE_SHORT: + case FORMAT_TYPE_2BYTE: save_arg(short); break; default: @@ -3375,37 +3335,17 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) unsigned long long num; switch (spec.type) { - - case FORMAT_TYPE_LONG_LONG: + case FORMAT_TYPE_8BYTE: num = get_arg(long long); break; - case FORMAT_TYPE_ULONG: - case FORMAT_TYPE_LONG: - num = get_arg(unsigned long); - break; - case FORMAT_TYPE_SIZE_T: - num = get_arg(size_t); - break; - case FORMAT_TYPE_PTRDIFF: - num = get_arg(ptrdiff_t); - break; - case FORMAT_TYPE_UBYTE: - num = get_arg(unsigned char); - break; - case FORMAT_TYPE_BYTE: - num = get_arg(signed char); - break; - case FORMAT_TYPE_USHORT: - num = get_arg(unsigned short); - break; - case FORMAT_TYPE_SHORT: - num = get_arg(short); + case FORMAT_TYPE_2BYTE: + num = convert_num_spec(get_arg(short), spec); break; - case FORMAT_TYPE_UINT: - num = get_arg(unsigned int); + case FORMAT_TYPE_1BYTE: + num = convert_num_spec(get_arg(char), spec); break; default: - num = get_arg(int); + num = convert_num_spec(get_arg(int), spec); } str = number(str, end, num, spec); -- cgit v1.2.3 From 03d23941bf03eecd8560e40238decb1515f264f6 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 16 Dec 2024 10:26:53 -0800 Subject: vsprintf: avoid nested switch statement on same variable Now that we have simplified the number format types, the top-level switch table can easily just handle all the remaining cases, and we don't need to have a case statement with a conditional on the same expression as the switch statement. We do want to fall through to the common 'number()' case, but that's trivially done by making the other case statements use 'continue' instead of 'break'. They are just looping back to the top, after all. Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 99 ++++++++++++++++++++++++++++------------------------------ 1 file changed, 47 insertions(+), 52 deletions(-) (limited to 'lib/vsprintf.c') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 4ed9a37b5e168..befd0075113f4 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2816,16 +2816,16 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) memcpy(str, old_fmt, copy); } str += read; - break; + continue; } case FORMAT_TYPE_WIDTH: set_field_width(&spec, va_arg(args, int)); - break; + continue; case FORMAT_TYPE_PRECISION: set_precision(&spec, va_arg(args, int)); - break; + continue; case FORMAT_TYPE_CHAR: { char c; @@ -2847,25 +2847,25 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) *str = ' '; ++str; } - break; + continue; } case FORMAT_TYPE_STR: str = string(str, end, va_arg(args, char *), spec); - break; + continue; case FORMAT_TYPE_PTR: str = pointer(fmt, str, end, va_arg(args, void *), spec); while (isalnum(*fmt)) fmt++; - break; + continue; case FORMAT_TYPE_PERCENT_CHAR: if (str < end) *str = '%'; ++str; - break; + continue; case FORMAT_TYPE_INVALID: /* @@ -2878,14 +2878,16 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) */ goto out; - default: - if (spec.type == FORMAT_TYPE_8BYTE) - num = va_arg(args, long long); - else - num = convert_num_spec(va_arg(args, int), spec); + case FORMAT_TYPE_8BYTE: + num = va_arg(args, long long); + break; - str = number(str, end, num, spec); + default: + num = convert_num_spec(va_arg(args, int), spec); + break; } + + str = number(str, end, num, spec); } out: @@ -3154,20 +3156,17 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) fmt++; break; + case FORMAT_TYPE_8BYTE: + save_arg(long long); + break; + case FORMAT_TYPE_1BYTE: + save_arg(char); + break; + case FORMAT_TYPE_2BYTE: + save_arg(short); + break; default: - switch (spec.type) { - case FORMAT_TYPE_8BYTE: - save_arg(long long); - break; - case FORMAT_TYPE_1BYTE: - save_arg(char); - break; - case FORMAT_TYPE_2BYTE: - save_arg(short); - break; - default: - save_arg(int); - } + save_arg(int); } } @@ -3235,6 +3234,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) while (*fmt) { const char *old_fmt = fmt; int read = format_decode(fmt, &spec); + unsigned long long num; fmt += read; @@ -3247,16 +3247,16 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) memcpy(str, old_fmt, copy); } str += read; - break; + continue; } case FORMAT_TYPE_WIDTH: set_field_width(&spec, get_arg(int)); - break; + continue; case FORMAT_TYPE_PRECISION: set_precision(&spec, get_arg(int)); - break; + continue; case FORMAT_TYPE_CHAR: { char c; @@ -3277,14 +3277,14 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) *str = ' '; ++str; } - break; + continue; } case FORMAT_TYPE_STR: { const char *str_arg = args; args += strlen(str_arg) + 1; str = string(str, end, (char *)str_arg, spec); - break; + continue; } case FORMAT_TYPE_PTR: { @@ -3319,38 +3319,33 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) while (isalnum(*fmt)) fmt++; - break; + continue; } case FORMAT_TYPE_PERCENT_CHAR: if (str < end) *str = '%'; ++str; - break; + continue; case FORMAT_TYPE_INVALID: goto out; - default: { - unsigned long long num; - - switch (spec.type) { - case FORMAT_TYPE_8BYTE: - num = get_arg(long long); - break; - case FORMAT_TYPE_2BYTE: - num = convert_num_spec(get_arg(short), spec); - break; - case FORMAT_TYPE_1BYTE: - num = convert_num_spec(get_arg(char), spec); - break; - default: - num = convert_num_spec(get_arg(int), spec); - } + case FORMAT_TYPE_8BYTE: + num = get_arg(long long); + break; + case FORMAT_TYPE_2BYTE: + num = convert_num_spec(get_arg(short), spec); + break; + case FORMAT_TYPE_1BYTE: + num = convert_num_spec(get_arg(char), spec); + break; + default: + num = convert_num_spec(get_arg(int), spec); + break; + } - str = number(str, end, num, spec); - } /* default: */ - } /* switch(spec.type) */ + str = number(str, end, num, spec); } /* while(*fmt) */ out: -- cgit v1.2.3 From 9e0e6d8a3268e805e061ae8b22f14e37b157102a Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 16 Dec 2024 15:37:06 -0800 Subject: vsprintf: fix calling convention for format_decode() Every single caller wants to know what the next format location is, but instead the function returned the length of the processed part and so every single return statement in the format_decode() function was instead subtracting the start of the format string. The callers that that did want to know the length (in addition to the end of the format processing) already had to save off the start of the format string anyway. So this was all just doing extra processing both on the caller and callee sides. Just change the calling convention to return the end of the format processing, making everything simpler (and preparing for yet more simplification to come). Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) (limited to 'lib/vsprintf.c') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index befd0075113f4..d9f749cf04e70 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2546,7 +2546,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, * @qualifier: qualifier of a number (long, size_t, ...) */ static noinline_for_stack -int format_decode(const char *fmt, struct printf_spec *spec) +const char *format_decode(const char *fmt, struct printf_spec *spec) { const char *start = fmt; char qualifier; @@ -2580,7 +2580,7 @@ int format_decode(const char *fmt, struct printf_spec *spec) /* Return the current non-format string */ if (fmt != start || !*fmt) - return fmt - start; + return fmt; /* Process flags */ spec->flags = 0; @@ -2611,7 +2611,7 @@ int format_decode(const char *fmt, struct printf_spec *spec) else if (*fmt == '*') { /* it's the next argument */ spec->type = FORMAT_TYPE_WIDTH; - return ++fmt - start; + return ++fmt; } precision: @@ -2626,7 +2626,7 @@ precision: } else if (*fmt == '*') { /* it's the next argument */ spec->type = FORMAT_TYPE_PRECISION; - return ++fmt - start; + return ++fmt; } } @@ -2652,19 +2652,19 @@ qualifier: switch (*fmt) { case 'c': spec->type = FORMAT_TYPE_CHAR; - return ++fmt - start; + return ++fmt; case 's': spec->type = FORMAT_TYPE_STR; - return ++fmt - start; + return ++fmt; case 'p': spec->type = FORMAT_TYPE_PTR; - return ++fmt - start; + return ++fmt; case '%': spec->type = FORMAT_TYPE_PERCENT_CHAR; - return ++fmt - start; + return ++fmt; /* integer number formats - set up the flags and "break" */ case 'o': @@ -2697,7 +2697,7 @@ qualifier: default: WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt); spec->type = FORMAT_TYPE_INVALID; - return fmt - start; + return fmt; } if (qualifier == 'L') @@ -2716,7 +2716,7 @@ qualifier: spec->type = FORMAT_TYPE_SIZE(int); } - return ++fmt - start; + return ++fmt; } static void @@ -2803,14 +2803,14 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) while (*fmt) { const char *old_fmt = fmt; - int read = format_decode(fmt, &spec); - fmt += read; + fmt = format_decode(fmt, &spec); switch (spec.type) { case FORMAT_TYPE_NONE: { - int copy = read; + int read = fmt - old_fmt; if (str < end) { + int copy = read; if (copy > end - str) copy = end - str; memcpy(str, old_fmt, copy); @@ -3089,9 +3089,7 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) }) while (*fmt) { - int read = format_decode(fmt, &spec); - - fmt += read; + fmt = format_decode(fmt, &spec); switch (spec.type) { case FORMAT_TYPE_NONE: @@ -3233,15 +3231,14 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) while (*fmt) { const char *old_fmt = fmt; - int read = format_decode(fmt, &spec); unsigned long long num; - fmt += read; - + fmt = format_decode(fmt, &spec); switch (spec.type) { case FORMAT_TYPE_NONE: { - int copy = read; + int read = fmt - old_fmt; if (str < end) { + int copy = read; if (copy > end - str) copy = end - str; memcpy(str, old_fmt, copy); -- cgit v1.2.3 From 938df695e98db7e0df540cce3b12da8c382955b4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 16 Dec 2024 15:44:10 -0800 Subject: vsprintf: associate the format state with the format pointer The vsnprintf() code is written as a state machine as it walks the format pointer, but for various historical reasons the state is oddly named and was encoded as the 'type' field in the 'struct printf_spec'. That naming came from the fact that the states used to not just encode the state of the state machine, but also the various integer types that would then be printed out. Let's make the state machine more obvious, and actually call it 'state', and associate it with the format pointer itself, rather than the 'printf_spec' that contains the currently decoded formatting specs. This also removes the bit packing from printf_spec, which makes it much easier on the compiler. Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 292 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 157 insertions(+), 135 deletions(-) (limited to 'lib/vsprintf.c') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index d9f749cf04e70..e6bd223b0184a 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -418,29 +418,28 @@ int num_to_str(char *buf, int size, unsigned long long num, unsigned int width) static_assert(ZEROPAD == ('0' - ' ')); static_assert(SMALL == ('a' ^ 'A')); -enum format_type { - FORMAT_TYPE_NONE, /* Just a string part */ - FORMAT_TYPE_1BYTE = 1, /* char/short/int are their own sizes */ - FORMAT_TYPE_2BYTE = 2, - FORMAT_TYPE_8BYTE = 3, - FORMAT_TYPE_4BYTE = 4, - FORMAT_TYPE_WIDTH, - FORMAT_TYPE_PRECISION, - FORMAT_TYPE_CHAR, - FORMAT_TYPE_STR, - FORMAT_TYPE_PTR, - FORMAT_TYPE_PERCENT_CHAR, - FORMAT_TYPE_INVALID, +enum format_state { + FORMAT_STATE_NONE, /* Just a string part */ + FORMAT_STATE_1BYTE = 1, /* char/short/int are their own sizes */ + FORMAT_STATE_2BYTE = 2, + FORMAT_STATE_8BYTE = 3, + FORMAT_STATE_4BYTE = 4, + FORMAT_STATE_WIDTH, + FORMAT_STATE_PRECISION, + FORMAT_STATE_CHAR, + FORMAT_STATE_STR, + FORMAT_STATE_PTR, + FORMAT_STATE_PERCENT_CHAR, + FORMAT_STATE_INVALID, }; -#define FORMAT_TYPE_SIZE(type) (sizeof(type) <= 4 ? sizeof(type) : FORMAT_TYPE_8BYTE) +#define FORMAT_STATE_SIZE(type) (sizeof(type) <= 4 ? sizeof(type) : FORMAT_STATE_8BYTE) struct printf_spec { - unsigned int type:8; /* format_type enum */ - signed int field_width:24; /* width of output field */ - unsigned int flags:8; /* flags to number() */ - unsigned int base:8; /* number base, 8, 10 or 16 only */ - signed int precision:16; /* # of digits/chars */ + unsigned char flags; /* flags to number() */ + unsigned char base; /* number base, 8, 10 or 16 only */ + short precision; /* # of digits/chars */ + int field_width; /* width of output field */ } __packed; static_assert(sizeof(struct printf_spec) == 8); @@ -573,7 +572,6 @@ char *special_hex_number(char *buf, char *end, unsigned long long num, int size) { struct printf_spec spec; - spec.type = FORMAT_TYPE_PTR; spec.field_width = 2 + 2 * size; /* 0x + hex */ spec.flags = SPECIAL | SMALL | ZEROPAD; spec.base = 16; @@ -2524,6 +2522,11 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, } } +struct fmt { + const char *str; + enum format_state state; +}; + /* * Helper function to decode printf style format. * Each call decode a token from the format and return the @@ -2546,40 +2549,40 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, * @qualifier: qualifier of a number (long, size_t, ...) */ static noinline_for_stack -const char *format_decode(const char *fmt, struct printf_spec *spec) +struct fmt format_decode(struct fmt fmt, struct printf_spec *spec) { - const char *start = fmt; + const char *start = fmt.str; char qualifier; /* we finished early by reading the field width */ - if (spec->type == FORMAT_TYPE_WIDTH) { + if (fmt.state == FORMAT_STATE_WIDTH) { if (spec->field_width < 0) { spec->field_width = -spec->field_width; spec->flags |= LEFT; } - spec->type = FORMAT_TYPE_NONE; + fmt.state = FORMAT_STATE_NONE; goto precision; } /* we finished early by reading the precision */ - if (spec->type == FORMAT_TYPE_PRECISION) { + if (fmt.state == FORMAT_STATE_PRECISION) { if (spec->precision < 0) spec->precision = 0; - spec->type = FORMAT_TYPE_NONE; + fmt.state = FORMAT_STATE_NONE; goto qualifier; } /* By default */ - spec->type = FORMAT_TYPE_NONE; + fmt.state = FORMAT_STATE_NONE; - for (; *fmt ; ++fmt) { - if (*fmt == '%') + for (; *fmt.str ; fmt.str++) { + if (*fmt.str == '%') break; } /* Return the current non-format string */ - if (fmt != start || !*fmt) + if (fmt.str != start || !*fmt.str) return fmt; /* Process flags */ @@ -2588,9 +2591,9 @@ const char *format_decode(const char *fmt, struct printf_spec *spec) while (1) { /* this also skips first '%' */ bool found = true; - ++fmt; + fmt.str++; - switch (*fmt) { + switch (*fmt.str) { case '-': spec->flags |= LEFT; break; case '+': spec->flags |= PLUS; break; case ' ': spec->flags |= SPACE; break; @@ -2606,65 +2609,71 @@ const char *format_decode(const char *fmt, struct printf_spec *spec) /* get field width */ spec->field_width = -1; - if (isdigit(*fmt)) - spec->field_width = skip_atoi(&fmt); - else if (*fmt == '*') { + if (isdigit(*fmt.str)) + spec->field_width = skip_atoi(&fmt.str); + else if (*fmt.str == '*') { /* it's the next argument */ - spec->type = FORMAT_TYPE_WIDTH; - return ++fmt; + fmt.state = FORMAT_STATE_WIDTH; + fmt.str++; + return fmt; } precision: /* get the precision */ spec->precision = -1; - if (*fmt == '.') { - ++fmt; - if (isdigit(*fmt)) { - spec->precision = skip_atoi(&fmt); + if (*fmt.str == '.') { + fmt.str++; + if (isdigit(*fmt.str)) { + spec->precision = skip_atoi(&fmt.str); if (spec->precision < 0) spec->precision = 0; - } else if (*fmt == '*') { + } else if (*fmt.str == '*') { /* it's the next argument */ - spec->type = FORMAT_TYPE_PRECISION; - return ++fmt; + fmt.state = FORMAT_STATE_PRECISION; + fmt.str++; + return fmt; } } qualifier: /* get the conversion qualifier */ qualifier = 0; - if (*fmt == 'h' || _tolower(*fmt) == 'l' || - *fmt == 'z' || *fmt == 't') { - qualifier = *fmt++; - if (unlikely(qualifier == *fmt)) { + if (*fmt.str == 'h' || _tolower(*fmt.str) == 'l' || + *fmt.str == 'z' || *fmt.str == 't') { + qualifier = *fmt.str++; + if (unlikely(qualifier == *fmt.str)) { if (qualifier == 'l') { qualifier = 'L'; - ++fmt; + fmt.str++; } else if (qualifier == 'h') { qualifier = 'H'; - ++fmt; + fmt.str++; } } } /* default base */ spec->base = 10; - switch (*fmt) { + switch (*fmt.str) { case 'c': - spec->type = FORMAT_TYPE_CHAR; - return ++fmt; + fmt.state = FORMAT_STATE_CHAR; + fmt.str++; + return fmt; case 's': - spec->type = FORMAT_TYPE_STR; - return ++fmt; + fmt.state = FORMAT_STATE_STR; + fmt.str++; + return fmt; case 'p': - spec->type = FORMAT_TYPE_PTR; - return ++fmt; + fmt.state = FORMAT_STATE_PTR; + fmt.str++; + return fmt; case '%': - spec->type = FORMAT_TYPE_PERCENT_CHAR; - return ++fmt; + fmt.state = FORMAT_STATE_PERCENT_CHAR; + fmt.str++; + return fmt; /* integer number formats - set up the flags and "break" */ case 'o': @@ -2695,28 +2704,29 @@ qualifier: fallthrough; default: - WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt); - spec->type = FORMAT_TYPE_INVALID; + WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt.str); + fmt.state = FORMAT_STATE_INVALID; return fmt; } if (qualifier == 'L') - spec->type = FORMAT_TYPE_SIZE(long long); + fmt.state = FORMAT_STATE_SIZE(long long); else if (qualifier == 'l') { - spec->type = FORMAT_TYPE_SIZE(long); + fmt.state = FORMAT_STATE_SIZE(long); } else if (qualifier == 'z') { - spec->type = FORMAT_TYPE_SIZE(size_t); + fmt.state = FORMAT_STATE_SIZE(size_t); } else if (qualifier == 't') { - spec->type = FORMAT_TYPE_SIZE(ptrdiff_t); + fmt.state = FORMAT_STATE_SIZE(ptrdiff_t); } else if (qualifier == 'H') { - spec->type = FORMAT_TYPE_SIZE(char); + fmt.state = FORMAT_STATE_SIZE(char); } else if (qualifier == 'h') { - spec->type = FORMAT_TYPE_SIZE(short); + fmt.state = FORMAT_STATE_SIZE(short); } else { - spec->type = FORMAT_TYPE_SIZE(int); + fmt.state = FORMAT_STATE_SIZE(int); } - return ++fmt; + fmt.str++; + return fmt; } static void @@ -2741,11 +2751,11 @@ set_precision(struct printf_spec *spec, int prec) * Turn a 1/2/4-byte value into a 64-bit one for printing: truncate * as necessary and deal with signedness. * - * The 'spec.type' is the size of the value in bytes. + * 'size' is the size of the value in bytes. */ -static unsigned long long convert_num_spec(unsigned int val, struct printf_spec spec) +static unsigned long long convert_num_spec(unsigned int val, int size, struct printf_spec spec) { - unsigned int shift = 32 - spec.type*8; + unsigned int shift = 32 - size*8; val <<= shift; if (!(spec.flags & SIGN)) @@ -2781,11 +2791,15 @@ static unsigned long long convert_num_spec(unsigned int val, struct printf_spec * * If you're not already dealing with a va_list consider using snprintf(). */ -int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +int vsnprintf(char *buf, size_t size, const char *fmt_str, va_list args) { unsigned long long num; char *str, *end; struct printf_spec spec = {0}; + struct fmt fmt = { + .str = fmt_str, + .state = FORMAT_STATE_NONE, + }; /* Reject out-of-range values early. Large positive sizes are used for unknown buffer sizes. */ @@ -2801,14 +2815,14 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) size = end - buf; } - while (*fmt) { - const char *old_fmt = fmt; + while (*fmt.str) { + const char *old_fmt = fmt.str; fmt = format_decode(fmt, &spec); - switch (spec.type) { - case FORMAT_TYPE_NONE: { - int read = fmt - old_fmt; + switch (fmt.state) { + case FORMAT_STATE_NONE: { + int read = fmt.str - old_fmt; if (str < end) { int copy = read; if (copy > end - str) @@ -2819,15 +2833,15 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) continue; } - case FORMAT_TYPE_WIDTH: + case FORMAT_STATE_WIDTH: set_field_width(&spec, va_arg(args, int)); continue; - case FORMAT_TYPE_PRECISION: + case FORMAT_STATE_PRECISION: set_precision(&spec, va_arg(args, int)); continue; - case FORMAT_TYPE_CHAR: { + case FORMAT_STATE_CHAR: { char c; if (!(spec.flags & LEFT)) { @@ -2850,24 +2864,24 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) continue; } - case FORMAT_TYPE_STR: + case FORMAT_STATE_STR: str = string(str, end, va_arg(args, char *), spec); continue; - case FORMAT_TYPE_PTR: - str = pointer(fmt, str, end, va_arg(args, void *), + case FORMAT_STATE_PTR: + str = pointer(fmt.str, str, end, va_arg(args, void *), spec); - while (isalnum(*fmt)) - fmt++; + while (isalnum(*fmt.str)) + fmt.str++; continue; - case FORMAT_TYPE_PERCENT_CHAR: + case FORMAT_STATE_PERCENT_CHAR: if (str < end) *str = '%'; ++str; continue; - case FORMAT_TYPE_INVALID: + case FORMAT_STATE_INVALID: /* * Presumably the arguments passed gcc's type * checking, but there is no safe or sane way @@ -2878,12 +2892,12 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) */ goto out; - case FORMAT_TYPE_8BYTE: + case FORMAT_STATE_8BYTE: num = va_arg(args, long long); break; default: - num = convert_num_spec(va_arg(args, int), spec); + num = convert_num_spec(va_arg(args, int), fmt.state, spec); break; } @@ -3055,8 +3069,12 @@ EXPORT_SYMBOL(sprintf); * If the return value is greater than @size, the resulting bin_buf is NOT * valid for bstr_printf(). */ -int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) +int vbin_printf(u32 *bin_buf, size_t size, const char *fmt_str, va_list args) { + struct fmt fmt = { + .str = fmt_str, + .state = FORMAT_STATE_NONE, + }; struct printf_spec spec = {0}; char *str, *end; int width; @@ -3088,29 +3106,29 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) value; \ }) - while (*fmt) { + while (*fmt.str) { fmt = format_decode(fmt, &spec); - switch (spec.type) { - case FORMAT_TYPE_NONE: - case FORMAT_TYPE_PERCENT_CHAR: + switch (fmt.state) { + case FORMAT_STATE_NONE: + case FORMAT_STATE_PERCENT_CHAR: break; - case FORMAT_TYPE_INVALID: + case FORMAT_STATE_INVALID: goto out; - case FORMAT_TYPE_WIDTH: - case FORMAT_TYPE_PRECISION: + case FORMAT_STATE_WIDTH: + case FORMAT_STATE_PRECISION: width = (int)save_arg(int); /* Pointers may require the width */ - if (*fmt == 'p') + if (*fmt.str == 'p') set_field_width(&spec, width); break; - case FORMAT_TYPE_CHAR: + case FORMAT_STATE_CHAR: save_arg(char); break; - case FORMAT_TYPE_STR: { + case FORMAT_STATE_STR: { const char *save_str = va_arg(args, char *); const char *err_msg; size_t len; @@ -3126,9 +3144,9 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) break; } - case FORMAT_TYPE_PTR: + case FORMAT_STATE_PTR: /* Dereferenced pointers must be done now */ - switch (*fmt) { + switch (*fmt.str) { /* Dereference of functions is still OK */ case 'S': case 's': @@ -3138,11 +3156,11 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) save_arg(void *); break; default: - if (!isalnum(*fmt)) { + if (!isalnum(*fmt.str)) { save_arg(void *); break; } - str = pointer(fmt, str, end, va_arg(args, void *), + str = pointer(fmt.str, str, end, va_arg(args, void *), spec); if (str + 1 < end) *str++ = '\0'; @@ -3150,17 +3168,17 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args) end[-1] = '\0'; /* Must be nul terminated */ } /* skip all alphanumeric pointer suffixes */ - while (isalnum(*fmt)) - fmt++; + while (isalnum(*fmt.str)) + fmt.str++; break; - case FORMAT_TYPE_8BYTE: + case FORMAT_STATE_8BYTE: save_arg(long long); break; - case FORMAT_TYPE_1BYTE: + case FORMAT_STATE_1BYTE: save_arg(char); break; - case FORMAT_TYPE_2BYTE: + case FORMAT_STATE_2BYTE: save_arg(short); break; default: @@ -3196,8 +3214,12 @@ EXPORT_SYMBOL_GPL(vbin_printf); * return is greater than or equal to @size, the resulting * string is truncated. */ -int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) +int bstr_printf(char *buf, size_t size, const char *fmt_str, const u32 *bin_buf) { + struct fmt fmt = { + .str = fmt_str, + .state = FORMAT_STATE_NONE, + }; struct printf_spec spec = {0}; char *str, *end; const char *args = (const char *)bin_buf; @@ -3229,14 +3251,14 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) size = end - buf; } - while (*fmt) { - const char *old_fmt = fmt; + while (*fmt.str) { + const char *old_fmt = fmt.str; unsigned long long num; fmt = format_decode(fmt, &spec); - switch (spec.type) { - case FORMAT_TYPE_NONE: { - int read = fmt - old_fmt; + switch (fmt.state) { + case FORMAT_STATE_NONE: { + int read = fmt.str - old_fmt; if (str < end) { int copy = read; if (copy > end - str) @@ -3247,15 +3269,15 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) continue; } - case FORMAT_TYPE_WIDTH: + case FORMAT_STATE_WIDTH: set_field_width(&spec, get_arg(int)); continue; - case FORMAT_TYPE_PRECISION: + case FORMAT_STATE_PRECISION: set_precision(&spec, get_arg(int)); continue; - case FORMAT_TYPE_CHAR: { + case FORMAT_STATE_CHAR: { char c; if (!(spec.flags & LEFT)) { @@ -3277,18 +3299,18 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) continue; } - case FORMAT_TYPE_STR: { + case FORMAT_STATE_STR: { const char *str_arg = args; args += strlen(str_arg) + 1; str = string(str, end, (char *)str_arg, spec); continue; } - case FORMAT_TYPE_PTR: { + case FORMAT_STATE_PTR: { bool process = false; int copy, len; /* Non function dereferences were already done */ - switch (*fmt) { + switch (*fmt.str) { case 'S': case 's': case 'x': @@ -3297,7 +3319,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) process = true; break; default: - if (!isalnum(*fmt)) { + if (!isalnum(*fmt.str)) { process = true; break; } @@ -3312,38 +3334,38 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) } } if (process) - str = pointer(fmt, str, end, get_arg(void *), spec); + str = pointer(fmt.str, str, end, get_arg(void *), spec); - while (isalnum(*fmt)) - fmt++; + while (isalnum(*fmt.str)) + fmt.str++; continue; } - case FORMAT_TYPE_PERCENT_CHAR: + case FORMAT_STATE_PERCENT_CHAR: if (str < end) *str = '%'; ++str; continue; - case FORMAT_TYPE_INVALID: + case FORMAT_STATE_INVALID: goto out; - case FORMAT_TYPE_8BYTE: + case FORMAT_STATE_8BYTE: num = get_arg(long long); break; - case FORMAT_TYPE_2BYTE: - num = convert_num_spec(get_arg(short), spec); + case FORMAT_STATE_2BYTE: + num = convert_num_spec(get_arg(short), fmt.state, spec); break; - case FORMAT_TYPE_1BYTE: - num = convert_num_spec(get_arg(char), spec); + case FORMAT_STATE_1BYTE: + num = convert_num_spec(get_arg(char), fmt.state, spec); break; default: - num = convert_num_spec(get_arg(int), spec); + num = convert_num_spec(get_arg(int), fmt.state, spec); break; } str = number(str, end, num, spec); - } /* while(*fmt) */ + } /* while(*fmt.str) */ out: if (size > 0) { -- cgit v1.2.3 From 312f48b2e27f0f8ede4260e024352fdad225d1c5 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 19 Dec 2024 10:02:45 -0800 Subject: vsprintf: deal with format flags with a simple lookup table Rather than a case statement, just look up the printf format flags (justification, zero-padding etc) using a small table. Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 41 +++++++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 20 deletions(-) (limited to 'lib/vsprintf.c') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index e6bd223b0184a..123e58724262a 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2527,6 +2527,20 @@ struct fmt { enum format_state state; }; +#define SPEC_CHAR(x, flag) [(x)-32] = flag +static unsigned char spec_flag(unsigned char c) +{ + static const unsigned char spec_flag_array[] = { + SPEC_CHAR(' ', SPACE), + SPEC_CHAR('#', SPECIAL), + SPEC_CHAR('+', PLUS), + SPEC_CHAR('-', LEFT), + SPEC_CHAR('0', ZEROPAD), + }; + c -= 32; + return (c < sizeof(spec_flag_array)) ? spec_flag_array[c] : 0; +} + /* * Helper function to decode printf style format. * Each call decode a token from the format and return the @@ -2552,7 +2566,7 @@ static noinline_for_stack struct fmt format_decode(struct fmt fmt, struct printf_spec *spec) { const char *start = fmt.str; - char qualifier; + char flag, qualifier; /* we finished early by reading the field width */ if (fmt.state == FORMAT_STATE_WIDTH) { @@ -2585,26 +2599,13 @@ struct fmt format_decode(struct fmt fmt, struct printf_spec *spec) if (fmt.str != start || !*fmt.str) return fmt; - /* Process flags */ + /* Process flags. This also skips the first '%' */ spec->flags = 0; - - while (1) { /* this also skips first '%' */ - bool found = true; - - fmt.str++; - - switch (*fmt.str) { - case '-': spec->flags |= LEFT; break; - case '+': spec->flags |= PLUS; break; - case ' ': spec->flags |= SPACE; break; - case '#': spec->flags |= SPECIAL; break; - case '0': spec->flags |= ZEROPAD; break; - default: found = false; - } - - if (!found) - break; - } + do { + /* this also skips first '%' */ + flag = spec_flag(*++fmt.str); + spec->flags |= flag; + } while (flag); /* get field width */ spec->field_width = -1; -- cgit v1.2.3 From 614d13462daef9bf6ac735744b5835a18cbfd19c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 19 Dec 2024 10:55:56 -0800 Subject: vsprintf: deal with format specifiers with a lookup table We did the flags as an array earlier, they had simpler rules. The final format specifiers are a bit more complex since they have more fields to deal with, and we want to handle the length modifiers at the same time. But like the flags, we're better off just making it a data-driven table rather than some case statement. Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 133 +++++++++++++++++++++++---------------------------------- 1 file changed, 54 insertions(+), 79 deletions(-) (limited to 'lib/vsprintf.c') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 123e58724262a..9142c8c76f826 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2566,7 +2566,7 @@ static noinline_for_stack struct fmt format_decode(struct fmt fmt, struct printf_spec *spec) { const char *start = fmt.str; - char flag, qualifier; + char flag; /* we finished early by reading the field width */ if (fmt.state == FORMAT_STATE_WIDTH) { @@ -2637,96 +2637,71 @@ precision: } qualifier: - /* get the conversion qualifier */ - qualifier = 0; - if (*fmt.str == 'h' || _tolower(*fmt.str) == 'l' || - *fmt.str == 'z' || *fmt.str == 't') { - qualifier = *fmt.str++; - if (unlikely(qualifier == *fmt.str)) { - if (qualifier == 'l') { - qualifier = 'L'; - fmt.str++; - } else if (qualifier == 'h') { - qualifier = 'H'; - fmt.str++; - } - } - } - - /* default base */ + /* Set up default numeric format */ spec->base = 10; - switch (*fmt.str) { - case 'c': - fmt.state = FORMAT_STATE_CHAR; - fmt.str++; - return fmt; - - case 's': - fmt.state = FORMAT_STATE_STR; - fmt.str++; - return fmt; - - case 'p': - fmt.state = FORMAT_STATE_PTR; - fmt.str++; - return fmt; - - case '%': - fmt.state = FORMAT_STATE_PERCENT_CHAR; - fmt.str++; - return fmt; - - /* integer number formats - set up the flags and "break" */ - case 'o': - spec->base = 8; - break; - - case 'x': - spec->flags |= SMALL; - fallthrough; - - case 'X': - spec->base = 16; - break; + fmt.state = FORMAT_STATE_SIZE(int); + static const struct format_state { + unsigned char state; + unsigned char flags_or_double_state; + unsigned char modifier; + unsigned char base; + } lookup_state[256] = { + // Qualifiers + ['l'] = { FORMAT_STATE_SIZE(long), FORMAT_STATE_SIZE(long long), 1 }, + ['L'] = { FORMAT_STATE_SIZE(long long), 0, 1 }, + ['h'] = { FORMAT_STATE_SIZE(short), FORMAT_STATE_SIZE(char), 1 }, + ['H'] = { FORMAT_STATE_SIZE(char), 0, 1 }, // Questionable, historic + ['z'] = { FORMAT_STATE_SIZE(size_t), 0, 1 }, + ['t'] = { FORMAT_STATE_SIZE(ptrdiff_t), 0, 1 }, + + // Non-numeric formats + ['c'] = { FORMAT_STATE_CHAR }, + ['s'] = { FORMAT_STATE_STR }, + ['p'] = { FORMAT_STATE_PTR }, + ['%'] = { FORMAT_STATE_PERCENT_CHAR }, + + // Numerics + ['o'] = { 0, 0, 0, 8 }, + ['x'] = { 0, SMALL, 0, 16 }, + ['X'] = { 0, 0, 0, 16 }, + ['d'] = { 0, SIGN, 0, 10 }, + ['i'] = { 0, SIGN, 0, 10 }, + ['u'] = { 0, 0, 0, 10, }, - case 'd': - case 'i': - spec->flags |= SIGN; - break; - case 'u': - break; - - case 'n': /* * Since %n poses a greater security risk than * utility, treat it as any other invalid or * unsupported format specifier. */ - fallthrough; + }; - default: - WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt.str); - fmt.state = FORMAT_STATE_INVALID; + const struct format_state *p = lookup_state + (u8)*fmt.str; + if (p->modifier) { + fmt.state = p->state; + if (p->flags_or_double_state && fmt.str[0] == fmt.str[1]) { + fmt.state = p->flags_or_double_state; + fmt.str++; + } + fmt.str++; + p = lookup_state + *fmt.str; + if (unlikely(p->modifier)) + goto invalid; + } + if (p->base) { + spec->base = p->base; + spec->flags |= p->flags_or_double_state; + fmt.str++; return fmt; } - - if (qualifier == 'L') - fmt.state = FORMAT_STATE_SIZE(long long); - else if (qualifier == 'l') { - fmt.state = FORMAT_STATE_SIZE(long); - } else if (qualifier == 'z') { - fmt.state = FORMAT_STATE_SIZE(size_t); - } else if (qualifier == 't') { - fmt.state = FORMAT_STATE_SIZE(ptrdiff_t); - } else if (qualifier == 'H') { - fmt.state = FORMAT_STATE_SIZE(char); - } else if (qualifier == 'h') { - fmt.state = FORMAT_STATE_SIZE(short); - } else { - fmt.state = FORMAT_STATE_SIZE(int); + if (p->state) { + fmt.state = p->state; + fmt.str++; + return fmt; } - fmt.str++; +invalid: + WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt.str); + fmt.state = FORMAT_STATE_INVALID; return fmt; } -- cgit v1.2.3 From f372b2256acbfbbf703cfdfae3d02c5a6c0e1679 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 19 Dec 2024 11:37:07 -0800 Subject: vsnprintf: inline skip_atoi() again At some point skip_atoi() had been marked 'noinline_for_stack', but it turns out that this is now a pessimization, and not inlining it actually results in a stack frame in format decoding due to the call and thus hurts stack usage rather than helping. With the simplistic atoi function inlined, the format decoding now needs no frame at all. Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib/vsprintf.c') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 9142c8c76f826..617b629c73737 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -160,8 +160,7 @@ long long simple_strtoll(const char *cp, char **endp, unsigned int base) } EXPORT_SYMBOL(simple_strtoll); -static noinline_for_stack -int skip_atoi(const char **s) +static inline int skip_atoi(const char **s) { int i = 0; -- cgit v1.2.3 From 2b76e39fca4739a75c9a4f96f3471af6b1c18d9e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 19 Dec 2024 11:42:15 -0800 Subject: vsnprintf: mark the indirect width and precision cases unlikely Make the format_decode() code generation easier to look at by getting the strange and unlikely cases out of line. Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib/vsprintf.c') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 617b629c73737..31dca7b8ad905 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2568,7 +2568,7 @@ struct fmt format_decode(struct fmt fmt, struct printf_spec *spec) char flag; /* we finished early by reading the field width */ - if (fmt.state == FORMAT_STATE_WIDTH) { + if (unlikely(fmt.state == FORMAT_STATE_WIDTH)) { if (spec->field_width < 0) { spec->field_width = -spec->field_width; spec->flags |= LEFT; @@ -2578,7 +2578,7 @@ struct fmt format_decode(struct fmt fmt, struct printf_spec *spec) } /* we finished early by reading the precision */ - if (fmt.state == FORMAT_STATE_PRECISION) { + if (unlikely(fmt.state == FORMAT_STATE_PRECISION)) { if (spec->precision < 0) spec->precision = 0; @@ -2611,7 +2611,7 @@ struct fmt format_decode(struct fmt fmt, struct printf_spec *spec) if (isdigit(*fmt.str)) spec->field_width = skip_atoi(&fmt.str); - else if (*fmt.str == '*') { + else if (unlikely(*fmt.str == '*')) { /* it's the next argument */ fmt.state = FORMAT_STATE_WIDTH; fmt.str++; @@ -2621,7 +2621,7 @@ struct fmt format_decode(struct fmt fmt, struct printf_spec *spec) precision: /* get the precision */ spec->precision = -1; - if (*fmt.str == '.') { + if (unlikely(*fmt.str == '.')) { fmt.str++; if (isdigit(*fmt.str)) { spec->precision = skip_atoi(&fmt.str); -- cgit v1.2.3 From 8d4826cc8a8aca01a3b5e95438dfc0eb3bd589ab Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 19 Dec 2024 13:52:53 -0800 Subject: vsnprintf: collapse the number format state into one single state We'll squirrel away the size of the number in 'struct fmt' instead. We have two fairly separate state structures: the 'decode state' is in 'struct fmt', while the 'printout format' is in 'printf_spec'. Both structures are small enough to pass around in registers even across function boundaries (ie two words), even on 32-bit machines. The goal here is to avoid the case statements on the format states, which generate either deep conditionals or jump tables, while also keeping the state size manageable. Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 137 +++++++++++++++++++++++++++------------------------------ 1 file changed, 66 insertions(+), 71 deletions(-) (limited to 'lib/vsprintf.c') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 31dca7b8ad905..7a40c8ae2d190 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -419,10 +419,7 @@ static_assert(SMALL == ('a' ^ 'A')); enum format_state { FORMAT_STATE_NONE, /* Just a string part */ - FORMAT_STATE_1BYTE = 1, /* char/short/int are their own sizes */ - FORMAT_STATE_2BYTE = 2, - FORMAT_STATE_8BYTE = 3, - FORMAT_STATE_4BYTE = 4, + FORMAT_STATE_NUM, FORMAT_STATE_WIDTH, FORMAT_STATE_PRECISION, FORMAT_STATE_CHAR, @@ -432,8 +429,6 @@ enum format_state { FORMAT_STATE_INVALID, }; -#define FORMAT_STATE_SIZE(type) (sizeof(type) <= 4 ? sizeof(type) : FORMAT_STATE_8BYTE) - struct printf_spec { unsigned char flags; /* flags to number() */ unsigned char base; /* number base, 8, 10 or 16 only */ @@ -2523,7 +2518,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, struct fmt { const char *str; - enum format_state state; + unsigned char state; // enum format_state + unsigned char size; // size of numbers }; #define SPEC_CHAR(x, flag) [(x)-32] = flag @@ -2638,20 +2634,21 @@ precision: qualifier: /* Set up default numeric format */ spec->base = 10; - fmt.state = FORMAT_STATE_SIZE(int); + fmt.state = FORMAT_STATE_NUM; + fmt.size = sizeof(int); static const struct format_state { unsigned char state; - unsigned char flags_or_double_state; - unsigned char modifier; + unsigned char size; + unsigned char flags_or_double_size; unsigned char base; } lookup_state[256] = { - // Qualifiers - ['l'] = { FORMAT_STATE_SIZE(long), FORMAT_STATE_SIZE(long long), 1 }, - ['L'] = { FORMAT_STATE_SIZE(long long), 0, 1 }, - ['h'] = { FORMAT_STATE_SIZE(short), FORMAT_STATE_SIZE(char), 1 }, - ['H'] = { FORMAT_STATE_SIZE(char), 0, 1 }, // Questionable, historic - ['z'] = { FORMAT_STATE_SIZE(size_t), 0, 1 }, - ['t'] = { FORMAT_STATE_SIZE(ptrdiff_t), 0, 1 }, + // Length + ['l'] = { 0, sizeof(long), sizeof(long long) }, + ['L'] = { 0, sizeof(long long) }, + ['h'] = { 0, sizeof(short), sizeof(char) }, + ['H'] = { 0, sizeof(char) }, // Questionable historical + ['z'] = { 0, sizeof(size_t) }, + ['t'] = { 0, sizeof(ptrdiff_t) }, // Non-numeric formats ['c'] = { FORMAT_STATE_CHAR }, @@ -2660,12 +2657,12 @@ qualifier: ['%'] = { FORMAT_STATE_PERCENT_CHAR }, // Numerics - ['o'] = { 0, 0, 0, 8 }, - ['x'] = { 0, SMALL, 0, 16 }, - ['X'] = { 0, 0, 0, 16 }, - ['d'] = { 0, SIGN, 0, 10 }, - ['i'] = { 0, SIGN, 0, 10 }, - ['u'] = { 0, 0, 0, 10, }, + ['o'] = { FORMAT_STATE_NUM, 0, 0, 8 }, + ['x'] = { FORMAT_STATE_NUM, 0, SMALL, 16 }, + ['X'] = { FORMAT_STATE_NUM, 0, 0, 16 }, + ['d'] = { FORMAT_STATE_NUM, 0, SIGN, 10 }, + ['i'] = { FORMAT_STATE_NUM, 0, SIGN, 10 }, + ['u'] = { FORMAT_STATE_NUM, 0, 0, 10, }, /* * Since %n poses a greater security risk than @@ -2675,30 +2672,23 @@ qualifier: }; const struct format_state *p = lookup_state + (u8)*fmt.str; - if (p->modifier) { - fmt.state = p->state; - if (p->flags_or_double_state && fmt.str[0] == fmt.str[1]) { - fmt.state = p->flags_or_double_state; + if (p->size) { + fmt.size = p->size; + if (p->flags_or_double_size && fmt.str[0] == fmt.str[1]) { + fmt.size = p->flags_or_double_size; fmt.str++; } fmt.str++; p = lookup_state + *fmt.str; - if (unlikely(p->modifier)) - goto invalid; - } - if (p->base) { - spec->base = p->base; - spec->flags |= p->flags_or_double_state; - fmt.str++; - return fmt; } if (p->state) { + spec->base = p->base; + spec->flags |= p->flags_or_double_size; fmt.state = p->state; fmt.str++; return fmt; } -invalid: WARN_ONCE(1, "Please remove unsupported %%%c in format string\n", *fmt.str); fmt.state = FORMAT_STATE_INVALID; return fmt; @@ -2768,7 +2758,6 @@ static unsigned long long convert_num_spec(unsigned int val, int size, struct pr */ int vsnprintf(char *buf, size_t size, const char *fmt_str, va_list args) { - unsigned long long num; char *str, *end; struct printf_spec spec = {0}; struct fmt fmt = { @@ -2808,6 +2797,16 @@ int vsnprintf(char *buf, size_t size, const char *fmt_str, va_list args) continue; } + case FORMAT_STATE_NUM: { + unsigned long long num; + if (fmt.size <= sizeof(int)) + num = convert_num_spec(va_arg(args, int), fmt.size, spec); + else + num = va_arg(args, long long); + str = number(str, end, num, spec); + continue; + } + case FORMAT_STATE_WIDTH: set_field_width(&spec, va_arg(args, int)); continue; @@ -2856,7 +2855,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt_str, va_list args) ++str; continue; - case FORMAT_STATE_INVALID: + default: /* * Presumably the arguments passed gcc's type * checking, but there is no safe or sane way @@ -2866,17 +2865,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt_str, va_list args) * sync. */ goto out; - - case FORMAT_STATE_8BYTE: - num = va_arg(args, long long); - break; - - default: - num = convert_num_spec(va_arg(args, int), fmt.state, spec); - break; } - - str = number(str, end, num, spec); } out: @@ -3147,17 +3136,20 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt_str, va_list args) fmt.str++; break; - case FORMAT_STATE_8BYTE: - save_arg(long long); - break; - case FORMAT_STATE_1BYTE: - save_arg(char); - break; - case FORMAT_STATE_2BYTE: - save_arg(short); - break; - default: - save_arg(int); + case FORMAT_STATE_NUM: + switch (fmt.size) { + case 8: + save_arg(long long); + break; + case 1: + save_arg(char); + break; + case 2: + save_arg(short); + break; + default: + save_arg(int); + } } } @@ -3325,18 +3317,21 @@ int bstr_printf(char *buf, size_t size, const char *fmt_str, const u32 *bin_buf) case FORMAT_STATE_INVALID: goto out; - case FORMAT_STATE_8BYTE: - num = get_arg(long long); - break; - case FORMAT_STATE_2BYTE: - num = convert_num_spec(get_arg(short), fmt.state, spec); - break; - case FORMAT_STATE_1BYTE: - num = convert_num_spec(get_arg(char), fmt.state, spec); - break; - default: - num = convert_num_spec(get_arg(int), fmt.state, spec); - break; + case FORMAT_STATE_NUM: + switch (fmt.size) { + case 8: + num = get_arg(long long); + break; + case 1: + num = convert_num_spec(get_arg(char), fmt.size, spec); + break; + case 2: + num = convert_num_spec(get_arg(short), fmt.size, spec); + break; + default: + num = convert_num_spec(get_arg(int), fmt.size, spec); + break; + } } str = number(str, end, num, spec); -- cgit v1.2.3 From 4c538044ee2d11299cc57ac1e92d343e1e83b847 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 23 Dec 2024 11:34:32 -0800 Subject: vsprintf: don't make the 'binary' version pack small integer arguments The strange vbin_printf / bstr_printf interface used to save one- and two-byte printf numerical arguments into their packed format. That's more than a bit strange since the argument buffer is supposed to be an array of 'u32' words, and it's also very different from how the source of the data (varargs) work - which always do the normal integer type conversions, so 'char' and 'short' are always passed as int-sized anyway. This odd packing causes extra code complexity, and it really isn't worth it, since the space savings are simply not there: it only happens for formats like '%hd' (short) and '%hhd' (char), which are very rare indeed. In fact, the only other user of this interface seems to be the bpf helper code (bpf_bprintf_prepare()), and Alexei points out that that case doesn't support those truncated integer formatting options at all in the first place. As a result, bpf_bprintf_prepare() doesn't need any changes for this, and TRACE_BPRINT uses 'vbin_printf()' -> 'bstr_printf()' for the formatting and hopefully doesn't expose the odd packing any other way (knock wood). Link: https://lore.kernel.org/all/CAADnVQJy65oOubjxM-378O3wDfhuwg8TGa9hc-cTv6NmmUSykQ@mail.gmail.com/ Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 29 ++++++----------------------- 1 file changed, 6 insertions(+), 23 deletions(-) (limited to 'lib/vsprintf.c') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 7a40c8ae2d190..a49bb6eb24350 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -3137,17 +3137,9 @@ int vbin_printf(u32 *bin_buf, size_t size, const char *fmt_str, va_list args) break; case FORMAT_STATE_NUM: - switch (fmt.size) { - case 8: + if (fmt.size > sizeof(int)) { save_arg(long long); - break; - case 1: - save_arg(char); - break; - case 2: - save_arg(short); - break; - default: + } else { save_arg(int); } } @@ -3318,23 +3310,14 @@ int bstr_printf(char *buf, size_t size, const char *fmt_str, const u32 *bin_buf) goto out; case FORMAT_STATE_NUM: - switch (fmt.size) { - case 8: + if (fmt.size > sizeof(int)) { num = get_arg(long long); - break; - case 1: - num = convert_num_spec(get_arg(char), fmt.size, spec); - break; - case 2: - num = convert_num_spec(get_arg(short), fmt.size, spec); - break; - default: + } else { num = convert_num_spec(get_arg(int), fmt.size, spec); - break; } + str = number(str, end, num, spec); + continue; } - - str = number(str, end, num, spec); } /* while(*fmt.str) */ out: -- cgit v1.2.3 From fa47906ff358a5865b7be2356a5a1d1e58dd17d8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 6 Jan 2025 06:31:11 -0800 Subject: vsnprintf: fix up kerneldoc for argument name changes Stephen Rothwell reports that I missed fixing up the documentation when the argument names changed in commit 938df695e98d ("vsprintf: associate the format state with the format pointer"), resulting in htmldoc warnings like lib/vsprintf.c:2760: warning: Function parameter or struct member 'fmt_str' not described in 'vsnprintf' lib/vsprintf.c:2760: warning: Excess function parameter 'fmt' description in 'vsnprintf' ... which I didn't notice because the doc build takes longer than the whole "real" kernel build for me, so I never bother (and judging by the other warnings, pretty much nobody else does either). I guess the bigger issues won't be fixed until the doc build is much faster (narrator: "That isn's in the cards") but at least linux-next finds the new cases. Reported-by: Stephen Rothwell Fixes: 938df695e98d ("vsprintf: associate the format state with the format pointer") Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'lib/vsprintf.c') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index a49bb6eb24350..4432b69a78be8 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2732,7 +2732,7 @@ static unsigned long long convert_num_spec(unsigned int val, int size, struct pr * vsnprintf - Format a string and place it in a buffer * @buf: The buffer to place the result into * @size: The size of the buffer, including the trailing null space - * @fmt: The format string to use + * @fmt_str: The format string to use * @args: Arguments for the format string * * This function generally follows C99 vsnprintf, but has some @@ -3020,7 +3020,7 @@ EXPORT_SYMBOL(sprintf); * vbin_printf - Parse a format string and place args' binary value in a buffer * @bin_buf: The buffer to place args' binary value * @size: The size of the buffer(by words(32bits), not characters) - * @fmt: The format string to use + * @fmt_str: The format string to use * @args: Arguments for the format string * * The format follows C99 vsnprintf, except %n is ignored, and its argument @@ -3155,7 +3155,7 @@ EXPORT_SYMBOL_GPL(vbin_printf); * bstr_printf - Format a string from binary arguments and place it in a buffer * @buf: The buffer to place the result into * @size: The size of the buffer, including the trailing null space - * @fmt: The format string to use + * @fmt_str: The format string to use * @bin_buf: Binary arguments for the format string * * This function like C99 vsnprintf, but the difference is that vsnprintf gets -- cgit v1.2.3 From ecdc475e0707c98c2a89da8d0024b3ea2924ef9b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 13 Jan 2025 08:23:28 -0800 Subject: vsnprintf: fix the number base for non-numeric formats Commit 8d4826cc8a8a ("vsnprintf: collapse the number format state into one single state") changed the format specification decoding to be a bit more straightforward but in the process ended up also resetting the number base to zero for formats that aren't clearly numerical. Now, the number base obviously doesn't matter for something like '%s', so this wasn't all that obvious. But some of our specialized pointer extension formatting (ie, things like "print out IPv6 address") did up depending on the default base-10 setting, and when they then tried to print out numbers in "base zero", things didn't work out so well. Most pointer formatting (including things like the default raw hex value conversion) didn't have this issue, because they used helpers that explicitly set the base. Reported-and-tested-by: kernel test robot Closes: https://lore.kernel.org/oe-lkp/202501131352.e226f995-lkp@intel.com Fixes: 8d4826cc8a8a ("vsnprintf: collapse the number format state into one single state") Signed-off-by: Linus Torvalds --- lib/vsprintf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'lib/vsprintf.c') diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 4432b69a78be8..56fe963192926 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -2682,7 +2682,8 @@ qualifier: p = lookup_state + *fmt.str; } if (p->state) { - spec->base = p->base; + if (p->base) + spec->base = p->base; spec->flags |= p->flags_or_double_size; fmt.state = p->state; fmt.str++; -- cgit v1.2.3