diff options
author | Nicolas Pitre <npitre@baylibre.com> | 2025-04-17 14:45:11 -0400 |
---|---|---|
committer | Greg Kroah-Hartman <gregkh@linuxfoundation.org> | 2025-04-26 11:22:04 +0200 |
commit | b5c574995d842d241d810f3a6a3ebb03c52d57fa (patch) | |
tree | b751111fb22541156433f26a8708d5dcc0b12e5c | |
parent | 9bd73840935746dccef20dbcb509317c8e718f08 (diff) |
vt: support Unicode recomposition
Try replacing any decomposed Unicode sequence by the corresponding
recomposed code point. Code point to glyph correspondance works best
after recomposition, and this apply mostly to single-width code points
therefore we can't preserve them in their decomposed form anyway.
Signed-off-by: Nicolas Pitre <npitre@baylibre.com>
Reviewed-by: Jiri Slaby <jirislaby@kernel.org>
Link: https://lore.kernel.org/r/20250417184849.475581-10-nico@fluxnic.net
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-rw-r--r-- | drivers/tty/vt/ucs.c | 62 | ||||
-rw-r--r-- | drivers/tty/vt/vt.c | 14 | ||||
-rw-r--r-- | include/linux/consolemap.h | 6 |
3 files changed, 79 insertions, 3 deletions
diff --git a/drivers/tty/vt/ucs.c b/drivers/tty/vt/ucs.c index 5f9f25bd201b..bf25d63cea61 100644 --- a/drivers/tty/vt/ucs.c +++ b/drivers/tty/vt/ucs.c @@ -59,3 +59,65 @@ bool ucs_is_double_width(u32 cp) return cp_in_range(cp, ucs_double_width_ranges, ARRAY_SIZE(ucs_double_width_ranges)); } + +/* + * Structure for base with combining mark pairs and resulting recompositions. + * Using u16 to save space since all values are within BMP range. + */ +struct ucs_recomposition { + u16 base; /* base character */ + u16 mark; /* combining mark */ + u16 recomposed; /* corresponding recomposed character */ +}; + +#include "ucs_recompose_table.h" + +struct compare_key { + u16 base; + u16 mark; +}; + +static int recomposition_cmp(const void *key, const void *element) +{ + const struct compare_key *search_key = key; + const struct ucs_recomposition *entry = element; + + /* Compare base character first */ + if (search_key->base < entry->base) + return -1; + if (search_key->base > entry->base) + return 1; + + /* Base characters match, now compare combining character */ + if (search_key->mark < entry->mark) + return -1; + if (search_key->mark > entry->mark) + return 1; + + /* Both match */ + return 0; +} + +/** + * ucs_recompose() - Attempt to recompose two Unicode characters into a single character. + * @base: Base Unicode code point (UCS-4) + * @mark: Combining mark Unicode code point (UCS-4) + * + * Return: Recomposed Unicode code point, or 0 if no recomposition is possible + */ +u32 ucs_recompose(u32 base, u32 mark) +{ + /* Check if characters are within the range of our table */ + if (!in_range(base, UCS_RECOMPOSE_MIN_BASE, UCS_RECOMPOSE_MAX_BASE) || + !in_range(mark, UCS_RECOMPOSE_MIN_MARK, UCS_RECOMPOSE_MAX_MARK)) + return 0; + + struct compare_key key = { base, mark }; + struct ucs_recomposition *result = + __inline_bsearch(&key, ucs_recomposition_table, + ARRAY_SIZE(ucs_recomposition_table), + sizeof(*ucs_recomposition_table), + recomposition_cmp); + + return result ? result->recomposed : 0; +} diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index a989feffad5e..76554c2040bf 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -2925,9 +2925,9 @@ static void vc_con_rewind(struct vc_data *vc) #define UCS_VS16 0xfe0f /* Variation Selector 16 */ -static int vc_process_ucs(struct vc_data *vc, int c, int *tc) +static int vc_process_ucs(struct vc_data *vc, int *c, int *tc) { - u32 prev_c, curr_c = c; + u32 prev_c, curr_c = *c; if (ucs_is_double_width(curr_c)) return 2; @@ -2964,6 +2964,14 @@ static int vc_process_ucs(struct vc_data *vc, int c, int *tc) return 1; } + /* try recomposition */ + prev_c = ucs_recompose(prev_c, curr_c); + if (prev_c != 0) { + vc_con_rewind(vc); + *tc = *c = prev_c; + return 1; + } + /* Otherwise zero-width code points are ignored. */ return 0; } @@ -2978,7 +2986,7 @@ static int vc_con_write_normal(struct vc_data *vc, int tc, int c, bool inverse = false; if (vc->vc_utf && !vc->vc_disp_ctrl) { - width = vc_process_ucs(vc, c, &tc); + width = vc_process_ucs(vc, &c, &tc); if (!width) goto out; } diff --git a/include/linux/consolemap.h b/include/linux/consolemap.h index b3a911866662..8167494229db 100644 --- a/include/linux/consolemap.h +++ b/include/linux/consolemap.h @@ -30,6 +30,7 @@ int conv_uni_to_8bit(u32 uni); void console_map_init(void); bool ucs_is_double_width(uint32_t cp); bool ucs_is_zero_width(uint32_t cp); +u32 ucs_recompose(u32 base, u32 mark); #else static inline u16 inverse_translate(const struct vc_data *conp, u16 glyph, bool use_unicode) @@ -69,6 +70,11 @@ static inline bool ucs_is_zero_width(uint32_t cp) { return false; } + +static inline u32 ucs_recompose(u32 base, u32 mark) +{ + return 0; +} #endif /* CONFIG_CONSOLE_TRANSLATIONS */ #endif /* __LINUX_CONSOLEMAP_H__ */ |