1#pragma once
2
3/*
4 * UTF-8/UTF-32 functions
5 */
6
7#ifdef __cplusplus
8extern "C" {
9#endif
10
11char32_t _pw_decode_utf8_char(char8_t** str);
12/*
13 * Decode UTF-8 character, update `*str`.
14 *
15 * Return decoded character or 0xFFFFFFFF if UTF-8 sequence is invalid.
16 */
17
18char32_t _pw_decode_utf8_char_reverse(char8_t** ptr);
19/*
20 * Decode UTF-8 character from `*ptr` downwards.
21 *
22 * Result is decoded character or 0xFFFFFFFF if UTF-8 sequence is invalid.
23 */
24
25bool _pw_decode_utf8_buffer(char8_t** ptr, unsigned* bytes_remaining, char32_t* result);
26/*
27 * Decode UTF-8 character from buffer, update `*ptr`.
28 *
29 * Null charaters are returned as zero codepoints.
30 *
31 * Return false if UTF-8 sequence is incomplete or `bytes_remaining` is zero.
32 * Otherwise return true.
33 * If character is invalid, write 0xFFFFFFFF to `result`.
34 */
35
36unsigned pw_strlen_in_utf8(PwValuePtr str);
37/*
38 * Return length of str as if was encoded in UTF-8.
39 */
40
41void pw_string_to_utf8(PwValuePtr str, char* buffer);
42void pw_substr_to_utf8(PwValuePtr str, unsigned start_pos, unsigned end_pos, char* buffer);
43/*
44 * Copy string to buffer, appending terminating 0.
45 * Use carefully. The caller is responsible to allocate the buffer.
46 * Encode multibyte chars to UTF-8.
47 */
48
49unsigned pw_char32_to_utf8(char32_t codepoint, char* buffer);
50/*
51 * Write up to 4 characters to buffer.
52 * Return number of characters written.
53 */
54
55void _pw_putchar32_utf8(FILE* fp, char32_t codepoint);
56
57unsigned utf8_strlen(char8_t* str);
58/*
59 * Count codepoints in UTF8-encoded string.
60 */
61
62unsigned utf8_strlen2(char8_t* str, uint8_t* char_size);
63/*
64 * Count codepoints in UTF8-encoded string
65 * and find max char size.
66 */
67
68unsigned utf8_strlen3(char8_t* str, uint8_t* char_size, char8_t** end_ptr);
69/*
70 * Same as utf8_strlen2, plus writes the pointer to the terminating 0 character to `end_ptr`.
71 */
72
73unsigned utf8_strlen2_buf(char8_t* buffer, unsigned* size, uint8_t* char_size);
74/*
75 * Count codepoints in the buffer and find max char size.
76 *
77 * Null characters are allowed! They are counted as zero codepoints.
78 *
79 * Return the number of codepoints.
80 * Write the number of processed bytes back to `size`.
81 * This number can be less than original `size` if buffer ends with
82 * incomplete sequence.
83 */
84
85char8_t* utf8_skip(char8_t* str, unsigned n);
86/*
87 * Skip `n` characters, return pointer to `n`th char.
88 * If `n` is greater than length of string, returned value points to terminating null character.
89 */
90
91unsigned utf32_strlen(char32_t* str);
92/*
93 * Find length of null-terminated `str`.
94 */
95
96unsigned utf32_strlen2(char32_t* str, uint8_t* char_size);
97/*
98 * Find both length of null-terminated `str` and max char size in one go.
99 */
100
101/*
102int utf32_strcmp (char32_t* a, char32_t* b);
103int utf32_strcmp_utf8(char32_t* a, char8_t* b);
104 *
105 * Compare null-terminated strings.
106 */
107
108char32_t* utf32_strchr(char32_t* str, char32_t chr);
109/*
110 * Find the first occurrence of `chr` in the null-terminated `str`.
111 */
112
113uint8_t utf32_char_size(char32_t* str, unsigned max_len);
114/*
115 * Find the maximal size of character in `str`, up to `max_len` or null terminator.
116 */
117
118
119#ifdef __cplusplus
120}
121#endif