1#pragma once
  2
  3/*
  4 * UTF-8/UTF-32 functions
  5 */
  6
  7#ifdef __cplusplus
  8extern "C" {
  9#endif
 10
 11char32_t _pw_decode_utf8_char(char8_t** str);
 12/*
 13 * Decode UTF-8 character, update `*str`.
 14 *
 15 * Return decoded character or 0xFFFFFFFF if UTF-8 sequence is invalid.
 16 */
 17
 18char32_t _pw_decode_utf8_char_reverse(char8_t** ptr);
 19/*
 20 * Decode UTF-8 character from `*ptr` downwards.
 21 *
 22 * Result is decoded character or 0xFFFFFFFF if UTF-8 sequence is invalid.
 23 */
 24
 25bool _pw_decode_utf8_buffer(char8_t** ptr, unsigned* bytes_remaining, char32_t* result);
 26/*
 27 * Decode UTF-8 character from buffer, update `*ptr`.
 28 *
 29 * Null charaters are returned as zero codepoints.
 30 *
 31 * Return false if UTF-8 sequence is incomplete or `bytes_remaining` is zero.
 32 * Otherwise return true.
 33 * If character is invalid, write 0xFFFFFFFF to `result`.
 34 */
 35
 36unsigned pw_strlen_in_utf8(PwValuePtr str);
 37/*
 38 * Return length of str as if was encoded in UTF-8.
 39 */
 40
 41void pw_string_to_utf8(PwValuePtr str, char* buffer);
 42void pw_substr_to_utf8(PwValuePtr str, unsigned start_pos, unsigned end_pos, char* buffer);
 43/*
 44 * Copy string to buffer, appending terminating 0.
 45 * Use carefully. The caller is responsible to allocate the buffer.
 46 * Encode multibyte chars to UTF-8.
 47 */
 48
 49unsigned pw_char32_to_utf8(char32_t codepoint, char* buffer);
 50/*
 51 * Write up to 4 characters to buffer.
 52 * Return number of characters written.
 53 */
 54
 55void _pw_putchar32_utf8(FILE* fp, char32_t codepoint);
 56
 57unsigned utf8_strlen(char8_t* str);
 58/*
 59 * Count codepoints in UTF8-encoded string.
 60 */
 61
 62unsigned utf8_strlen2(char8_t* str, uint8_t* char_size);
 63/*
 64 * Count codepoints in UTF8-encoded string
 65 * and find max char size.
 66 */
 67
 68unsigned utf8_strlen3(char8_t* str, uint8_t* char_size, char8_t** end_ptr);
 69/*
 70 * Same as utf8_strlen2, plus writes the pointer to the terminating 0 character to `end_ptr`.
 71 */
 72
 73unsigned utf8_strlen2_buf(char8_t* buffer, unsigned* size, uint8_t* char_size);
 74/*
 75 * Count codepoints in the buffer and find max char size.
 76 *
 77 * Null characters are allowed! They are counted as zero codepoints.
 78 *
 79 * Return the number of codepoints.
 80 * Write the number of processed bytes back to `size`.
 81 * This number can be less than original `size` if buffer ends with
 82 * incomplete sequence.
 83 */
 84
 85char8_t* utf8_skip(char8_t* str, unsigned n);
 86/*
 87 * Skip `n` characters, return pointer to `n`th char.
 88 * If `n` is greater than length of string, returned value points to terminating null character.
 89 */
 90
 91unsigned utf32_strlen(char32_t* str);
 92/*
 93 * Find length of null-terminated `str`.
 94 */
 95
 96unsigned utf32_strlen2(char32_t* str, uint8_t* char_size);
 97/*
 98 * Find both length of null-terminated `str` and max char size in one go.
 99 */
100
101/*
102int utf32_strcmp     (char32_t* a, char32_t* b);
103int utf32_strcmp_utf8(char32_t* a, char8_t*  b);
104 *
105 * Compare  null-terminated strings.
106 */
107
108char32_t* utf32_strchr(char32_t* str, char32_t chr);
109/*
110 * Find the first occurrence of `chr` in the null-terminated `str`.
111 */
112
113uint8_t utf32_char_size(char32_t* str, unsigned max_len);
114/*
115 * Find the maximal size of character in `str`, up to `max_len` or null terminator.
116 */
117
118
119#ifdef __cplusplus
120}
121#endif