1#include <errno.h>
  2#include <stdlib.h>
  3
  4#include "include/pw.h"
  5
  6
  7static inline bool end_of_line(PwValuePtr str, unsigned position)
  8/*
  9 * Return true if position is beyond end of line.
 10 */
 11{
 12    return !pw_string_index_valid(str, position);
 13}
 14
 15[[nodiscard]] bool _pw_parse_unsigned(PwValuePtr str, unsigned start_pos,
 16                                      unsigned* end_pos, unsigned radix, PwValuePtr result)
 17{
 18    pw_destroy(result);
 19    *result = PwUnsigned(0);
 20
 21    bool digit_seen = false;
 22    bool separator_seen = false;
 23    unsigned pos = start_pos;
 24    bool ret = false;
 25    for (;;) {
 26        char32_t chr = pw_char_at(str, pos);
 27
 28        // check separator
 29        if (chr == '\'' || chr == '_') {
 30            if (separator_seen) {
 31                // duplicate separator in the number
 32                pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
 33                break;
 34            }
 35            if (!digit_seen) {
 36                // eparator is not allowed in the beginning of number
 37                pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
 38                break;
 39            }
 40            separator_seen = true;
 41            pos++;
 42            if (end_of_line(str, pos)) {
 43                pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
 44                break;
 45            }
 46            continue;
 47        }
 48        separator_seen = false;
 49
 50        // check digit and convert to number
 51        if (radix == 16) {
 52            if (chr >= 'a' && chr <= 'f') {
 53                chr -= 'a' - 10;
 54            } else if (chr >= 'A' && chr <= 'F') {
 55                chr -= 'A' - 10;
 56            } else if (chr >= '0' && chr <= '9') {
 57                chr -= '0';
 58            } else if (!digit_seen) {
 59                pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
 60                break;
 61            } else {
 62                // not a digit, end of conversion
 63                ret = true;
 64                break;
 65            }
 66        } else if (chr >= '0' && chr < (char32_t) ('0' + radix)) {
 67            chr -= '0';
 68        } else if (!digit_seen) {
 69            pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
 70            break;
 71        } else {
 72            // not a digit, end of conversion
 73            ret = true;
 74            break;
 75        }
 76        if (result->unsigned_value > PW_UNSIGNED_MAX / radix) {
 77            // overflow
 78            pw_set_status(PwStatus(PW_ERROR_NUMERIC_OVERFLOW));
 79            break;
 80        }
 81        PwType_Unsigned new_value = result->unsigned_value * radix + chr;
 82        if (new_value < result->unsigned_value) {
 83            // overflow
 84            pw_set_status(PwStatus(PW_ERROR_NUMERIC_OVERFLOW));
 85            break;
 86        }
 87        result->unsigned_value = new_value;
 88
 89        pos++;
 90        if (end_of_line(str, pos)) {
 91            // end of line, end of conversion
 92            ret = true;
 93            break;
 94        }
 95        digit_seen = true;
 96    }
 97    if (end_pos) {
 98        *end_pos = pos;
 99    }
100    return ret;
101}
102
103static unsigned skip_digits(PwValuePtr str, unsigned pos)
104{
105    for (;;) {
106        if (end_of_line(str, pos)) {
107            break;
108        }
109        char32_t chr = pw_char_at(str, pos);
110        if (!('0' <= chr && chr <= '9')) {
111            break;
112        }
113        pos++;
114    }
115    return pos;
116}
117
118[[nodiscard]] bool _pw_parse_number(PwValuePtr str, unsigned start_pos,
119                                    int sign, unsigned* end_pos, char32_t* allowed_terminators,
120                                    PwValuePtr result)
121{
122    pw_destroy(result);
123    *result = PwSigned(0);
124
125    unsigned pos = start_pos;
126    unsigned radix = 10;
127    bool is_float = false;
128    PwValue base = PW_UNSIGNED(0);
129
130    char32_t chr = pw_char_at(str, pos);
131    if (chr == '0') {
132        // check radix specifier
133        if (end_of_line(str, pos)) {
134            goto done;
135        }
136        switch (pw_char_at(str, pos + 1)) {
137            case 'b':
138            case 'B':
139                radix = 2;
140                pos += 2;
141                break;
142            case 'o':
143            case 'O':
144                radix = 8;
145                pos += 2;
146                break;
147            case 'x':
148            case 'X':
149                radix = 16;
150                pos += 2;
151                break;
152            default:
153                break;
154        }
155        if (end_of_line(str, pos)) {
156            pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
157            goto error;
158        }
159    }
160
161    if (!_pw_parse_unsigned(str, pos, &pos, radix, &base)) {
162        goto error;
163    }
164    if (end_of_line(str, pos)) {
165        goto done;
166    }
167
168    // check for fraction
169    chr = pw_char_at(str, pos);
170    if (chr == '.') {
171        if (radix != 10) {
172decimal_float_only:
173            // only decimal representation is supported for floating point numbers
174            pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
175            goto error;
176        }
177        is_float = true;
178        pos = skip_digits(str, pos + 1);
179        if (end_of_line(str, pos)) {
180            goto done;
181        }
182        chr = pw_char_at(str, pos);
183    }
184    // check for exponent
185    if (chr == 'e' || chr == 'E') {
186        if (radix != 10) {
187            goto decimal_float_only;
188        }
189        is_float = true;
190        pos++;
191        if (end_of_line(str, pos)) {
192            goto done;
193        }
194        chr = pw_char_at(str, pos);
195        if (chr == '-' || chr == '+') {
196            pos++;
197        }
198        unsigned next_pos = skip_digits(str, pos);
199        if (next_pos == pos) {
200            // bad exponent
201            pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
202            goto error;
203        }
204        pos = next_pos;
205
206    } else if ( ! (pw_isspace(chr) || (allowed_terminators && utf32_strchr(allowed_terminators, chr)))) {
207        pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
208        goto error;
209    }
210
211done:
212    if (is_float) {
213        // parse float
214        unsigned len = pos - start_pos;
215        char number[len + 1];
216        pw_substr_to_utf8(str, start_pos, pos, number);
217        errno = 0;
218        double n = strtod(number, nullptr);
219        if (errno == ERANGE) {
220            pw_set_status(PwStatus(PW_ERROR_NUMERIC_OVERFLOW));
221            goto error;
222        } else if (errno) {
223            // floating point conversion error
224            pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
225            goto error;
226        }
227        if (sign < 0 && n != 0.0) {
228            n = -n;
229        }
230        *result = PwFloat(n);
231    } else {
232        // make integer
233        if (base.unsigned_value > PW_SIGNED_MAX) {
234            if (sign < 0) {
235                pw_set_status(PwStatus(PW_ERROR_NUMERIC_OVERFLOW));
236                goto error;
237            } else {
238                *result = PwUnsigned(base.unsigned_value);
239            }
240        } else {
241            if (sign < 0 && base.unsigned_value) {
242                *result = PwSigned(-base.unsigned_value);
243            } else {
244                *result = PwSigned(base.unsigned_value);
245            }
246        }
247    }
248    if (end_pos) {
249        *end_pos = pos;
250    }
251    return true;
252
253error:
254    if (end_pos) {
255        *end_pos = pos;
256    }
257    return false;
258}
259
260[[nodiscard]] bool pw_parse_number(PwValuePtr str, PwValuePtr result)
261{
262    int sign = 1;
263    unsigned start_pos = pw_string_skip_spaces(str, 0);
264    char32_t chr = pw_char_at(str, start_pos);
265    if (chr == '+') {
266        // no op
267        start_pos++;
268    } else if (chr == '-') {
269        sign = -1;
270        start_pos++;
271    }
272    return _pw_parse_number(str, start_pos, sign, nullptr, nullptr, result);
273}
274
275static bool parse_nanosecond_frac(PwValuePtr str, unsigned* pos, uint32_t* result)
276/*
277 * Parse fractional nanoseconds part in `str` starting from `pos`.
278 * Always update `pos` upon return.
279 * Return true on success and write parsed value to `result`.
280 * On error return false.
281 */
282{
283    unsigned p = *pos;
284    uint32_t nanoseconds = 0;
285    unsigned i = 0;
286    while (!end_of_line(str, p)) {
287        char32_t chr = pw_char_at(str, p);
288        if (!pw_is_ascii_digit(chr)) {
289            break;
290        }
291        if (i == 9) {
292            *pos = p;
293            return false;
294        }
295        nanoseconds *= 10;
296        nanoseconds += chr - '0';
297        i++;
298        p++;
299    }
300    if (i == 0) {
301    }
302    static unsigned order[] = {
303        1000'000'000,  // unused, i starts from 1 here
304        100'000'000,
305        10'000'000,
306        1000'000,
307        100'000,
308        10'000,
309        1000,
310        100,
311        10,
312        1
313    };
314    *result = nanoseconds * order[i];
315    *pos = p;
316    return true;
317}
318
319[[nodiscard]] bool _pw_parse_datetime(PwValuePtr str, unsigned start_pos, unsigned* end_pos,
320                                      char32_t* allowed_terminators, PwValuePtr result)
321{
322    pw_destroy(result);
323    *result = PwDateTime(0, 0, 0, 0, 0, 0);
324
325    unsigned pos = start_pos;
326    char32_t chr;
327
328    // parse YYYY part
329    for (unsigned i = 0; i < 4; i++, pos++) {
330        chr = pw_char_at(str, pos);
331        if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
332        result->year *= 10;
333        result->year += chr - '0';
334    }
335    // skip optional separator
336    if (pw_char_at(str, pos) == '-') {
337        pos++;
338    }
339    // parse MM part
340    for (unsigned i = 0; i < 2; i++, pos++) {
341        chr = pw_char_at(str, pos);
342        if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
343        result->month *= 10;
344        result->month += chr - '0';
345    }
346    // skip optional separator
347    if (pw_char_at(str, pos) == '-') {
348        pos++;
349    }
350    // parse DD part
351    for (unsigned i = 0; i < 2; i++, pos++) {
352        chr = pw_char_at(str, pos);
353        if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
354        result->day *= 10;
355        result->day += chr - '0';
356    }
357    // skip optional separator
358    chr = pw_char_at(str, pos);
359    if (chr == 'T') {
360        pos++;
361    } else {
362        pos = pw_string_skip_spaces(str, pos);
363        if (end_of_line(str, pos)) { goto out; }
364        chr = pw_char_at(str, pos);
365        if (allowed_terminators && utf32_strchr(allowed_terminators, chr)) { goto out; }
366    }
367    // parse HH part
368    for (unsigned i = 0; i < 2; i++, pos++) {
369        chr = pw_char_at(str, pos);
370        if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
371        result->hour *= 10;
372        result->hour += chr - '0';
373    }
374    // skip optional separator
375    if (pw_char_at(str, pos) == ':') {
376        pos++;
377    }
378    // parse MM part
379    for (unsigned i = 0; i < 2; i++, pos++) {
380        chr = pw_char_at(str, pos);
381        if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
382        result->minute *= 10;
383        result->minute += chr - '0';
384    }
385    // skip optional separator
386    if (pw_char_at(str, pos) == ':') {
387        pos++;
388    }
389    // parse SS part
390    for (unsigned i = 0; i < 2; i++, pos++) {
391        chr = pw_char_at(str, pos);
392        if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
393        result->second *= 10;
394        result->second += chr - '0';
395    }
396    // check optional parts
397    chr = pw_char_at(str, pos);
398    if (chr == 'Z') {
399        pos++;
400        goto end_of_datetime;
401    }
402    if ( chr == '.') {
403        // parse nanoseconds
404        pos++;
405        if (!parse_nanosecond_frac(str, &pos, &result->nanosecond)) {
406            goto bad_datetime;
407        }
408        chr = pw_char_at(str, pos);
409    }
410    if (chr == 'Z') {
411        pos++;
412
413    } else if (chr == '+' || chr == '-') {
414        // parse GMT offset
415        int sign = (chr == '-')? -1 : 1;
416        pos++;
417        // parse HH part
418        unsigned offset_hour = 0;
419        for (unsigned i = 0; i < 2; i++, pos++) {
420            chr = pw_char_at(str, pos);
421            if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
422            offset_hour *= 10;
423            offset_hour += chr - '0';
424        }
425        // skip optional separator
426        if (pw_char_at(str, pos) == ':') {
427            pos++;
428        }
429        // parse optional MM part
430        unsigned offset_minute = 0;
431        if (!end_of_line(str, pos)) {
432            chr = pw_char_at(str, pos);
433            if (pw_is_ascii_digit(chr)) {
434                for (unsigned i = 0; i < 2; i++, pos++) {
435                    chr = pw_char_at(str, pos);
436                    if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
437                    offset_minute *= 10;
438                    offset_minute += chr - '0';
439                }
440            }
441        }
442        result->gmt_offset = sign * offset_hour * 60 + offset_minute;
443    }
444
445end_of_datetime:
446    if (end_of_line(str, pos)) {
447        goto out;
448    }
449    chr = pw_char_at(str, pos);
450    if ( ! (pw_isspace(chr) || (allowed_terminators && utf32_strchr(allowed_terminators, chr)))) {
451        goto bad_datetime;
452    }
453
454out:
455    if (end_pos) {
456        *end_pos = pos;
457    }
458    return true;
459
460bad_datetime:
461    pw_set_status(PwStatus(PW_ERROR_BAD_DATETIME));
462    if (end_pos) {
463        *end_pos = pos;
464    }
465    return false;
466}
467
468
469[[nodiscard]] bool pw_parse_datetime(PwValuePtr str, PwValuePtr result)
470{
471    return _pw_parse_datetime(str, pw_string_skip_spaces(str, 0), nullptr, nullptr, result);
472}
473
474
475[[nodiscard]] bool _pw_parse_timestamp(PwValuePtr str, unsigned start_pos, unsigned* end_pos,
476                                       char32_t* allowed_terminators, PwValuePtr result)
477{
478    pw_destroy(result);
479    *result = PwTimestamp(0, 0);
480
481    unsigned pos;
482    PwValue seconds = PW_NULL;
483    if (!_pw_parse_unsigned(str, start_pos, &pos, 10, &seconds)) {
484        return false;
485    }
486
487    result->ts_seconds = seconds.unsigned_value;
488
489    if (end_of_line(str, pos)) {
490        goto out;
491    }
492    char32_t chr = pw_char_at(str, pos);
493    if ( chr == '.') {
494        // parse nanoseconds
495        pos++;
496        if (!parse_nanosecond_frac(str, &pos, &result->ts_nanoseconds)) {
497            goto bad_timestamp;
498        }
499    }
500    if (end_of_line(str, pos)) {
501        goto out;
502    }
503    chr = pw_char_at(str, pos);
504    if ( ! (pw_isspace(chr) || (allowed_terminators && utf32_strchr(allowed_terminators, chr)))) {
505        goto bad_timestamp;
506    }
507
508out:
509    if (end_pos) {
510        *end_pos = pos;
511    }
512    return true;
513
514bad_timestamp:
515    pw_set_status(PwStatus(PW_ERROR_BAD_TIMESTAMP));
516    if (end_pos) {
517        *end_pos = pos;
518    }
519    return false;
520}
521
522
523[[nodiscard]] bool pw_parse_timestamp(PwValuePtr str, PwValuePtr result)
524{
525    return _pw_parse_timestamp(str, pw_string_skip_spaces(str, 0), nullptr, nullptr, result);
526}