1#include <errno.h>
2#include <stdlib.h>
3
4#include "include/pw.h"
5
6
7static inline bool end_of_line(PwValuePtr str, unsigned position)
8/*
9 * Return true if position is beyond end of line.
10 */
11{
12 return !pw_string_index_valid(str, position);
13}
14
15[[nodiscard]] bool _pw_parse_unsigned(PwValuePtr str, unsigned start_pos,
16 unsigned* end_pos, unsigned radix, PwValuePtr result)
17{
18 pw_destroy(result);
19 *result = PwUnsigned(0);
20
21 bool digit_seen = false;
22 bool separator_seen = false;
23 unsigned pos = start_pos;
24 bool ret = false;
25 for (;;) {
26 char32_t chr = pw_char_at(str, pos);
27
28 // check separator
29 if (chr == '\'' || chr == '_') {
30 if (separator_seen) {
31 // duplicate separator in the number
32 pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
33 break;
34 }
35 if (!digit_seen) {
36 // eparator is not allowed in the beginning of number
37 pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
38 break;
39 }
40 separator_seen = true;
41 pos++;
42 if (end_of_line(str, pos)) {
43 pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
44 break;
45 }
46 continue;
47 }
48 separator_seen = false;
49
50 // check digit and convert to number
51 if (radix == 16) {
52 if (chr >= 'a' && chr <= 'f') {
53 chr -= 'a' - 10;
54 } else if (chr >= 'A' && chr <= 'F') {
55 chr -= 'A' - 10;
56 } else if (chr >= '0' && chr <= '9') {
57 chr -= '0';
58 } else if (!digit_seen) {
59 pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
60 break;
61 } else {
62 // not a digit, end of conversion
63 ret = true;
64 break;
65 }
66 } else if (chr >= '0' && chr < (char32_t) ('0' + radix)) {
67 chr -= '0';
68 } else if (!digit_seen) {
69 pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
70 break;
71 } else {
72 // not a digit, end of conversion
73 ret = true;
74 break;
75 }
76 if (result->unsigned_value > PW_UNSIGNED_MAX / radix) {
77 // overflow
78 pw_set_status(PwStatus(PW_ERROR_NUMERIC_OVERFLOW));
79 break;
80 }
81 PwType_Unsigned new_value = result->unsigned_value * radix + chr;
82 if (new_value < result->unsigned_value) {
83 // overflow
84 pw_set_status(PwStatus(PW_ERROR_NUMERIC_OVERFLOW));
85 break;
86 }
87 result->unsigned_value = new_value;
88
89 pos++;
90 if (end_of_line(str, pos)) {
91 // end of line, end of conversion
92 ret = true;
93 break;
94 }
95 digit_seen = true;
96 }
97 if (end_pos) {
98 *end_pos = pos;
99 }
100 return ret;
101}
102
103static unsigned skip_digits(PwValuePtr str, unsigned pos)
104{
105 for (;;) {
106 if (end_of_line(str, pos)) {
107 break;
108 }
109 char32_t chr = pw_char_at(str, pos);
110 if (!('0' <= chr && chr <= '9')) {
111 break;
112 }
113 pos++;
114 }
115 return pos;
116}
117
118[[nodiscard]] bool _pw_parse_number(PwValuePtr str, unsigned start_pos,
119 int sign, unsigned* end_pos, char32_t* allowed_terminators,
120 PwValuePtr result)
121{
122 pw_destroy(result);
123 *result = PwSigned(0);
124
125 unsigned pos = start_pos;
126 unsigned radix = 10;
127 bool is_float = false;
128 PwValue base = PW_UNSIGNED(0);
129
130 char32_t chr = pw_char_at(str, pos);
131 if (chr == '0') {
132 // check radix specifier
133 if (end_of_line(str, pos)) {
134 goto done;
135 }
136 switch (pw_char_at(str, pos + 1)) {
137 case 'b':
138 case 'B':
139 radix = 2;
140 pos += 2;
141 break;
142 case 'o':
143 case 'O':
144 radix = 8;
145 pos += 2;
146 break;
147 case 'x':
148 case 'X':
149 radix = 16;
150 pos += 2;
151 break;
152 default:
153 break;
154 }
155 if (end_of_line(str, pos)) {
156 pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
157 goto error;
158 }
159 }
160
161 if (!_pw_parse_unsigned(str, pos, &pos, radix, &base)) {
162 goto error;
163 }
164 if (end_of_line(str, pos)) {
165 goto done;
166 }
167
168 // check for fraction
169 chr = pw_char_at(str, pos);
170 if (chr == '.') {
171 if (radix != 10) {
172decimal_float_only:
173 // only decimal representation is supported for floating point numbers
174 pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
175 goto error;
176 }
177 is_float = true;
178 pos = skip_digits(str, pos + 1);
179 if (end_of_line(str, pos)) {
180 goto done;
181 }
182 chr = pw_char_at(str, pos);
183 }
184 // check for exponent
185 if (chr == 'e' || chr == 'E') {
186 if (radix != 10) {
187 goto decimal_float_only;
188 }
189 is_float = true;
190 pos++;
191 if (end_of_line(str, pos)) {
192 goto done;
193 }
194 chr = pw_char_at(str, pos);
195 if (chr == '-' || chr == '+') {
196 pos++;
197 }
198 unsigned next_pos = skip_digits(str, pos);
199 if (next_pos == pos) {
200 // bad exponent
201 pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
202 goto error;
203 }
204 pos = next_pos;
205
206 } else if ( ! (pw_isspace(chr) || (allowed_terminators && utf32_strchr(allowed_terminators, chr)))) {
207 pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
208 goto error;
209 }
210
211done:
212 if (is_float) {
213 // parse float
214 unsigned len = pos - start_pos;
215 char number[len + 1];
216 pw_substr_to_utf8(str, start_pos, pos, number);
217 errno = 0;
218 double n = strtod(number, nullptr);
219 if (errno == ERANGE) {
220 pw_set_status(PwStatus(PW_ERROR_NUMERIC_OVERFLOW));
221 goto error;
222 } else if (errno) {
223 // floating point conversion error
224 pw_set_status(PwStatus(PW_ERROR_BAD_NUMBER));
225 goto error;
226 }
227 if (sign < 0 && n != 0.0) {
228 n = -n;
229 }
230 *result = PwFloat(n);
231 } else {
232 // make integer
233 if (base.unsigned_value > PW_SIGNED_MAX) {
234 if (sign < 0) {
235 pw_set_status(PwStatus(PW_ERROR_NUMERIC_OVERFLOW));
236 goto error;
237 } else {
238 *result = PwUnsigned(base.unsigned_value);
239 }
240 } else {
241 if (sign < 0 && base.unsigned_value) {
242 *result = PwSigned(-base.unsigned_value);
243 } else {
244 *result = PwSigned(base.unsigned_value);
245 }
246 }
247 }
248 if (end_pos) {
249 *end_pos = pos;
250 }
251 return true;
252
253error:
254 if (end_pos) {
255 *end_pos = pos;
256 }
257 return false;
258}
259
260[[nodiscard]] bool pw_parse_number(PwValuePtr str, PwValuePtr result)
261{
262 int sign = 1;
263 unsigned start_pos = pw_string_skip_spaces(str, 0);
264 char32_t chr = pw_char_at(str, start_pos);
265 if (chr == '+') {
266 // no op
267 start_pos++;
268 } else if (chr == '-') {
269 sign = -1;
270 start_pos++;
271 }
272 return _pw_parse_number(str, start_pos, sign, nullptr, nullptr, result);
273}
274
275static bool parse_nanosecond_frac(PwValuePtr str, unsigned* pos, uint32_t* result)
276/*
277 * Parse fractional nanoseconds part in `str` starting from `pos`.
278 * Always update `pos` upon return.
279 * Return true on success and write parsed value to `result`.
280 * On error return false.
281 */
282{
283 unsigned p = *pos;
284 uint32_t nanoseconds = 0;
285 unsigned i = 0;
286 while (!end_of_line(str, p)) {
287 char32_t chr = pw_char_at(str, p);
288 if (!pw_is_ascii_digit(chr)) {
289 break;
290 }
291 if (i == 9) {
292 *pos = p;
293 return false;
294 }
295 nanoseconds *= 10;
296 nanoseconds += chr - '0';
297 i++;
298 p++;
299 }
300 if (i == 0) {
301 }
302 static unsigned order[] = {
303 1000'000'000, // unused, i starts from 1 here
304 100'000'000,
305 10'000'000,
306 1000'000,
307 100'000,
308 10'000,
309 1000,
310 100,
311 10,
312 1
313 };
314 *result = nanoseconds * order[i];
315 *pos = p;
316 return true;
317}
318
319[[nodiscard]] bool _pw_parse_datetime(PwValuePtr str, unsigned start_pos, unsigned* end_pos,
320 char32_t* allowed_terminators, PwValuePtr result)
321{
322 pw_destroy(result);
323 *result = PwDateTime(0, 0, 0, 0, 0, 0);
324
325 unsigned pos = start_pos;
326 char32_t chr;
327
328 // parse YYYY part
329 for (unsigned i = 0; i < 4; i++, pos++) {
330 chr = pw_char_at(str, pos);
331 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
332 result->year *= 10;
333 result->year += chr - '0';
334 }
335 // skip optional separator
336 if (pw_char_at(str, pos) == '-') {
337 pos++;
338 }
339 // parse MM part
340 for (unsigned i = 0; i < 2; i++, pos++) {
341 chr = pw_char_at(str, pos);
342 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
343 result->month *= 10;
344 result->month += chr - '0';
345 }
346 // skip optional separator
347 if (pw_char_at(str, pos) == '-') {
348 pos++;
349 }
350 // parse DD part
351 for (unsigned i = 0; i < 2; i++, pos++) {
352 chr = pw_char_at(str, pos);
353 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
354 result->day *= 10;
355 result->day += chr - '0';
356 }
357 // skip optional separator
358 chr = pw_char_at(str, pos);
359 if (chr == 'T') {
360 pos++;
361 } else {
362 pos = pw_string_skip_spaces(str, pos);
363 if (end_of_line(str, pos)) { goto out; }
364 chr = pw_char_at(str, pos);
365 if (allowed_terminators && utf32_strchr(allowed_terminators, chr)) { goto out; }
366 }
367 // parse HH part
368 for (unsigned i = 0; i < 2; i++, pos++) {
369 chr = pw_char_at(str, pos);
370 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
371 result->hour *= 10;
372 result->hour += chr - '0';
373 }
374 // skip optional separator
375 if (pw_char_at(str, pos) == ':') {
376 pos++;
377 }
378 // parse MM part
379 for (unsigned i = 0; i < 2; i++, pos++) {
380 chr = pw_char_at(str, pos);
381 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
382 result->minute *= 10;
383 result->minute += chr - '0';
384 }
385 // skip optional separator
386 if (pw_char_at(str, pos) == ':') {
387 pos++;
388 }
389 // parse SS part
390 for (unsigned i = 0; i < 2; i++, pos++) {
391 chr = pw_char_at(str, pos);
392 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
393 result->second *= 10;
394 result->second += chr - '0';
395 }
396 // check optional parts
397 chr = pw_char_at(str, pos);
398 if (chr == 'Z') {
399 pos++;
400 goto end_of_datetime;
401 }
402 if ( chr == '.') {
403 // parse nanoseconds
404 pos++;
405 if (!parse_nanosecond_frac(str, &pos, &result->nanosecond)) {
406 goto bad_datetime;
407 }
408 chr = pw_char_at(str, pos);
409 }
410 if (chr == 'Z') {
411 pos++;
412
413 } else if (chr == '+' || chr == '-') {
414 // parse GMT offset
415 int sign = (chr == '-')? -1 : 1;
416 pos++;
417 // parse HH part
418 unsigned offset_hour = 0;
419 for (unsigned i = 0; i < 2; i++, pos++) {
420 chr = pw_char_at(str, pos);
421 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
422 offset_hour *= 10;
423 offset_hour += chr - '0';
424 }
425 // skip optional separator
426 if (pw_char_at(str, pos) == ':') {
427 pos++;
428 }
429 // parse optional MM part
430 unsigned offset_minute = 0;
431 if (!end_of_line(str, pos)) {
432 chr = pw_char_at(str, pos);
433 if (pw_is_ascii_digit(chr)) {
434 for (unsigned i = 0; i < 2; i++, pos++) {
435 chr = pw_char_at(str, pos);
436 if (!pw_is_ascii_digit(chr)) { goto bad_datetime; }
437 offset_minute *= 10;
438 offset_minute += chr - '0';
439 }
440 }
441 }
442 result->gmt_offset = sign * offset_hour * 60 + offset_minute;
443 }
444
445end_of_datetime:
446 if (end_of_line(str, pos)) {
447 goto out;
448 }
449 chr = pw_char_at(str, pos);
450 if ( ! (pw_isspace(chr) || (allowed_terminators && utf32_strchr(allowed_terminators, chr)))) {
451 goto bad_datetime;
452 }
453
454out:
455 if (end_pos) {
456 *end_pos = pos;
457 }
458 return true;
459
460bad_datetime:
461 pw_set_status(PwStatus(PW_ERROR_BAD_DATETIME));
462 if (end_pos) {
463 *end_pos = pos;
464 }
465 return false;
466}
467
468
469[[nodiscard]] bool pw_parse_datetime(PwValuePtr str, PwValuePtr result)
470{
471 return _pw_parse_datetime(str, pw_string_skip_spaces(str, 0), nullptr, nullptr, result);
472}
473
474
475[[nodiscard]] bool _pw_parse_timestamp(PwValuePtr str, unsigned start_pos, unsigned* end_pos,
476 char32_t* allowed_terminators, PwValuePtr result)
477{
478 pw_destroy(result);
479 *result = PwTimestamp(0, 0);
480
481 unsigned pos;
482 PwValue seconds = PW_NULL;
483 if (!_pw_parse_unsigned(str, start_pos, &pos, 10, &seconds)) {
484 return false;
485 }
486
487 result->ts_seconds = seconds.unsigned_value;
488
489 if (end_of_line(str, pos)) {
490 goto out;
491 }
492 char32_t chr = pw_char_at(str, pos);
493 if ( chr == '.') {
494 // parse nanoseconds
495 pos++;
496 if (!parse_nanosecond_frac(str, &pos, &result->ts_nanoseconds)) {
497 goto bad_timestamp;
498 }
499 }
500 if (end_of_line(str, pos)) {
501 goto out;
502 }
503 chr = pw_char_at(str, pos);
504 if ( ! (pw_isspace(chr) || (allowed_terminators && utf32_strchr(allowed_terminators, chr)))) {
505 goto bad_timestamp;
506 }
507
508out:
509 if (end_pos) {
510 *end_pos = pos;
511 }
512 return true;
513
514bad_timestamp:
515 pw_set_status(PwStatus(PW_ERROR_BAD_TIMESTAMP));
516 if (end_pos) {
517 *end_pos = pos;
518 }
519 return false;
520}
521
522
523[[nodiscard]] bool pw_parse_timestamp(PwValuePtr str, PwValuePtr result)
524{
525 return _pw_parse_timestamp(str, pw_string_skip_spaces(str, 0), nullptr, nullptr, result);
526}