1#include <myaw.h>
  2#include <pwlib/parsers.h>
  3
  4static char32_t number_terminators[] = { MW_COMMENT, ':', ',', '}', ']', 0 };
  5
  6
  7[[nodiscard]] static bool skip_spaces(MwParser* parser, unsigned* pos, unsigned source_line, char32_t* chr)
  8/*
  9 * Skip spaces and comments before structural element.
 10 *
 11 * On success write first non-space character to `chr`.
 12 */
 13{
 14    for (;;) {
 15        PwValuePtr current_line = &parser->current_line;
 16
 17        *pos = pw_string_skip_spaces(current_line, *pos);
 18
 19        // end of line?
 20        if (pw_string_index_valid(current_line, *pos)) {
 21            // no, return character if not a comment
 22            char32_t c = pw_char_at(current_line, *pos);
 23            if (c != '#') {
 24                *chr = c;
 25                return true;
 26            }
 27        }
 28        // read next line
 29        if (!_mw_read_block_line(parser)) {
 30            if (_mw_end_of_block()) {
 31                pw_set_status(mw_parser_error(parser, parser->current_indent, "Unexpected end of block"));
 32                _pw_set_status_location(&current_task->status, __FILE__, source_line);
 33            }
 34            return false;
 35        }
 36        *pos = parser->current_indent;
 37    }
 38}
 39
 40[[nodiscard]] static bool parse_number(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
 41/*
 42 * `start_pos` points to the sign or first digit
 43 */
 44{
 45    int sign = 1;
 46    char32_t chr = pw_char_at(&parser->current_line, start_pos);
 47    if (chr == '+') {
 48        // no op
 49        start_pos++;
 50    } else if (chr == '-') {
 51        sign = -1;
 52        start_pos++;
 53    }
 54    return _pw_parse_number(&parser->current_line, start_pos, sign, end_pos, number_terminators, result);
 55}
 56
 57[[nodiscard]] static bool parse_string(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
 58/*
 59 * `start_pos` points to the opening double quotation mark (")
 60 */
 61{
 62    unsigned closing_quote_pos;
 63    if (_mw_find_closing_quote(&parser->current_line, '"', start_pos + 1, &closing_quote_pos)) {
 64        *end_pos = closing_quote_pos + 1;
 65        return _mw_unescape_line(parser, &parser->current_line,
 66                                 parser->line_number, '"', start_pos + 1, closing_quote_pos, result);
 67    }
 68    pw_set_status(mw_parser_error(parser, parser->current_indent, "String has no closing quote"));
 69    return false;
 70}
 71
 72[[nodiscard]] static bool parse_array(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
 73/*
 74 * `start_pos` points to the next character after opening square bracket
 75 */
 76{
 77    parser->json_depth++;
 78
 79    if (!pw_create(PwTypeId_BasicArray, result)) {
 80        return false;
 81    }
 82    char32_t chr;
 83    if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
 84        return false;
 85    }
 86    if (chr == ']') {
 87        // empty array
 88        *end_pos = start_pos + 1;
 89        parser->json_depth--;
 90        return true;
 91    }
 92    // parse first item
 93    PwValue first_item = PW_NULL;
 94    if (!_mw_parse_json_value(parser, start_pos, &start_pos, &first_item)) {
 95        return false;
 96    }
 97    if (!pw_array_append(result, &first_item)) {
 98        return false;
 99    }
100    // parse subsequent items
101    for (;;) {{
102        if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
103            return false;
104        }
105        if (chr == ']') {
106            // done
107            *end_pos = start_pos + 1;
108            parser->json_depth--;
109            return true;
110        }
111        if (chr != ',') {
112            pw_set_status(mw_parser_error(parser, parser->current_indent, "Array items must be separated with comma"));
113            return false;
114        }
115        PwValue item = PW_NULL;
116        if (!_mw_parse_json_value(parser, start_pos + 1, &start_pos, &item)) {
117            return false;
118        }
119        if (!pw_array_append(result, &item)) {
120            return false;
121        }
122    }}
123}
124
125[[nodiscard]] static bool parse_object_member(MwParser* parser, unsigned* pos, PwValuePtr result)
126/*
127 * Parse key:value pair starting from `pos` and update `result`.
128 *
129 * Update `pos` on exit.
130 */
131{
132    PwValue key = PW_NULL;
133    if (!parse_string(parser, *pos, pos, &key)) {
134        return false;
135    }
136    char32_t chr;
137    if (!skip_spaces(parser, pos, __LINE__, &chr)) {
138        return false;
139    }
140    if (chr != ':') {
141        pw_set_status(mw_parser_error(parser, *pos, "Values must be separated from keys with colon"));
142        return false;
143    }
144
145    (*pos)++;
146
147    PwValue value = PW_NULL;
148    if (!_mw_parse_json_value(parser, *pos, pos, &value)) {
149        return false;
150    }
151    return pw_map_update(result, &key, &value);
152}
153
154[[nodiscard]] static bool parse_object(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
155/*
156 * `start_pos` points to the next character after opening curly bracket
157 */
158{
159    parser->json_depth++;
160
161    if (!pw_create(PwTypeId_BasicMap, result)) {
162        return false;
163    }
164
165    char32_t chr;
166    if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
167        return false;
168    }
169    if (chr == '}') {
170        // empty object
171        *end_pos = start_pos + 1;
172        parser->json_depth--;
173        return true;
174    }
175    if (chr != '"') {
176        pw_set_status(mw_parser_error(parser, parser->current_indent, "Keys must be strings"));
177        return false;
178    }
179    // parse first member
180    if (!parse_object_member(parser, &start_pos, result)) {
181        return false;
182    }
183    // parse subsequent members
184    for (;;) {{
185        if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
186            return false;
187        }
188        if (chr == '}') {
189            // done
190            *end_pos = start_pos + 1;
191            parser->json_depth--;
192            return true;
193        }
194        if (chr != ',') {
195            pw_set_status(mw_parser_error(parser, parser->current_indent, "Object members must be separated with comma"));
196            return false;
197        }
198        start_pos++;
199        if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
200            return false;
201        }
202        if (chr != '"') {
203            pw_set_status(mw_parser_error(parser, parser->current_indent, "Keys must be strings"));
204            return false;
205        }
206        if (!parse_object_member(parser, &start_pos, result)) {
207            return false;
208        }
209    }}
210}
211
212[[nodiscard]] bool _mw_parse_json_value(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
213{
214    if (parser->json_depth >= parser->max_json_depth) {
215        pw_set_status(mw_parser_error(parser, parser->current_indent, "Maximum recursion depth exceeded"));
216        return false;
217    }
218
219    char32_t first_char;
220    if (!skip_spaces(parser, &start_pos, __LINE__, &first_char)) {
221        return false;
222    }
223
224    if (first_char == '[') {
225        return parse_array(parser, start_pos + 1, end_pos, result);
226    }
227    if (first_char == '{') {
228        return parse_object(parser, start_pos + 1, end_pos, result);
229    }
230    if (first_char == '"') {
231        return parse_string(parser, start_pos, end_pos, result);
232    }
233    if (first_char == '+' || first_char == '-' || pw_is_ascii_digit(first_char)) {
234        return parse_number(parser, start_pos, end_pos, result);
235    }
236    if (pw_substring_eq(&parser->current_line, start_pos, start_pos + 4, "null")) {
237        *end_pos = start_pos + 4;
238        pw_destroy(result);
239        return true;
240    }
241    if (pw_substring_eq(&parser->current_line, start_pos, start_pos + 4, "true")) {
242        *end_pos = start_pos + 4;
243        pw_destroy(result);
244        *result = PwBool(true);
245        return true;
246    }
247    if (pw_substring_eq(&parser->current_line, start_pos, start_pos + 5, "false")) {
248        *end_pos = start_pos + 5;
249        pw_destroy(result);
250        *result = PwBool(false);
251        return true;
252    }
253    pw_set_status(mw_parser_error(parser, start_pos, "Unexpected character"));
254    return false;
255}
256
257[[nodiscard]] bool _mw_json_parser_func(MwParser* parser, PwValuePtr result)
258{
259    unsigned end_pos;
260    if (!_mw_parse_json_value(parser, _mw_get_start_position(parser), &end_pos, result)) {
261        return false;
262    }
263
264    // check trailing characters
265
266    static char garbage[] = "Garbage after JSON value";
267
268    if (_mw_comment_or_end_of_line(parser, end_pos)) {
269
270        // make sure current block has no more data
271        if (_mw_read_block_line(parser)) {
272            pw_set_status(mw_parser_error(parser, parser->current_indent, garbage));
273            return false;
274        }
275        return _mw_end_of_block();  // true if end of block, false if other error (read error or OOM)
276    }
277    pw_set_status(mw_parser_error(parser, parser->current_indent, garbage));
278    return false;
279}
280
281[[nodiscard]] bool mw_parse_json(PwValuePtr markup, PwValuePtr result)
282{
283    [[ gnu::cleanup(mw_delete_parser) ]] MwParser* parser = mw_create_parser(markup);
284    if (!parser) {
285        pw_set_status(PwStatus(PW_ERROR_OOM));
286        return false;
287    }
288    // read first line to prepare for parsing and to detect EOF
289    if (!_mw_read_block_line(parser)) {
290        return false;
291    }
292
293    // parse root value
294    unsigned end_pos;
295    if (!_mw_parse_json_value(parser, 0, &end_pos, result)) {
296        return false;
297    }
298
299    // make sure markup has no more data
300
301    static char extra_data[] = "Extra data after parsed value";
302
303    if (!_mw_comment_or_end_of_line(parser, end_pos)) {
304        pw_set_status(mw_parser_error(parser, parser->current_indent, extra_data));
305        return false;
306    }
307    // make sure current block has no more data
308    bool ret = _mw_read_block_line(parser);
309    if (parser->eof) {
310        // all right
311        ret = true;
312    } else if (ret) {
313        pw_set_status(mw_parser_error(parser, parser->current_indent, extra_data));
314    }
315    return ret;
316}