1#include <myaw.h>
2#include <pwlib/parsers.h>
3
4static char32_t number_terminators[] = { MW_COMMENT, ':', ',', '}', ']', 0 };
5
6
7[[nodiscard]] static bool skip_spaces(MwParser* parser, unsigned* pos, unsigned source_line, char32_t* chr)
8/*
9 * Skip spaces and comments before structural element.
10 *
11 * On success write first non-space character to `chr`.
12 */
13{
14 for (;;) {
15 PwValuePtr current_line = &parser->current_line;
16
17 *pos = pw_string_skip_spaces(current_line, *pos);
18
19 // end of line?
20 if (pw_string_index_valid(current_line, *pos)) {
21 // no, return character if not a comment
22 char32_t c = pw_char_at(current_line, *pos);
23 if (c != '#') {
24 *chr = c;
25 return true;
26 }
27 }
28 // read next line
29 if (!_mw_read_block_line(parser)) {
30 if (_mw_end_of_block()) {
31 pw_set_status(mw_parser_error(parser, parser->current_indent, "Unexpected end of block"));
32 _pw_set_status_location(¤t_task->status, __FILE__, source_line);
33 }
34 return false;
35 }
36 *pos = parser->current_indent;
37 }
38}
39
40[[nodiscard]] static bool parse_number(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
41/*
42 * `start_pos` points to the sign or first digit
43 */
44{
45 int sign = 1;
46 char32_t chr = pw_char_at(&parser->current_line, start_pos);
47 if (chr == '+') {
48 // no op
49 start_pos++;
50 } else if (chr == '-') {
51 sign = -1;
52 start_pos++;
53 }
54 return _pw_parse_number(&parser->current_line, start_pos, sign, end_pos, number_terminators, result);
55}
56
57[[nodiscard]] static bool parse_string(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
58/*
59 * `start_pos` points to the opening double quotation mark (")
60 */
61{
62 unsigned closing_quote_pos;
63 if (_mw_find_closing_quote(&parser->current_line, '"', start_pos + 1, &closing_quote_pos)) {
64 *end_pos = closing_quote_pos + 1;
65 return _mw_unescape_line(parser, &parser->current_line,
66 parser->line_number, '"', start_pos + 1, closing_quote_pos, result);
67 }
68 pw_set_status(mw_parser_error(parser, parser->current_indent, "String has no closing quote"));
69 return false;
70}
71
72[[nodiscard]] static bool parse_array(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
73/*
74 * `start_pos` points to the next character after opening square bracket
75 */
76{
77 parser->json_depth++;
78
79 if (!pw_create(PwTypeId_BasicArray, result)) {
80 return false;
81 }
82 char32_t chr;
83 if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
84 return false;
85 }
86 if (chr == ']') {
87 // empty array
88 *end_pos = start_pos + 1;
89 parser->json_depth--;
90 return true;
91 }
92 // parse first item
93 PwValue first_item = PW_NULL;
94 if (!_mw_parse_json_value(parser, start_pos, &start_pos, &first_item)) {
95 return false;
96 }
97 if (!pw_array_append(result, &first_item)) {
98 return false;
99 }
100 // parse subsequent items
101 for (;;) {{
102 if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
103 return false;
104 }
105 if (chr == ']') {
106 // done
107 *end_pos = start_pos + 1;
108 parser->json_depth--;
109 return true;
110 }
111 if (chr != ',') {
112 pw_set_status(mw_parser_error(parser, parser->current_indent, "Array items must be separated with comma"));
113 return false;
114 }
115 PwValue item = PW_NULL;
116 if (!_mw_parse_json_value(parser, start_pos + 1, &start_pos, &item)) {
117 return false;
118 }
119 if (!pw_array_append(result, &item)) {
120 return false;
121 }
122 }}
123}
124
125[[nodiscard]] static bool parse_object_member(MwParser* parser, unsigned* pos, PwValuePtr result)
126/*
127 * Parse key:value pair starting from `pos` and update `result`.
128 *
129 * Update `pos` on exit.
130 */
131{
132 PwValue key = PW_NULL;
133 if (!parse_string(parser, *pos, pos, &key)) {
134 return false;
135 }
136 char32_t chr;
137 if (!skip_spaces(parser, pos, __LINE__, &chr)) {
138 return false;
139 }
140 if (chr != ':') {
141 pw_set_status(mw_parser_error(parser, *pos, "Values must be separated from keys with colon"));
142 return false;
143 }
144
145 (*pos)++;
146
147 PwValue value = PW_NULL;
148 if (!_mw_parse_json_value(parser, *pos, pos, &value)) {
149 return false;
150 }
151 return pw_map_update(result, &key, &value);
152}
153
154[[nodiscard]] static bool parse_object(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
155/*
156 * `start_pos` points to the next character after opening curly bracket
157 */
158{
159 parser->json_depth++;
160
161 if (!pw_create(PwTypeId_BasicMap, result)) {
162 return false;
163 }
164
165 char32_t chr;
166 if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
167 return false;
168 }
169 if (chr == '}') {
170 // empty object
171 *end_pos = start_pos + 1;
172 parser->json_depth--;
173 return true;
174 }
175 if (chr != '"') {
176 pw_set_status(mw_parser_error(parser, parser->current_indent, "Keys must be strings"));
177 return false;
178 }
179 // parse first member
180 if (!parse_object_member(parser, &start_pos, result)) {
181 return false;
182 }
183 // parse subsequent members
184 for (;;) {{
185 if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
186 return false;
187 }
188 if (chr == '}') {
189 // done
190 *end_pos = start_pos + 1;
191 parser->json_depth--;
192 return true;
193 }
194 if (chr != ',') {
195 pw_set_status(mw_parser_error(parser, parser->current_indent, "Object members must be separated with comma"));
196 return false;
197 }
198 start_pos++;
199 if (!skip_spaces(parser, &start_pos, __LINE__, &chr)) {
200 return false;
201 }
202 if (chr != '"') {
203 pw_set_status(mw_parser_error(parser, parser->current_indent, "Keys must be strings"));
204 return false;
205 }
206 if (!parse_object_member(parser, &start_pos, result)) {
207 return false;
208 }
209 }}
210}
211
212[[nodiscard]] bool _mw_parse_json_value(MwParser* parser, unsigned start_pos, unsigned* end_pos, PwValuePtr result)
213{
214 if (parser->json_depth >= parser->max_json_depth) {
215 pw_set_status(mw_parser_error(parser, parser->current_indent, "Maximum recursion depth exceeded"));
216 return false;
217 }
218
219 char32_t first_char;
220 if (!skip_spaces(parser, &start_pos, __LINE__, &first_char)) {
221 return false;
222 }
223
224 if (first_char == '[') {
225 return parse_array(parser, start_pos + 1, end_pos, result);
226 }
227 if (first_char == '{') {
228 return parse_object(parser, start_pos + 1, end_pos, result);
229 }
230 if (first_char == '"') {
231 return parse_string(parser, start_pos, end_pos, result);
232 }
233 if (first_char == '+' || first_char == '-' || pw_is_ascii_digit(first_char)) {
234 return parse_number(parser, start_pos, end_pos, result);
235 }
236 if (pw_substring_eq(&parser->current_line, start_pos, start_pos + 4, "null")) {
237 *end_pos = start_pos + 4;
238 pw_destroy(result);
239 return true;
240 }
241 if (pw_substring_eq(&parser->current_line, start_pos, start_pos + 4, "true")) {
242 *end_pos = start_pos + 4;
243 pw_destroy(result);
244 *result = PwBool(true);
245 return true;
246 }
247 if (pw_substring_eq(&parser->current_line, start_pos, start_pos + 5, "false")) {
248 *end_pos = start_pos + 5;
249 pw_destroy(result);
250 *result = PwBool(false);
251 return true;
252 }
253 pw_set_status(mw_parser_error(parser, start_pos, "Unexpected character"));
254 return false;
255}
256
257[[nodiscard]] bool _mw_json_parser_func(MwParser* parser, PwValuePtr result)
258{
259 unsigned end_pos;
260 if (!_mw_parse_json_value(parser, _mw_get_start_position(parser), &end_pos, result)) {
261 return false;
262 }
263
264 // check trailing characters
265
266 static char garbage[] = "Garbage after JSON value";
267
268 if (_mw_comment_or_end_of_line(parser, end_pos)) {
269
270 // make sure current block has no more data
271 if (_mw_read_block_line(parser)) {
272 pw_set_status(mw_parser_error(parser, parser->current_indent, garbage));
273 return false;
274 }
275 return _mw_end_of_block(); // true if end of block, false if other error (read error or OOM)
276 }
277 pw_set_status(mw_parser_error(parser, parser->current_indent, garbage));
278 return false;
279}
280
281[[nodiscard]] bool mw_parse_json(PwValuePtr markup, PwValuePtr result)
282{
283 [[ gnu::cleanup(mw_delete_parser) ]] MwParser* parser = mw_create_parser(markup);
284 if (!parser) {
285 pw_set_status(PwStatus(PW_ERROR_OOM));
286 return false;
287 }
288 // read first line to prepare for parsing and to detect EOF
289 if (!_mw_read_block_line(parser)) {
290 return false;
291 }
292
293 // parse root value
294 unsigned end_pos;
295 if (!_mw_parse_json_value(parser, 0, &end_pos, result)) {
296 return false;
297 }
298
299 // make sure markup has no more data
300
301 static char extra_data[] = "Extra data after parsed value";
302
303 if (!_mw_comment_or_end_of_line(parser, end_pos)) {
304 pw_set_status(mw_parser_error(parser, parser->current_indent, extra_data));
305 return false;
306 }
307 // make sure current block has no more data
308 bool ret = _mw_read_block_line(parser);
309 if (parser->eof) {
310 // all right
311 ret = true;
312 } else if (ret) {
313 pw_set_status(mw_parser_error(parser, parser->current_indent, extra_data));
314 }
315 return ret;
316}