1#include <ctype.h>
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5
6#include <pw.h>
7
8#include "pw_curl.h"
9
10static inline bool is_ctl(unsigned char c)
11/*
12 * https://datatracker.ietf.org/doc/html/rfc2616#section-2.2
13 *
14 * CTL = <any US-ASCII control character
15 * (octets 0 - 31) and DEL (127)>
16 */
17{
18 return (0 <= c && c <= 31) || c == 127;
19}
20
21static inline bool is_separator(unsigned char c)
22/*
23 * https://datatracker.ietf.org/doc/html/rfc2616#section-2.2
24 *
25 * separators = "(" | ")" | "<" | ">" | "@"
26 * | "," | ";" | ":" | "\" | <">
27 * | "/" | "[" | "]" | "?" | "="
28 * | "{" | "}" | SP | HT
29 */
30{
31 switch (c) {
32 case '(': case ')': case '<': case '>': case '@':
33 case ',': case ';': case ':': case '\\': case '"':
34 case '/': case '[': case ']': case '?': case '=':
35 case '{': case '}': case ' ': case '\t':
36 return true;
37 default:
38 return false;
39 }
40}
41
42static inline void skip_lwsp(char** current_char)
43/*
44 * WSP = SP / HTAB
45 * LWSP = *(WSP / CRLF WSP)
46 */
47{
48 // simplified, not strictly follows the grammar
49 char* ptr = *current_char;
50 for (;;) {
51 char c = *ptr;
52 if (c == 0) {
53 break;
54 }
55 if (c != ' ' && c != '\t' && c != '\r' && c != '\n') {
56 break;
57 }
58 ptr++;
59 }
60 *current_char = ptr;
61}
62
63[[nodiscard]] static bool parse_token(char** current_char, PwValuePtr result)
64/*
65 * https://datatracker.ietf.org/doc/html/rfc2616#section-2.2
66 *
67 * token = 1*<any CHAR except CTLs or separators>
68 *
69 * Return token.
70 */
71{
72 char* token_start = *current_char;
73 char* token_end = token_start;
74
75 while (!(is_separator(*token_end) || is_ctl(*token_end))) {
76 token_end++;
77 }
78 PwValue token = PW_STRING("");
79 if (!pw_string_append(&token, token_start, token_end)) {
80 return false;
81 }
82 *current_char = token_end;
83 pw_move(result, &token);
84 return true;
85}
86
87[[nodiscard]] static bool parse_quoted_string(char** current_char, PwValuePtr result)
88/*
89 * https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.6
90 *
91 * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
92 * qdtext = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text
93 * obs-text = %x80-FF
94 * quoted-pair = "\" ( HTAB / SP / VCHAR / obs-text )
95 *
96 * Return string or null value if not a quoted string.
97 */
98{
99 char* qstr_start = *current_char;
100
101 pw_destroy(result); // this makes result Null
102
103 if (*qstr_start != '"') {
104 return true;
105 }
106 qstr_start++;
107
108 *result = PwString("");
109
110 char* qstr_end = qstr_start;
111 for (;;) {
112 unsigned char c = *qstr_end;
113 if (is_ctl(c)) {
114 if (c != ' ' && c != '\t') {
115 break;
116 }
117 }
118 if (c == '"') {
119 break;
120 }
121 if (c != '\\') {
122 qstr_end++;
123 continue;
124 }
125 // append what we've got and skip quote char
126 if (!pw_string_append(result, qstr_start, qstr_end)) {
127 return false;
128 }
129 qstr_end++;
130 qstr_start = qstr_end;
131 }
132 if (*qstr_end != '"') {
133 // strict parsing, ignore malformed string
134 if (!pw_string_truncate(result, 0)) {
135 return false;
136 }
137 } else {
138 if (!pw_string_append(result, qstr_start, qstr_end)) {
139 return false;
140 }
141 qstr_end++; // skip closing quote
142 }
143 *current_char = qstr_end;
144 return true;
145}
146
147static inline bool is_mime_charsetc(char c)
148/*
149 * mime-charsetc = ALPHA / DIGIT
150 * / "!" / "#" / "$" / "%" / "&"
151 * / "+" / "-" / "^" / "_" / "`"
152 * / "{" / "}" / "~"
153 * ; as <mime-charset> in Section 2.3 of [RFC2978]
154 * ; except that the single quote is not included
155 * ; SHOULD be registered in the IANA charset registry
156 */
157{
158 if (isalnum(c)) {
159 return true;
160 }
161 switch (c) {
162 case '!': case '#': case '$': case '%': case '&':
163 case '+': case '-': case '^': case '_': case '`':
164 case '{': case '}': case '~':
165 return true;
166 default:
167 return false;
168 }
169}
170
171static inline char xdigit_to_num(char** current_char)
172{
173 char c = **current_char;
174
175 if (!isxdigit(c)) {
176 return 0;
177 }
178 (*current_char)++;
179 if (isdigit(c)) {
180 return c - '0';
181 } else if (islower(c)) {
182 return 10 + c - 'a';
183 } else {
184 return 10 + c - 'A';
185 }
186}
187
188static inline char32_t parse_value_char(char** current_char)
189/*
190 * value-chars = *( pct-encoded / attr-char )
191 *
192 * pct-encoded = "%" HEXDIG HEXDIG
193 * ; see [RFC3986], Section 2.1
194 *
195 * attr-char = ALPHA / DIGIT
196 * / "!" / "#" / "$" / "&" / "+" / "-" / "."
197 * / "^" / "_" / "`" / "|" / "~"
198 * ; token except ( "*" / "'" / "%" )
199 */
200{
201 char c = **current_char;
202
203 if (isalnum(c)) {
204 (*current_char)++;
205 return c;
206 }
207 switch (c) {
208 case '!': case '#': case '$': case '&': case '+': case '-': case '.':
209 case '^': case '_': case '`': case '|': case '~':
210 (*current_char)++;
211 return c;
212 case '%':
213 break;
214 default:
215 return 0;
216 }
217 // pct-encoded
218 (*current_char)++;
219 char high_nibble = xdigit_to_num(current_char);
220 if (high_nibble == 0) {
221 return 0;
222 }
223 c = xdigit_to_num(current_char);
224 if (c == 0) {
225 return 0;
226 }
227 return (((char32_t) high_nibble) << 4) | c;
228}
229
230[[nodiscard]] static bool parse_ext_value(char** current_char, PwValuePtr result)
231/*
232 * current_char must point to the first non-space character
233 *
234 * ext-value = charset "'" [ language ] "'" value-chars
235 *
236 * charset = "UTF-8" / "ISO-8859-1" / mime-charset
237 *
238 * mime-charset = 1*mime-charsetc
239 *
240 * language = <Language-Tag, defined in [RFC5646], Section 2.1>
241 *
242 * Return string or null value if not a quoted string.
243 */
244{
245 char* charset_ptr = *current_char;
246 char* language_ptr = *current_char;
247
248 for (;;) {
249 if (!is_mime_charsetc(*charset_ptr)) {
250 break;
251 }
252 charset_ptr++;
253 }
254
255 pw_destroy(result); // this makes result Null
256
257 *current_char = language_ptr;
258 if (*language_ptr != '\'') {
259 // malformed ext-value
260 return true;
261 };
262
263 *language_ptr++ = 0; // terminate charset part
264
265 char* value_ptr = language_ptr;
266
267 // get language tag by simply searching closing single quote
268 for (;;) {
269 char c = *value_ptr;
270 if (c == '\'' || c == 0) {
271 break;
272 }
273 value_ptr++;
274 }
275 *current_char = value_ptr;
276
277 if (*value_ptr != '\'') {
278 // malformed ext-value
279 return true;
280 }
281 *value_ptr++ = 0; // terminate language part
282 *current_char = value_ptr;
283
284 PwValue value = PW_NULL;
285 if (!pw_create_empty_string(strlen(value_ptr) + 1, 1, &value)) {
286 return false;
287 }
288 for (;;) {
289 char32_t c = parse_value_char(current_char);
290 if (c == 0) {
291 break;
292 }
293 if (!pw_string_append(&value, c)) {
294 return false;
295 }
296 }
297 PwValue charset = PW_NULL;
298 if (!pw_create_string(&charset, charset_ptr)) {
299 return false;
300 }
301 PwValue language = PW_NULL;
302 if (!pw_create_string(&language, language_ptr)) {
303 return false;
304 }
305 return pw_map_va(
306 result,
307 PwString("charset"), pw_clone(&charset),
308 PwString("language"), pw_clone(&language),
309 PwString("value"), pw_clone(&value)
310 );
311}
312
313[[nodiscard]] bool pw_parse_content_type(char* content_type,
314 PwValuePtr media_type, PwValuePtr media_subtype, PwValuePtr media_type_params)
315/*
316 * https://datatracker.ietf.org/doc/html/rfc7231#section-3.1.1.1
317 *
318 * media-type = type "/" subtype *( OWS ";" OWS parameter )
319 * type = token
320 * subtype = token
321 *
322 * parameter = token "=" ( token / quoted-string )
323 *
324 * XXX: replaced OWS with LWSP
325 */
326{
327 char** current_char = &content_type;
328
329 // parse media type
330 if (!parse_token(current_char, media_type)) {
331 return false;
332 }
333 if (**current_char == 0) {
334 pw_set_status(PwStatus(PW_ERROR));
335 return false;
336 }
337 if (**current_char != '/') {
338 pw_set_status(PwStatus(PW_ERROR_EOF));
339 return false;
340 }
341 (*current_char)++;
342
343 // parse media subtype
344 if (!parse_token(current_char, media_subtype)) {
345 return false;
346 }
347
348 // parse params
349 if (!pw_create_map(media_type_params)) {
350 return false;
351 }
352 for (;;) {
353 skip_lwsp(current_char);
354 if (**current_char == 0) {
355 break;
356 }
357 if (**current_char != ';') {
358 // malformed header, but we've got as most as we could, haven't we?
359 break;
360 }
361 (*current_char)++;
362 skip_lwsp(current_char);
363 {
364 PwValue param_name = PW_NULL;
365 if (!parse_token(current_char, ¶m_name)) {
366 return false;
367 }
368 skip_lwsp(current_char);
369 if (**current_char != '=') {
370 break;
371 }
372 (*current_char)++;
373 skip_lwsp(current_char);
374
375 if (**current_char == 0) {
376 break;
377 }
378
379 PwValue param_value = PW_NULL;
380 if (**current_char == '"') {
381 if (!parse_quoted_string(current_char, ¶m_value)) {
382 return false;
383 }
384 } else {
385 if (!parse_token(current_char, ¶m_value)) {
386 return false;
387 }
388 }
389 if (!pw_is_string(¶m_value)) {
390 break;
391 }
392
393 if (!pw_string_lower(¶m_name)) {
394 return false;
395 }
396 if (!pw_map_update(media_type_params, ¶m_name, ¶m_value)) {
397 return false;
398 }
399 }
400 }
401 return true;
402}
403
404[[nodiscard]] bool pw_parse_content_disposition(char* content_disposition,
405 PwValuePtr disposition_type, PwValuePtr disposition_params)
406/*
407 * content-disposition = "Content-Disposition" ":"
408 * disposition-type *( ";" disposition-parm )
409 *
410 * disposition-type = "inline" | "attachment" | disp-ext-type
411 * ; case-insensitive
412 *
413 * disp-ext-type = token
414 *
415 * disposition-parm = filename-parm | disp-ext-parm
416 *
417 * filename-parm = "filename" "=" value
418 * | "filename*" "=" ext-value
419 *
420 * disp-ext-parm = token "=" value
421 * | ext-token "=" ext-value
422 *
423 * ext-token = <the characters in token, followed by "*">
424 */
425{
426 char** current_char = &content_disposition;
427
428 // parse disposition type
429 if (!parse_token(current_char, disposition_type)) {
430 return false;
431 }
432 if (!pw_string_lower(disposition_type)) {
433 return false;
434 }
435
436 // parse disposition params
437 if (!pw_create_map(disposition_params)) {
438 return false;
439 }
440 for (;;) {
441 skip_lwsp(current_char);
442 if (**current_char == 0) {
443 break;
444 }
445 if (**current_char != ';') {
446 // malformed header, but we've got as most as we could, haven't we?
447 break;
448 }
449 (*current_char)++;
450 skip_lwsp(current_char);
451 {
452 bool is_ext_value = false;
453
454 PwValue param_name = PW_NULL;
455 if (!parse_token(current_char, ¶m_name)) {
456 return false;
457 }
458 if (*(*current_char - 1) == '*') {
459 is_ext_value = true;
460 }
461 skip_lwsp(current_char);
462 if (**current_char != '=') {
463 break;
464 }
465 (*current_char)++;
466 skip_lwsp(current_char);
467
468 if (**current_char == 0) {
469 break;
470 }
471
472 PwValue param_value = PW_NULL;
473 if (is_ext_value) {
474 if (!parse_ext_value(current_char, ¶m_value)) {
475 return false;
476 }
477 } else if (**current_char == '"') {
478 if (!parse_quoted_string(current_char, ¶m_value)) {
479 return false;
480 }
481 } else {
482 if (!parse_token(current_char, ¶m_value)) {
483 return false;
484 }
485 }
486 if (!pw_is_string(¶m_value)) {
487 break;
488 }
489 if (!pw_string_lower(¶m_name)) {
490 return false;
491 }
492 if (!pw_map_update(disposition_params, ¶m_name, ¶m_value)) {
493 return false;
494 }
495 }
496 }
497 return true;
498}
499
500[[nodiscard]] bool pw_urljoin_cstr(char* base_url, char* other_url, PwValuePtr result)
501{
502 CURLU* handle = curl_url();
503 if (!handle) {
504 pw_set_status(PwStatus(PW_ERROR_OOM));
505 return false;
506 }
507
508 CURLUcode rc;
509
510 rc = curl_url_set(handle, CURLUPART_URL, base_url, 0);
511 if(rc) {
512 pw_set_status(PwStatus(PW_ERROR), "URL error: %s", curl_url_strerror(rc));
513
514 curl_url_cleanup(handle);
515 return false;
516 }
517 rc = curl_url_set(handle, CURLUPART_URL, other_url, 0);
518 if(rc) {
519 pw_set_status(PwStatus(PW_ERROR), "URL error: %s", curl_url_strerror(rc));
520
521 curl_url_cleanup(handle);
522 return false;
523 }
524 char* url;
525 rc = curl_url_get(handle, CURLUPART_URL, &url, 0);
526 if(rc) {
527 pw_set_status(PwStatus(PW_ERROR), "URL error: %s", curl_url_strerror(rc));
528
529 curl_url_cleanup(handle);
530 return false;
531 }
532 bool ret = pw_create_string(result, url);
533 curl_free(url);
534 curl_url_cleanup(handle);
535 return ret;
536}
537
538[[nodiscard]] bool pw_urljoin(PwValuePtr base_url, PwValuePtr other_url, PwValuePtr result)
539{
540 PW_CSTRING(cstr_base_url, base_url);
541 PW_CSTRING(cstr_other_url, other_url);
542 return pw_urljoin_cstr(cstr_base_url, cstr_other_url, result);
543}