1#include <ctype.h>
  2#include <stdio.h>
  3#include <stdlib.h>
  4#include <string.h>
  5
  6#include <pw.h>
  7
  8#include "pw_curl.h"
  9
 10static inline bool is_ctl(unsigned char c)
 11/*
 12 * https://datatracker.ietf.org/doc/html/rfc2616#section-2.2
 13 *
 14 * CTL = <any US-ASCII control character
 15 *       (octets 0 - 31) and DEL (127)>
 16 */
 17{
 18    return (0 <= c && c <= 31) || c == 127;
 19}
 20
 21static inline bool is_separator(unsigned char c)
 22/*
 23 * https://datatracker.ietf.org/doc/html/rfc2616#section-2.2
 24 *
 25 * separators = "(" | ")" | "<" | ">" | "@"
 26 *            | "," | ";" | ":" | "\" | <">
 27 *            | "/" | "[" | "]" | "?" | "="
 28 *            | "{" | "}" | SP  | HT
 29 */
 30{
 31    switch (c) {
 32        case '(':  case ')':  case '<':  case '>':  case '@':
 33        case ',':  case ';':  case ':':  case '\\': case '"':
 34        case '/':  case '[':  case ']':  case '?':  case '=':
 35        case '{':  case '}':  case ' ':  case '\t':
 36            return true;
 37        default:
 38            return false;
 39    }
 40}
 41
 42static inline void skip_lwsp(char** current_char)
 43/*
 44 * WSP = SP / HTAB
 45 * LWSP = *(WSP / CRLF WSP)
 46 */
 47{
 48    // simplified, not strictly follows the grammar
 49    char* ptr = *current_char;
 50    for (;;) {
 51        char c = *ptr;
 52        if (c == 0) {
 53            break;
 54        }
 55        if (c != ' ' && c != '\t' && c != '\r' && c != '\n') {
 56            break;
 57        }
 58        ptr++;
 59    }
 60    *current_char = ptr;
 61}
 62
 63[[nodiscard]] static bool parse_token(char** current_char, PwValuePtr result)
 64/*
 65 * https://datatracker.ietf.org/doc/html/rfc2616#section-2.2
 66 *
 67 * token = 1*<any CHAR except CTLs or separators>
 68 *
 69 * Return token.
 70 */
 71{
 72    char* token_start = *current_char;
 73    char* token_end = token_start;
 74
 75    while (!(is_separator(*token_end) || is_ctl(*token_end))) {
 76        token_end++;
 77    }
 78    PwValue token = PW_STRING("");
 79    if (!pw_string_append(&token, token_start, token_end)) {
 80        return false;
 81    }
 82    *current_char = token_end;
 83    pw_move(result, &token);
 84    return true;
 85}
 86
 87[[nodiscard]] static bool parse_quoted_string(char** current_char, PwValuePtr result)
 88/*
 89 * https://datatracker.ietf.org/doc/html/rfc7230#section-3.2.6
 90 *
 91 * quoted-string = DQUOTE *( qdtext / quoted-pair ) DQUOTE
 92 * qdtext        = HTAB / SP / %x21 / %x23-5B / %x5D-7E / obs-text
 93 * obs-text      = %x80-FF
 94 * quoted-pair   = "\" ( HTAB / SP / VCHAR / obs-text )
 95 *
 96 * Return string or null value if not a quoted string.
 97 */
 98{
 99    char* qstr_start = *current_char;
100
101    pw_destroy(result);  // this makes result Null
102
103    if (*qstr_start != '"') {
104        return true;
105    }
106    qstr_start++;
107
108    *result = PwString("");
109
110    char* qstr_end = qstr_start;
111    for (;;) {
112        unsigned char c = *qstr_end;
113        if (is_ctl(c)) {
114            if (c != ' ' && c != '\t') {
115                break;
116            }
117        }
118        if (c == '"') {
119            break;
120        }
121        if (c != '\\') {
122            qstr_end++;
123            continue;
124        }
125        // append what we've got and skip quote char
126        if (!pw_string_append(result, qstr_start, qstr_end)) {
127            return false;
128        }
129        qstr_end++;
130        qstr_start = qstr_end;
131    }
132    if (*qstr_end != '"') {
133        // strict parsing, ignore malformed string
134        if (!pw_string_truncate(result, 0)) {
135            return false;
136        }
137    } else {
138        if (!pw_string_append(result, qstr_start, qstr_end)) {
139            return false;
140        }
141        qstr_end++;  // skip closing quote
142    }
143    *current_char = qstr_end;
144    return true;
145}
146
147static inline bool is_mime_charsetc(char c)
148/*
149 * mime-charsetc = ALPHA / DIGIT
150 *                 / "!" / "#" / "$" / "%" / "&"
151 *                 / "+" / "-" / "^" / "_" / "`"
152 *                 / "{" / "}" / "~"
153 *                 ; as <mime-charset> in Section 2.3 of [RFC2978]
154 *                 ; except that the single quote is not included
155 *                 ; SHOULD be registered in the IANA charset registry
156 */
157{
158    if (isalnum(c)) {
159        return true;
160    }
161    switch (c) {
162        case '!':  case '#':  case '$':  case '%':  case '&':
163        case '+':  case '-':  case '^':  case '_':  case '`':
164        case '{':  case '}':  case '~':
165            return true;
166        default:
167            return false;
168    }
169}
170
171static inline char xdigit_to_num(char** current_char)
172{
173    char c = **current_char;
174
175    if (!isxdigit(c)) {
176        return 0;
177    }
178    (*current_char)++;
179    if (isdigit(c)) {
180        return c - '0';
181    } else if (islower(c)) {
182        return 10 + c - 'a';
183    } else {
184        return 10 + c - 'A';
185    }
186}
187
188static inline char32_t parse_value_char(char** current_char)
189/*
190 * value-chars = *( pct-encoded / attr-char )
191 *
192 * pct-encoded = "%" HEXDIG HEXDIG
193 *               ; see [RFC3986], Section 2.1
194 *
195 * attr-char   = ALPHA / DIGIT
196 *               / "!" / "#" / "$" / "&" / "+" / "-" / "."
197 *               / "^" / "_" / "`" / "|" / "~"
198 *               ; token except ( "*" / "'" / "%" )
199 */
200{
201    char c = **current_char;
202
203    if (isalnum(c)) {
204        (*current_char)++;
205        return c;
206    }
207    switch (c) {
208        case '!':  case '#':  case '$':  case '&':  case '+':  case '-':  case '.':
209        case '^':  case '_':  case '`':  case '|':  case '~':
210            (*current_char)++;
211            return c;
212        case '%':
213            break;
214        default:
215            return 0;
216    }
217    // pct-encoded
218    (*current_char)++;
219    char high_nibble = xdigit_to_num(current_char);
220    if (high_nibble == 0) {
221        return 0;
222    }
223    c = xdigit_to_num(current_char);
224    if (c == 0) {
225        return 0;
226    }
227    return (((char32_t) high_nibble) << 4) | c;
228}
229
230[[nodiscard]] static bool parse_ext_value(char** current_char, PwValuePtr result)
231/*
232 * current_char must point to the first non-space character
233 *
234 * ext-value           = charset  "'" [ language ] "'" value-chars
235 *
236 * charset             = "UTF-8" / "ISO-8859-1" / mime-charset
237 *
238 * mime-charset        = 1*mime-charsetc
239 *
240 * language            = <Language-Tag, defined in [RFC5646], Section 2.1>
241 *
242 * Return string or null value if not a quoted string.
243 */
244{
245    char* charset_ptr = *current_char;
246    char* language_ptr = *current_char;
247
248    for (;;) {
249        if (!is_mime_charsetc(*charset_ptr)) {
250            break;
251        }
252        charset_ptr++;
253    }
254
255    pw_destroy(result);  // this makes result Null
256
257    *current_char = language_ptr;
258    if (*language_ptr != '\'') {
259        // malformed ext-value
260        return true;
261    };
262
263    *language_ptr++ = 0;  // terminate charset part
264
265    char* value_ptr = language_ptr;
266
267    // get language tag by simply searching closing single quote
268    for (;;) {
269        char c = *value_ptr;
270        if (c == '\'' || c == 0) {
271            break;
272        }
273        value_ptr++;
274    }
275    *current_char = value_ptr;
276
277    if (*value_ptr != '\'') {
278        // malformed ext-value
279        return true;
280    }
281    *value_ptr++ = 0;  // terminate language part
282    *current_char = value_ptr;
283
284    PwValue value = PW_NULL;
285    if (!pw_create_empty_string(strlen(value_ptr) + 1, 1, &value)) {
286        return false;
287    }
288    for (;;) {
289        char32_t c = parse_value_char(current_char);
290        if (c == 0) {
291            break;
292        }
293        if (!pw_string_append(&value, c)) {
294            return false;
295        }
296    }
297    PwValue charset = PW_NULL;
298    if (!pw_create_string(&charset, charset_ptr)) {
299        return false;
300    }
301    PwValue language = PW_NULL;
302    if (!pw_create_string(&language, language_ptr)) {
303        return false;
304    }
305    return pw_map_va(
306        result,
307        PwString("charset"),  pw_clone(&charset),
308        PwString("language"), pw_clone(&language),
309        PwString("value"),    pw_clone(&value)
310    );
311}
312
313[[nodiscard]] bool pw_parse_content_type(char* content_type,
314                                         PwValuePtr media_type, PwValuePtr media_subtype, PwValuePtr media_type_params)
315/*
316 * https://datatracker.ietf.org/doc/html/rfc7231#section-3.1.1.1
317 *
318 * media-type = type "/" subtype *( OWS ";" OWS parameter )
319 * type       = token
320 * subtype    = token
321 *
322 * parameter  = token "=" ( token / quoted-string )
323 *
324 * XXX: replaced OWS with LWSP
325 */
326{
327    char** current_char = &content_type;
328
329    // parse media type
330    if (!parse_token(current_char, media_type)) {
331        return false;
332    }
333    if (**current_char == 0) {
334        pw_set_status(PwStatus(PW_ERROR));
335        return false;
336    }
337    if (**current_char != '/') {
338        pw_set_status(PwStatus(PW_ERROR_EOF));
339        return false;
340    }
341    (*current_char)++;
342
343    // parse media subtype
344    if (!parse_token(current_char, media_subtype)) {
345        return false;
346    }
347
348    // parse params
349    if (!pw_create_map(media_type_params)) {
350        return false;
351    }
352    for (;;) {
353        skip_lwsp(current_char);
354        if (**current_char == 0) {
355            break;
356        }
357        if (**current_char != ';') {
358            // malformed header, but we've got as most as we could, haven't we?
359            break;
360        }
361        (*current_char)++;
362        skip_lwsp(current_char);
363        {
364            PwValue param_name = PW_NULL;
365            if (!parse_token(current_char, &param_name)) {
366                return false;
367            }
368            skip_lwsp(current_char);
369            if (**current_char != '=') {
370                break;
371            }
372            (*current_char)++;
373            skip_lwsp(current_char);
374
375            if (**current_char == 0) {
376                break;
377            }
378
379            PwValue param_value = PW_NULL;
380            if (**current_char == '"') {
381                if (!parse_quoted_string(current_char, &param_value)) {
382                    return false;
383                }
384            } else {
385                if (!parse_token(current_char, &param_value)) {
386                    return false;
387                }
388            }
389            if (!pw_is_string(&param_value)) {
390                break;
391            }
392
393            if (!pw_string_lower(&param_name)) {
394                return false;
395            }
396            if (!pw_map_update(media_type_params, &param_name, &param_value)) {
397                return false;
398            }
399        }
400    }
401    return true;
402}
403
404[[nodiscard]] bool pw_parse_content_disposition(char* content_disposition,
405                                                PwValuePtr disposition_type, PwValuePtr disposition_params)
406/*
407 * content-disposition = "Content-Disposition" ":"
408 *                             disposition-type *( ";" disposition-parm )
409 *
410 * disposition-type    = "inline" | "attachment" | disp-ext-type
411 *                       ; case-insensitive
412 *
413 * disp-ext-type       = token
414 *
415 * disposition-parm    = filename-parm | disp-ext-parm
416 *
417 * filename-parm       = "filename" "=" value
418 *                     | "filename*" "=" ext-value
419 *
420 * disp-ext-parm       = token "=" value
421 *                     | ext-token "=" ext-value
422 *
423 * ext-token           = <the characters in token, followed by "*">
424 */
425{
426    char** current_char = &content_disposition;
427
428    // parse disposition type
429    if (!parse_token(current_char, disposition_type)) {
430        return false;
431    }
432    if (!pw_string_lower(disposition_type)) {
433        return false;
434    }
435
436    // parse disposition params
437    if (!pw_create_map(disposition_params)) {
438        return false;
439    }
440    for (;;) {
441        skip_lwsp(current_char);
442        if (**current_char == 0) {
443            break;
444        }
445        if (**current_char != ';') {
446            // malformed header, but we've got as most as we could, haven't we?
447            break;
448        }
449        (*current_char)++;
450        skip_lwsp(current_char);
451        {
452            bool is_ext_value = false;
453
454            PwValue param_name = PW_NULL;
455            if (!parse_token(current_char, &param_name)) {
456                return false;
457            }
458            if (*(*current_char - 1) == '*') {
459                is_ext_value = true;
460            }
461            skip_lwsp(current_char);
462            if (**current_char != '=') {
463                break;
464            }
465            (*current_char)++;
466            skip_lwsp(current_char);
467
468            if (**current_char == 0) {
469                break;
470            }
471
472            PwValue param_value = PW_NULL;
473            if (is_ext_value) {
474                if (!parse_ext_value(current_char, &param_value)) {
475                    return false;
476                }
477            } else if (**current_char == '"') {
478                if (!parse_quoted_string(current_char, &param_value)) {
479                    return false;
480                }
481            } else {
482                if (!parse_token(current_char, &param_value)) {
483                    return false;
484                }
485            }
486            if (!pw_is_string(&param_value)) {
487                break;
488            }
489            if (!pw_string_lower(&param_name)) {
490                return false;
491            }
492            if (!pw_map_update(disposition_params, &param_name, &param_value)) {
493                return false;
494            }
495        }
496    }
497    return true;
498}
499
500[[nodiscard]] bool pw_urljoin_cstr(char* base_url, char* other_url, PwValuePtr result)
501{
502    CURLU* handle = curl_url();
503    if (!handle) {
504        pw_set_status(PwStatus(PW_ERROR_OOM));
505        return false;
506    }
507
508    CURLUcode rc;
509
510    rc = curl_url_set(handle, CURLUPART_URL, base_url, 0);
511    if(rc) {
512        pw_set_status(PwStatus(PW_ERROR), "URL error: %s", curl_url_strerror(rc));
513
514        curl_url_cleanup(handle);
515        return false;
516    }
517    rc = curl_url_set(handle, CURLUPART_URL, other_url, 0);
518    if(rc) {
519        pw_set_status(PwStatus(PW_ERROR), "URL error: %s", curl_url_strerror(rc));
520
521        curl_url_cleanup(handle);
522        return false;
523    }
524    char* url;
525    rc = curl_url_get(handle, CURLUPART_URL, &url, 0);
526    if(rc) {
527        pw_set_status(PwStatus(PW_ERROR), "URL error: %s", curl_url_strerror(rc));
528
529        curl_url_cleanup(handle);
530        return false;
531    }
532    bool ret = pw_create_string(result, url);
533    curl_free(url);
534    curl_url_cleanup(handle);
535    return ret;
536}
537
538[[nodiscard]] bool pw_urljoin(PwValuePtr base_url, PwValuePtr other_url, PwValuePtr result)
539{
540    PW_CSTRING(cstr_base_url, base_url);
541    PW_CSTRING(cstr_other_url, other_url);
542    return pw_urljoin_cstr(cstr_base_url, cstr_other_url, result);
543}