1#include <signal.h>
  2#include <string.h>
  3
  4#include <pwlib/file.h>
  5#include <pwlib/parsers.h>
  6#include "pw_curl.h"
  7
  8// constants
  9_PwValue accept_encoding = PW_STATIC_STRING("gzip, deflate, br, zstd");
 10_PwValue user_agent      = PW_STATIC_STRING("User-Agent: pw-curl (https://tilde.club/~petbrain/)");
 11_PwValue cainfo          = PW_STATIC_STRING("/etc/ssl/certs/ca-certificates.crt");
 12
 13// global parameters from argv
 14_PwValue proxy   = PW_NULL;
 15_PwValue verbose = PW_BOOL(false);
 16
 17// CURL session
 18_PwValue curl_session = PW_NULL;
 19
 20
 21// signal handling
 22
 23sig_atomic_t pending_sigint = 0;
 24
 25void sigint_handler(int sig)
 26{
 27    puts("\nInterrupted");
 28    pending_sigint = 1;
 29}
 30
 31/*
 32 * Extended CurlRequest to save content to a file
 33 */
 34
 35uint16_t PwTypeId_FileRequest = 0;
 36
 37typedef struct {
 38    // autocleaned PwValue is not suitable for manually managed data,
 39    // using plain structure that starts with underscore
 40    _PwValue file;
 41
 42    // values saved by on_content_start
 43    long status;
 44    curl_off_t length;
 45    _PwValue final_url;
 46
 47} FileRequestData;
 48
 49
 50/*
 51 * Basic interface
 52 */
 53
 54static bool file_request_destroy(PwMethod_Basic_destroy* mthis, PwValuePtr self, _PwCompoundChain* tail)
 55{
 56    FileRequestData* req = pw_this_data(self);
 57    pw_destroy(&req->final_url);
 58    pw_destroy(&req->file);
 59    return pw_super(mthis, self, tail);
 60}
 61
 62static PwInterface_Basic file_request_basic_interface = {
 63    .destroy = { .func = file_request_destroy }
 64};
 65
 66/*
 67 * CurlRequest interface
 68 */
 69
 70static bool file_request_on_content_start(PwMethod_CurlRequest_on_content_start* mthis, PwValuePtr self,
 71                                          long status, curl_off_t length, PwValuePtr final_url)
 72{
 73    FileRequestData* req = pw_this_data(self);
 74
 75    // save args
 76    req->status = status;
 77    req->length = length;
 78    pw_clone2(&req->final_url, final_url);
 79
 80    // get original URL
 81    PwValue url = PW_NULL;
 82    if (!pw_call(CurlRequest, get_url, self, &url)) {
 83        return false;
 84    }
 85    PW_CSTRING(url_cstr, &url);
 86
 87    if(status != 200) {
 88        printf("FAILED: %ld %s\n", status, url_cstr);
 89        // do not return error here, receive and swallow response
 90        return true;
 91    }
 92
 93    // create file
 94    PwValue filename = PW_NULL;
 95    if (!pw_call(CurlRequest, attachment_filename, self, &filename)) {
 96        return false;
 97    }
 98    if (pw_strlen(&filename) == 0) {
 99        if (!pw_string_append(&filename, "index.html", nullptr)) {
100            return false;
101        }
102    }
103    if (!pw_file_open(&filename, O_CREAT | O_RDWR | O_TRUNC, 0644, &req->file)) {
104        return false;
105    }
106    PW_CSTRING(filename_cstr, &filename);
107    printf("Downloading %s -> %s\n", url_cstr, filename_cstr);
108    return true;
109}
110
111static bool file_request_on_content(PwMethod_CurlRequest_on_content* mthis, PwValuePtr self, void* data, size_t size)
112{
113    FileRequestData* req = pw_this_data(self);
114
115    if(req->status != 200) {
116        // silently swallow content
117        return true;
118    }
119
120    // write data to file
121    unsigned bytes_written;
122    if (!pw_write(&req->file, data, size, &bytes_written)) {
123        return false;
124    }
125    return true;
126}
127
128static bool file_request_on_complete(PwMethod_CurlRequest_on_complete* mthis, PwValuePtr self, PwValuePtr error)
129{
130    FileRequestData* req = pw_this_data(self);
131
132    if (!pw_is_null(error)) {
133        pw_set_status(*error);
134        return false;
135    }
136    // close file
137    if (!pw_is_null(&req->file)) {
138        if (!pw_close(&req->file)) { /* ignore error */ }
139        pw_destroy(&req->file);
140    }
141    return true;
142}
143
144static PwInterface_CurlRequest file_request_interface = {
145    .on_content_start = { .func = file_request_on_content_start },
146    .on_content       = { .func = file_request_on_content },
147    .on_complete      = { .func = file_request_on_complete }
148};
149
150
151static bool make_request(PwValuePtr url)
152{
153    PW_CSTRING(url_cstr, url);
154    printf("Requesting %s\n", url_cstr);
155
156    PwValue request = PW_NULL;
157
158    if (!pw_create(PwTypeId_FileRequest, &request)) {
159        return false;
160    }
161    if (!pw_call(CurlRequest, set_url, &request, url)) {
162        return false;
163    }
164    if (pw_is_string(&proxy) && pw_strlen(&proxy)) {
165        if (!pw_call(CurlRequest, setopt, &request, CURLOPT_PROXY, &proxy)) {
166            return false;
167        }
168    }
169    if (!pw_call(CurlRequest, setopt, &request, CURLOPT_VERBOSE, &verbose)) {
170        return false;
171    }
172    if (!pw_call(CurlRequest, setopt, &request, CURLOPT_ACCEPT_ENCODING, &accept_encoding)) {
173        return false;
174    }
175    if (!pw_call(CurlRequest, setopt, &request, CURLOPT_USERAGENT, &user_agent)) {
176        return false;
177    }
178    if (!pw_call(CurlRequest, setopt, &request, CURLOPT_CAINFO, &cainfo)) {
179        return false;
180    }
181    PwValue n = PW_SIGNED(1200);
182    if (!pw_call(CurlRequest, setopt, &request, CURLOPT_TIMEOUT, &n)) {
183        return false;
184    }
185    n.signed_value = 60;
186    if (!pw_call(CurlRequest, setopt, &request, CURLOPT_CONNECTTIMEOUT, &n)) {
187        return false;
188    }
189    n.signed_value = 0;
190    if (!pw_call(CurlRequest, setopt, &request, CURLOPT_EXPECT_100_TIMEOUT_MS, &n)) {
191        return false;
192    }
193    n.signed_value = 1;
194    if (!pw_call(CurlRequest, setopt, &request, CURLOPT_FOLLOWLOCATION, &n)) {
195        return false;
196    }
197    n.signed_value = 10;
198    if (!pw_call(CurlRequest, setopt, &request, CURLOPT_MAXREDIRS, &n)) {
199        return false;
200    }
201    PwValue protocols = PW_STATIC_STRING("http,https");
202    if (!pw_call(CurlRequest, setopt, &request, CURLOPT_REDIR_PROTOCOLS_STR, &protocols)) {
203        return false;
204    }
205    n.signed_value = 1;
206    if (!pw_call(CurlRequest, setopt, &request, CURLOPT_AUTOREFERER, &n)) {
207        return false;
208    }
209
210    return pw_curl_add_request(&curl_session, &request);
211
212    // request is now managed by Curl session, we do not need it
213}
214
215static bool pw_main(int argc, char* argv[])
216{
217    // parse command line arguments
218    PwValue urls = PW_NULL;
219    if (!pw_create_array(&urls)) {
220        return false;
221    }
222    PwValue parallel = PW_UNSIGNED(1);
223    for (int i = 1; i < argc; i++) {
224
225        PwValue arg = PW_NULL;
226        if (!pw_create_string(&arg, argv[i])) {
227            return false;
228        }
229        if (pw_startswith(&arg, "http://") || pw_startswith(&arg, "https://")) {
230            if (!pw_array_append(&urls, &arg)) {
231                return false;
232            }
233        } else if (pw_startswith(&arg, "verbose=")) {
234            PwValue v = PW_NULL;
235            if (!pw_substr(&arg, strlen("verbose="), pw_strlen(&arg), &v)) {
236                return false;
237            }
238            verbose.bool_value = pw_equal(&v, "1");
239
240        } else if (pw_startswith(&arg, "proxy=")) {
241            if (!pw_substr(&arg, strlen("proxy="), pw_strlen(&arg), &proxy)) {
242                return false;
243            }
244        } else if (pw_startswith(&arg, "parallel=")) {
245            PwValue s = PW_NULL;
246            if (!pw_substr(&arg, strlen("parallel="), pw_strlen(&arg), &s)) {
247                return false;
248            }
249            PwValue n = PW_NULL;
250            if (pw_parse_number(&s, &n)) {
251                parallel = n;
252            }
253        }
254    }
255    if (pw_array_length(&urls) == 0) {
256        printf("Usage: fetch [verbose=1|0] [proxy=<proxy>] [parallel=<n>] url1 url2 ...\n");
257        return true;
258    }
259
260    // create Curl session
261
262    if (!pw_create(PwTypeId_CurlSession, &curl_session)) {
263        return false;
264    }
265
266    // fetch URLs
267    // prepare first request
268    for(unsigned i = 0; i < parallel.unsigned_value; i++) {
269        if (pw_array_length(&urls) == 0) {
270            break;
271        }
272        PwValue url = PW_NULL;
273        if (!pw_array_pop(&urls, &url)) {
274            return false;
275        }
276        if (!make_request(&url)) {
277            return false;
278        }
279    }
280
281    // perform fetching
282
283    while(!pending_sigint) {
284        PwValue timeout = PwUnsigned(1000);
285        int running_transfers;
286        if (!pw_curl_perform(&curl_session, &timeout, &running_transfers)) {
287            return false;
288        }
289        unsigned i = running_transfers;
290        // add more requests
291        for(; i < parallel.unsigned_value; i++) {
292            if (pw_array_length(&urls) == 0) {
293                break;
294            }
295            PwValue url = PW_NULL;
296            if (!pw_array_pop(&urls, &url)) {
297                return false;
298            }
299            if (!make_request(&url)) {
300                return false;
301            }
302        }
303        if (i == 0) {
304            // no running transfers and no more URLs were added
305            break;
306        }
307    }
308    return true;
309}
310
311int main(int argc, char* argv[])
312{
313    // global initialization
314
315    init_allocator(&pet_allocator);
316    curl_global_init(CURL_GLOBAL_DEFAULT);
317
318    // create FileRequest subtype
319
320    PwTypeId_FileRequest = pw_add_type2(
321        "FileRequest", FileRequestData,
322        PW_PARENTS,
323            PwTypeId_CurlRequest,
324        PW_INTERFACES,
325            PwInterfaceId_CurlRequest, &file_request_interface,
326            PwInterfaceId_Basic,       &file_request_basic_interface
327    );
328
329    // setup signal handling
330
331    signal(SIGINT, sigint_handler);
332
333    // main routine
334
335    if (!pw_main(argc, argv)) {
336        pw_print_status(stdout, &current_task->status);
337    }
338
339    // global finalization
340
341    pw_destroy(&curl_session);
342    pw_destroy(&proxy);
343
344    curl_global_cleanup();
345
346    return 0;
347}