1#include <signal.h>
2#include <string.h>
3
4#include <pwlib/file.h>
5#include <pwlib/parsers.h>
6#include "pw_curl.h"
7
8// constants
9_PwValue accept_encoding = PW_STATIC_STRING("gzip, deflate, br, zstd");
10_PwValue user_agent = PW_STATIC_STRING("User-Agent: pw-curl (https://tilde.club/~petbrain/)");
11_PwValue cainfo = PW_STATIC_STRING("/etc/ssl/certs/ca-certificates.crt");
12
13// global parameters from argv
14_PwValue proxy = PW_NULL;
15_PwValue verbose = PW_BOOL(false);
16
17// CURL session
18_PwValue curl_session = PW_NULL;
19
20
21// signal handling
22
23sig_atomic_t pending_sigint = 0;
24
25void sigint_handler(int sig)
26{
27 puts("\nInterrupted");
28 pending_sigint = 1;
29}
30
31/*
32 * Extended CurlRequest to save content to a file
33 */
34
35uint16_t PwTypeId_FileRequest = 0;
36
37typedef struct {
38 // autocleaned PwValue is not suitable for manually managed data,
39 // using plain structure that starts with underscore
40 _PwValue file;
41
42 // values saved by on_content_start
43 long status;
44 curl_off_t length;
45 _PwValue final_url;
46
47} FileRequestData;
48
49
50/*
51 * Basic interface
52 */
53
54static bool file_request_destroy(PwMethod_Basic_destroy* mthis, PwValuePtr self, _PwCompoundChain* tail)
55{
56 FileRequestData* req = pw_this_data(self);
57 pw_destroy(&req->final_url);
58 pw_destroy(&req->file);
59 return pw_super(mthis, self, tail);
60}
61
62static PwInterface_Basic file_request_basic_interface = {
63 .destroy = { .func = file_request_destroy }
64};
65
66/*
67 * CurlRequest interface
68 */
69
70static bool file_request_on_content_start(PwMethod_CurlRequest_on_content_start* mthis, PwValuePtr self,
71 long status, curl_off_t length, PwValuePtr final_url)
72{
73 FileRequestData* req = pw_this_data(self);
74
75 // save args
76 req->status = status;
77 req->length = length;
78 pw_clone2(&req->final_url, final_url);
79
80 // get original URL
81 PwValue url = PW_NULL;
82 if (!pw_call(CurlRequest, get_url, self, &url)) {
83 return false;
84 }
85 PW_CSTRING(url_cstr, &url);
86
87 if(status != 200) {
88 printf("FAILED: %ld %s\n", status, url_cstr);
89 // do not return error here, receive and swallow response
90 return true;
91 }
92
93 // create file
94 PwValue filename = PW_NULL;
95 if (!pw_call(CurlRequest, attachment_filename, self, &filename)) {
96 return false;
97 }
98 if (pw_strlen(&filename) == 0) {
99 if (!pw_string_append(&filename, "index.html", nullptr)) {
100 return false;
101 }
102 }
103 if (!pw_file_open(&filename, O_CREAT | O_RDWR | O_TRUNC, 0644, &req->file)) {
104 return false;
105 }
106 PW_CSTRING(filename_cstr, &filename);
107 printf("Downloading %s -> %s\n", url_cstr, filename_cstr);
108 return true;
109}
110
111static bool file_request_on_content(PwMethod_CurlRequest_on_content* mthis, PwValuePtr self, void* data, size_t size)
112{
113 FileRequestData* req = pw_this_data(self);
114
115 if(req->status != 200) {
116 // silently swallow content
117 return true;
118 }
119
120 // write data to file
121 unsigned bytes_written;
122 if (!pw_write(&req->file, data, size, &bytes_written)) {
123 return false;
124 }
125 return true;
126}
127
128static bool file_request_on_complete(PwMethod_CurlRequest_on_complete* mthis, PwValuePtr self, PwValuePtr error)
129{
130 FileRequestData* req = pw_this_data(self);
131
132 if (!pw_is_null(error)) {
133 pw_set_status(*error);
134 return false;
135 }
136 // close file
137 if (!pw_is_null(&req->file)) {
138 if (!pw_close(&req->file)) { /* ignore error */ }
139 pw_destroy(&req->file);
140 }
141 return true;
142}
143
144static PwInterface_CurlRequest file_request_interface = {
145 .on_content_start = { .func = file_request_on_content_start },
146 .on_content = { .func = file_request_on_content },
147 .on_complete = { .func = file_request_on_complete }
148};
149
150
151static bool make_request(PwValuePtr url)
152{
153 PW_CSTRING(url_cstr, url);
154 printf("Requesting %s\n", url_cstr);
155
156 PwValue request = PW_NULL;
157
158 if (!pw_create(PwTypeId_FileRequest, &request)) {
159 return false;
160 }
161 if (!pw_call(CurlRequest, set_url, &request, url)) {
162 return false;
163 }
164 if (pw_is_string(&proxy) && pw_strlen(&proxy)) {
165 if (!pw_call(CurlRequest, setopt, &request, CURLOPT_PROXY, &proxy)) {
166 return false;
167 }
168 }
169 if (!pw_call(CurlRequest, setopt, &request, CURLOPT_VERBOSE, &verbose)) {
170 return false;
171 }
172 if (!pw_call(CurlRequest, setopt, &request, CURLOPT_ACCEPT_ENCODING, &accept_encoding)) {
173 return false;
174 }
175 if (!pw_call(CurlRequest, setopt, &request, CURLOPT_USERAGENT, &user_agent)) {
176 return false;
177 }
178 if (!pw_call(CurlRequest, setopt, &request, CURLOPT_CAINFO, &cainfo)) {
179 return false;
180 }
181 PwValue n = PW_SIGNED(1200);
182 if (!pw_call(CurlRequest, setopt, &request, CURLOPT_TIMEOUT, &n)) {
183 return false;
184 }
185 n.signed_value = 60;
186 if (!pw_call(CurlRequest, setopt, &request, CURLOPT_CONNECTTIMEOUT, &n)) {
187 return false;
188 }
189 n.signed_value = 0;
190 if (!pw_call(CurlRequest, setopt, &request, CURLOPT_EXPECT_100_TIMEOUT_MS, &n)) {
191 return false;
192 }
193 n.signed_value = 1;
194 if (!pw_call(CurlRequest, setopt, &request, CURLOPT_FOLLOWLOCATION, &n)) {
195 return false;
196 }
197 n.signed_value = 10;
198 if (!pw_call(CurlRequest, setopt, &request, CURLOPT_MAXREDIRS, &n)) {
199 return false;
200 }
201 PwValue protocols = PW_STATIC_STRING("http,https");
202 if (!pw_call(CurlRequest, setopt, &request, CURLOPT_REDIR_PROTOCOLS_STR, &protocols)) {
203 return false;
204 }
205 n.signed_value = 1;
206 if (!pw_call(CurlRequest, setopt, &request, CURLOPT_AUTOREFERER, &n)) {
207 return false;
208 }
209
210 return pw_curl_add_request(&curl_session, &request);
211
212 // request is now managed by Curl session, we do not need it
213}
214
215static bool pw_main(int argc, char* argv[])
216{
217 // parse command line arguments
218 PwValue urls = PW_NULL;
219 if (!pw_create_array(&urls)) {
220 return false;
221 }
222 PwValue parallel = PW_UNSIGNED(1);
223 for (int i = 1; i < argc; i++) {
224
225 PwValue arg = PW_NULL;
226 if (!pw_create_string(&arg, argv[i])) {
227 return false;
228 }
229 if (pw_startswith(&arg, "http://") || pw_startswith(&arg, "https://")) {
230 if (!pw_array_append(&urls, &arg)) {
231 return false;
232 }
233 } else if (pw_startswith(&arg, "verbose=")) {
234 PwValue v = PW_NULL;
235 if (!pw_substr(&arg, strlen("verbose="), pw_strlen(&arg), &v)) {
236 return false;
237 }
238 verbose.bool_value = pw_equal(&v, "1");
239
240 } else if (pw_startswith(&arg, "proxy=")) {
241 if (!pw_substr(&arg, strlen("proxy="), pw_strlen(&arg), &proxy)) {
242 return false;
243 }
244 } else if (pw_startswith(&arg, "parallel=")) {
245 PwValue s = PW_NULL;
246 if (!pw_substr(&arg, strlen("parallel="), pw_strlen(&arg), &s)) {
247 return false;
248 }
249 PwValue n = PW_NULL;
250 if (pw_parse_number(&s, &n)) {
251 parallel = n;
252 }
253 }
254 }
255 if (pw_array_length(&urls) == 0) {
256 printf("Usage: fetch [verbose=1|0] [proxy=<proxy>] [parallel=<n>] url1 url2 ...\n");
257 return true;
258 }
259
260 // create Curl session
261
262 if (!pw_create(PwTypeId_CurlSession, &curl_session)) {
263 return false;
264 }
265
266 // fetch URLs
267 // prepare first request
268 for(unsigned i = 0; i < parallel.unsigned_value; i++) {
269 if (pw_array_length(&urls) == 0) {
270 break;
271 }
272 PwValue url = PW_NULL;
273 if (!pw_array_pop(&urls, &url)) {
274 return false;
275 }
276 if (!make_request(&url)) {
277 return false;
278 }
279 }
280
281 // perform fetching
282
283 while(!pending_sigint) {
284 PwValue timeout = PwUnsigned(1000);
285 int running_transfers;
286 if (!pw_curl_perform(&curl_session, &timeout, &running_transfers)) {
287 return false;
288 }
289 unsigned i = running_transfers;
290 // add more requests
291 for(; i < parallel.unsigned_value; i++) {
292 if (pw_array_length(&urls) == 0) {
293 break;
294 }
295 PwValue url = PW_NULL;
296 if (!pw_array_pop(&urls, &url)) {
297 return false;
298 }
299 if (!make_request(&url)) {
300 return false;
301 }
302 }
303 if (i == 0) {
304 // no running transfers and no more URLs were added
305 break;
306 }
307 }
308 return true;
309}
310
311int main(int argc, char* argv[])
312{
313 // global initialization
314
315 init_allocator(&pet_allocator);
316 curl_global_init(CURL_GLOBAL_DEFAULT);
317
318 // create FileRequest subtype
319
320 PwTypeId_FileRequest = pw_add_type2(
321 "FileRequest", FileRequestData,
322 PW_PARENTS,
323 PwTypeId_CurlRequest,
324 PW_INTERFACES,
325 PwInterfaceId_CurlRequest, &file_request_interface,
326 PwInterfaceId_Basic, &file_request_basic_interface
327 );
328
329 // setup signal handling
330
331 signal(SIGINT, sigint_handler);
332
333 // main routine
334
335 if (!pw_main(argc, argv)) {
336 pw_print_status(stdout, ¤t_task->status);
337 }
338
339 // global finalization
340
341 pw_destroy(&curl_session);
342 pw_destroy(&proxy);
343
344 curl_global_cleanup();
345
346 return 0;
347}