From 75a42ec54dbf721caa659ddf02c1f46fc2cb4bef Mon Sep 17 00:00:00 2001
From: mlot <petri-rush-curvy@duck.com>
Date: Fri, 6 Jun 2025 13:40:57 -0400
Subject: initial commit for archiving

---
 Chapter11ProjectDownloadXkcd.py | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 Chapter11ProjectDownloadXkcd.py

(limited to 'Chapter11ProjectDownloadXkcd.py')

diff --git a/Chapter11ProjectDownloadXkcd.py b/Chapter11ProjectDownloadXkcd.py
new file mode 100644
index 0000000..11f7f6b
--- /dev/null
+++ b/Chapter11ProjectDownloadXkcd.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+
+#Chapter 11 Project
+#XKCD Comic Download - Downloads every XKCD comic
+
+import requests
+import os
+import bs4
+
+url = 'http://xkcd.com' #Starting url
+os.makedirs('xkcd', exist_ok=True)  #Store comics in ./xkcd
+
+while not url.endswith('#'):
+    #Download the page
+    print('Downloading page %s...' % url)
+    res = requests.get(url)
+    res.raise_for_status()
+
+    soup = bs4.BeautifulSoup(res.text)
+    comicElem = soup.select('#comic img')
+    if comicElem == []:
+        print('Could not find the comic image.')
+    else:
+        comicUrl = 'http:' + comicElem[0].get('src')
+        # Download the image
+        print('Downloading image %s...' % (comicUrl))
+        res = requests.get(comicUrl)
+        res.raise_for_status()
+        imageFile = open(os.path.join('xkcd', os.path.basename(comicUrl)), 'wb')
+        for chunk in res.iter_content(100000):
+            imageFile.write(chunk)
+        imageFile.close()
+
+    #Get the Prev button's url.
+    prevLink = soup.select('a[rel="prev"]')[0]
+    url = 'http://xkcd.com' + prevLink.get('href')
+
+print('Done.')
-- 
cgit