summaryrefslogtreecommitdiff
path: root/Chapter11ProjectDownloadXkcd.py
diff options
context:
space:
mode:
Diffstat (limited to 'Chapter11ProjectDownloadXkcd.py')
-rw-r--r--Chapter11ProjectDownloadXkcd.py38
1 files changed, 38 insertions, 0 deletions
diff --git a/Chapter11ProjectDownloadXkcd.py b/Chapter11ProjectDownloadXkcd.py
new file mode 100644
index 0000000..11f7f6b
--- /dev/null
+++ b/Chapter11ProjectDownloadXkcd.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+
+#Chapter 11 Project
+#XKCD Comic Download - Downloads every XKCD comic
+
+import requests
+import os
+import bs4
+
+url = 'http://xkcd.com' #Starting url
+os.makedirs('xkcd', exist_ok=True) #Store comics in ./xkcd
+
+while not url.endswith('#'):
+ #Download the page
+ print('Downloading page %s...' % url)
+ res = requests.get(url)
+ res.raise_for_status()
+
+ soup = bs4.BeautifulSoup(res.text)
+ comicElem = soup.select('#comic img')
+ if comicElem == []:
+ print('Could not find the comic image.')
+ else:
+ comicUrl = 'http:' + comicElem[0].get('src')
+ # Download the image
+ print('Downloading image %s...' % (comicUrl))
+ res = requests.get(comicUrl)
+ res.raise_for_status()
+ imageFile = open(os.path.join('xkcd', os.path.basename(comicUrl)), 'wb')
+ for chunk in res.iter_content(100000):
+ imageFile.write(chunk)
+ imageFile.close()
+
+ #Get the Prev button's url.
+ prevLink = soup.select('a[rel="prev"]')[0]
+ url = 'http://xkcd.com' + prevLink.get('href')
+
+print('Done.')