diff options
Diffstat (limited to 'Chap15ProjMulitXKCDdownload.py')
-rw-r--r-- | Chap15ProjMulitXKCDdownload.py | 42 |
1 files changed, 42 insertions, 0 deletions
diff --git a/Chap15ProjMulitXKCDdownload.py b/Chap15ProjMulitXKCDdownload.py new file mode 100644 index 0000000..ade7e18 --- /dev/null +++ b/Chap15ProjMulitXKCDdownload.py @@ -0,0 +1,42 @@ +#! /usr/bin/env python3 + +# Chapter 15 Project Multithreaded XKCD Downloader + +import threading +import os +import bs4 +import requests + +os.makedirs('xkcd', exist_ok=True) + +def downloadXkcd(startComic, endComic): + for urlNumber in range(startComic, endComic): + print('Downloading page http://xkcd.com/%s...' % (urlNumber)) + res = requests.get('http://xkcd.com/%s' % (urlNumber)) + res.raise_for_status() + + soup = bs4.BeautifulSoup(res.text) + + comicElem = soup.select('#comic img') + if comicElem == []: + print('Could not find comic image.') + else: + comicUrl = comicElem[0].get('src') + print('Downloading image %s...' % (comicUrl)) + res = requests.get(comicUrl) + res.raise_for_status() + + imageFile = open(os.path.join('xkcd', os.path.basename(comicUrl)), 'wb') + for chunk in res.inter_content(100000): + imageFile.write(chunk) + imageFile.close() + +downloadThreads = [] +for i in range(0, 1400, 100): + downloadThread = threading.Thread(target=downloadXkcd, args=(i, i +90)) + downloadThreads.append(downloadThread) + downloadThread.start() + +for downloadThread in downloadThreads: + downloadThread.join() +print('Done.') |