From 75a42ec54dbf721caa659ddf02c1f46fc2cb4bef Mon Sep 17 00:00:00 2001 From: mlot Date: Fri, 6 Jun 2025 13:40:57 -0400 Subject: initial commit for archiving --- Chap15ProjMulitXKCDdownload.py | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 Chap15ProjMulitXKCDdownload.py (limited to 'Chap15ProjMulitXKCDdownload.py') diff --git a/Chap15ProjMulitXKCDdownload.py b/Chap15ProjMulitXKCDdownload.py new file mode 100644 index 0000000..ade7e18 --- /dev/null +++ b/Chap15ProjMulitXKCDdownload.py @@ -0,0 +1,42 @@ +#! /usr/bin/env python3 + +# Chapter 15 Project Multithreaded XKCD Downloader + +import threading +import os +import bs4 +import requests + +os.makedirs('xkcd', exist_ok=True) + +def downloadXkcd(startComic, endComic): + for urlNumber in range(startComic, endComic): + print('Downloading page http://xkcd.com/%s...' % (urlNumber)) + res = requests.get('http://xkcd.com/%s' % (urlNumber)) + res.raise_for_status() + + soup = bs4.BeautifulSoup(res.text) + + comicElem = soup.select('#comic img') + if comicElem == []: + print('Could not find comic image.') + else: + comicUrl = comicElem[0].get('src') + print('Downloading image %s...' % (comicUrl)) + res = requests.get(comicUrl) + res.raise_for_status() + + imageFile = open(os.path.join('xkcd', os.path.basename(comicUrl)), 'wb') + for chunk in res.inter_content(100000): + imageFile.write(chunk) + imageFile.close() + +downloadThreads = [] +for i in range(0, 1400, 100): + downloadThread = threading.Thread(target=downloadXkcd, args=(i, i +90)) + downloadThreads.append(downloadThread) + downloadThread.start() + +for downloadThread in downloadThreads: + downloadThread.join() +print('Done.') -- cgit