for img_url in soup.select("<ID、Class、Tag等等定位>"): img_url = img_url.get("<data-src或者src>") res = requests.get(img_url)
now = time.strftime("%Y-%m-%d_%H-%M-%S_", time.localtime()) # 当前时间-年月日时分秒 ms = str(time.time()).split(".")[1] # 当前时间-微秒 img_name = file_path + now + ms + ".jpg"#图片命名规则
withopen(img_name, 'wb') as f: f.write(res.content)
defdownload(url): html = askURL(url) soup = BeautifulSoup(html, "html.parser")
#核心代码 for img_url in soup.select(".rich_media_content img"): img_url = img_url.get("data-src") res = requests.get(img_url)
now = time.strftime("%Y-%m-%d_%H-%M-%S_", time.localtime()) # 当前时间-年月日时分秒 ms = str(time.time()).split(".")[1] # 当前时间-微秒 img_name = file_path + now + ms + ".jpg"#图片命名规则
withopen(img_name, 'wb') as f: f.write(res.content)
defaskURL(url): #得到指定一个URL的网页内容 request = urllib.request.Request(url, headers=headers) html = "" try: response = urllib.request.urlopen(request) html = response.read().decode("utf-8") except urllib.error.URLError as e: ifhasattr(e,"code"): print(e.code) ifhasattr(e,"reason"): print(e.reason) return html
if __name__ == '__main__': # 当程序执行时 # 调用函数 main() print("爬取完毕!")