用python爬取美女图网图片

夜空中最亮的星 · 发表于 2021-11-22 10:44

[Python] 纯文本查看 复制代码

import requests
from bs4 import BeautifulSoup
import re
import os
 
headers = {"referer": "https://www.mzitu.com/",
           "user-agent": "Mozilla/5.0 (Windows NT 10.0; "
                         "Win64; x64) AppleWebKit/537.36 "
                         "(KHTML, like Gecko) Chrome/95.0.4638.54 "
                         "Safari/537.36 Edg/95.0.1020.40"}
def getHtml(url, headers=headers):
    try:
        r = requests.get(url, headers=headers)
        r.raise_for_status()
        r.encoding ="utf-8"
        return r
    except BaseException:
        print("获取失败")
 
def parserHtml(url):
    herflist = []
    r = getHtml(url)
    soup = BeautifulSoup(r.text,"html.parser")
    for herf in (i.find("a").attrs["href"] for i in soup.find("ul",id="pins")("li")):
        respon = getHtml(herf)
        n = re.compile(r"<span class='dots'>…</span><a href='.*?'><span>(?P<num>\d+)</span>")
        num = int(n.search(respon.text).group("num"))
        herflist.append((herf,num))
        respon.close()
    r.close()
    return herflist
 
def downLoad(herf,num):
    for i in range(num):
        i+= 1
        url = herf+"/"+str(i)
        r = getHtml(url)
        soup = BeautifulSoup(r.text,"html.parser")
        if i == 1:
            title = soup.find("title").string.split(sep="-")[0].strip()
            os.mkdir("D:/学习/爬虫/第二章/图片/"+title)
        img = soup.find("img","blur").attrs["src"]
        name = img.split(sep="/")[-1]
        picture = getHtml(img)
        with open("图片/"+title+"/"+name,"wb") as f:
            f.write(picture.content)
        print(title+name+"下载完毕")
        r.close()
        picture.close()
 
def main():
    url = "https://www.mzitu.com"
    herflist = parserHtml(url)
    for herf,num in herflist:
        downLoad(herf,num)
 
main()

笑东风丶 · 发表于 2021-11-22 11:36

感谢大佬分享，大佬666666

li771098551 · 发表于 2021-11-22 15:00

感谢大佬分享，大佬666666

七日书 · 发表于 2021-11-22 19:26

谢谢分享

		自动登录	找回密码
密码			注册[Register]