|
禁止求评分、诱导评分、互刷评分、互刷悬赏值,违规者封号处理。
禁止发布推广、邀请码、邀请链接、二维码或者有利益相关的任何推广行为。
所有非原创软件请发布在【精品软件区】,发帖必须按照本版块版规格式发帖。
话不多说吧 直接上,判定有点少,不要写0 也不要超过所提示的页数
- from lxml import etree
- import requests
- import os
-
- #封装解析下载函数
- def cv_down(cv_href, headers):
- for href in cv_href:
- act_response = requests.get(url=href, headers=headers).text
- act_tree = etree.HTML(act_response)
- cv_title = act_tree.xpath('//div[@class="ppt_tit clearfix"]/h1/text()')
- cv_title = cv_title[0].encode('ISO-8859-1').decode('utf-8') + '.rar'
- dow_url = act_tree.xpath('//div[@class="clearfix mt20 downlist"]/ul/li[1]/a/@href')[0]
- doc = requests.get(url=dow_url, headers=headers).content
- cv_path = './免费简历/' + cv_title
- with open(cv_path, 'wb') as fp:
- fp.write(doc)
- print(cv_title, '下载完成!!!')
-
- #检查文件夹是否存在,并创建文件夹
- if not os.path.exists('./免费简历'):
- os.mkdir('./免费简历')
-
- first_page_url = 'http://sc.chinaz.com/jianli/free.html'
- headers = {
- "User-Agent":'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.92 Safari/537.36'
- }
-
- tree = etree.HTML(requests.get(url=first_page_url, headers=headers).text)
- page_limit = tree.xpath('//div[@class="pagination fr clearfix clear"]/a[8]/b/text()')
-
-
- print(page_limit[0] + " pages at most")
- page = input("Please enter how many page you want: " )
-
- if page.isdigit():
- if int(page) == 1 :
- cv_href = tree.xpath('//div[@class="sc_warp mt20"]/div/div/div/a/@href')
- cv_down(cv_href, headers)
-
- else:
- for i in range(1, int(page)+1):
- if i == 1:
- cv_href = tree.xpath('//div[@class="sc_warp mt20"]/div/div/div/a/@href')
- cv_down(cv_href, headers)
- else:
- other_page_url = 'http://sc.chinaz.com/jianli/free_' + str(i) + '.html' #一页以上的特殊性需要重新制定链接
- response = requests.get(url=other_page_url, headers=headers).text
- tree = etree.HTML(response)
- cv_href = tree.xpath('//div[@class="sc_warp mt20"]/div/div/div/a/@href')
- cv_down(cv_href, headers)
-
- else:
- print("You can only enter numbers!!!")
复制代码
|
|