知轩藏书是比较不错的精校小说下载网站, 可惜某些时候经常被和谐, 所以做了个下载工具, 吧已经有的都扒下来
脚本是python3.7.1写的, 懒得写界面了, 直接贴脚本
运行以后会根据C_IndexMin和C_IndexMax值下载对应的小说, 目前C_IndexMax设置3W够用了, 知轩现在最大编号才1W2
下载目录为脚本当前目录的\Download\知轩藏书\
如果运行一半关闭再自动会从头开始, 但是已经下载完毕的书会校验, 如果正常就不重复下载了(某些网络波动, 下载的时候正好无法访问导致下载失败)
我是用pycharm写的, 但是用python自带的idle也能直接运行, 就是下载进度信息不能覆盖挺难看的
如果用pycharm运行没问题
- import os
- import re
- import urllib.request
- import contextlib
- import json
- import sys
-
- #大牛破解论坛 - 黑暗煎饼果子
-
- lWorkPath = os.getcwd() + '\\Download\\知轩藏书\\'
- if not os.path.exists(lWorkPath):
- os.makedirs(lWorkPath)
- lBooksPath = lWorkPath + 'Books\\'
- if not os.path.exists(lBooksPath):
- os.makedirs(lBooksPath)
- C_URLInfo = r'http://www.zxcs.me/post/%d'
- C_URLDownload = r'http://www.zxcs.me/download.php?id=%d'
-
- C_IndexMin = 1090 #知轩最小书籍编号是1090
- C_IndexMax = 30000
-
- lURLOpener = urllib.request.build_opener(urllib.request.HTTPHandler)
- urllib.request.install_opener(lURLOpener)
- lURLOpener.addheaders = [
- ('Host', 'http://www.zxcs.me'),
- ('Connection', 'keep-alive'),
- ('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'),
- ]
-
- def DownloadProgress(blocknum, bs, size):
- '''''
- blocknum:已经下载的数据块
- bs:数据块的大小
- size:远程文件的大小
- '''
- per = 100.0 * blocknum * bs / size
- if per > 100 :
- per = 100
- sys.stdout.write('正在下载: %.2f%%' % per)
- sys.stdout.write('\r')
- sys.stdout.flush()
-
- lJSONFile = lWorkPath + 'List.json'
-
- if os.path.exists(lJSONFile):
- with open(lJSONFile, 'r', encoding='utf-8') as f:
- lJBooks = json.load(f)
- else:
- lJBooks = {}
-
- for lIndex in range(C_IndexMin, C_IndexMax + 1):
- print()
-
- lID = str(lIndex)
-
- lJBook = lJBooks.get(lID, {})
-
- lFileName = lJBook.get('File', '')
- if (lFileName != '') and os.path.exists(lBooksPath + lFileName):
- if 'Size' not in lJBook:
- lJBook['Size'] = os.path.getsize(lBooksPath + lFileName)
- lJBooks[lID] = lJBook
- with open(lJSONFile, 'w', encoding='utf-8') as f:
- json.dump(lJBooks, f, ensure_ascii=False)
- print('%d 已存在' % lIndex)
- print()
- continue
-
- # 获取书籍信息
- try:
- lHTML = lURLOpener.open(C_URLInfo % lIndex).read().decode('utf-8')
- # 找信息部分 <div id="content">
- lHTMLClass = re.compile(r'(?<=<div id="content">)[^\f]+?(?=</div>)').search(lHTML)
- if lHTMLClass:
- # 取Title
- lTitle = re.compile(r'(?<=<h1>).+?(?=</h1>)').search(lHTMLClass.group(0)).group(0)
- # 书名
- lJBook['Name'] = re.compile(r'(?<=《).+?(?=》)').search(lTitle).group(0)
- # 作者
- lJBook['Author'] = re.compile(r'(?<=作者:).+').search(lTitle).group(0)
- # 分类
- # lJBook['Sort'] = re.compile(r'(?<=<a >).+?(?=</a>)').search(lHTMLClass.group(0)).group(0)
- lTempStr = re.compile(r'<a >.+?</a>').search(lHTMLClass.group(0)).group(0)
- lJBook['Sort'] = re.compile(r'(?<=>).+(?=<)').search(lTempStr).group(0)
- # 标签
- # lJBook['Tag'] = re.compile(r'(?<=<a href="http://www.zxcs.me/tag/[^\s>]+?">).+?(?=</a>)').search(lHTMLClass.group(0)).group(0)
- lTempStr = re.compile(r'<a href="http://www.zxcs.me/tag/[^\s>]+?">.+?</a>').search(lHTMLClass.group(0)).group(0)
- lJBook['Tag'] = re.compile(r'(?<=>).+(?=<)').search(lTempStr).group(0)
-
- lJBook['Desc'] = re.compile(r'(?<=【内容简介】:)[^\f\v]+?(?=</p>)').search(lHTML).group(0).replace('<br />', '').replace(' ', ' ')
- except Exception as e:
- print('[%d]获取信息失败: %s' % (lIndex, e))
- continue
-
- # 获取书籍文件
- try:
- # 下载页面
- lHTML = lURLOpener.open(C_URLDownload % lIndex).read().decode('utf-8')
- # 下载地址列表
- lDownloadURLList = re.compile(r'(?<=<span class="downfile"><a href=").+?(?=")').findall(lHTML)
- if not lDownloadURLList:
- raise Exception('无法获取下载地址')
- except Exception as E:
- print(E)
- continue
-
- try:
- lDownloaded = False
- lErrors = []
- for lDownloadURL in lDownloadURLList:
- try:
- lFileExt = re.compile(r'\.[^\./]+
- ).search(lDownloadURL).group(0)
- lFileName = lID + lFileExt
- lJBook['File'] = lFileName
-
- # 先获取要下载的文件大小
- lFileSize = 0
- try:
- with contextlib.closing(urllib.request.urlopen(lDownloadURL, None)) as UR:
- lHeaders = UR.info()
- lFileSize = int(lHeaders['Content-Length'])
- except Exception as E:
- lFileSize = 0
- print('获取文件大小失败: %s' % E)
-
- # 下载
- urllib.request.urlretrieve(lDownloadURL, lBooksPath + lFileName, DownloadProgress)
-
- # 校验文件大小
- lRFileSize = int(os.path.getsize(lBooksPath + lFileName))
- if lRFileSize != lFileSize:
- raise Exception('文件大小不一致')
-
- lJBook['Size'] = lFileSize
- lDownloaded = True
- break
- except Exception as E:
- lErrors.append(E)
-
- if lDownloaded:
- lJBooks[lID] = lJBook
- with open(lJSONFile, 'w', encoding='utf-8') as f:
- json.dump(lJBooks, f, ensure_ascii=False)
- print('[%d]下载完成%s' % (lIndex, ' ' * 10))
- else:
- # 下载失败, 从json内删除
- if lID in lJBooks:
- del lJBooks[lID]
- raise Exception(lErrors)
- except Exception as E:
- if (lFileName != '') and os.path.exists(lBooksPath + lFileName):
- os.remove(lBooksPath + lFileName)
- print('[%d]下载失败: %s' % (lIndex, E))
-
- input('按任意键结束')
复制代码
|