知轩藏书下载工具

机器猫 · 发表于 2020-7-27 20:20

知轩藏书是比较不错的精校小说下载网站, 可惜某些时候经常被和谐, 所以做了个下载工具, 吧已经有的都扒下来
脚本是python3.7.1写的, 懒得写界面了, 直接贴脚本

运行以后会根据C_IndexMin和C_IndexMax值下载对应的小说, 目前C_IndexMax设置3W够用了, 知轩现在最大编号才1W2
下载目录为脚本当前目录的\Download\知轩藏书\
如果运行一半关闭再自动会从头开始, 但是已经下载完毕的书会校验, 如果正常就不重复下载了(某些网络波动, 下载的时候正好无法访问导致下载失败)

我是用pycharm写的, 但是用python自带的idle也能直接运行, 就是下载进度信息不能覆盖挺难看的
如果用pycharm运行没问题

import os
import re
import urllib.request
import contextlib
import json
import sys
#大牛破解论坛 - 黑暗煎饼果子
lWorkPath = os.getcwd() + '\\Download\\知轩藏书\\'
if not os.path.exists(lWorkPath):
os.makedirs(lWorkPath)
lBooksPath = lWorkPath + 'Books\\'
if not os.path.exists(lBooksPath):
os.makedirs(lBooksPath)
C_URLInfo = r'http://www.zxcs.me/post/%d'
C_URLDownload = r'http://www.zxcs.me/download.php?id=%d'
C_IndexMin = 1090 #知轩最小书籍编号是1090
C_IndexMax = 30000
lURLOpener = urllib.request.build_opener(urllib.request.HTTPHandler)
urllib.request.install_opener(lURLOpener)
lURLOpener.addheaders = [
('Host', 'http://www.zxcs.me'),
('Connection', 'keep-alive'),
('User-Agent', 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3626.109 Safari/537.36'),
]
def DownloadProgress(blocknum, bs, size):
'''''
blocknum:已经下载的数据块
bs:数据块的大小
size:远程文件的大小
'''
per = 100.0 * blocknum * bs / size
if per > 100 :
per = 100
sys.stdout.write('正在下载: %.2f%%' % per)
sys.stdout.write('\r')
sys.stdout.flush()
lJSONFile = lWorkPath + 'List.json'
if os.path.exists(lJSONFile):
with open(lJSONFile, 'r', encoding='utf-8') as f:
lJBooks = json.load(f)
else:
lJBooks = {}
for lIndex in range(C_IndexMin, C_IndexMax + 1):
print()
lID = str(lIndex)
lJBook = lJBooks.get(lID, {})
lFileName = lJBook.get('File', '')
if (lFileName != '') and os.path.exists(lBooksPath + lFileName):
if 'Size' not in lJBook:
lJBook['Size'] = os.path.getsize(lBooksPath + lFileName)
lJBooks[lID] = lJBook
with open(lJSONFile, 'w', encoding='utf-8') as f:
json.dump(lJBooks, f, ensure_ascii=False)
print('%d 已存在' % lIndex)
print()
continue
# 获取书籍信息
try:
lHTML = lURLOpener.open(C_URLInfo % lIndex).read().decode('utf-8')
# 找信息部分 <div id="content">
lHTMLClass = re.compile(r'(?<=<div id="content">)[^\f]+?(?=</div>)').search(lHTML)
if lHTMLClass:
# 取Title
lTitle = re.compile(r'(?<=<h1>).+?(?=</h1>)').search(lHTMLClass.group(0)).group(0)
# 书名
lJBook['Name'] = re.compile(r'(?<=《).+?(?=》)').search(lTitle).group(0)
# 作者
lJBook['Author'] = re.compile(r'(?<=作者：).+').search(lTitle).group(0)
# 分类
# lJBook['Sort'] = re.compile(r'(?<=<a >).+?(?=</a>)').search(lHTMLClass.group(0)).group(0)
lTempStr = re.compile(r'<a >.+?</a>').search(lHTMLClass.group(0)).group(0)
lJBook['Sort'] = re.compile(r'(?<=>).+(?=<)').search(lTempStr).group(0)
# 标签
# lJBook['Tag'] = re.compile(r'(?<=<a href="http://www.zxcs.me/tag/[^\s>]+?">).+?(?=</a>)').search(lHTMLClass.group(0)).group(0)
lTempStr = re.compile(r'<a href="http://www.zxcs.me/tag/[^\s>]+?">.+?</a>').search(lHTMLClass.group(0)).group(0)
lJBook['Tag'] = re.compile(r'(?<=>).+(?=<)').search(lTempStr).group(0)
lJBook['Desc'] = re.compile(r'(?<=【内容简介】：)[^\f\v]+?(?=</p>)').search(lHTML).group(0).replace('<br />', '').replace(' ', ' ')
except Exception as e:
print('[%d]获取信息失败: %s' % (lIndex, e))
continue
# 获取书籍文件
try:
# 下载页面
lHTML = lURLOpener.open(C_URLDownload % lIndex).read().decode('utf-8')
# 下载地址列表
lDownloadURLList = re.compile(r'(?<=<span class="downfile"><a href=").+?(?=")').findall(lHTML)
if not lDownloadURLList:
raise Exception('无法获取下载地址')
except Exception as E:
print(E)
continue
try:
lDownloaded = False
lErrors = []
for lDownloadURL in lDownloadURLList:
try:
lFileExt = re.compile(r'\.[^\./]+
).search(lDownloadURL).group(0)
lFileName = lID + lFileExt
lJBook['File'] = lFileName
# 先获取要下载的文件大小
lFileSize = 0
try:
with contextlib.closing(urllib.request.urlopen(lDownloadURL, None)) as UR:
lHeaders = UR.info()
lFileSize = int(lHeaders['Content-Length'])
except Exception as E:
lFileSize = 0
print('获取文件大小失败: %s' % E)
# 下载
urllib.request.urlretrieve(lDownloadURL, lBooksPath + lFileName, DownloadProgress)
# 校验文件大小
lRFileSize = int(os.path.getsize(lBooksPath + lFileName))
if lRFileSize != lFileSize:
raise Exception('文件大小不一致')
lJBook['Size'] = lFileSize
lDownloaded = True
break
except Exception as E:
lErrors.append(E)
if lDownloaded:
lJBooks[lID] = lJBook
with open(lJSONFile, 'w', encoding='utf-8') as f:
json.dump(lJBooks, f, ensure_ascii=False)
print('[%d]下载完成%s' % (lIndex, ' ' * 10))
else:
# 下载失败, 从json内删除
if lID in lJBooks:
del lJBooks[lID]
raise Exception(lErrors)
except Exception as E:
if (lFileName != '') and os.path.exists(lBooksPath + lFileName):
os.remove(lBooksPath + lFileName)
print('[%d]下载失败: %s' % (lIndex, E))
input('按任意键结束')

复制代码

q1289478903 · 发表于 2020-7-27 20:20

6666666666

faka0001 · 发表于 2020-7-27 20:20

感谢楼主分享

Y20190801 · 发表于 2020-7-27 20:33

谢谢分享

单杀任何人 · 发表于 2020-7-27 20:45

谢谢大牛

zwjnnnnnnb · 发表于 2020-7-27 20:51

好的，非常感谢

wasq0309 · 发表于 2020-7-27 20:58

6666

Luit · 发表于 2020-7-28 08:47

abc7419638462 · 发表于 2020-7-28 19:03

谢谢大佬

		自动登录	找回密码
密码			注册[Register]

[其他] 知轩藏书下载工具