version = 0.1

新增功能:
1. 更新菜单
2. 现在可免费下载,不需要会员
3. 重构代码,优化性能
This commit is contained in:
qianyi 2024-10-13 22:06:47 +08:00
parent 92a1eb558a
commit 726397428e
2 changed files with 255 additions and 80 deletions

332
main.py
View File

@ -1,70 +1,28 @@
import json
# encoding: utf-8
# version: 0.0.1
# author: qianyi
# email: xxqianyi@163.com
# date: 2024-10-13
# description: 爬取番茄小说
import requests
import parsel
class Novel:
def __init__(self, nid):
self.novel_id = nid
self.book_name = None
self.book_tags = []
self.chapters = []
# 请求头
headers = {
'cookie': 's_v_web_id=verify_m24u3hpa_LjedVsj2_0wHE_4YHz_AQbA_XubUuy7zBEfB; novel_web_id=7424527740918957579; Hm_lvt_2667d29c8e792e6fa9182c20a3013175=1728657590,1728729482; HMACCOUNT=C11EDE0200C117A2; csrf_session_id=9d05fbb2d55d14bcef609e07b15ee962; serial_uuid=7424527740918957579; serial_webid=7424527740918957579; passport_csrf_token=4ca9597fb853eee76008f7aa64d8e2b2; passport_csrf_token_default=4ca9597fb853eee76008f7aa64d8e2b2; passport_mfa_token=Cjd8rmE00d6EfflBq%2FcSKtg4tdE1wjK3k4AlVK9xhhfSX9yES1syXBMr7T5SqdgNhHiMb3ZcENizGkoKPBwhI5O6cVkE8SiUJ4MM3ZWbezvJEknWED5U%2FreyeQvJnVRfBsQqHY2RtF6nrPYyGuweLAfpGWn5dA2N%2FxD4xt4NGPax0WwgAiIBA8AKpQg%3D; d_ticket=6537bdb09e3bc966a73555b625f4a20805d80; odin_tt=8b14d78bda8d96ee252a0b04bde6df67e417de819ae2aff5e84dde678f187f509b2372655b99325ad4ce341c6c80bd5d947a24cb10a10aaf4dec4d9b69267a08; n_mh=LcM1cOw8HFMjuUsAxgY98un18tJ2aS13XDoKUMJmgAw; passport_auth_status=d05d05a79a1b91086a256e14792c088f%2C; passport_auth_status_ss=d05d05a79a1b91086a256e14792c088f%2C; sid_guard=fc6b04b9b74cd000904ad05a7d97eecc%7C1728730882%7C5184000%7CWed%2C+11-Dec-2024+11%3A01%3A22+GMT; uid_tt=081a78b4f45cf6f7248d345f6f6b2a2f; uid_tt_ss=081a78b4f45cf6f7248d345f6f6b2a2f; sid_tt=fc6b04b9b74cd000904ad05a7d97eecc; sessionid=fc6b04b9b74cd000904ad05a7d97eecc; sessionid_ss=fc6b04b9b74cd000904ad05a7d97eecc; is_staff_user=false; sid_ucp_v1=1.0.0-KGMxMDEzMDA5MTNjN2MwNzI1M2NkZTg3NWE2MDJiODFlMzc3MzJiZmYKHwiY37C54MyAAhCCrqm4BhjHEyAMMLvkgaMGOAJA8QcaAmhsIiBmYzZiMDRiOWI3NGNkMDAwOTA0YWQwNWE3ZDk3ZWVjYw; ssid_ucp_v1=1.0.0-KGMxMDEzMDA5MTNjN2MwNzI1M2NkZTg3NWE2MDJiODFlMzc3MzJiZmYKHwiY37C54MyAAhCCrqm4BhjHEyAMMLvkgaMGOAJA8QcaAmhsIiBmYzZiMDRiOWI3NGNkMDAwOTA0YWQwNWE3ZDk3ZWVjYw; store-region=cn-gd; store-region-src=uid; Hm_lpvt_2667d29c8e792e6fa9182c20a3013175=1728733437; ttwid=1%7CpG5C6EqM_9UeWXYJGM3B4K4x0Nyc97iCcZj51uLAaqI%7C1728733438%7C9f427b4c00ce6215497cb23aee88d69836d2eecfc5fab5ef250da93cdaf6b745',
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0'
}
def fetch_novel_content(self):
# 请求链接
url = 'https://fanqienovel.com/page/' + self.novel_id
web_page = requests.get(url, headers=self.headers).content.decode('utf-8')
web_selector = parsel.Selector(web_page)
# 书名
self.book_name = web_selector.css('.info-name h1::text').get()
# 小说标签
self.book_tags = web_selector.css('.info-label span::text').getall()
# 章节ID
chapter_urls = web_selector.css('.chapter-item .chapter-item-title::attr(href)').getall()
# 逐章获取章节内容
for index, id_link in enumerate(chapter_urls):
chapter_data = self.get_chapter_content(id_link)
self.chapters.append(chapter_data) # 保存章节数据
print(f"获取章节 {chapter_data['chapterName']} 成功")
# 每5章保存一次
if (index + 1) % 5 == 0:
self.save_content_to_file() # 每5章保存内容
# 如果还有未保存的章节,最后再保存一次
self.save_content_to_file()
def get_chapter_content(self, id_link):
chapter_url = 'https://fanqienovel.com' + id_link
chapter_data = requests.get(chapter_url, headers=self.headers).content.decode('utf-8')
chapter_selector = parsel.Selector(chapter_data)
# 章节名
chapter_name = chapter_selector.css('.muye-reader-title::text').get()
# 章节字数
chapter_words = chapter_selector.css('.desc-item:nth-child(1)::text').get()
# 章节更新时间
chapter_update_time = chapter_selector.css('.desc-item:nth-child(2)::text').get()
# 章节内容
chapter_contents = chapter_selector.css('.muye-reader-content-16 p::text').getall()
chapter_content = self.decrypt_chapter_content('\n\n'.join(chapter_contents))
return {
"chapterName": chapter_name,
"wordCount": chapter_words,
"updateTime": chapter_update_time,
"content": chapter_content,
import random
import time
import json
from pathlib import Path
from fake_useragent import UserAgent
class NovelSpider:
def __init__(self):
self.URL = 'https://fanqienovel.com'
self.cookie = ''
self.headers = {
'User-Agent': UserAgent().random,
'cookie': self.cookie
}
self.novel_dict = {}
def decrypt_chapter_content(self, content):
@staticmethod
def decrypt_chapter_content(content):
with open('woff2.json', 'r', encoding='utf-8') as f:
woff2_dict = json.load(f)
converted_content = ""
@ -75,22 +33,238 @@ class Novel:
converted_content += index
return converted_content
def save_content_to_file(self):
novel_data = {
"novel": {
"title": self.book_name,
"tags": self.book_tags,
"chapters": self.chapters
}
}
with open(f'{novel_data["novel"]["title"]}.json', 'w', encoding='utf-8') as f:
json.dump(novel_data, f, ensure_ascii=False, indent=4)
def search_novel(self, key):
"""
搜索小说
:param key: 小说关键字
:return: 小说ID
"""
while True:
if key == '':
return 'b'
# 使用新的API进行搜索
url = f"https://api5-normal-lf.fqnovel.com/reading/bookapi/search/page/v/?query={key}&aid=1967&channel=0&os_version=0&device_type=0&device_platform=0&iid=466614321180296&passback={{(page-1)*10}}&version_code=999"
response = requests.get(url)
if response.status_code == 200:
data = response.json()
if data['code'] == 0:
books = data['data']
if not books:
print("没有找到相关书籍。")
break
for index, book in enumerate(books):
print(
f"{index + 1}. 名称:{book['book_data'][0]['book_name']} 作者:{book['book_data'][0]['author']} ID{book['book_data'][0]['book_id']} 字数:{book['book_data'][0]['word_number']}")
while True:
choice_ = input("请选择一个结果, 输入 r 以重新搜索:")
if choice_ == "r":
break
elif choice_.isdigit() and 1 <= int(choice_) <= len(books):
chosen_book = books[int(choice_) - 1]
self.novel_dict['book_id'] = chosen_book['book_data'][0]['book_id']
return chosen_book['book_data'][0]['book_id']
else:
print("输入无效,请重新输入。")
else:
print("搜索出错,错误码:", data['code'])
break
else:
print("请求失败,状态码:", response.status_code)
break
def get_novel_info(self, book_id):
"""
获取小说信息
:param book_id: 小说ID
:return: 小说信息字典
"""
# 构建小说信息字典
url = f"{self.URL}/page/{book_id}"
response = requests.get(url, headers=self.headers)
novel_selector = parsel.Selector(response.content.decode('utf-8'))
self.novel_dict['title'] = novel_selector.css('.info-name h1::text').get()
self.novel_dict['author'] = novel_selector.css('.author-name-text::text').get()
self.novel_dict['word_count'] = novel_selector.css('.detail::text').getall()
self.novel_dict['intro'] = novel_selector.css('.page-abstract-content p::text').get()
self.novel_dict['chapter_latest'] = ' '.join(title for title in novel_selector.css('.info-last-title:nth-child(1)::text').getall() if title != '最近更新:')
self.novel_dict['chapter_names'] = novel_selector.css('.chapter-item .chapter-item-title::text').getall()
self.novel_dict['chapter_links'] = novel_selector.css('.chapter-item .chapter-item-title::attr(href)').getall()
# return self.novel_dict
# 使用示例
def get_novel_content(self, chapter_link):
"""
获取小说章节内容
:param chapter_link: 小说章节链接
:return: 小说章节内容
"""
url = f"{self.URL}{chapter_link}"
self.headers['cookie'] = self.cookie
try:
response = requests.get(url, headers=self.headers)
chapter_selector = parsel.Selector(response.content.decode('utf-8'))
chapter_name = chapter_selector.css('.muye-reader-title::text').get()
chapter_contents = chapter_selector.css('.muye-reader-content-16 p::text').getall()
# 解密
chapter_content = self.decrypt_chapter_content('\n\n'.join(chapter_contents))
print(f"{chapter_name}章节内容长度:{len(chapter_content)}")
return chapter_content
except :
print("获取章节内容失败。")
def get_cookie(self, t):
"""
获取cookie
:param t: 0表示获取cookie
:return:
"""
bas = 1000000000000000000 # 定义基数
if t == '': # 如果未传入cookie
# 随机生成一个数字作为cookie
for num in range(random.randint(bas * 6, bas * 8), bas * 9):
time.sleep(random.randint(50, 150) / 1000) # 随机暂停
self.cookie = 'novel_web_id=' + str(num) # 生成cookie
self.headers['cookie'] = self.cookie
link = self.random_str('chapter_links')
print(f"尝试使用cookie {self.cookie} 获取 {link} 链接")
# 200ms间隔防止被封IP
time.sleep(0.2)
if len(self.get_novel_content(link)) > 200:
return 'ok'
else: # 如果传入了cookie
self.headers['cookie'] = t
link = self.random_str('chapter_links')
if len(self.get_novel_content(link)) > 200:
print(f"尝试使用cookie {self.cookie} 获取 {link} 链接")
return 'ok'
else:
return 'err'
def random_str(self, key):
"""
随机选取列表中的一个元素
:param key: 字典键
:return: 随机元素
"""
try:
values = self.novel_dict[key]
if values:
return random.choice(values[10:])
except KeyError:
print("返回值为空,请先获取小说信息。")
def down_text(self):
file = FileHandler()
for name,link in zip(self.novel_dict['chapter_names'], self.novel_dict['chapter_links']):
time.sleep(1)
text = self.get_novel_content(link)
file.book2down('text', self.novel_dict['title'], name, text)
class FileHandler:
def __init__(self):
self.config = {}
self.path = Path('Data')
self.config_file_path = self.path / 'fq2s.cookie'
self.download_dir_path = self.path / 'books'
# 如果 Data 目录不存在,则创建
if not self.path.exists():
self.path.mkdir(parents=True)
self.download_dir_path.mkdir(parents=True)
print(f"目录 {self.path} , {self.download_dir_path} 已创建。")
def read_config(self,):
"""
读取配置文件
:param : 配置文件路径
:return: 配置字典
"""
# 检查 fq2s.conf 文件是否存在
if self.config_file_path.exists():
# 如果文件存在,则读取内容
self.config['cookie'] = self.config_file_path.read_text(encoding='utf-8')
# print(f"配置文件 {self.config_file_path} 读取成功。")
else:
# 如果文件不存在,则创建文件并写入默认内容
default_content = ""
self.config_file_path.write_text(default_content, encoding='utf-8')
print(f"配置文件 {self.config_file_path} 不存在,已创建并写入默认内容。")
def write_config(self, con):
"""
写入配置文件
:param con: 配置内容
:return:
"""
cons = f'"{con}"'
with self.config_file_path.open('w', encoding='utf-8') as f:
f.write(cons)
print("配置文件写入成功。")
def book2down(self, down_type, b_name, c_name, text):
"""
下载小说
:param down_type: 下载类型
:param b_name: 小说名
:param c_name: 章节名
:param text: 章节内容
:return:
"""
match down_type:
case 'text':
# 下载txt格式
book_name_path = self.download_dir_path / f"{b_name}"
book_name_path.mkdir(parents=True, exist_ok=True)
file_path = book_name_path / f"{c_name}.txt"
file_path.write_text(text, encoding='utf-8')
print(f"已下载 {c_name} 章节。")
case 2:
# 下载json格式
pass
def run(mode=0):
print("欢迎使用番茄小说爬虫!\n初始化中................................................")
novel_spider = NovelSpider()
file_handler = FileHandler()
novel_spider.get_novel_info(6982529841564224526)
file_handler.read_config()
cookie = file_handler.config.get('cookie', '')
novel_spider.cookie = cookie
if cookie == '':
print("获取cookie------------")
status = novel_spider.get_cookie('')
if status == 'ok':
print("获取cookie成功")
file_handler.write_config(novel_spider.cookie)
else:
status = novel_spider.get_cookie(cookie)
if status == 'err':
print("该cookie已失效重新获取。")
novel_spider.get_cookie('')
file_handler.write_config(novel_spider.cookie)
print("获取cookie成功")
elif status == 'ok':
print("cookie可用")
if mode == 1:
pass
else:
# nogui模式
while True:
m = input("请输入功能模块[1.下载小说|2.x|3.x|4.x|5.x|6.x|7.x|8.x|9.x||0.退出]")
match m:
case '1':
# 下载小说
# t = input("请输入下载类型[1.txt|2.json]")
bid = novel_spider.search_novel(input("请输入要搜索的小说名:"))
novel_spider.get_novel_info(bid)
novel_spider.down_text()
case '2':
pass
case '0':
break
# 主程序入口
if __name__ == '__main__':
novel_id = '7412526577163979800'
novel = Novel(novel_id)
novel.fetch_novel_content() # 获取小说内容
novel.save_content_to_file() # 保存内容到json
print(novel.book_name) # 打印书名
'''
打开方式nogui(0)|gui(1)
'''
# i = int(input("请输入打开方式[nogui(0)|gui(1)]"))
i = 0
run(i)

View File

@ -3,4 +3,5 @@ lxml~=5.3.0
EbookLib~=0.18
tqdm~=4.66.5
beautifulsoup4~=4.12.3
parsel~=1.9.1
parsel~=1.9.1
fake-useragent~=1.5.1