version = 0.1
新增功能: 1. 更新菜单 2. 现在可免费下载,不需要会员 3. 重构代码,优化性能
This commit is contained in:
parent
92a1eb558a
commit
726397428e
332
main.py
332
main.py
|
@ -1,70 +1,28 @@
|
|||
import json
|
||||
# encoding: utf-8
|
||||
# version: 0.0.1
|
||||
# author: qianyi
|
||||
# email: xxqianyi@163.com
|
||||
# date: 2024-10-13
|
||||
# description: 爬取番茄小说
|
||||
import requests
|
||||
import parsel
|
||||
|
||||
|
||||
class Novel:
|
||||
def __init__(self, nid):
|
||||
self.novel_id = nid
|
||||
self.book_name = None
|
||||
self.book_tags = []
|
||||
self.chapters = []
|
||||
|
||||
# 请求头
|
||||
headers = {
|
||||
'cookie': 's_v_web_id=verify_m24u3hpa_LjedVsj2_0wHE_4YHz_AQbA_XubUuy7zBEfB; novel_web_id=7424527740918957579; Hm_lvt_2667d29c8e792e6fa9182c20a3013175=1728657590,1728729482; HMACCOUNT=C11EDE0200C117A2; csrf_session_id=9d05fbb2d55d14bcef609e07b15ee962; serial_uuid=7424527740918957579; serial_webid=7424527740918957579; passport_csrf_token=4ca9597fb853eee76008f7aa64d8e2b2; passport_csrf_token_default=4ca9597fb853eee76008f7aa64d8e2b2; passport_mfa_token=Cjd8rmE00d6EfflBq%2FcSKtg4tdE1wjK3k4AlVK9xhhfSX9yES1syXBMr7T5SqdgNhHiMb3ZcENizGkoKPBwhI5O6cVkE8SiUJ4MM3ZWbezvJEknWED5U%2FreyeQvJnVRfBsQqHY2RtF6nrPYyGuweLAfpGWn5dA2N%2FxD4xt4NGPax0WwgAiIBA8AKpQg%3D; d_ticket=6537bdb09e3bc966a73555b625f4a20805d80; odin_tt=8b14d78bda8d96ee252a0b04bde6df67e417de819ae2aff5e84dde678f187f509b2372655b99325ad4ce341c6c80bd5d947a24cb10a10aaf4dec4d9b69267a08; n_mh=LcM1cOw8HFMjuUsAxgY98un18tJ2aS13XDoKUMJmgAw; passport_auth_status=d05d05a79a1b91086a256e14792c088f%2C; passport_auth_status_ss=d05d05a79a1b91086a256e14792c088f%2C; sid_guard=fc6b04b9b74cd000904ad05a7d97eecc%7C1728730882%7C5184000%7CWed%2C+11-Dec-2024+11%3A01%3A22+GMT; uid_tt=081a78b4f45cf6f7248d345f6f6b2a2f; uid_tt_ss=081a78b4f45cf6f7248d345f6f6b2a2f; sid_tt=fc6b04b9b74cd000904ad05a7d97eecc; sessionid=fc6b04b9b74cd000904ad05a7d97eecc; sessionid_ss=fc6b04b9b74cd000904ad05a7d97eecc; is_staff_user=false; sid_ucp_v1=1.0.0-KGMxMDEzMDA5MTNjN2MwNzI1M2NkZTg3NWE2MDJiODFlMzc3MzJiZmYKHwiY37C54MyAAhCCrqm4BhjHEyAMMLvkgaMGOAJA8QcaAmhsIiBmYzZiMDRiOWI3NGNkMDAwOTA0YWQwNWE3ZDk3ZWVjYw; ssid_ucp_v1=1.0.0-KGMxMDEzMDA5MTNjN2MwNzI1M2NkZTg3NWE2MDJiODFlMzc3MzJiZmYKHwiY37C54MyAAhCCrqm4BhjHEyAMMLvkgaMGOAJA8QcaAmhsIiBmYzZiMDRiOWI3NGNkMDAwOTA0YWQwNWE3ZDk3ZWVjYw; store-region=cn-gd; store-region-src=uid; Hm_lpvt_2667d29c8e792e6fa9182c20a3013175=1728733437; ttwid=1%7CpG5C6EqM_9UeWXYJGM3B4K4x0Nyc97iCcZj51uLAaqI%7C1728733438%7C9f427b4c00ce6215497cb23aee88d69836d2eecfc5fab5ef250da93cdaf6b745',
|
||||
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36 Edg/129.0.0.0'
|
||||
}
|
||||
|
||||
def fetch_novel_content(self):
|
||||
# 请求链接
|
||||
url = 'https://fanqienovel.com/page/' + self.novel_id
|
||||
web_page = requests.get(url, headers=self.headers).content.decode('utf-8')
|
||||
web_selector = parsel.Selector(web_page)
|
||||
|
||||
# 书名
|
||||
self.book_name = web_selector.css('.info-name h1::text').get()
|
||||
# 小说标签
|
||||
self.book_tags = web_selector.css('.info-label span::text').getall()
|
||||
# 章节ID
|
||||
chapter_urls = web_selector.css('.chapter-item .chapter-item-title::attr(href)').getall()
|
||||
|
||||
# 逐章获取章节内容
|
||||
for index, id_link in enumerate(chapter_urls):
|
||||
chapter_data = self.get_chapter_content(id_link)
|
||||
self.chapters.append(chapter_data) # 保存章节数据
|
||||
print(f"获取章节 {chapter_data['chapterName']} 成功")
|
||||
|
||||
# 每5章保存一次
|
||||
if (index + 1) % 5 == 0:
|
||||
self.save_content_to_file() # 每5章保存内容
|
||||
|
||||
# 如果还有未保存的章节,最后再保存一次
|
||||
self.save_content_to_file()
|
||||
|
||||
def get_chapter_content(self, id_link):
|
||||
chapter_url = 'https://fanqienovel.com' + id_link
|
||||
chapter_data = requests.get(chapter_url, headers=self.headers).content.decode('utf-8')
|
||||
chapter_selector = parsel.Selector(chapter_data)
|
||||
|
||||
# 章节名
|
||||
chapter_name = chapter_selector.css('.muye-reader-title::text').get()
|
||||
# 章节字数
|
||||
chapter_words = chapter_selector.css('.desc-item:nth-child(1)::text').get()
|
||||
# 章节更新时间
|
||||
chapter_update_time = chapter_selector.css('.desc-item:nth-child(2)::text').get()
|
||||
# 章节内容
|
||||
chapter_contents = chapter_selector.css('.muye-reader-content-16 p::text').getall()
|
||||
chapter_content = self.decrypt_chapter_content('\n\n'.join(chapter_contents))
|
||||
|
||||
return {
|
||||
"chapterName": chapter_name,
|
||||
"wordCount": chapter_words,
|
||||
"updateTime": chapter_update_time,
|
||||
"content": chapter_content,
|
||||
import random
|
||||
import time
|
||||
import json
|
||||
from pathlib import Path
|
||||
from fake_useragent import UserAgent
|
||||
class NovelSpider:
|
||||
def __init__(self):
|
||||
self.URL = 'https://fanqienovel.com'
|
||||
self.cookie = ''
|
||||
self.headers = {
|
||||
'User-Agent': UserAgent().random,
|
||||
'cookie': self.cookie
|
||||
}
|
||||
self.novel_dict = {}
|
||||
|
||||
def decrypt_chapter_content(self, content):
|
||||
@staticmethod
|
||||
def decrypt_chapter_content(content):
|
||||
with open('woff2.json', 'r', encoding='utf-8') as f:
|
||||
woff2_dict = json.load(f)
|
||||
converted_content = ""
|
||||
|
@ -75,22 +33,238 @@ class Novel:
|
|||
converted_content += index
|
||||
return converted_content
|
||||
|
||||
def save_content_to_file(self):
|
||||
novel_data = {
|
||||
"novel": {
|
||||
"title": self.book_name,
|
||||
"tags": self.book_tags,
|
||||
"chapters": self.chapters
|
||||
}
|
||||
}
|
||||
with open(f'{novel_data["novel"]["title"]}.json', 'w', encoding='utf-8') as f:
|
||||
json.dump(novel_data, f, ensure_ascii=False, indent=4)
|
||||
def search_novel(self, key):
|
||||
"""
|
||||
搜索小说
|
||||
:param key: 小说关键字
|
||||
:return: 小说ID
|
||||
"""
|
||||
while True:
|
||||
if key == '':
|
||||
return 'b'
|
||||
# 使用新的API进行搜索
|
||||
url = f"https://api5-normal-lf.fqnovel.com/reading/bookapi/search/page/v/?query={key}&aid=1967&channel=0&os_version=0&device_type=0&device_platform=0&iid=466614321180296&passback={{(page-1)*10}}&version_code=999"
|
||||
response = requests.get(url)
|
||||
if response.status_code == 200:
|
||||
data = response.json()
|
||||
if data['code'] == 0:
|
||||
books = data['data']
|
||||
if not books:
|
||||
print("没有找到相关书籍。")
|
||||
break
|
||||
for index, book in enumerate(books):
|
||||
print(
|
||||
f"{index + 1}. 名称:{book['book_data'][0]['book_name']} 作者:{book['book_data'][0]['author']} ID:{book['book_data'][0]['book_id']} 字数:{book['book_data'][0]['word_number']}")
|
||||
while True:
|
||||
choice_ = input("请选择一个结果, 输入 r 以重新搜索:")
|
||||
if choice_ == "r":
|
||||
break
|
||||
elif choice_.isdigit() and 1 <= int(choice_) <= len(books):
|
||||
chosen_book = books[int(choice_) - 1]
|
||||
self.novel_dict['book_id'] = chosen_book['book_data'][0]['book_id']
|
||||
return chosen_book['book_data'][0]['book_id']
|
||||
else:
|
||||
print("输入无效,请重新输入。")
|
||||
else:
|
||||
print("搜索出错,错误码:", data['code'])
|
||||
break
|
||||
else:
|
||||
print("请求失败,状态码:", response.status_code)
|
||||
break
|
||||
|
||||
def get_novel_info(self, book_id):
|
||||
"""
|
||||
获取小说信息
|
||||
:param book_id: 小说ID
|
||||
:return: 小说信息字典
|
||||
"""
|
||||
# 构建小说信息字典
|
||||
url = f"{self.URL}/page/{book_id}"
|
||||
response = requests.get(url, headers=self.headers)
|
||||
novel_selector = parsel.Selector(response.content.decode('utf-8'))
|
||||
self.novel_dict['title'] = novel_selector.css('.info-name h1::text').get()
|
||||
self.novel_dict['author'] = novel_selector.css('.author-name-text::text').get()
|
||||
self.novel_dict['word_count'] = novel_selector.css('.detail::text').getall()
|
||||
self.novel_dict['intro'] = novel_selector.css('.page-abstract-content p::text').get()
|
||||
self.novel_dict['chapter_latest'] = ' '.join(title for title in novel_selector.css('.info-last-title:nth-child(1)::text').getall() if title != '最近更新:')
|
||||
self.novel_dict['chapter_names'] = novel_selector.css('.chapter-item .chapter-item-title::text').getall()
|
||||
self.novel_dict['chapter_links'] = novel_selector.css('.chapter-item .chapter-item-title::attr(href)').getall()
|
||||
# return self.novel_dict
|
||||
|
||||
# 使用示例
|
||||
def get_novel_content(self, chapter_link):
|
||||
"""
|
||||
获取小说章节内容
|
||||
:param chapter_link: 小说章节链接
|
||||
:return: 小说章节内容
|
||||
"""
|
||||
url = f"{self.URL}{chapter_link}"
|
||||
self.headers['cookie'] = self.cookie
|
||||
try:
|
||||
response = requests.get(url, headers=self.headers)
|
||||
chapter_selector = parsel.Selector(response.content.decode('utf-8'))
|
||||
chapter_name = chapter_selector.css('.muye-reader-title::text').get()
|
||||
chapter_contents = chapter_selector.css('.muye-reader-content-16 p::text').getall()
|
||||
# 解密
|
||||
chapter_content = self.decrypt_chapter_content('\n\n'.join(chapter_contents))
|
||||
print(f"{chapter_name}章节内容长度:{len(chapter_content)}")
|
||||
return chapter_content
|
||||
except :
|
||||
print("获取章节内容失败。")
|
||||
|
||||
def get_cookie(self, t):
|
||||
"""
|
||||
获取cookie
|
||||
:param t: 0表示获取cookie
|
||||
:return:
|
||||
"""
|
||||
bas = 1000000000000000000 # 定义基数
|
||||
if t == '': # 如果未传入cookie
|
||||
# 随机生成一个数字作为cookie
|
||||
for num in range(random.randint(bas * 6, bas * 8), bas * 9):
|
||||
time.sleep(random.randint(50, 150) / 1000) # 随机暂停
|
||||
self.cookie = 'novel_web_id=' + str(num) # 生成cookie
|
||||
self.headers['cookie'] = self.cookie
|
||||
link = self.random_str('chapter_links')
|
||||
print(f"尝试使用cookie {self.cookie} 获取 {link} 链接")
|
||||
# 200ms间隔,防止被封IP
|
||||
time.sleep(0.2)
|
||||
if len(self.get_novel_content(link)) > 200:
|
||||
return 'ok'
|
||||
else: # 如果传入了cookie
|
||||
self.headers['cookie'] = t
|
||||
link = self.random_str('chapter_links')
|
||||
if len(self.get_novel_content(link)) > 200:
|
||||
print(f"尝试使用cookie {self.cookie} 获取 {link} 链接")
|
||||
return 'ok'
|
||||
else:
|
||||
return 'err'
|
||||
|
||||
def random_str(self, key):
|
||||
"""
|
||||
随机选取列表中的一个元素
|
||||
:param key: 字典键
|
||||
:return: 随机元素
|
||||
"""
|
||||
try:
|
||||
values = self.novel_dict[key]
|
||||
if values:
|
||||
return random.choice(values[10:])
|
||||
except KeyError:
|
||||
print("返回值为空,请先获取小说信息。")
|
||||
|
||||
def down_text(self):
|
||||
file = FileHandler()
|
||||
for name,link in zip(self.novel_dict['chapter_names'], self.novel_dict['chapter_links']):
|
||||
time.sleep(1)
|
||||
text = self.get_novel_content(link)
|
||||
file.book2down('text', self.novel_dict['title'], name, text)
|
||||
|
||||
class FileHandler:
|
||||
def __init__(self):
|
||||
self.config = {}
|
||||
self.path = Path('Data')
|
||||
self.config_file_path = self.path / 'fq2s.cookie'
|
||||
self.download_dir_path = self.path / 'books'
|
||||
|
||||
# 如果 Data 目录不存在,则创建
|
||||
if not self.path.exists():
|
||||
self.path.mkdir(parents=True)
|
||||
self.download_dir_path.mkdir(parents=True)
|
||||
print(f"目录 {self.path} , {self.download_dir_path} 已创建。")
|
||||
|
||||
def read_config(self,):
|
||||
"""
|
||||
读取配置文件
|
||||
:param : 配置文件路径
|
||||
:return: 配置字典
|
||||
"""
|
||||
# 检查 fq2s.conf 文件是否存在
|
||||
if self.config_file_path.exists():
|
||||
# 如果文件存在,则读取内容
|
||||
self.config['cookie'] = self.config_file_path.read_text(encoding='utf-8')
|
||||
# print(f"配置文件 {self.config_file_path} 读取成功。")
|
||||
else:
|
||||
# 如果文件不存在,则创建文件并写入默认内容
|
||||
default_content = ""
|
||||
self.config_file_path.write_text(default_content, encoding='utf-8')
|
||||
print(f"配置文件 {self.config_file_path} 不存在,已创建并写入默认内容。")
|
||||
|
||||
def write_config(self, con):
|
||||
"""
|
||||
写入配置文件
|
||||
:param con: 配置内容
|
||||
:return:
|
||||
"""
|
||||
cons = f'"{con}"'
|
||||
with self.config_file_path.open('w', encoding='utf-8') as f:
|
||||
f.write(cons)
|
||||
print("配置文件写入成功。")
|
||||
|
||||
def book2down(self, down_type, b_name, c_name, text):
|
||||
"""
|
||||
下载小说
|
||||
:param down_type: 下载类型
|
||||
:param b_name: 小说名
|
||||
:param c_name: 章节名
|
||||
:param text: 章节内容
|
||||
:return:
|
||||
"""
|
||||
match down_type:
|
||||
case 'text':
|
||||
# 下载txt格式
|
||||
book_name_path = self.download_dir_path / f"{b_name}"
|
||||
book_name_path.mkdir(parents=True, exist_ok=True)
|
||||
file_path = book_name_path / f"{c_name}.txt"
|
||||
file_path.write_text(text, encoding='utf-8')
|
||||
print(f"已下载 {c_name} 章节。")
|
||||
case 2:
|
||||
# 下载json格式
|
||||
pass
|
||||
def run(mode=0):
|
||||
print("欢迎使用番茄小说爬虫!\n初始化中................................................")
|
||||
novel_spider = NovelSpider()
|
||||
file_handler = FileHandler()
|
||||
novel_spider.get_novel_info(6982529841564224526)
|
||||
file_handler.read_config()
|
||||
cookie = file_handler.config.get('cookie', '')
|
||||
novel_spider.cookie = cookie
|
||||
if cookie == '':
|
||||
print("获取cookie------------")
|
||||
status = novel_spider.get_cookie('')
|
||||
if status == 'ok':
|
||||
print("获取cookie成功!")
|
||||
file_handler.write_config(novel_spider.cookie)
|
||||
else:
|
||||
status = novel_spider.get_cookie(cookie)
|
||||
if status == 'err':
|
||||
print("该cookie已失效,重新获取。")
|
||||
novel_spider.get_cookie('')
|
||||
file_handler.write_config(novel_spider.cookie)
|
||||
print("获取cookie成功!")
|
||||
elif status == 'ok':
|
||||
print("cookie可用!")
|
||||
if mode == 1:
|
||||
pass
|
||||
else:
|
||||
# nogui模式
|
||||
while True:
|
||||
m = input("请输入功能模块[1.下载小说|2.x|3.x|4.x|5.x|6.x|7.x|8.x|9.x||0.退出]:")
|
||||
match m:
|
||||
case '1':
|
||||
# 下载小说
|
||||
# t = input("请输入下载类型[1.txt|2.json]")
|
||||
bid = novel_spider.search_novel(input("请输入要搜索的小说名:"))
|
||||
novel_spider.get_novel_info(bid)
|
||||
novel_spider.down_text()
|
||||
case '2':
|
||||
pass
|
||||
case '0':
|
||||
break
|
||||
# 主程序入口
|
||||
if __name__ == '__main__':
|
||||
novel_id = '7412526577163979800'
|
||||
novel = Novel(novel_id)
|
||||
novel.fetch_novel_content() # 获取小说内容
|
||||
novel.save_content_to_file() # 保存内容到json
|
||||
print(novel.book_name) # 打印书名
|
||||
'''
|
||||
打开方式:nogui(0)|gui(1)
|
||||
'''
|
||||
# i = int(input("请输入打开方式[nogui(0)|gui(1)]:"))
|
||||
i = 0
|
||||
run(i)
|
|
@ -3,4 +3,5 @@ lxml~=5.3.0
|
|||
EbookLib~=0.18
|
||||
tqdm~=4.66.5
|
||||
beautifulsoup4~=4.12.3
|
||||
parsel~=1.9.1
|
||||
parsel~=1.9.1
|
||||
fake-useragent~=1.5.1
|
Loading…
Reference in New Issue