笔趣阁下载(python学习分享:笔趣阁小说全本下载工具爬虫源码)
python学习分享:笔趣阁小说全本下载东西爬虫源码
#导入模块
import requests
import parsel
from lxml import etree
import re
import pandas as pd
import datetime
import time
from tqdm import tqdm
import prettytable as pt
#哀求头
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
curr_time = datetime.datetime.now()
times = datetime.datetime.strftime(curr_time, '%Y-%m-%d %H:%M:%S')
print(f' 如今是:{times} by 琴棋字画')
print()
print('***********接待使用笔趣阁小说全本下载东西*************')
print()
while True:
#哀求数据
keyword=input('请输入你想要搜刮的小说名字(0退去):')
if keyword=='0':
break
seacher_url=f'https://www.biquge9.com/s?q={keyword}'
respones=requests.get(seacher_url,headers=headers)
#print(respones.text)
#剖析获取数据
selector=etree.HTML(respones.text)
xs_urls=selector.xpath('//h4/a[contains(@href,"/book/")]/@href') #获取小说地点列表
xs_names=selector.xpath('//a[contains(@href,"/book/")]/text()') #获取小说名字列表
xs_authers=selector.xpath('//div[contains(@class,"author")]/text()') #获取小说作者列表
# selector=parsel.Selector(respones.text)
# xs_urls=selector.css( '.bookinfo .bookname a::attr(href)').getall()
# xs_names=selector.css( 'bookinfo .bookname a::text').getall()
# xs_authers=selector.css('.bookinfo div::text').getall()
# print(xs_authers,xs_names,xs_urls)
tb=pt.PrettyTable()
tb.field_names=['序号','书名','作者','小说ID']
num=0
if xs_names:
#print(xs_urls,xs_names,xs_authers)
xs_list=[]
#遍历弄表
for xs_url,xs_name,xs_auther in zip(xs_urls,xs_names,xs_authers):
xs_id=xs_url.split('/')[2] #小说ID
xs_url='https://www.biquge9.com'+xs_url #小说网址
xs_name=xs_name.strip() #小说书名
xs_auther=xs_auther.split(':')[1] #小说作者
#print(xs_id,xs_url,xs_name,xs_auther)
dict={'书名':xs_name,'作者':xs_auther,'ID':xs_id} #天生字典
xs_list.append(dict)#小说信息列表
tb.add_row([num, xs_name, xs_auther, xs_id])
num = num + 1
print(tb)
# print()
print(f'一共搜刮到{len(xs_list)}条数据')
print()
# sea_data=pd.DataFrame(xs_list)
# print(sea_data)#输入小说信息列表
print()
key_num=input('请输入你要下载的小说序号:')
print()
print('小说正本人载,已完成......')
xs_ID=xs_list[int(key_num)]['ID']
url='https://www.biquge9.com/book/'+xs_ID
urls=[url+'/{}.html'.format(str(i)) for i in range(1,5000)] #天生章节地点列表
#print(urls)
for url1 in tqdm(urls): #遍历章节地点列表
data_1=requests.get(url1,headers=headers) #哀求章节文本
selector=parsel.Selector(data_1.text)
#print(selector)
title=selector.css('.content h1::text').get() #获取章节标题
#print(title)
content=selector.css('#chaptercontent::text').getall() #获取章节要文
#print(content)
content_1='\n'.join(content)
#print(content_1)
with open(xs_name+'.txt','a+',encoding='utf-8') as file: #保存小说内容到TXT文件
if title!='':
file.write(title)
file.write('\n')
file.write(content_1)
file.write('\n')
#print('正在保存',title)
else:
file.write(content_1)
file.write('\n')
else:
print('请准确输入小说名字或作者名字,没有查到这本书的数据.....')
声明:本站所有文章资源内容,如无特殊说明或标注,均为采集网络资源。如若本站内容侵犯了原著者的合法权益,可联系本站删除。