网站
https://project-iprj6705f17ebcfad66461658c5c-8000.preview.node01.inscode.run/
第一道题爬取api并且保存
import requests,re
import json
url = "https://project-iprj6705f17ebcfad66461658c5c-8000.preview.node01.inscode.run/tasks/api/"
headers= {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36'
}
res = requests.get(url,headers=headers).json()
with open('1.json','w') as f:
f.write(json.dumps(res,ensure_ascii=False))
第二道爬取所有图片
from urllib.parse import urljoin
import requests,re
from urllib.parse import urlparse
import json
url = "https://project-iprj6705f17ebcfad66461658c5c-8000.preview.node01.inscode.run/tasks/api/"
headers= {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36'
}
res = requests.get(url,headers=headers).json()
list1 = res['articles']
list2=[]
for i in list1:
list2.append(i['image'])
base_url ="https://"+urlparse(url).netloc
for image in list2:
image_url = urljoin(base_url,image)
img = requests.get(image_url).content
img_name = image.split("/")[-1]
with open(img_name,'wb') as f:
f.write(img)
第三道 爬取题目和摘要
import requests,csv
from lxml import etree
with open("data.csv","w",newline='',encoding='gbk') as f:
writer = csv.writer(f)
writer.writerow(["题目","再要"])
url = "https://project-iprj6705f17ebcfad66461658c5c-8000.preview.node01.inscode.run/tasks/article/list/"
headers= {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36'
}
res = requests.get(url,headers=headers)
html = etree.HTML(res.text)
wen_zhang = html.xpath('//div[@class="lab-block"]//a//@href')
with open("data.csv","w",newline='',encoding='gbk') as f:
writer = csv.writer(f)
writer.writerow(["ti","zai"])
for i in wen_zhang:
url_l = "https://project-iprj6705f17ebcfad66461658c5c-8000.preview.node01.inscode.run/"+i
result = requests.get(url_l,headers=headers)
select = etree.HTML(result.text)
timu = select.xpath('//h2/text()')[0]
zaiyao = select.xpath('//p//text()')
result = "".join(zaiyao)
with open("data.csv", "a", newline='',encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow([timu, result])
本站资源均来自互联网,仅供研究学习,禁止违法使用和商用,产生法律纠纷本站概不负责!如果侵犯了您的权益请与我们联系!
转载请注明出处: 免费源码网-免费的源码资源网站 » python爬虫题目
发表评论 取消回复