0%

Pyppeteer

好用的一个爬虫库

优点是后台调用 chromium ,异步抓取,使用也简单,对一些异步加载数据的网页,很方便

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
# 简单例子

# coding=utf-8
import time
import asyncio
from pyppeteer import launch
from pyquery import PyQuery as pq


urls =["https://v.huya.com/u/146501201/video.html?sort=news&p={}".format(i) for i in range(1,6)]

async def main():
down_list = []
browser = await launch()
page = await browser.newPage()
with open("/Users/ming/projects/huyaPyDwon/down.txt", 'a') as f:
for url in urls:
await page.goto(url)
await page.waitForSelector('.content-list .statpid')
doc = pq(await page.content())
pink_link = "https://v.huya.com"
names = [pink_link + item.attr('href') for item in doc('.content-list .statpid').items()]
for name in names:
f.write(name + '\n')

# print('Names:', names)
await browser.close()

asyncio.get_event_loop().run_until_complete(main())