Commit fa22bfb

authored

Update caixukun.py

1 parent 636b592 commit fa22bfbCopy full SHA for fa22bfb

File tree

-4

lines changed

-4

lines changed

Lines changed: 4 additions & 4 deletions

Original file line number	Diff line number	Diff line change
`@@ -8,15 +8,15 @@ class CaixukunSpider(scrapy.Spider):`
`8`	`8`	`name = 'caixukun'`
`9`	`9`	`allowed_domains = ['m.weibo.cn']`
`10`	`10`
`11`		`- def start_requests(self):`
	`11`	`+ def start_requests(self):# 以start_requests代替strat_urls启动爬虫`
`12`	`12`	`urls = ['https://m.weibo.cn/api/statuses/repostTimeline?'`
`13`		`- 'id=4347741368557605&page={}'.format(i) for i in range(15136)]`
`14`		`- random.shuffle(urls)`
	`13`	`+ 'id=4347741368557605&page={}'.format(i) for i in range(15136)]# 该链接通过浏览器抓包得来(微博移动端)`
	`14`	`+ random.shuffle(urls)# 这个api的数据是实时更新的,所以不需要按照顺序爬,shuffle一下可以增加爬虫效率`
`15`	`15`
`16`	`16`	`for url in urls:`
`17`	`17`	`yield scrapy.Request(url=url, callback=self.parse, dont_filter=True)`
`18`	`18`
`19`		`- def parse(self, response):`
	`19`	`+ def parse(self, response):# 解析函数`
`20`	`20`	`res = json.loads(response.text)`
`21`	`21`	`if res['ok'] == 1:`
`22`	`22`	`data = res['data']['data']`

Comments

(0)