Skip to content

Navigation Menu

Sign in
Appearance settings

Search code, repositories, users, issues, pull requests...

Provide feedback

We read every piece of feedback, and take your input very seriously.

Saved searches

Use saved searches to filter your results more quickly

Sign up
Appearance settings

Commit fa22bfb

Browse files
Update caixukun.py
1 parent 636b592 commit fa22bfb

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

‎CaiXuKun/CaiXuKun/spiders/caixukun.py‎

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,15 @@ class CaixukunSpider(scrapy.Spider):
88
name = 'caixukun'
99
allowed_domains = ['m.weibo.cn']
1010

11-
def start_requests(self):
11+
def start_requests(self):# 以start_requests代替strat_urls启动爬虫
1212
urls = ['https://m.weibo.cn/api/statuses/repostTimeline?'
13-
'id=4347741368557605&page={}'.format(i) for i in range(15136)]
14-
random.shuffle(urls)
13+
'id=4347741368557605&page={}'.format(i) for i in range(15136)]# 该链接通过浏览器抓包得来(微博移动端)
14+
random.shuffle(urls)# 这个api的数据是实时更新的,所以不需要按照顺序爬,shuffle一下可以增加爬虫效率
1515

1616
for url in urls:
1717
yield scrapy.Request(url=url, callback=self.parse, dont_filter=True)
1818

19-
def parse(self, response):
19+
def parse(self, response):# 解析函数
2020
res = json.loads(response.text)
2121
if res['ok'] == 1:
2222
data = res['data']['data']

0 commit comments

Comments
(0)

AltStyle によって変換されたページ (->オリジナル) /