1
1
/**
2
- * @desc 定时任务 - 爬取豆瓣电影
2
+ * @desc 定时任务 - 爬取豆瓣电影等
3
+ * @author justJokee
3
4
*/
4
5
5
6
const schedule = require ( 'node-schedule' )
@@ -18,15 +19,13 @@ let cache = {
18
19
getMovieDo : [ ]
19
20
}
20
21
const douban = new DoubanSpider ( {
21
- // uid: 'tan-mu'
22
22
uid : '173712770'
23
23
} )
24
24
25
25
function startSchedule ( ) {
26
26
// 每天凌晨1点进行爬取
27
- // schedule.scheduleJob('0 0 1 * * *', async () => {
28
- schedule . scheduleJob ( '0 14 18 * * *' , async ( ) => {
29
- console . log ( '定时任务触发------>>>>>>>' )
27
+ schedule . scheduleJob ( '0 0 1 * * *' , async ( ) => {
28
+ console . log ( '定时任务触发 -->>>>>' )
30
29
getMovies ( )
31
30
} )
32
31
}
@@ -42,18 +41,17 @@ async function handleMovies(method) {
42
41
try {
43
42
const res = await douban [ method ] ( )
44
43
cache [ method ] . push ( res . data )
45
- console . log ( ' 第1页爬取成功==== >>>>>' )
44
+ console . log ( `[ ${ method } ]: 第1页爬取成功 -- >>>>` )
46
45
fs . writeFileSync ( `${ moviesPath [ method ] } /pageTotal.txt` , res . page . totalPage + '' , 'utf8' )
47
46
if ( res . page . totalPage > 1 ) {
48
47
// 保存总页码数
49
48
50
49
for ( let i = 2 ; i <= res . page . totalPage ; i ++ ) {
51
- // for (let i = 2; i <= 3; i++) {
52
- // 爬取速度1分钟1页,避免触发反爬
50
+ // 爬取速度 30s/1页,避免触发反爬机制
53
51
await sleep ( )
54
52
const res = await douban [ method ] ( i )
55
53
cache [ method ] . push ( res . data )
56
- console . log ( `第${ i } 页爬取成功====> >>>>` )
54
+ console . log ( `[ ${ method } ]: 第${ i } 页爬取成功 -- >>>>` )
57
55
}
58
56
}
59
57
// 写入json文件
@@ -64,7 +62,7 @@ async function handleMovies(method) {
64
62
// 释放空间
65
63
cache [ method ] = [ ]
66
64
} catch ( e ) {
67
- console . log ( '爬虫解析错误-- -->>>>' , e )
65
+ console . log ( '爬虫解析错误-->>>>' , e )
68
66
cache = {
69
67
getMovieCollect : [ ] ,
70
68
getMovieWish : [ ] ,
0 commit comments