|
| 1 | +/** |
| 2 | + * @desc 定时任务 - 爬取豆瓣电影 |
| 3 | + */ |
| 4 | + |
| 5 | +const schedule = require('node-schedule') |
| 6 | +const DoubanSpider = require('douban-spider-v') |
| 7 | +const fs = require('fs-extra') |
| 8 | +const path = require('path') |
| 9 | +const moviesPath = { |
| 10 | + getMovieCollect: path.join(__dirname, '../files/movies/collect'), |
| 11 | + getMovieWish: path.join(__dirname, '../files/movies/wish'), |
| 12 | + getMovieDo: path.join(__dirname, '../files/movies/do') |
| 13 | +} |
| 14 | + |
| 15 | +let cache = { |
| 16 | + getMovieCollect: [], |
| 17 | + getMovieWish: [], |
| 18 | + getMovieDo: [] |
| 19 | +} |
| 20 | +const douban = new DoubanSpider({ |
| 21 | + uid: 'tan-mu' |
| 22 | +}) |
| 23 | + |
| 24 | +function startSchedule() { |
| 25 | + // 每天凌晨1点进行爬取 |
| 26 | + schedule.scheduleJob('0 0 1 * * *', async () => { |
| 27 | + // schedule.scheduleJob('0 50 18 * * *', async () => { |
| 28 | + console.log('定时任务触发------>>>>>>>') |
| 29 | + getMovies() |
| 30 | + }) |
| 31 | +} |
| 32 | +async function getMovies() { |
| 33 | + await handleMovies('getMovieCollect') |
| 34 | + await sleep() |
| 35 | + await handleMovies('getMovieWish') |
| 36 | + await sleep() |
| 37 | + await handleMovies('getMovieDo') |
| 38 | +} |
| 39 | + |
| 40 | +async function handleMovies(method) { |
| 41 | + try { |
| 42 | + const res = await douban[method]() |
| 43 | + cache[method].push(res.data) |
| 44 | + console.log('第1页爬取成功====>>>>>') |
| 45 | + if (res.page.totalPage > 1) { |
| 46 | + // 保存总页码数 |
| 47 | + fs.writeFileSync(`${moviesPath[method]}/pageTotal.txt`, res.page.totalPage + '', 'utf8') |
| 48 | + |
| 49 | + for (let i = 2; i <= res.page.totalPage; i++) { |
| 50 | + // for (let i = 2; i <= 3; i++) { |
| 51 | + // 爬取速度1分钟1页,避免触发反爬 |
| 52 | + await sleep() |
| 53 | + const res = await douban[method](i) |
| 54 | + cache[method].push(res.data) |
| 55 | + console.log(`第${i}页爬取成功====>>>>>`) |
| 56 | + } |
| 57 | + } |
| 58 | + // 写入json文件 |
| 59 | + cache[method].forEach((doc, index) => { |
| 60 | + fs.ensureDirSync(moviesPath[method]) |
| 61 | + fs.writeFileSync(`${moviesPath[method]}/${index + 1}.json`, JSON.stringify(doc), 'utf8') |
| 62 | + }) |
| 63 | + } catch (e) { |
| 64 | + console.log('爬虫解析错误---->>>>', e) |
| 65 | + cache = { |
| 66 | + getMovieCollect: [], |
| 67 | + getMovieWish: [], |
| 68 | + getMovieDo: [] |
| 69 | + } |
| 70 | + } |
| 71 | +} |
| 72 | +async function sleep(ms = 1000 * 30) { |
| 73 | + await new Promise(resolve => setTimeout(resolve, ms)) |
| 74 | +} |
| 75 | +exports.startSchedule = startSchedule |
0 commit comments