Skip to main content
Code Review

Return to Question

Commonmark migration
Source Link

##Review Wanted##

Review Wanted

Application Summary

###Application Summary### TheThe application is a simple screen scraper which is to notify the user when new items are posted.

My Challenges

###My Challenges### FirstFirst of all, I have never used any of the technologies I had to use in the app ( NodeJS, Puppeteer, and Express ). More significant that is, how to track what constitutes a "new" item, an item that has been "seen", etc.

###WorkFlow###

WorkFlow

[![WorkFlow][1]][1]WorkFlow

###Source Code### Here is the complete source code of the app. It functions well, as far as I can tell. I am concerned about the Duplicate issue though.

Source Code

Here is the complete source code of the app. It functions well, as far as I can tell. I am concerned about the Duplicate issue though.

Regards, John [1]: https://i.sstatic.net/DKXxz.png

##Review Wanted##

###Application Summary### The application is a simple screen scraper which is to notify the user when new items are posted.

###My Challenges### First of all, I have never used any of the technologies I had to use in the app ( NodeJS, Puppeteer, and Express ). More significant that is, how to track what constitutes a "new" item, an item that has been "seen", etc.

###WorkFlow###

[![WorkFlow][1]][1]

###Source Code### Here is the complete source code of the app. It functions well, as far as I can tell. I am concerned about the Duplicate issue though.

Regards, John [1]: https://i.sstatic.net/DKXxz.png

Review Wanted

Application Summary

The application is a simple screen scraper which is to notify the user when new items are posted.

My Challenges

First of all, I have never used any of the technologies I had to use in the app ( NodeJS, Puppeteer, and Express ). More significant that is, how to track what constitutes a "new" item, an item that has been "seen", etc.

WorkFlow

WorkFlow

Source Code

Here is the complete source code of the app. It functions well, as far as I can tell. I am concerned about the Duplicate issue though.

Regards, John

edited title
Link
Mast
  • 13.8k
  • 12
  • 57
  • 127

Scrape Pagepage and Send User Only New Resultssend user only new results

Source Link
John S.
  • 171
  • 6

Scrape Page and Send User Only New Results

##Review Wanted##

###Application Summary### The application is a simple screen scraper which is to notify the user when new items are posted.

The code is run as a CRON job every ten minutes. It will scrape the target page and return an array of ALL items matching the search criteria ( hard-coded ) by the user.

The results are compared to the results from the previous time the code was run. If there are any new items, the user is notified.

###My Challenges### First of all, I have never used any of the technologies I had to use in the app ( NodeJS, Puppeteer, and Express ). More significant that is, how to track what constitutes a "new" item, an item that has been "seen", etc.

Currently, I added a column in the database, "seen". When the user hits the front end, I will mark those results as "seen".

Also, the way I am saving the items in general may need refactoring. I will leave that up to your opinion.

###WorkFlow###

Here is a breakdown of how the app works:

[![WorkFlow][1]][1]

###Source Code### Here is the complete source code of the app. It functions well, as far as I can tell. I am concerned about the Duplicate issue though.

const puppeteer = require('puppeteer');
const _ = require("lodash");
var mysql = require('mysql');
var con = mysql.createConnection({
 host: 'localhost',
 user: 'root',
 password: '',
 database: 'mydatabase'
});
var oldItems; // Items already scraped
getSavedJeeps = function () {
 return new Promise(function (resolve, reject) {
 con.query(
 "SELECT * FROM jeeps",
 function (err, rows) {
 if (err) {
 reject(new Error("Error rows is undefined"));
 } else {
 resolve(rows);
 }
 }
 )
 })
}
const saveNewJeeps = async function (entity) {
 let objLen = entity.length;
 // FOR EACH OBJECT IN ARRAY...
 for (var i = 0; i < objLen; i++) {
 var savedJeeps = con.query('INSERT INTO newjeeps SET ?', entity[i], function (err, result) {
 // Neat!
 console.log("Save function complete");
 });
 }
 removeDupes();
 return true;
}
const updateAllItems = async function (entity) {
 let objLen = entity.length;
 // FOR EACH OBJECT IN ARRAY...
 for (var i = 0; i < objLen; i++) {
 var savedJeeps = con.query('INSERT INTO jeeps SET ?', entity[i], function (err, result) {
 // Neat!
 console.log("Save function complete");
 });
 }
}
// Gets current items Search Results
const getItems = async searchTerm => {
 browser = await puppeteer.launch({
 headless: true,
 timeout: 0,
 args: ["--no-sandbox"]
 });
 page = await browser.newPage();
 await page.goto(`https://facebook.com/marketplace/tampa/search/?query=${encodeURI(searchTerm)}&sort=created_date_descending&exact=false`);
 await autoScroll(page);
 const itemList = await page.waitForSelector('div > div > span > div > a[tabindex="0"]')
 .then(() => page.evaluate(() => {
 const itemArray = [];
 const itemNodeList = document.querySelectorAll('div > div > span > div > a[tabindex="0"]');
 itemNodeList.forEach(item => {
 const itemTitle = item.innerText;
 const itemURL = item.getAttribute('href');
 const itemImg = item.querySelector('div > div > span > div > a > div > div > div > div > div > div > img').getAttribute('src');
 var obj = ['price', 'title', 'location', 'miles',
 ...itemTitle.split(/\n/)
 ]
 .reduce((a, c, i, t) => {
 if (i < 4) a[c] = t[i + 4]
 return a
 }, {});
 obj.imgUrl = itemImg;
 obj.itemURL = itemURL;
 itemArray.push(obj);
 });
 return itemArray;
 }))
 .catch(() => console.log("Selector error."));
 return itemList;
}
// This takes care of the auto scrolling problem
async function autoScroll(page) {
 await page.evaluate(async () => {
 await new Promise(resolve => {
 var totalHeight = 0;
 var distance = 100;
 var timer = setInterval(() => {
 var scrollHeight = document.body.scrollHeight;
 window.scrollBy(0, distance);
 totalHeight += distance;
 if (totalHeight >= scrollHeight || scrollHeight > 9000) {
 clearInterval(timer);
 resolve();
 }
 }, 100);
 });
 });
}
const removeDupes = async function () {
 // remove duplicates
 sql = `DELETE
 t1
FROM
 jeeps t1
INNER JOIN jeeps t2 WHERE
 t1.title < t2.title AND t1.price = t2.price `;
 return new Promise(function (resolve, reject) {
 con.query(
 sql,
 function (err, rows) {
 if (err) {
 reject(new Error("Error rows is undefined"));
 } else {
 resolve();
 }
 }
 )
 })
}
const getDifferences = async function (objNew, objOld) {
 console.log("Inside Differences")
 return _.difference(objNew, objOld);
}
const init = async function () {
 var oldItems;
 const newItems = await getItems("Jeep Wrangler");
 getSavedJeeps()
 .then(function (results) {
 oldItems = results;
 })
 .catch(function (err) {
 console.log("Promise rejection error: " + err);
 })
 const finalArray = await getDifferences(newItems, oldItems);
 const saveSuccess = await saveNewJeeps(finalArray);
 const saveSuccess2 = await updateAllItems(finalArray);
 const changed = (finalArray.length > 0) ? true : false;
 
 if (changed) {
 // Fire Off Email
 const page2 = await browser.newPage();
 await page2.goto(`http://john.example.com/mail.php`);
 }
}
init();

Thanks in advance for your suggestions or comments. I asked a similar question a while back, but the app wasn't ready for review. It is now, and I am ready for your review - whether good or bad.

Regards, John [1]: https://i.sstatic.net/DKXxz.png

lang-sql

AltStyle によって変換されたページ (->オリジナル) /