##Review Wanted##
Review Wanted
Application Summary
###Application Summary### TheThe application is a simple screen scraper which is to notify the user when new items are posted.
My Challenges
###My Challenges### FirstFirst of all, I have never used any of the technologies I had to use in the app ( NodeJS, Puppeteer, and Express ). More significant that is, how to track what constitutes a "new" item, an item that has been "seen", etc.
###WorkFlow###
WorkFlow
[![WorkFlow][1]][1]WorkFlow
###Source Code### Here is the complete source code of the app. It functions well, as far as I can tell. I am concerned about the Duplicate issue though.
Source Code
Here is the complete source code of the app. It functions well, as far as I can tell. I am concerned about the Duplicate issue though.
Regards, John [1]: https://i.sstatic.net/DKXxz.png
##Review Wanted##
###Application Summary### The application is a simple screen scraper which is to notify the user when new items are posted.
###My Challenges### First of all, I have never used any of the technologies I had to use in the app ( NodeJS, Puppeteer, and Express ). More significant that is, how to track what constitutes a "new" item, an item that has been "seen", etc.
###WorkFlow###
[![WorkFlow][1]][1]
###Source Code### Here is the complete source code of the app. It functions well, as far as I can tell. I am concerned about the Duplicate issue though.
Regards, John [1]: https://i.sstatic.net/DKXxz.png
Review Wanted
Application Summary
The application is a simple screen scraper which is to notify the user when new items are posted.
My Challenges
First of all, I have never used any of the technologies I had to use in the app ( NodeJS, Puppeteer, and Express ). More significant that is, how to track what constitutes a "new" item, an item that has been "seen", etc.
WorkFlow
Source Code
Here is the complete source code of the app. It functions well, as far as I can tell. I am concerned about the Duplicate issue though.
Regards, John
Scrape Pagepage and Send User Only New Resultssend user only new results
Scrape Page and Send User Only New Results
##Review Wanted##
###Application Summary### The application is a simple screen scraper which is to notify the user when new items are posted.
The code is run as a CRON job every ten minutes. It will scrape the target page and return an array of ALL items matching the search criteria ( hard-coded ) by the user.
The results are compared to the results from the previous time the code was run. If there are any new items, the user is notified.
###My Challenges### First of all, I have never used any of the technologies I had to use in the app ( NodeJS, Puppeteer, and Express ). More significant that is, how to track what constitutes a "new" item, an item that has been "seen", etc.
Currently, I added a column in the database, "seen". When the user hits the front end, I will mark those results as "seen".
Also, the way I am saving the items in general may need refactoring. I will leave that up to your opinion.
###WorkFlow###
Here is a breakdown of how the app works:
[![WorkFlow][1]][1]
###Source Code### Here is the complete source code of the app. It functions well, as far as I can tell. I am concerned about the Duplicate issue though.
const puppeteer = require('puppeteer');
const _ = require("lodash");
var mysql = require('mysql');
var con = mysql.createConnection({
host: 'localhost',
user: 'root',
password: '',
database: 'mydatabase'
});
var oldItems; // Items already scraped
getSavedJeeps = function () {
return new Promise(function (resolve, reject) {
con.query(
"SELECT * FROM jeeps",
function (err, rows) {
if (err) {
reject(new Error("Error rows is undefined"));
} else {
resolve(rows);
}
}
)
})
}
const saveNewJeeps = async function (entity) {
let objLen = entity.length;
// FOR EACH OBJECT IN ARRAY...
for (var i = 0; i < objLen; i++) {
var savedJeeps = con.query('INSERT INTO newjeeps SET ?', entity[i], function (err, result) {
// Neat!
console.log("Save function complete");
});
}
removeDupes();
return true;
}
const updateAllItems = async function (entity) {
let objLen = entity.length;
// FOR EACH OBJECT IN ARRAY...
for (var i = 0; i < objLen; i++) {
var savedJeeps = con.query('INSERT INTO jeeps SET ?', entity[i], function (err, result) {
// Neat!
console.log("Save function complete");
});
}
}
// Gets current items Search Results
const getItems = async searchTerm => {
browser = await puppeteer.launch({
headless: true,
timeout: 0,
args: ["--no-sandbox"]
});
page = await browser.newPage();
await page.goto(`https://facebook.com/marketplace/tampa/search/?query=${encodeURI(searchTerm)}&sort=created_date_descending&exact=false`);
await autoScroll(page);
const itemList = await page.waitForSelector('div > div > span > div > a[tabindex="0"]')
.then(() => page.evaluate(() => {
const itemArray = [];
const itemNodeList = document.querySelectorAll('div > div > span > div > a[tabindex="0"]');
itemNodeList.forEach(item => {
const itemTitle = item.innerText;
const itemURL = item.getAttribute('href');
const itemImg = item.querySelector('div > div > span > div > a > div > div > div > div > div > div > img').getAttribute('src');
var obj = ['price', 'title', 'location', 'miles',
...itemTitle.split(/\n/)
]
.reduce((a, c, i, t) => {
if (i < 4) a[c] = t[i + 4]
return a
}, {});
obj.imgUrl = itemImg;
obj.itemURL = itemURL;
itemArray.push(obj);
});
return itemArray;
}))
.catch(() => console.log("Selector error."));
return itemList;
}
// This takes care of the auto scrolling problem
async function autoScroll(page) {
await page.evaluate(async () => {
await new Promise(resolve => {
var totalHeight = 0;
var distance = 100;
var timer = setInterval(() => {
var scrollHeight = document.body.scrollHeight;
window.scrollBy(0, distance);
totalHeight += distance;
if (totalHeight >= scrollHeight || scrollHeight > 9000) {
clearInterval(timer);
resolve();
}
}, 100);
});
});
}
const removeDupes = async function () {
// remove duplicates
sql = `DELETE
t1
FROM
jeeps t1
INNER JOIN jeeps t2 WHERE
t1.title < t2.title AND t1.price = t2.price `;
return new Promise(function (resolve, reject) {
con.query(
sql,
function (err, rows) {
if (err) {
reject(new Error("Error rows is undefined"));
} else {
resolve();
}
}
)
})
}
const getDifferences = async function (objNew, objOld) {
console.log("Inside Differences")
return _.difference(objNew, objOld);
}
const init = async function () {
var oldItems;
const newItems = await getItems("Jeep Wrangler");
getSavedJeeps()
.then(function (results) {
oldItems = results;
})
.catch(function (err) {
console.log("Promise rejection error: " + err);
})
const finalArray = await getDifferences(newItems, oldItems);
const saveSuccess = await saveNewJeeps(finalArray);
const saveSuccess2 = await updateAllItems(finalArray);
const changed = (finalArray.length > 0) ? true : false;
if (changed) {
// Fire Off Email
const page2 = await browser.newPage();
await page2.goto(`http://john.example.com/mail.php`);
}
}
init();
Thanks in advance for your suggestions or comments. I asked a similar question a while back, but the app wasn't ready for review. It is now, and I am ready for your review - whether good or bad.
Regards, John [1]: https://i.sstatic.net/DKXxz.png