This Node app runs daily to update an overview of interest rates from multiple providers. The overview is displayed on a static website.
The app performs the following tasks:
- Scrapes providers' sites to collect interest rates
- Notifies via email if any rates have changed
- Saves the raw data to a database
- Processes the raw data into a format ready for publishing
- Tests the processed values to make sure they are valid
- Saves the processed values to a database
- Renders a static HTML file using a template plus the processed results
- Uploads the HTML file to a static site host
I am currently running the app as a daily cronjob on an Ubuntu machine on DigitalOcean. It works and is fully automated.
This is my first app. I find the asynchronous parts of Node particularly difficult to understand, with some modules using async/await, others Promises, and others callbacks. I would ideally like to clean that up and only use one of these methods, but I haven't been able to do that yet without breaking the app.
Any feedback on how to make the code more readable, organized, consistent, robust and/or modular would be appreciated.
Here is the full code for the app:
// Variable set to 1 for testing
var testVar = 0;
if (testVar == 1) {
console.log('Test mode: not writing to DB, not uploading to static website host')
console.log('__dirname: ', __dirname)
}
// Require Modules
const { MongoClient, ObjectID } = require('mongodb');
const Xray = require('x-ray');
const puppeteer = require('puppeteer');
const _ = require('lodash');
const nodemailer = require('nodemailer');
const fs = require('fs')
const express = require('express')
const hbs = require('hbs')
const app = express()
const js2xmlparser = require('js2xmlparser')
const scp = require('scp');
const moment = require('moment')
var interest = {}
// Ignore invalid SSH certificates on websites scraped
process.env['NODE_TLS_REJECT_UNAUTHORIZED'] = '0';
// Create date string
moment.locale('en')
var dateVar = moment().format('LL').toString()
// Email Configuration
let smtpConfig = {
host: '', // Redacted
port: 465,
secure: true,
auth: {
user: '', // Redacted
pass: '' // Redacted
},
tls: {
rejectUnauthorized: false
}
};
let transporter = nodemailer.createTransport(smtpConfig);
// Non-JS Scraper Configuration
var x = Xray({
filters: {
trim: function(value) {
return typeof value === 'string' ? value.trim() : value;
},
slice: function(value) {
return value.slice(-5);
},
match: function(value) {
arr = value.match(/.,..%/g);
return arr[0];
},
matchNoPerc: function(value) {
arr = value.match(/.,../g);
return arr[0] + '%';
},
sliceBeg: function(value) {
return value.slice(0, 5).trim();
}
}
});
// Today and Yesterday strings
var d = new Date();
var datestring = ("0" + d.getDate()).slice(-2) + "-" + ("0" + (d.getMonth() + 1)).slice(-2) + "-" +
d.getFullYear();
d.setDate(d.getDate() - 1);
var yDatestring = ("0" + d.getDate()).slice(-2) + "-" + ("0" + (d.getMonth() + 1)).slice(-2) + "-" +
d.getFullYear();
// DB lookup by date
var date = new Date(Date.now())
var options = { year: 'numeric', month: 'long', day: 'numeric' }
var iDag = date.toLocaleString('en-EN', options)
// Define scraping sources (non-JS)
var src = [{
name: '', // Redacted
url: '', // Redacted
scope: '', // Redacted
sel: {
oFa1: 'div div div div.col-sm-10.col-sm-offset-1 div:nth-child(1) div:nth-child(2) ul li:nth-child(1) h4',
vFa1: 'div div div div.col-sm-10.col-sm-offset-1 div:nth-child(1) div:nth-child(2) ul li:nth-child(2) h4',
vBr1: 'div div div div.col-sm-10.col-sm-offset-1 div:nth-child(1) div:nth-child(2) ul li:nth-child(3) h4',
}
},
// ...et cetera (total of 14 sources)
];
//Execute scrape
doc = {};
doc.interest = {};
doc.date = datestring;
var s1 = scrape();
var s2 = scrapeJS();
Promise.all([s1, s2]).then(() => {
console.log('Scraping complete, processing results');
processResults();
})
/* Functions below */
// Non-JS scraper
async function scrape() {
var processed = 0;
src.forEach(item => {
x(item.url, item.scope, item.sel)(function(err, res) {
if (err) {
console.log(err);
return;
}
processed++;
doc.interest[item.name] = res;
if (src.length === processed) {
console.log('Non-JS scraping complete')
return
}
});
});
}
// JS scraper
async function scrapeJS() {
const browser = await puppeteer.launch({ args: ['--no-sandbox', '--disable-setuid-sandbox'] });
const page = await browser.newPage();
await page.goto(''); // Redacted
await page.waitForSelector('#interest1');
var int1 = await page.evaluate(() => {
return document.querySelector('#interest1').textContent.trim()
});
var int2 = await page.evaluate(() => {
return document.querySelector('#interest2').textContent.trim()
});
var int3 = await page.evaluate(() => {
return document.querySelector('#interest5').textContent.trim()
});
var int4 = await page.evaluate(() => {
return document.querySelector('#interest6').textContent.trim()
});
//Put into pre-defined object to use later on
browser.close();
console.log('JS scraping complete');
return;
}
// Process scrape results
function processResults() {
right = doc.interest;
interest = right;
//Database
MongoClient.connect('mongodb://...', function(err, database) { // URL redacted
if (err) {
return console.log('Unable to connect to MongoDB server');
}
console.log('Connected to MongoDB server');
const db = database.db('...')
// Retrieve yesterday's results
db.collection('interest').find({ date: yDatestring }).toArray()
.then(res => {
left = res[0].interest;
//Compare the two objects
if (_.isEqual(left, right)) {
console.log('No changes since yesterday');
} else {
console.log('Changes detected, sending email')
// Send email notification
var changes = JSON.stringify(changes(left, right), undefined, 2);
var html = `<p>Interest rates have changed. These are the changes:</p><p>${changes}</p>`;
var message = {
from: 'interest@...',
to: '...',
subject: 'Interests have changed',
text: 'interests have changed!',
html: html
};
transporter.sendMail(message);
}
})
.then(() => {
// Insert today's results
if (testVar != 1) {
db.collection('interest').insertOne(doc);
}
})
.then(() => {
function number(string) {
return parseFloat(string.replace(',', '.')) / 100
}
function percent(num) {
return (num * 100).toFixed(2).toString().replace('.', ',') + '%'
}
function percentWhole(num) {
return (num * 100).toFixed(0).toString().replace('.', ',') + '%'
}
function comma(a) {
return a.replace('.', ',')
}
function fix(a) {
return percent(number(a))
}
function weighting(v1, v2, h1, h2, weights) {
return percent(
(number(v1) * h1 * weights + number(v2) * (h1 + h2 - h1 * weights)) / (h1 + h2)
)
}
// Assumptions
var weights = 0.8
var ratios = {
...: { // Redacted
h1: 0.7,
h2: 0.1
},
... // Et cetera
}
interest = {
_meta: {
updated: dateVar
},
lender1: {
party: {
name: '',
img: '',
url: ''
},
maximum: {
val: `${interest['lender1'].int1}`,
desc: '' // Some description
},
// Many more properties, removed for brevity
}
}
// Insert today's results
if (testVar != 1) {
db.collection('table').insertOne(interest);
}
})
.then(() => {
delete interest['_id']
// Test that the interest rates are of the correct form for the HTML table
var testing = []
_.forEach(interest, function(value, key) {
testing.push(_.get(interest, [key, 'vBr', 'val'], '-'))
})
_.forEach(interest, function(value, key) {
testing.push(_.get(interest, [key, 'vFa', 'val'], '-'))
})
_.forEach(interest, function(value, key) {
testing.push(_.get(interest, [key, 'oBr', 'val'], '-'))
})
_.forEach(interest, function(value, key) {
testing.push(_.get(interest, [key, 'oFa', 'val'], '-'))
})
for (var i = 0; i < testing.length; i++) {
if (/\d,\d\d%/.test(testing[i]) || testing[i] == '-') {} else {
console.log('Error: interest rates are not of the correct form')
break
}
}
// Update XML feed
fs.writeFile(__dirname + "/table.xml", js2xmlparser.parse('rows', interest), function(err) {
if (err) {
return console.log(err);
}
console.log('XML saved to table.xml')
var optionsXml = {
file: __dirname + '/table.xml',
user: '...', // Redacted
host: '', // Redacted
port: '', // Redacted
path: '' // Redacted
}
if (testVar != 1) {
scp.send(optionsXml, function(err) {
if (err) console.log(err);
else console.log('XML uploaded to static website host');
});
}
})
// Generate HTML using HBS template
app.set('view engine', 'hbs')
app.render(require.resolve('./views/index'), interest, function(err, html) {
if (err) {
console.log(err);
} else {
fs.writeFile(__dirname + "/index.html", html, function(err) {
console.log("HTML file saved");
var optionsHtml = {
file: __dirname + '/index.html',
user: '...', // Redacted
host: '', // Redacted
port: '', // Redacted
path: '' // Redacted
}
if (testVar != 1) {
scp.send(optionsHtml, function(err) {
if (err) console.log(err);
else console.log('HTML uploaded to static website host');
});
}
});
}
})
})
.then(() => {
database.close()
console.log('Database connection closed')
})
})
}
function changes(a, b) {
if (_.isEqual(a, b)) {
return;
} else {
if (_.isArray(a) && _.isArray(b)) {
return _.reduce(b, function(array, value, index) {
value = changes(a[index], value);
if (!_.isUndefined(value)) {
array[index] = value;
}
return array;
}, []);
} else if (_.isObject(a) && _.isObject(b)) {
return _.reduce(b, function(object, value, key) {
value = changes(a[key], value);
if (!_.isUndefined(value)) {
object[key] = value;
}
return object;
}, {});
} else {
return b;
}
}
}
1 Answer 1
I've always seen required in modules declared at the very top of files. You might also want to alphabetize and organize by category for best practice. Can I ask what the file structure looks like of the app, or is this code all in one file? I would definitely have a scraper controller and a model and/or schema file for your data.
const _ = require('lodash');
const express = require('express')
const fs = require('fs')
const hbs = require('hbs')
const js2xmlparser = require('js2xmlparser')
const moment = require('moment').locale('en')
const nodemailer = require('nodemailer');
const puppeteer = require('puppeteer');
const scp = require('scp');
const Xray = require('x-ray');
const app = express() //should go in an app.js or server.js file
const { MongoClient, ObjectID } = require('mongodb'); //should go in a db file
Have you considered using mongoose? For me, mongoose schemas are really easy to work with and are a huge help.
You might want to also use arrow functions in your changes
function for your _.reduce()
callbacks. And also maybe use let
and const
where possible instead of var
just to stay consistent.
I'll take another look tomorrow when I get a chance, cool app!
-
\$\begingroup\$ Thank you for the reply. Everything is currently in one js file, which is the code above. I would ideally split the code up in some logical way, but have had a difficult time getting it to work because of the asynchronous tasks. From what I understand, I would need to wrap some of the parts into promise functions, but haven't been able to figure that out. Also, I don't have experience with splitting code up into different parts by nature, so not quite sure what the best way for modularization would be. Will likely need to learn more about module/view/controller types of segmentation. \$\endgroup\$nemesis– nemesis2018年01月31日 16:09:43 +00:00Commented Jan 31, 2018 at 16:09
-
\$\begingroup\$ Definitely look into the 'Model-View-Controller' pattern, it will make things a lot easier for you and anyone else who reads your code. The 'module' pattern will allow you to export and import functions, data, objects, etc. into and out of different files in your application. I particularly like the 'revealing module' pattern. \$\endgroup\$Aaron Goldsmith– Aaron Goldsmith2018年01月31日 18:42:54 +00:00Commented Jan 31, 2018 at 18:42