Making multiple calls to API and processing data in Node.JS

Question 1

I have written a chunk of Node that is acting as a web hook for a GitHub repository I have. Basically when a commit is made to that repo, a request gets made to my function which then looks for potentially sensitive data that has been checked in. I'm making multiple calls to the GitHub API to pull down data. I tried to handle all of these calls by using the async package's waterfall and each functions.

There are two sections to the waterfall call: getting the commit data and then getting the file data. Within the "file data" section I have the async each that is getting the actual file contents for each file.

I'm wondering if anyone can see any obvious best practices that I am not following for JavaScript programming. Am I handling the asynchronous calls the correct way? I created two helper functions to try and keep things clean/readable - should I break any other pieces out into their own functions?

var https = require('https')
var async = require('async')
module.exports = function (ctx,cb) { 
 console.log("**********************************************")
 var danger = ['password','pass','pw','pwd','passwd','key','user','username','secret']
 var re = new RegExp(danger.join('|'))
 async.waterfall([
 function(callback){
 var options = {
 host: 'api.github.com',
 path: getApiPath(ctx.data.head_commit.url),
 headers: {
 'User-Agent': 'Broham'
 }
 }
 getData(options, function(commitData){
 // console.log("commit data", commitData)
 callback(null,commitData.files) 
 })
 },
 //for each file, get the file content and look for potentially sensitive information that has been commited
 function(files, callback){
 // console.log("files", files)
 var review = []
 async.each(files, function(file, fileCallback){
 var fileOptions = {
 host: 'api.github.com',
 path: file.contents_url.replace('https://api.github.com', ''),
 headers: {
 'User-Agent': 'Broham'
 }
 }
 // get the JSON that will have the URL for the raw file content
 getData(fileOptions, function(fileData){
 // get the raw file content
 https.get(fileData.download_url, function(contentData){
 contentData.setEncoding('utf-8')
 var contents = ''
 contentData.on('data', function (data) {
 contents += data
 })
 contentData.on('end', function(){
 console.log('contents', contents)
 // check to see if there are any matches for the terms we want to review.
 matches = contents.match(re)
 if(matches.length > 0){
 // console.log("matches", matches)
 review = review.concat(contents.match(re))
 }
 fileCallback()
 })
 })
 })
 }, function(err){
 callback(null, review)
 })
 }
 ], function(err, results){
 if(results.length > 0){
 console.log("For review:", results)
 }
 cb(null, 'GitEye complete!')
 console.log("**********************************************")
 })
}
function getData(options, callback){
 https.get(options, function(res){
 res.setEncoding('utf-8')
 var body = ''
 res.on('data', function (data) {
 body += data
 })
 res.on('end', function(){
 // console.log('body', body)
 var data = JSON.parse(body);
 callback(data)
 })
 })
}
function getApiPath(url){
 var pieces = url.split('/')
 var user = pieces[3]
 var repo = pieces[4]
 var commit = pieces[6]
 // return "https://api.github.com/repos/" + user + "/" + repo + "/commits/" + commit
 return "/repos/" + user + "/" + repo + "/commits/" + commit
}

Question 2

Looking for sensitive terms

I see that you build a regex from a list of terms:

 var danger = ['password','pass','pw','pwd','passwd','key','user','username','secret']
 var re = new RegExp(danger.join('|'))

With this approach, some of the items in danger are redundant, because "password" and "passwd" will be already matched by "pass", "pwd" will be already matched by "pw", and "username" will be already matched by "user".

Overall this approach looks rather primitive and I suspect it will yield too many false positives to be useful.

Duplicated calls

In this code contents.match(re) is executed twice:

 matches = contents.match(re)
 if(matches.length > 0){
 // console.log("matches", matches)
 review = review.concat(contents.match(re))
 }

That's a waste, it would be better to use matches in place of the second call.

Truthy values in JavaScript

Non-empty arrays are truthy, so instead of if(matches.length > 0) you could write simply if (matches.length), and even just if (matches). The same goes for if(results.length > 0) later in the code.

janos janos 113k15 gold badges154 silver badges396 bronze badges · Answer 1 · 2017-09-24 13:12:23Z

Looking for sensitive terms

I see that you build a regex from a list of terms:

 var danger = ['password','pass','pw','pwd','passwd','key','user','username','secret']
 var re = new RegExp(danger.join('|'))

With this approach, some of the items in danger are redundant, because "password" and "passwd" will be already matched by "pass", "pwd" will be already matched by "pw", and "username" will be already matched by "user".

Overall this approach looks rather primitive and I suspect it will yield too many false positives to be useful.

Duplicated calls

In this code contents.match(re) is executed twice:

 matches = contents.match(re)
 if(matches.length > 0){
 // console.log("matches", matches)
 review = review.concat(contents.match(re))
 }

That's a waste, it would be better to use matches in place of the second call.

Truthy values in JavaScript

Non-empty arrays are truthy, so instead of if(matches.length > 0) you could write simply if (matches.length), and even just if (matches). The same goes for if(results.length > 0) later in the code.

Stack Exchange Network

Making multiple calls to API and processing data in Node.JS

1 Answer 1

Looking for sensitive terms

Duplicated calls

Truthy values in JavaScript

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Hot Network Questions

Making multiple calls to API and processing data in Node.JS

1 Answer 1

Looking for sensitive terms

Duplicated calls

Truthy values in JavaScript

Your Answer

Sign up or log in

Post as a guest

Post as a guest

Related

Hot Network Questions