I'm currently learning Node.js, javascript and so on. I come from C++.
I need to parse an array such as:
======================================================================================================
No. Name Cask Current Latest Auto-Update State
======================================================================================================
1/38 5KPlayer 5kplayer latest latest
2/38 Adobe Photoshop CC adobe-photoshop-cc 16 16
3/38 Alfred alfred 3.3.1_806 3.3.2_818 Y ignored
4/38 AppCleaner appcleaner 3.4 3.4 Y ignored
5/38 Github Atom atom 1.15.0 1.15.0 Y ignored
6/38 BetterZipQL betterzipql latest latest
7/38 Boom boom 1.6,1490693621 1.6,1490693621
8/38 CheatSheet cheatsheet 1.2.7 1.2.7
9/38 Cyberduck cyberduck 5.4.0.23761 5.4.0.23761
10/38 Dropbox dropbox 21.4.25 latest Y ignored
This is a list of apps installed on an Mac, 1 line per app.
If the app is outdated ('current' != 'latest'), I keep the line and make an object out of it for later treatement.
I came up with a dirty -yet working- solution :
function parseBrewCUArray(array) {
var toUpdate = [];
var lines = array.split('\n');
//remove useless lines
lines = lines.slice(3);
for (var i=0; i<lines.length; i++) {
splittedLine = lines[i].split(/[ ]{2,}/);
if (splittedLine[3] != splittedLine[4]) {
toUpdate.push(splittedLine)
console.log(splittedLine);
}
}
}
But there must be a very much better solution out there! Can someone optimize this a bit, making this piece of code more beautiful?
1 Answer 1
your code can be simplified to the following:
//keeps only the header and the rows where Current !== Latest
function parseBrewCUArray(str) {
return str.split('\n').filter((row, index) => {
if(index < 3) return true;
var cols = row.split(/ {2,}/);
return cols[3] !== cols[4]
}).join("\n");
}
var s = `======================================================================================================
No. Name Cask Current Latest Auto-Update State
======================================================================================================
1/38 5KPlayer 5kplayer latest latest
2/38 Adobe Photoshop CC adobe-photoshop-cc 16 16
3/38 Alfred alfred 3.3.1_806 3.3.2_818 Y ignored
4/38 AppCleaner appcleaner 3.4 3.4 Y ignored
5/38 Github Atom atom 1.15.0 1.15.0 Y ignored
6/38 BetterZipQL betterzipql latest latest
7/38 Boom boom 1.6,1490693621 1.6,1490693621
8/38 CheatSheet cheatsheet 1.2.7 1.2.7
9/38 Cyberduck cyberduck 5.4.0.23761 5.4.0.23761
10/38 Dropbox dropbox 21.4.25 latest Y ignored`;
console.log(parseBrewCUArray(s));
But usually I'd prefer to parse the string first into some usable datastructure and then proceed with that
// first the utilities:
//most of the time I want null and undefined to be cast to an empty String not to "null"/"undefined".
var string = value => value == null? "": String(value);
//a utility to define replacements
var replace = (pattern, replacement="") => value => string(value).replace(pattern, replacement);
//escapes special chars that have a special meaning in Regular expressions
var escapeForRegex = replace(/[\-\[\]\/\{\}\(\)\*\+\?\.\\\^\$\|]/g, "\\$&");
//RegExp#exec() is awful to be used manually
//too much boilerplate code and I ended too many times in an infinite loop
function matchAll(haystack, needle){
var str = string(haystack),
regex = needle instanceof RegExp?
needle:
new RegExp(escapeForRegex(needle), "g"),
results = [],
lastMatchIndex = NaN,
match;
while((match = regex.exec(str)) && lastMatchIndex !== match.index){
results.push(match);
lastMatchIndex = match.index;
}
return results;
}
//a generic function that takes a table where columns ain't defined by a seperator but by their alignment
//removes every row that doesn't contain at least one letter or number
//parses the first row to determine the column names and their offsets
//returns an array of objects with the column names as properties
//doesn't handle tabs, because there are too many standards about how wide a tab may be
function parseTableByAlignment(str, allowSingleSpacesInTitle=false){
var end,
rows = string(str).match(/[^\r\n]*[a-zA-Z0-9][^\r\n]*/g),
removeTrailingDots = replace(/[\.:\s]+$/, ""),
parseRow = new Function("row", "return {\n" + matchAll(rows.shift(), allowSingleSpacesInTitle? /\S+(?: \S+)*/g: /\S+/g)
.reduceRight((acc, match) => {
var row = JSON.stringify( removeTrailingDots( match[0] ) ) + ": row.substring(" + match.index + ", " + end + ").trim()";
end = match.index;
return " " + row + ",\n" + acc;
}, "}"));
return rows.map(parseRow);
}
var s = `
======================================================================================================
No. Name Cask Current Latest Auto-Update State
======================================================================================================
1/38 5KPlayer 5kplayer latest latest
2/38 Adobe Photoshop CC adobe-photoshop-cc 16 16
3/38 Alfred alfred 3.3.1_806 3.3.2_818 Y ignored
4/38 AppCleaner appcleaner 3.4 3.4 Y ignored
5/38 Github Atom atom 1.15.0 1.15.0 Y ignored
6/38 BetterZipQL betterzipql latest latest
7/38 Boom boom 1.6,1490693621 1.6,1490693621
8/38 CheatSheet cheatsheet 1.2.7 1.2.7
9/38 Cyberduck cyberduck 5.4.0.23761 5.4.0.23761
10/38 Dropbox dropbox 21.4.25 latest Y ignored
`;
var data = parseTableByAlignment(s);
console.log(data.filter(item => item.Current !== item.Latest));
Using the Function constructor and evaluating a string as code ... well, you get mostly negative responses on that. So I add a second implementation of parseTableByAlignment() without this Function constructor. The result is still the same:
function parseTableByAlignment(str, allowSingleSpacesInTitle=false){
var previousColumn,
rows = string(str).match(/[^\r\n]*[a-zA-Z0-9][^\r\n]*/g),
removeTrailingDots = replace(/[\.:\s]+$/, ""),
columns = matchAll(rows.shift(), allowSingleSpacesInTitle? /\S+(?: \S+)*/g: /\S+/g)
.map(match => {
if(previousColumn) previousColumn.end = match.index;
return previousColumn = {
name: removeTrailingDots( match[0] ),
start: match.index,
end: undefined
};
});
return rows.map(row => columns.reduce((obj, column) => {
obj[column.name] = row.substring(column.start, column.end).trim();
return obj;
}, {}));
}