I have a data set returned from an SQL database contained below, as you can see all data remains the same apart from one property, "name_alt".
[{
"language_id": "tly",
"language_name": "Talysh",
"speakers_native": 800000,
"total_speakers": 912000,
"name_alt": "Taleshi",
"country_main": "az"
}, {
"language_id": "tly",
"language_name": "Talysh",
"speakers_native": 800000,
"total_speakers": 912000,
"name_alt": "Talish",
"country_main": "az"
}, {
"language_id": "tly",
"language_name": "Talysh",
"speakers_native": 800000,
"total_speakers": 912000,
"name_alt": "Talishi",
"country_main": "az"
}, {
"language_id": "tly",
"language_name": "Talysh",
"speakers_native": 800000,
"total_speakers": 912000,
"name_alt": "Talysh",
"country_main": "az"
}, {
"language_id": "tly",
"language_name": "Talysh",
"speakers_native": 800000,
"total_speakers": 912000,
"name_alt": "Talyshi",
"country_main": "az"
}, {
"language_id": "lez",
"language_name": "Lezghian",
"speakers_native": 171400,
"total_speakers": 428400,
"name_alt": "Kiurinsty",
"country_main": "ru"
}, {
"language_id": "lez",
"language_name": "Lezghian",
"speakers_native": 171400,
"total_speakers": 428400,
"name_alt": "Kiurinty",
"country_main": "ru"
}, {
"language_id": "lez",
"language_name": "Lezghian",
"speakers_native": 171400,
"total_speakers": 428400,
"name_alt": "Lezghi",
"country_main": "ru"
}]
I want to combine all the name_alts into an array, so my data looks like the below:
[{
"language_id": "tly",
"language_name": "Talysh",
"speakers_native": 800000,
"total_speakers": 912000,
"name_alt": ["Talesh", "Taleshi", "Talish", "Talishi", "Talyshi"],
"country_main": "az"
}, {
"language_id": "lez",
"language_name": "Lezghian",
"speakers_native": 171400,
"total_speakers": 428400,
"name_alt": ["Kiurinsty", "Kiurinty", "Lezghi", "Lezgi", "Lezgian", "Lezgin"],
"country_main": "ru"
}]
I have written this function which takes the array, maps and filters it to return the data set I require. It works, but I am concerned it's not very performant. It takes the array that I want to compress (array
), the property that I want to group by (mappedProperty
), the property that should remain unique for every row (uniqueProperty
) and a property that should be excluded from the set of grouped properties (mainProperty
)
Could you review?
function reduceRows(array, mappedProperty, uniqueProperty, mainProperty) {
var currentUniqueProp;
var objToReturn;
var mappedObj = _.map(array, function(obj) {
// we're grouping by the unique property (i.e. an ID), so if we have a new
// unique property, we clone the object, and start adding the
// mapped property (i.e. alternative names for a language) to the array
//usage: u.reduceRows(data.data.languages, "name_alt", "language_id", "language_name")
if (obj[uniqueProperty] != currentUniqueProp) {
objToReturn = _.clone(obj)
objToReturn[mappedProperty] = [obj[mappedProperty]]
currentUniqueProp = obj[uniqueProperty]
} else {
// sometimes the grouped property has a value which is the same as the main property
// i.e. the language_name is also included in the list of name_alts
// We want to avoid this duplication
if (obj[mappedProperty] != obj[mainProperty]) {
objToReturn[mappedProperty].push(obj[mappedProperty])
}
}
return objToReturn;
})
var filteredObj = _.filter(mappedObj, function(obj) {
//this function ensures that only one entry for each unique property
//is returned
if (obj[uniqueProperty] == currentUniqueProp) {
return false
} else {
currentUniqueProp = obj[uniqueProperty]
return true
}
})
return filteredObj
}
2 Answers 2
Several things:
You don't need underscore. Node supports ECMAScript 5 features such as
Array.prototype.map
andArray.prototype.filter
.Replace
_.map(array, function)
witharray.map(function)
, etc.Name your inline functions. It's better for readability, and if the code errors out, you won't get
(anonymous function)
without knowing which anonymous function.map
is used to map one array into another array, if you just want to iterate an array and run a function, you wantArray.prototype.forEach
.
I'd adopt a different method for doing this.
- Figure out the unique entries (aside from name_alt)
- Iterate over each "group" of unique entries, and
reduce
each to a single object - push each newly created object into a new array.
Here's my take:
// ## Get an array of language_ids to be used as a unique key.
// ## I'm assuming here that if language_id is equal, the rest (aside from name_alt) is also equal.
var uniqueKeys = data
.map(function pluckLanguageId(obj) { return obj.language_id; })
//Now your array looks like ["a", "a", "a", "b", "b" ,"b"]
.filter(function makeUnique(language, index, array) { return array.indexOf(language) === index; });
//Now your array looks like ["a", "b"]
// ## Map each of the keys to a formatted object:
var result = uniqueKeys.map(function mapKeysToFormattedObjects(key) {
//Take the entire data array
return data
//Filter it by language_id
.filter(function getOnlyDataWithKey(item) { return item.language_id === key; })
//Then combine it into one object
.reduce(function combineToOneObject(previous, current) {
if (typeof(previous.name_alt) === "string") { //In this first object, the name_alt would be a string.
previous.name_alt = [previous.name_alt]; //So make it into an array!
}
previous.name_alt.push(current.name_alt); //Push the current name_alt into the previous one.
return previous;
}); //Result is an object with all of the name_alts in an array
})
This is assuming that your original data is in that data
variable. Please tell me if there's something you don't understand, and I'll try my best to explain.
Notice how I'm chaining my calls to map, filter and reduce. Both map
and filter
return an array, so the chained call is performed on that newly processed array.
A fancier way of writing this would be factoring everything out into functions, and then having a oneliner:
function pluckLanguageId(obj) { return obj.language_id; }
function makeUnique(language, index, array) { return array.indexOf(language) === index; }
function mapKeysToFormattedObjects(key) {
return data
.filter(function getOnlyDataWithKey(item) { return item.language_id === key; })
.reduce(function combineToOneObject(previous, current) {
if (typeof(previous.name_alt) === "string") { //In this first object, the name_alt would be a string.
previous.name_alt = [previous.name_alt]; //So make it into an array!
}
previous.name_alt.push(current.name_alt); //Push the current name_alt into the previous one.
return previous;
}); //Result is an object with all of the name_alts in an array
}
var result = data.map(pluckLanguageId).filter(makeUnique).map(mapKeysToFormattedObjects);
//One liner!
This code can be improved even further with functions that return functions and whatnot, but take the first example as reference to learn from.
As javascript
has a else if
statement you should use this to remove one level of indention.
if (obj[uniqueProperty] != currentUniqueProp) {
objToReturn = _.clone(obj)
objToReturn[mappedProperty] = [obj[mappedProperty]]
currentUniqueProp = obj[uniqueProperty]
} else if (obj[mappedProperty] != obj[mainProperty]) {
// sometimes the grouped property has a value which is the same as the main property
// i.e. the language_name is also included in the list of name_alts
// We want to avoid this duplication
objToReturn[mappedProperty].push(obj[mappedProperty])
}
The underscore
filter method can be simplified to
var filteredObj = _.filter(mappedObj, function(obj) {
//this function ensures that only one entry for each unique property
//is returned
if (obj[uniqueProperty] == currentUniqueProp) {
return false
}
currentUniqueProp = obj[uniqueProperty]
return true
})
If the condition evaluates to true
the method returns, so no else
is needed.
You should always declare your variables where they are needed. So var objToReturn
should be declared here
var mappedObj = _.map(array, function(obj) {
var objToReturn;
// we're grouping by the unique property
else if
but if yes you should use it at the firstif..else
. \$\endgroup\$else if
\$\endgroup\$objToReturn
is a confusing name. It should describe what it represents and not what you plan on doing with it later. \$\endgroup\$