How to handle extremely large data in node.js array - javascript

I am working on a Node.js server that responds with JSON. I have a legacy server that contains the data. The Node.js server acts as a middle-man connecting the user to the data. The legacy server returns data as a csv with columns being keys and rows being objects. I want to turn this into an array of json objects. I am using this tool to do just that: https://github.com/Keyang/node-csvtojson. I can either have the tool construct the output for me or I can have a function called with each row passed in and construct it myself. I am currently doing the later.
The users of my application can pass in a search value which means only rows that contain this value should be returned. The user can also pass in filter keys and values which means only rows where key contains value for every key and value given should be returned. They can of course give both search and filter values or neither. They also give a sort key which is the key in the objects to sort the array by. They also give me a sort order: ascending or descending. They also give me a page size and offset to return, which is for pagination.
My question is, what is the best way to handle this with data sets that could contain millions of rows? I can't modify the legacy server. Here is what I have. It works but I would like to improve performance as much as possible. Is there a more efficient way to do the sorting, searching, filtering, pagination, etc? Would it be better to add to the array and then sort instead of using a binary search tree and sorting during add? Is there a way I could use streams to improve performance? I expect the server to be limited by CPU performance not RAM. I am looking for any recomendations for better performance. Thanks! :)
EDIT: Also, what part of the below code is blocking?
function searchRow(row, search) {
if (search) {
for (var key in row) {
if (row.hasOwnProperty(key) && row[key].toString().toLowerCase().indexOf(search.toLowerCase()) > -1) {
return true;
}
}
return false;
} else {
return true;
}
}
function filterByField(obj) {
return obj.field === this.valueOf();
}
function filterRow(row, filter) {
if (filter) {
filter = JSON.parse(decodeURIComponent(filter));
var passed = true;
for (var key in filter) {
if (filter.hasOwnProperty(key)) {
var index = row[key].toString().toLowerCase().indexOf(filter[key].toString().toLowerCase());
passed = passed && (index > -1);
}
if (!passed) {
break;
}
}
return passed;
} else {
return true;
}
}
function orderByKey(key, reverse, a, b) {
return function (a, b) {
var x = (a[key] || '').toString().toLowerCase();
var y = (b[key] || '').toString().toLowerCase();
if (x > y) {
return reverse ? -1 : 1;
} else if (y > x) {
return reverse ? 1 : -1;
} else if (hash(a) > hash(b)) {
return reverse ? -1 : 1;
} else if (hash(b) > hash(a)) {
return reverse ? 1 : -1;
} else {
return 0;
}
};
}
function sortedInsert(element, array, key, reverse) {
array.splice(locationOf(element, array, key, reverse) + 1, 0, element);
return array;
}
function locationOf(element, array, key, reverse, start, end) {
if (array.length === 0) {
return -1;
}
start = start || 0;
end = end || array.length;
var pivot = parseInt(start + (end - start) / 2, 10);
var c = orderByKey(key, reverse, element, array[pivot]);
if (end - start <= 1) {
return c == -1 ? pivot - 1 : pivot;
}
switch (c) {
case -1: return locationOf(element, array, key, reverse, start, pivot);
case 0: return pivot;
case 1: return locationOf(element, array, key, reverse, pivot, end);
}
}
function getTable(path, columns, delimiter, query) {
var deferred = q.defer();
var headers = [];
var data = [];
delimiter = delimiter ? delimiter : '\t';
var converter = new Converter({
delimiter: delimiter,
noheader: true,
headers: columns,
workerNum: os.cpus().length,
constructResult: false
});
converter.on("error", function(errMsg, errData) {
deferred.reject(errMsg);
});
converter.on("record_parsed", function(row) {
if (searchRow(row, query.search) && filterRow(row, query.filter)) {
sortedInsert(row, data, query.sort || headers[0].split("!").pop(), query.order === 'desc');
}
});
converter.on("end_parsed", function() {
var offset = parseInt(query.offset || 0);
var limit = parseInt(query.limit || data.length);
var total = data.length;
data = data.slice(offset, offset + limit);
deferred.resolve({
"total": total,
"rows": data
});
});
var options = {
url: config.url + path,
gzip: true,
method: 'GET'
};
request(options, function (error, response, body) {
if (error || response.statusCode != 200) {
deferred.reject(error);
}
}).pipe(converter);
return deferred.promise;
}

Related

Sorting different arrays in typescript

I am trying to sort some parameter in my typescript model. My model is as follows.
export class DataModel {
ID: String
point1: Point
point2 : Point
point3: Point
AnotherPoint1: AnotherPoint[]
AnotherPoint2: AnotherPoint[]
AnotherPoint3: AnotherPoint[]
}
export class Point {
Name: String
Timestamp: String
}
export class AnotherPoint {
Name: String
Timestamp: String
}
I have sorting logic in my component which take this above Data Model and sorts point as follows:
private sortByNameAndID(dataModel: DataModel[]): DataModel[] {
return dataModel.sort(function (a, b) {
const pointA = a.point1.Name.toLowerCase();
const pointB = b.point1.Name.toLowerCase();
if (pointA === pointB) {
const timeA = a.point1.Timestamp;
const timeB = b.point1.Timestamp;
return Service.compareDate(new Date(timeA), new Date(timeB)); //here again comparing dates
}
if (pointA < pointB ) {
return -1;
}
if (pointA > pointB ) {
return 1;
}
});
}
}
Above sorting logic is working fine for Points but now with this I also need to sort AnotherPoint as well. Means I have to sort all Points and AnotherPoints together as above. How can I do that?
I would start creating two helper functions, to compare two Points and two arrays of AnotherPoints respectively. I'm assuming that the arrays are to be compared element by element, stopping as soon as a pair of elements does not compare to 0.
function comparePoints(pA: Point | AnotherPoint, pB: Point | AnotherPoint)
{
const pointA = pA.Name.toLowerCase();
const pointB = pB.Name.toLowerCase();
if (pointA < pointB ) {
return -1;
}
if (pointA > pointB ) {
return 1;
}
const timeA = pA.Timestamp;
const timeB = pB.Timestamp;
return Service.compareDate(new Date(timeA), new Date(timeB));
}
function comparePointArrays(arrayA: AnotherPoint[], arrayB: AnotherPoint[])
{
const len = Math.min(arrayA.length, arrayB.length);
for (let i = 0; i < len; ++i)
{
const result = comparePoints(arrayA[i], arrayB[i]);
if (result !== 0) {
return result;
}
}
return 0;
}
Then the sorting function can be rewritten using the new helper comparators:
private sortByNameAndID(dataModel: DataModel[]): DataModel[] {
return dataModel.sort(function (a, b) {
return comparePoints(a.point1, b.point1) ||
comparePoints(a.point2, b.point2) ||
comparePoints(a.point3, b.point3) ||
comparePointArrays(a.AnotherPoint1, b.AnotherPoint1) ||
comparePointArrays(a.AnotherPoint2, b.AnotherPoint2) ||
comparePointArrays(a.AnotherPoint3, b.AnotherPoint3);
});
}
Note that the || operator only performs the operation on the right if the result of the operation on the left is falsy (0), meaning that we will stop comparing points as soon as a comparison reports a nonzero result.

Why is JS Map Function Returning Undefined? [duplicate]

This question already has answers here:
Why does JavaScript map function return undefined?
(13 answers)
Closed 1 year ago.
I have an array that is about 1000 in length. Why is map function returning undefined in certain indexes? Is there a way to only return array that meets this condition? I'd like to return an array with values > 0.
var total_percents = cars.map(function(element) {
var savings_percent = Number(element[0].getAttribute("percent-savings"));
if (savings_percent > 0)
return savings_percent;
});
You need to filter values after mapping, which you can do with filter array method with predicate like car => car > 0
var total_percents = cars.map((element) => Number(element[0].getAttribute("percent-savings"))).filter(car => car > 0)
You could also use reduce method for combining both operations at once:
var total_percents =
cars.reduce((acc, element) => {
const percentSavings = Number(element[0].getAttribute("percent-savings"));
if (percentSavings > 0) {
acc.push(percentSavings);
}
return acc;
}, [])
Unsure what you're attempting to accomplish, but:
Try something like this to return a sum of all pctSavings:
const totalPercents = cars.reduce((sum, el) => {
const pctSavings = +(el[0].getAttribute("percent-savings"));
if (pctSavings > 0) sum += pctSavings;
return sum;
}, 0);
To return an array of pctSavings, simply do this:
const totalPercents = cars.reduce((arr, el) => {
const pctSavings = +(el[0].getAttribute("percent-savings"));
if (pctSavings > 0) arr.push(pctSavings);
return arr;
}, []);
To get the max pctSavings do this:
let maxPctSavings = 0;
cars.forEach(el => {
const pctSavings = +(el[0].getAttribute("percent-savings"));
if (pctSavings > maxPctSavings) maxPctSavings = pctSavings
});
console.log(maxPctSavings) // this is your answer
Using reduce to both filter and map the value (should be faster and use less memory than separate filter and map)
var total_percents = cars.reduce(
function(out, element) {
var savings_percent = Number(element[0].getAttribute("percent-savings"));
if (savings_percent > 0) {
// Only add to the out-array if value is greater than 0.
out.push(savings_percent);
}
return out;
},
[] // start with empty array
);
total_percents will be an array, with values that is greater than 0.
Reading from a comment on another answer, what you wanted was the max savings.
Then I would do it like this:
var max_savings = cars.reduce(
function(value, element) {
var savings_percent = Number(element[0].getAttribute("percent-savings"));
return Math.max(savings_percent, value)
},
0 // start with no savings
);
"Number" will return "NaN" (Not a Number) if the argument cannot be converted to a number. Then you are using numeric comparison ">" of a string "NaN" to your zero value, so that comparison fails and does not return, therefore the "undefined" occurs. You could do a double comparison - if (typeof savings_percent == 'string'
if (typeof savings_percent == 'number' && savings_percent > 0)

How to return a variable with number of tags using jQuery?

I have this function to filter my data retrieved from ajax
var csvf = data.filter(function (el) {
return ['TRUCK_CPX'].indexOf(el.TAG) >= 0
&& ['CA5533'].indexOf(el.Chave) >= 0
});
All I have to do is create a dynamic function to do something like that:
function datafilter(x, y) {
// I don't know what to do here //
}
For:
var csvf = data.filter(function (el) {
//Using one tag://
function datafilter('TAG:"TRUCK_CPX"'); });
//return ['TRUCK_CPX'].indexOf(el.TAG) >= 0//
//Using two tags://
function datafilter('TAG:"TRUCK_CPX"', 'Chave:"CA5533"'); });
//return ['TRUCK_CPX'].indexOf(el.TAG) >= 0 && ['CA5533'].indexOf(el.Chave) >= 0//
Anyone here know how can I do that?
Define a function that takes two arguments: an object, and the element.
Map each key in the object to a space of the result of the condition
Then, reduce the resulting array of booleans.
function filter(item, el) {
return Object.keys(item).map(function(key) {
return [item[key]].indexOf(el[key]) >= 0
})
.reduce(function(prev, memo) {
return prev && memo;
}, true);
}
const matchInverse = filter(
{glue: 'gabby', alpha: 'allison'},
{alpha: 'allison', glue: 'gabby'}
);
console.log(matchInverse);
var csvf = data.filter(filter(item, el));

excluding "missing" values in reductio.avg()

I was hoping to use reductio to compute averages within my crossfilter groups. My dataset includes missing values (represented by null) that I'd like to exclude when calculating the average. However, I don't see a way to tell reductio to exclude certain values, and it treats the null values as 0.
I wrote a custom reduce function to accomplish this without using reductio:
function reduceAvg(attr) {
return {
init: function() {
return {
count: 0,
sum: 0,
avg: 0
};
},
add: function(reduction, record) {
if (record[attr] !== null) {
reduction.count += 1;
reduction.sum += record[attr];
if (reduction.count > 0) {
reduction.avg = reduction.sum / reduction.count;
}
else {
reduction.avg = 0;
}
}
return reduction;
},
remove: function(reduction, record) {
if (record[attr] !== null) {
reduction.count -= 1;
reduction.sum -= record[attr];
if (reduction.count > 0) {
reduction.avg = reduction.sum / reduction.count;
}
else {
reduction.avg = 0;
}
}
return reduction;
}
};
}
Is there a way to do this using reductio? Maybe using exception aggregation? I haven't fully wrapped my head around how exceptions work in reductio.
I think you should be able to average over 'myAttr' excluding null and undefined by doing:
reductio()
.filter(function(d) { return d[myAttr] !== null && d[myAttr] !== undefined; })
.avg(function(d) { return d[myAttr]; });
If that doesn't work as expected, please file an issue as it is a bug.

Using Crossfilter, is it possible to track max/min when grouping?

When using Crossfilter (https://github.com/square/crossfilter), I specify functions to use when adding and removing data from a group. It's fairly trivial to keep track of a running average (using CoffeeScript):
reduceAdd = (p, v) ->
++p.count;
p.sum += v.digit;
p
reduceRemove = (p, v) ->
--p.count;
p.sum -= v.digit;
p
reduceInitial = ->
{
count: 0
sum: 0
average: ->
return 0 if this.count == 0
return this.sum / this.count
}
Is it possible to keep track of the max and min of each group? I can't figure out a way short of keeping all elements in a huge array and doing a d3.min / d3.max. It seems that adding/removing data would be extremely inefficient.
I also looked for a way to tell Crossfilter to completely rebuild the group from scratch, rather than removing items from an existing group. If a filter is applied, the group is reset and rebuilt. Nothing obvious.
You can use dimension.top(1) and dimension.bottom(1) to retrieve the current min and max. These methods respect any filters that may be active on the crossfilter.
The best solution I came up with, was to keep track of all values in an ordered list and to add elements with a simple quicksort-style insertion function (cp. how to insert a number into a sorted array) and to remove them using indexOf.
Common functions:
function insertElement(element, array) {
array.splice(locationOfElement(element, array) + 1, 0, element);
return array;
}
function removeElement(element, array) {
var index = array.indexOf(element);
if (index >= 0) array.splice(index, 1);
return array;
}
function locationOfElement(element, array, start, end) {
start = start || 0;
end = end || array.length;
var pivot = parseInt(start + (end - start) / 2, 10);
if (array[pivot] === element) return pivot;
if (end - start <= 1)
return array[pivot] > element ? pivot - 1 : pivot;
if (array[pivot] < element) {
return locationOfElement(element, array, pivot, end);
} else {
return locationOfElement(element, array, start, pivot);
}
}
function maxElement(array) {
return (array.length > 0) ?
array[array.length - 1] : null;
}
function minElement(array) {
return (array.length > 0) ?
array[0] : null;
}
Functions to use when adding and removing data from a group to track min / max:
minMaxDimension = cf.dimension(function (d) {
return d.key;
});
var reduceAdd = function(p, v) {
insertElement(v.value, p.elements);
return p;
};
var reduceRemove = function(p, v) {
removeElement(v.value, p.elements);
return p;
};
var reduceInitial = function() {
return {
elements: [],
max: function() { return maxElement(elements); },
min: function() { return minElement(elements); }
}
}
minMaxGroup = minMaxDimension
.group()
.reduce(reduceAdd, reduceRemove, reduceInitial)
.orderNatural()
.top(Infinity);
After playing around with this for a bit, you can rebuild the group by just calling the group method again.

Categories

Resources