Ordering a bar chart labels in dc.js (again) - javascript

I know there are a lot of questions on that specific subject on SO but none of the solutions seem to work in my case.
This is my data :
var theData = [{
"value": "190.79",
"age_days": "22",
"criteria": "FX"
}, {
"value": "18.43",
"age_days": "22",
"criteria": "FX"
}, {...}]
I put the data into buckets as such :
var getAge = (d) => {
if ((d.age_days) <= 7) {
return (["1w", "2w", "1m", "3m", "6m", "1y", "All"]);
} else if ((d.age_days) <= 14) {
return (["2w", "1m", "3m", "6m", "1y", "All"]);
} else if ((d.age_days) <= 30) {
return (["1m", "3m", "6m", "1y", "All"]);
} else if ((d.age_days) <= 90) {
return (["3m", "6m", "1y", "All"]);
} else if ((d.age_days) <= 180) {
return (["6m", "1y", "All"]);
} else if ((d.age_days) <= 360) {
return (["1y", "All"]);
} else {
return (["All"]);
}
};
var ndx = crossfilter(theData);
var dims = {};
var groups = {};
dims.age = ndx.dimension(getAge,true);
groups.age = {};
groups.age.valueSum = dims.age.group().reduceSum((d) => d.value);
I then try to order the group using the fake group approach :
var sort_group = (source_group, order) => {
return {
all: () => {
let g = source_group.all();
let map = {};
g.forEach(function (kv) {
map[kv.key] = kv.value;
});
return order
.filter((k) => map.hasOwnProperty(k))
.map((k) => {
return {key: k, value: map[k]}
});
}
};
};
var the_order = ["1w", "2w", "1m", "3m", "6m", "1y", "All"];
var the_sorted_age_group = sort_group(groups.age.valueSum, the_order);
then I create the barChart using
theAgeChart
.height(200)
.width(400)
.dimension(dims.age)
.group(the_sorted_age_group)
.valueAccessor((d) => d.value)
.x(d3.scaleBand())
.xUnits(dc.units.ordinal);
But it still comes out using the default sort :
I've created a jsFiddle here which contains everything.
How can I get my bars sorted as I want them to be sorted ?

When elasticX is true or the x scale domain is not set, the coordinate grid mixin will generate the X domain
if (_chart.elasticX() || _x.domain().length === 0) {
_x.domain(_chart._ordinalXDomain());
}
(source)
That totally makes sense, but it always sorts the domain when it generates it:
_chart._ordinalXDomain = function () {
var groups = _chart._computeOrderedGroups(_chart.data());
return groups.map(_chart.keyAccessor());
};
(source)
I guess we could consider not sorting the domain when ordering is null.
Anyway, one workaround is to set the domain yourself:
.x(d3.scaleBand().domain(the_order))
You don't need to sort the group for a bar chart. (For a line chart, the group order must agree with the scale domain, but it doesn't matter for the bar chart.)
With the domain set, this also works:
.group(groups.age.valueSum)
Fork of your fiddle.
I guess the moral of the story is that it's complicated to generate charts automatically. Most of the time one does want the X domain sorted, but what's the best way to allow the user to provide their own sort?
I would not say this is the best way, but there is a way to make it work.

Related

d3 break line graph if no data

I am using dc to create a line graph where capacity is on the y-axis and week is on the x-axis. For weeks, the range is 1-52, but there is no data from weeks 2-40. I only have data for week 1 and 41-52, but my line graph is still creating a line when there is no data:
How do I get it so the line graph will break if there are no values? So it wouldn't be one connected line. Here is my code for reference
let chart = dc.lineChart("#chart");
let ndx = crossfilter(results);
let weekDimension = ndx.dimension(function (d) {
return d.week = +d.week;
});
function reduceAdd(p, v) {
++p.count;
p.total += v.capacity;
p.average = p.total / p.count;
return p;
}
function reduceRemove(p, v) {
--p.count;
p.total -= v.capacity;
p.average = p.count ? p.total / p.count : 0;
return p;
}
function reduceInitial() {
return { count: 0, total: 0, average: 0 };
}
let capacityGroup = weekDimension.group().reduce(reduceAdd, reduceRemove, reduceInitial);
chart.width(360)
.height(200)
.margins({ top: 20, right: 20, bottom: 50, left: 30 })
.mouseZoomable(false)
.x(d3.scale.linear().domain([1, 52]))
.renderHorizontalGridLines(true)
.brushOn(false)
.dimension(weekDimension)
.valueAccessor(function (d) {
return d.value.average;
})
.group(capacityGroup);
dc.renderAll('chart');
This is how results would look like
{month : "1", capacity: "48"}
{month : "1", capacity: "60"}
{month : "42", capacity: "67"}
{month : "42", capacity: "60"}
{month : "43", capacity: "66"}
{month : "44", capacity: "52"}
{month : "45", capacity: "63"}
{month : "46", capacity: "67"}
{month : "47", capacity: "80"}
{month : "48", capacity: "61"}
{month : "48", capacity: "66"}
{month : "49", capacity: "54"}
{month : "50", capacity: "69"}
I have tried to add .defined(d => { return d.y != null; }); and .defined(d => !isNaN(d.value)); but that didn't do anything... Any help will be greatly appreciated
As we discussed in the comments, the important problem is that dc.js will only draw the data it receives. It doesn't know if data is missing, so we will need to fill in the nulls in order to draw gaps in the line.
I linked to a previous question, where the data is timestamps. The answer there uses a d3 time interval to generate the missing timestamps.
However, your data uses integers for keys (even though it represents weeks), so we will need to change the function a little bit:
function fill_ints(group, fillval, stride = 1) { // 1
return {
all: function() {
var orig = group.all();
var target = d3.range(orig[0].key, orig[orig.length-1].key, stride); // 2
var result = [];
for(var oi = 0, ti = 0; oi < orig.length && ti < target.length;) {
if(orig[oi].key <= target[ti]) {
result.push(orig[oi]);
if(orig[oi++].key === target[ti])
++ti;
} else {
result.push({key: target[ti], value: fillval});
++ti;
}
} // 3
if(oi<orig.length) // 4
Array.prototype.push.apply(result, orig.slice(oi));
if(ti<target.length) // 5
result = [...result, ...target.slice(ti).map(t => ({key: t, value: fillval}))];
return result;
}
};
}
This function takes a group, the value to fill, and a stride, i.e. the desired gap between entries.
It reads the current data, and generates the desired keys using d3.range.
It walks both arrays, adding any missing entries to a copy of the group data.
If there are any leftover entries from the original group data, it appends it.
If there are any remaining targets, it generates those.
Now we wrap our original group using this function, creating a "fake group":
const filledGroup = fill_ints(capacityGroup, {average: null});
and pass it to the chart instead:
.group(filledGroup);
One weakness of using LineChart.defined(), and the underlying d3.line.defined, is that it takes two points to make a line. If you have isolated points, as week 1 is isolated in your original data, then it won't be shown at all.
In this demo fiddle, I have avoided the problem by adding data for week 2.
But what about isolated dots?
I was curious how to solve the "isolated dots problem" so I tried showing the built-in dots that are usually used for a mouseover effect:
chart.on('pretransition', chart => {
const all = chart.group().all();
isolated = all.reduce((p, kv, i) => {
return (kv.value.average !== null &&
(i==0 || all[i-1].value.average == null) &&
((i==all.length-1 || all[i+1].value.average == null))) ?
{...p, [kv.key]: true} : p;
}, {});
chart.g().selectAll('circle.dot')
.filter(d => isolated[d.data.key])
.style('fill-opacity', 0.75)
.on('mousemove mouseout', null)
})
This works but it currently relies on disabling the interactivity of those dots so they don't disappear.

How to create a stacked bar chart using dc.js?

I'd like to create a stacked bar chart using DC.JS.
I've tried to utilize the documentation from DC.JS (graph,source code) to no avail - Below is my attempt (here is my attempt in jsfiddle) and my most recent attempt in CodePen.
I'd like the 'Name' as the X axis and 'Type' as the stacks.
HTML
<script src="https://cdnjs.cloudflare.com/ajax/libs/d3/3.5.17/d3.js"></script>
<script src="https://rawgithub.com/NickQiZhu/dc.js/master/web/js/crossfilter.js"></script>
<script src="https://cdnjs.site44.com/dc2.js"></script>
<div id="chart"></div>
Javascript
var data = [ {"Name":"Abby","Type":"Apple"}, {"Name":"Abby","Type":"Banana"}, {"Name":"Bob","Type":"Apple"} ]
data.forEach(function(x) {
x.Speed = +x.Type;
});
var ndx = crossfilter(data)
var xdim = ndx.dimension(function (d) {return d.Name;});
function root_function(dim,stack_name) {
return dim.group().reduce(
function(p, v) {
p[v[stack_name]] = (p[v[stack_name]] || 0) + v.Speed;
return p;},
function(p, v) {
p[v[stack_name]] = (p[v[stack_name]] || 0) - v.Speed;
return p;},
function() {
return {};
});}
var ydim = root_function(xdim,'Type')
function sel_stack(i) {
return function(d) {
return d.value[i];
};}
var chart = dc.barChart("#chart");
chart
.x(d3.scale.ordinal().domain(xdim))
.dimension(xdim)
.group(ydim, "1", sel_stack('1'))
.xUnits(dc.units.ordinal);
for(var i = 2; i<6; ++i)
chart.stack(ydim, ''+i, sel_stack(i));
chart.render();
I've been fiddling with this for some time and I have some additional findings:
When I replace the data array with the following it works
data = [ {"Name":"Abby","Type":"1"}, {"Name":"Abby","Type":"2"}, {"Name":"Bob","Type":"1"} ]
But it only works when I swapped out dc 1.7.5 (https://cdnjs.cloudflare.com/ajax/libs/dc/1.7.5/dc.min.js) with dc 2.1.0-dev (https://github.com/dc-js/dc.js/blob/develop/web/js/dc.js)
However when I replace the data array with the following it doesn't work:
data = [ {"Name":"Abby","Type":"3"}, {"Name":"Abby","Type":"4"}, {"Name":"Bob","Type":"2"} ]
I believe the root issue lies in the root_function.
v.Speed is always NaN in your current example. Because +x.Type attempts to convert a string like "Apple" into a number and fails. If you just want to count, then add or subtract 1 in your reducer, rather than v.Speed. Then you need to update your sel_stack code and chart code to handle this change, of course.
Here's a working example for the 2 types in your data. You'll have to update it to handle arbitrary numbers of types, probably by building an array of all your types up front and then looping through it to add stacks to the chart: http://codepen.io/anon/pen/GjjyOv?editors=1010
var data = [ {"Name":"Abby","Type":"Apple"}, {"Name":"Abby","Type":"Banana"}, {"Name":"Bob","Type":"Apple"} ]
var ndx = crossfilter(data)
var xdim = ndx.dimension(function (d) {return d.Name;});
In the reducer, just add and subtract 1 to count:
var ydim = xdim.group().reduce(
function(p, v) {
p[v.Type] = (p[v.Type] || 0) + 1;
return p;},
function(p, v) {
p[v.Type] = (p[v.Type] || 0) - 1;
return p;},
function() {
return {};
});
sel_stack no longer takes a number, but a key:
function sel_stack(valueKey) {
return function(d) {
return d.value[valueKey];
};}
var chart = dc.barChart("#chart");
Here we hard-code the stack key, for the purpose of the example:
chart
.x(d3.scale.ordinal().domain(xdim))
.dimension(xdim)
.group(ydim, "Apple", sel_stack('Apple'))
.xUnits(dc.units.ordinal);
Again, the other hard-coded stack key. You'll need to recreate the loop after creating some sort of data structure that holds all of your stack values.
//for(var i = 2; i<6; ++i)
chart.stack(ydim, 'Banana', sel_stack('Banana'));
chart.render();

How to handle extremely large data in node.js array

I am working on a Node.js server that responds with JSON. I have a legacy server that contains the data. The Node.js server acts as a middle-man connecting the user to the data. The legacy server returns data as a csv with columns being keys and rows being objects. I want to turn this into an array of json objects. I am using this tool to do just that: https://github.com/Keyang/node-csvtojson. I can either have the tool construct the output for me or I can have a function called with each row passed in and construct it myself. I am currently doing the later.
The users of my application can pass in a search value which means only rows that contain this value should be returned. The user can also pass in filter keys and values which means only rows where key contains value for every key and value given should be returned. They can of course give both search and filter values or neither. They also give a sort key which is the key in the objects to sort the array by. They also give me a sort order: ascending or descending. They also give me a page size and offset to return, which is for pagination.
My question is, what is the best way to handle this with data sets that could contain millions of rows? I can't modify the legacy server. Here is what I have. It works but I would like to improve performance as much as possible. Is there a more efficient way to do the sorting, searching, filtering, pagination, etc? Would it be better to add to the array and then sort instead of using a binary search tree and sorting during add? Is there a way I could use streams to improve performance? I expect the server to be limited by CPU performance not RAM. I am looking for any recomendations for better performance. Thanks! :)
EDIT: Also, what part of the below code is blocking?
function searchRow(row, search) {
if (search) {
for (var key in row) {
if (row.hasOwnProperty(key) && row[key].toString().toLowerCase().indexOf(search.toLowerCase()) > -1) {
return true;
}
}
return false;
} else {
return true;
}
}
function filterByField(obj) {
return obj.field === this.valueOf();
}
function filterRow(row, filter) {
if (filter) {
filter = JSON.parse(decodeURIComponent(filter));
var passed = true;
for (var key in filter) {
if (filter.hasOwnProperty(key)) {
var index = row[key].toString().toLowerCase().indexOf(filter[key].toString().toLowerCase());
passed = passed && (index > -1);
}
if (!passed) {
break;
}
}
return passed;
} else {
return true;
}
}
function orderByKey(key, reverse, a, b) {
return function (a, b) {
var x = (a[key] || '').toString().toLowerCase();
var y = (b[key] || '').toString().toLowerCase();
if (x > y) {
return reverse ? -1 : 1;
} else if (y > x) {
return reverse ? 1 : -1;
} else if (hash(a) > hash(b)) {
return reverse ? -1 : 1;
} else if (hash(b) > hash(a)) {
return reverse ? 1 : -1;
} else {
return 0;
}
};
}
function sortedInsert(element, array, key, reverse) {
array.splice(locationOf(element, array, key, reverse) + 1, 0, element);
return array;
}
function locationOf(element, array, key, reverse, start, end) {
if (array.length === 0) {
return -1;
}
start = start || 0;
end = end || array.length;
var pivot = parseInt(start + (end - start) / 2, 10);
var c = orderByKey(key, reverse, element, array[pivot]);
if (end - start <= 1) {
return c == -1 ? pivot - 1 : pivot;
}
switch (c) {
case -1: return locationOf(element, array, key, reverse, start, pivot);
case 0: return pivot;
case 1: return locationOf(element, array, key, reverse, pivot, end);
}
}
function getTable(path, columns, delimiter, query) {
var deferred = q.defer();
var headers = [];
var data = [];
delimiter = delimiter ? delimiter : '\t';
var converter = new Converter({
delimiter: delimiter,
noheader: true,
headers: columns,
workerNum: os.cpus().length,
constructResult: false
});
converter.on("error", function(errMsg, errData) {
deferred.reject(errMsg);
});
converter.on("record_parsed", function(row) {
if (searchRow(row, query.search) && filterRow(row, query.filter)) {
sortedInsert(row, data, query.sort || headers[0].split("!").pop(), query.order === 'desc');
}
});
converter.on("end_parsed", function() {
var offset = parseInt(query.offset || 0);
var limit = parseInt(query.limit || data.length);
var total = data.length;
data = data.slice(offset, offset + limit);
deferred.resolve({
"total": total,
"rows": data
});
});
var options = {
url: config.url + path,
gzip: true,
method: 'GET'
};
request(options, function (error, response, body) {
if (error || response.statusCode != 200) {
deferred.reject(error);
}
}).pipe(converter);
return deferred.promise;
}

excluding "missing" values in reductio.avg()

I was hoping to use reductio to compute averages within my crossfilter groups. My dataset includes missing values (represented by null) that I'd like to exclude when calculating the average. However, I don't see a way to tell reductio to exclude certain values, and it treats the null values as 0.
I wrote a custom reduce function to accomplish this without using reductio:
function reduceAvg(attr) {
return {
init: function() {
return {
count: 0,
sum: 0,
avg: 0
};
},
add: function(reduction, record) {
if (record[attr] !== null) {
reduction.count += 1;
reduction.sum += record[attr];
if (reduction.count > 0) {
reduction.avg = reduction.sum / reduction.count;
}
else {
reduction.avg = 0;
}
}
return reduction;
},
remove: function(reduction, record) {
if (record[attr] !== null) {
reduction.count -= 1;
reduction.sum -= record[attr];
if (reduction.count > 0) {
reduction.avg = reduction.sum / reduction.count;
}
else {
reduction.avg = 0;
}
}
return reduction;
}
};
}
Is there a way to do this using reductio? Maybe using exception aggregation? I haven't fully wrapped my head around how exceptions work in reductio.
I think you should be able to average over 'myAttr' excluding null and undefined by doing:
reductio()
.filter(function(d) { return d[myAttr] !== null && d[myAttr] !== undefined; })
.avg(function(d) { return d[myAttr]; });
If that doesn't work as expected, please file an issue as it is a bug.

D3 Force Graph Neighboring Without Index (Including Self)

I am basing my D3 Force Model after this example: H is for Highlights. My JSON data is formatted differently since it uses the id as target/source instead of the default index given as a node property.
Sample of my JSON:
{
"nodes": [
{"fixed":true,"data": {"id": "foo","idType":"USERNAME","visible":true },"grabbable": true,"grabbed":false},
{"fixed":true,"data": {"id": "bar","idType":"USERNAME","visible":true },"grabbable": true}
],
"links": [
{classes":null,"data":{"color":"blue"","source":"foo","target":"bar","visible":true},"grabbable":false},
{classes":null,"data":{"color":"blue"","source":"bar","target":"foo","visible":true},"grabbable":false}
]
}
So as you can see I have both the target/source within the data and it uses the id as the type. Now here I am having trouble getting the highlight to work. I have it where it highlights its neighboring nodes and links BUT for some reason it doesn't keep the node itself highlighted. I have added arrows below to what I think needs modification but I am not sure what the issue is at all.
// sets the source and target to use id instead of index
var edges = [];
root.links.forEach(function(e) {
var sourceNode = root.nodes.filter(function(n) {
return n.data.id === e.data.source;
})[0],
targetNode = root.nodes.filter(function(n) {
return n.data.id === e.data.target;
})[0];
edges.push({
source: sourceNode,
target: targetNode
});
});
// Create an array logging what is connected to what
var linkedByIndex = { };
// array algorithm for what is connected to what
for (var i = 0; i < root.nodes.length; i++) {
linkedByIndex[i + "," + i] = 1;
};
root.links.forEach(function (d) {
linkedByIndex[d.data.source + "," + d.data.target] = 1;
});
// This function looks up whether a pair are neighbors
function neighboring(a, b) {
return linkedByIndex[a.data.id + "," + b.data.id]; <<<<<<<<<<<<<
}
function connectedNodes() {
//Reduce the opacity of all but the neighboring nodes
d = d3.select(this).node().__data__;
node.style("opacity", function (o) {
return neighboring(d, o) | neighboring(o, d) ? 1 : 0.1;
});
link.style("opacity", function (o) {
return d.index==o.source.index| d.index==o.target.index ? 1 : 0.1;
});
}
I have played around with it and I am not sure what is wrong with my algorithm.
Figured it out. Add the following line which modifies the node selected to the end of the connected node function
Code added
return d3.select(this).style("opacity",1);
So now it looks like...
function connectedNodes() {
// Reduce the opacity of all but the neighbouring nodes
// Ternary operator. condition | condition ? value if true : value if false
// the | means or
d = d3.select(this).node().__data__;
node.style("opacity", function (o) {
return neighboring(d, o) | neighboring(o, d) ? 1 : 0.1;
});
link.style("opacity", function (o) {
return d.index==o.source.index | d.index==o.target.index ? 1 : 0.1;
});
// Maintains opacity of selected node.
return d3.select(this).style("opacity",1);
}

Categories

Resources