Drawing parallel coordinates for random selection of data - javascript
I have 500 samples (rows) in my data which is stored as a csv file. You can see 5 rows of it as follow:
path,Ktype,label,CGX,CGY,C_1,C_2,C_3,C_4,total1,total2,totalI3,total4,feature1,feature2,feature3,feature4,feature5,feature6,feature7,feature8,feature9,feature10,feature11,feature12,A,B,C,D,feature13,feature14,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
.\Mydata\Case1,k1,1,42,33,0,57.44534,0,52597,71,16,10,276,4038,3789.631,0.6173469,0.6499337,2.103316,0.6661285,1.065539,248.3694,0.630161,0.000192848,0.9999996,0.000642777,1,0,0,1,9.60E-05,3136.698,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
.\Mydata\Case2,k1,2,163,29,0,43.28862,0,49050,71,16,10,248,2944,2587.956,0.5726808,0.5681185,2.130387,0.601512,1.137578,356.0444,0.6335613,0.000327267,1.000029,0.001271235,1,0,0,1,0.00010854,2676.418,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
.\Mydata\Case3,k1,3,774,19,0,45.26291,0,53455,71,16,10,212,2366,1982.547,0.408179,0.4579566,1.994296,0.6615351,1.193415,383.4534,0.7153812,0.000264522,1.000031,0.001210507,1,1,0,0,9.54E-05,3221.289,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
.\Mydata\Case4,k1,4,1116,25,0,80.76469,0,57542,71,16,10,284,3908,3453.988,0.3549117,0.4811547,1.982244,0.6088744,1.131446,454.0122,0.6166388,0.000314288,0.9999836,0.00129846,0,1,1,0,0.000140592,2143.42,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
.\Mydata\Case5,k1,5,1364,59,1,52.96776,0,49670,71,16,10,228,2725,2642.675,0.4328255,0.475517,1.859871,0.6587288,1.031152,82.32471,0.5775694,0.000466264,0.9999803,0.001765345,0,1,1,0,0.00012014,2439.636,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
I am drawing parallel coordinates for my data. This is the part of code which reads in the csv file and filters it:
d3.csv("Mydata.csv", function(raw_data) {
// Convert quantitative scales to floats
data = raw_data.map(function(d) {
for (var k in d) {
if (!_.isNaN(raw_data[0][k] - 0) && k != 'id' && k != 'cgX' && k != 'cgY') {
d[k] = parseFloat(d[k]) || 0;
}
};
return d;
});
// Extract the list of numerical dimensions and create a scale for each.
xscale.domain(dimensions = d3.keys(data[0]).filter(function(k) {
return (_.isNumber(data[0][k])) && (yscale[k] = d3.scale.linear()
.domain(d3.extent(data, function(d) { return +d[k]; }))
.range([h, 0]));
}));
// And the rest of the code for drawing parallel coordinates.
// It is similar to the code in this link:
// http://bl.ocks.org/syntagmatic/3150059
}
Now, I want to change it in a way that instead of drawing 500 samples (500 polylines in parallel coordinates), it selects 100 of data randomly. How should I do that?
Do it in 2 passes.
First read in all the data...
d3.csv("Mydata.csv", function(raw_data) {...}
Then pick 100 at random and feed it to rendering function.
The bonus is that you don't have parse / transform the coords you don't want to render.
Also if you hit 100 before the full 500 is read, you can simply exit early and jump to the rendering algorithm.
Related
Why does crossfilter not sort or filter my data correctly?
I'm learning crossfilter and want to filter some data. So I have this big json file (It's actually csv) with almost 4 million lines of data. The file looks like this: timestamp,speed,power,distance,temperature,heart_rate,cadence,altitude,lat,long 1514806362,6569,172,6.63,14,90,87,2548,500870453,33664825 And all I'm trying to do is filter the distance. d3.csv('data2.json').then(function(data) { data.forEach(function(d, i) { d.date = parseDate(d.timestamp); }); // Create instance of crossfilter with dataset var cf = crossfilter(data); // Ask crossfilter how many rows it has / Size of data dataSize = cf.size(); console.log("Data size: " + dataSize); function parseDate(d) { return new Date(d*1000); } var distanceDimension = cf.dimension(function(d) { return d.distance; }); console.log("Creating delay dimension"); // List top 3 distance distanceDimensionTop3 = distanceDimension.top(3); console.log("Top 3 distance"); console.table(distanceDimensionTop3); // List bottom 3 distance distanceDimensionBottom3 = distanceDimension.bottom(3); console.log("Bottom 3 distance"); console.table(distanceDimensionBottom3); // Apply filter to get only distance above 5000 meters distanceDimension.filterFunction(function(d) {return d > 5000;}); console.log("Appliyng distance filter for only distance above 5000 meters"); // List bottom 3 distance console.log("Bottom 3 distance with filter applied"); console.table(distanceDimension.bottom(3)); }); But somehow my code fails right at the beginning listing the top 3 distance. I get a value of 99999.88 but in my data file, I have bigger values. Also when I apply the filter to my dimension it doesn't filter right. Thanks in advance.
Choropleth Alternative Method and more
In my previous post1 and post2, i managed to fix the choropleth map/legend issue + draw circles problems when drawing a map. When i follow this must-do tutorial about choropleth and when i search on internet i always find the same logic d3.csv("my.csv", function(data) { d3.json(myjson, function(json) { for (var i = 0; i < data.length ; i++) { //Grab state name var dataState = data[i].nom; //Grab data value, and convert from string to float var dataValue = data[i].population; //Find the corresponding state inside the GeoJSON for (var j = 0; j < json.features.length; j++) { var jsonState = json.features[j].properties.nom; if (dataState == jsonState) { //Copy the data value into the JSON json.features[j].properties.CA = dataValue; //Stop looking through the JSON break; } } } So in my case, i have a map with 75 path (1 path=region) and my csv file have 75 rows (1 row = 1 path) Now i'm trying to do things a little differently My new csv has N rows (N > 75, let's say 200) and for each row, a store (properties+lat+lon) is affected to a path ==> i can have 5 stores/path e.g Here are my questions : 1) How do i write my choropleth code differently ==> I'd like to scan the csv file and return for each distinct path the sum of specific properties (here "income") in order to write it on my json file ??? 2) When i click on a specific region/path, i'd like to display on a new div (in my case #output) the json file corresponding to my region (basicaly i have 75 json files "region1.json", "region2.json" and so on...") with circles inside (one circle = one store, in my csv file "name" column") ==> How do i retrieve this "on click value" and call the correct/corresponding json file ???? 3) Finally, if i click on a displayed specific circle of the #output div, i'd like to have on a third div a chart ==> How do i writte correctly my 3rd div so it is correctly displayed (css, other ?? ==> it can be applied to #output too) ?? Thank you so much for reading this request and for your availability and help Here's the plunker file (do not mind the sales.csv file, i just used it to try displaying something when i click on the path Thanks again
d3 nest roll up is the solution d3.csv("source-data.csv", function(error, csv_data) { var data = d3.nest() .key(function(d) { return d.date;}) .rollup(function(d) { return d3.sum(d, function(g) {return g.value; }); }).entries(csv_data); }); More info here
Rescale axis ticks and text in d3.js multi y-axis plot
I have a parallel coordinates plot that is based off this code: http://bl.ocks.org/syntagmatic/2409451 I am trying to get the tick marks and the numbers on the y axes to scale from the min to the max of the data rather than autoscaling to the conveniently linear numbers like it currently done. I have not been able to find any example of using d3 or js where a plot of any sort does this unless the data happens to land on those values. I have been able to just show the min and max value, but cannot get ticks between these by replacing the 3rd line of //Add an axis and title with: .each(function(d) {d3.select(this).call(d3.svg.axis().scale(y[d]).tickValues(y[d].domain()).orient("left")); }) For reference, the data file is read in as a .csv and ends up looking like this with alphabet representing the headings in the .csv file: var example_data = [ {"a":5,"b":480,"c":250,"d":100,"e":220}, {"a":1,"b":90,"c":50,"d":33,"e":88} ]; EDIT: The main issue is iterating over the array that has the domains for each column to create a new array with the tick values. Tick values can be set using: d3.svg.axis().scale(y[d]).tickValues(value 1[d],value 2[d], etc) y[d] is set by: // Extract the list of dimensions and create a scale for each. x.domain(dimensions = d3.keys(cars[0]).filter(function(d) { return d != "name" && (y[d] = d3.scale.linear() .domain(d3.extent(cars, function(p) { return +p[d]; })) .range([h, 0])); }));
Since you have the min and the max you can map them in any way you want to any scale you want [y0,yn]. For example with y0 = 100, yn = 500 (because HTML counts from top and down). Here I use a linear scale d3.scale.linear() .domain([yourMin,yourMax]) .range([y0,yn]); Does this help?
Wrong result in dimplejs scatterplot
I'm trying to understand how to use with dimplejs but the result is not what i ment. JSFiddleCode <script src="http://d3js.org/d3.v3.min.js"></script> <script src="http://dimplejs.org/dist/dimple.v2.0.0.min.js"></script> <script type="text/javascript"> var svg = dimple.newSvg("#chartContainer", 590, 400); d3.csv("carsData.csv", function (data) { // change string (from CSV) into number format data.forEach(function(d) { if(d["Sports Car"]==1) d.Category = "Sports Car"; else if(d["SUV"]==1) d.Category = "SUV"; else d.Category = "Other"; d.HP = +d.HP; d["Engine Size (l)"] = +d["Engine Size (l)"]; }); // Latest period only //dimple.filterData(data, "Date", "01/12/2012"); // Create the chart var myChart = new dimple.chart(svg, data); myChart.setBounds(60, 30, 420, 330) // Create a standard bubble of SKUs by Price and Sales Value // We are coloring by Owner as that will be the key in the legend myChart.addMeasureAxis("x", "HP"); myChart.addMeasureAxis("y", "Engine Size (l)"); myChart.addSeries("Category", dimple.plot.bubble); var myLegend = myChart.addLegend(530, 100, 60, 300, "Right"); myChart.draw(); // This is a critical step. By doing this we orphan the legend. This // means it will not respond to graph updates. Without this the legend // will redraw when the chart refreshes removing the unchecked item and // also dropping the events we define below. myChart.legends = []; // This block simply adds the legend title. I put it into a d3 data // object to split it onto 2 lines. This technique works with any // number of lines, it isn't dimple specific. svg.selectAll("title_text") .data(["Click legend to","show/hide owners:"]) .enter() .append("text") .attr("x", 499) .attr("y", function (d, i) { return 90 + i * 14; }) .style("font-family", "sans-serif") .style("font-size", "10px") .style("color", "Black") .text(function (d) { return d; }); // Get a unique list of Owner values to use when filtering var filterValues = dimple.getUniqueValues(data, "Category"); // Get all the rectangles from our now orphaned legend myLegend.shapes.selectAll("rect") // Add a click event to each rectangle .on("click", function (e) { // This indicates whether the item is already visible or not var hide = false; var newFilters = []; // If the filters contain the clicked shape hide it filterValues.forEach(function (f) { if (f === e.aggField.slice(-1)[0]) { hide = true; } else { newFilters.push(f); } }); // Hide the shape or show it if (hide) { d3.select(this).style("opacity", 0.2); } else { newFilters.push(e.aggField.slice(-1)[0]); d3.select(this).style("opacity", 0.8); } // Update the filters filterValues = newFilters; // Filter the data myChart.data = dimple.filterData(data, "Category", filterValues); // Passing a duration parameter makes the chart animate. Without // it there is no transition myChart.draw(800); }); }); the scatterplot result is only 3 and i dont know why. the x is the HP and the y is horse power. more questions: 1. how can i change the axis unit. 2. how can i control the size of each bubble. 3. how to fix the wrong results. heres the result picture: The csv file has 480 rows. maybe the addseries is wrong (i dont know what it is)?
Dimple aggregates the data for you based on the first parameter of the addSeries method. You have passed "Category" which has 3 values and therefore creates 3 bubbles with summed values. If instead you want a bubble per vehicle coloured by category you could try: myChart.addSeries(["Vehicle Name", "Category"], dimple.plot.bubble); To change the axis unit you can use axis.tickFormat though the change above will reduce scale so you might find you don't need to. To control bubble size based on values in your data you need to add a "z" axis. See this example. If you want to just set a different marker size for your scatter plot you can do so after the draw method has been called with the following: var mySeries = myChart.addSeries("Category", dimple.plot.bubble); var myLegend = myChart.addLegend(530, 100, 60, 300, "Right"); myChart.draw(); // Set the bubble to 3 pixel radius mySeries.shapes.selectAll("circle").attr("r", 3); NB. A built in property for this is going to be included in the next release.
Memory gradually increased using d3 for real time chart?
I tried to simulate a real time chart with dynamic data using d3.js. I am running this using IE-10 browser. My Source Code I come across to a problem where the memory of my IE browser will be gradually increased if left the web application running for a period of time. I Google searched the possible reason that caused this problem. Two things come into my mind for discussion: The timer prevents the garbage collection of IE The d3 chart does not release memory after data.shift() My question: How could I diagnose if my problem actually originated from discussion 1 or 2 or neither? How could I solve the memory problem? You might need to download the code and run it with some time and monitor the iexplorer.exe using resource monitor in order to identify the problem. Thank you. Source Code: <html> <head> <title>Animated Sparkline using SVG Path and d3.js</title> <script src="http://mbostock.github.com/d3/d3.v2.js"></script> <style> /* tell the SVG path to be a thin blue line without any area fill */ path { stroke: steelblue; stroke-width: 1; fill: none; } </style> </head> <body> <span> <b>Size:</b> 300x30 <b>Interpolation:</b> basis <b>Animation:</b> true <b>Transition:</b> 1000ms <b>Update Frequency:</b> 1000ms <div id="graph1" class="aGraph" style="width:300px; height:30px;"></div> </span> <script> var myTimer; function FeedDataToChart(id, width, height, interpolation, animate, updateDelay, transitionDelay, data, startIndex) { // create an SVG element inside the #graph div that fills 100% of the div var graph = d3.select(id).append("svg:svg").attr("width", "80%").attr("height", "80%"); // X scale will fit values from 0-10 within pixels 0-100 var x = d3.scale.linear().domain([0, 48]).range([10, width-10]); // starting point is -5 so the first value doesn't show and slides off the edge as part of the transition // Y scale will fit values from 0-10 within pixels 0-100 var y = d3.scale.linear().domain([0, 20]).range([height-10, 10]); // create a line object that represents the SVN line we're creating var line = d3.svg.line() // assign the X function to plot our line as we wish .x(function(d,i) { // verbose logging to show what's actually being done //console.log('Plotting X value for data point: ' + d + ' using index: ' + i + ' to be at: ' + x(i) + ' using our xScale.'); // return the X coordinate where we want to plot this datapoint return x(i); }) .y(function(d) { // verbose logging to show what's actually being done //console.log('Plotting Y value for data point: ' + d + ' to be at: ' + y(d) + " using our yScale."); // return the Y coordinate where we want to plot this datapoint return y(d); }) .interpolate(interpolation) var counter = startIndex; //var myData = data.slice(); // display the line by appending an svg:path element with the data line we created above graph.append("svg:path").attr("d", line(data)); // or it can be done like this function redrawWithAnimation() { // update with animation graph.selectAll("path") .data([data]) // set the new data .attr("transform", "translate(" + x(1) + ")") // set the transform to the right by x(1) pixels (6 for the scale we've set) to hide the new value .attr("d", line) // apply the new data values ... but the new value is hidden at this point off the right of the canvas .transition() // start a transition to bring the new value into view .ease("linear") .duration(transitionDelay) // for this demo we want a continual slide so set this to the same as the setInterval amount below .attr("transform", "translate(" + x(0) + ")"); // animate a slide to the left back to x(0) pixels to reveal the new value } function redrawWithoutAnimation() { // static update without animation graph.selectAll("path") .data([data]) // set the new data .attr("d", line); // apply the new data values } function stopTimer() { clearInterval(myTimer); myTimer = null; graph.selectAll("path").data([data]).remove().append("svg:path").attr("d", line); buffer = null; signalGenerator(); } function startTimer() { if (myTimer == null) { myTimer = setInterval(function() { if (counter < data.length - 1) { var v = data.shift(); // remove the first element of the array data.push(v); // add a new element to the array (we're just taking the number we just shifted off the front and appending to the end) if(animate) { redrawWithAnimation(); } else { redrawWithoutAnimation(); } counter++; } else { //alert("no more data in buffer"); stopTimer(); counter = startIndex; } }, updateDelay); } } startTimer(); } var buffer; function signalGenerator() { if (buffer == null) { buffer = new Array(100); var i; for (i = 0; i < buffer.length; i++) { buffer[i] = Math.random() * 10; } FeedDataToChart("#graph1", 300, 300, "basis", true, 100, 100, buffer, 0); } } function startGenerator() { signalGenerator(); } startGenerator(); </script> </body> </html>
I tried as you said for 2 hours and it was initially 56 MB memory usage and in the end around 56.8 MB. It means only 0.8 MB difference with some exceptional cases. But I can help you finding the exact point where memory load is occurring. Just follow the steps one by one. Open "Developer Tools" of IE by pressing F12 Go to Memory (A camera symbol or CTRL+7) Click the Start Profiling Session ( Green Play button on top) Take a Heap Snap Shot to create Base Line. Now every 10 or 15 minutes take a heap snap shot Do this for how many hours you require (In your case 2 hours) Once profiling is done for desired time, stop it and analyze from beginning by comparing Heap Snap Shots. If memory difference in the beginning and end is so big, check where this memory increase started by analyzing the memory difference in the snap shot. Here you can check the difference of memory used by the process in terms of bytes or KB. Check which function or variable or operation is creating the memory issue. Most probably some calculations that are repeatedly carried out so that the variables used in these calculations won't be released from a certain point of time. I saw some "Ba, p", "n, p", "Wa, n, p" etc when analyzed the memory flow. I believe the functions that use these variables are creating the problem for you. Note If you use the UI Responsiveness (CTRL+5), you can easily see that the Garbage Collection is carried out by IE automatically.