Average every 'x' values in an array in node.js - javascript

I have a processing sketch that takes a sound level reading every second. It outputs it to a file, the end result looking like this:
17.51.11, 0.05900923, 0.059008658
17.51.12, 0.4037964, 0.40379566
17.51.13, 0.10111399, 0.101112984
17.51.14, 0.5614912, 0.56148833
17.51.15, 0.45223546, 0.45223528
17.51.16, 0.1956841, 0.19568439
17.51.17, 0.17803124, 0.17803174
(the two values are left and right audio in, I would also like to average these if possible!)
Node imports and splits this data, and outputs it to a chart (using chart.js) - however obviously for any real length of time the chart would be either unreadable, or stupidly long. So I want to get an average of every minute's worth of data. I'm not sure how to do this - and I'm worried about screwing up the timestamp.
var http = require('http');
var fs = require('fs');
var results = [];
var left = [];
var right = [];
var timestamp = [];
var rightfin = [];
var timefin = [];
var leftfin = [];
fs.readFile('20.8.2015_17.51.9.txt', "utf-8", function (err, data) {
if (err) throw err;
results = data.split("\n");
});
function callback() {
var lines = results;
for (var line in lines) {
var x = [];
x = lines[line].split(", ");
timestamp.push("'" + x[0] + "'");
//doesn't seem to work without adding quotes.
left.push(x[1] * 1000);
right.push(x[2] * 1000);
}
}
http.createServer(function (request, response) {
callback();
var leftfin = left.toString('utf8');
var rightfin = right.toString('utf8');
var timefin = timestamp.toString('utf8');
fs.readFile('chart.js', function (err, data) {
if (err) { throw err };
response.writeHead(200, { 'Content-Type': 'text/html' });
var chartjs = data.toString('utf-8');
response.write("<html>\n<head><meta charset='utf-8'><script src='http://ajax.googleapis.com/ajax/libs/jquery/1.9.1/jquery.min.js'></script>\n<title>Sound Levels</title>\n</head><body onLoad='init()'><script type = 'text/javascript'>\n" + chartjs);
response.write("window.onLoad = function() \n{init();};\n function init() {");
response.write("\nvar ctx = $('#line').get(0).getContext('2d'); \nvar data = { \nlabels: [");
response.write(timefin);
response.write("], \ndatasets: [ \n{ \nlabel: 'My First dataset', \nfillColor: 'rgba(220,220,220,0.5)', \nstrokeColor: 'rgba(220,220,220,0.8)', \nhighlightFill: 'rgba(220,220,220,0.75)',\nhighlightStroke: 'rgba(220,220,220,1)', \ndata: [");
response.write(leftfin);
response.write("]}, \n{ \nlabel: 'My Second dataset', \nfillColor: 'rgba(151,187,205,0.5)', \nstrokeColor: 'rgba(151,187,205,0.8)', \nhighlightFill: 'rgba(151,187,205,0.75)', \nhighlightStroke: 'rgba(151,187,205,1)', \ndata: [");
response.write(rightfin);
response.write("] \n} ]\n}; \nvar myBarChart = new Chart(ctx).Bar(data);}</script>");
response.write(" <center> <canvas id='line' width='7500' height='400'> </canvas></center> </body></html>");
response.end();
});
}).listen(8124);
console.log('Server running at http://127.0.0.1:8124/');

Here is a version for callback:
var parseLines = function(lines) {
// make sure we have lines, or return a default
if (lines == null || !lines.length) { return [0.0, 0.0]; }
var left_sum = 0.0
, right_sum = 0.0
, line, i, left, right;
// use for(;;) instead of for in, to exclude properties that are not values
for (i = 0; i < lines.length; i += 1) {
line = lines[i].split(',');
// trim each number from whitespace and make it its a float
left = parseFloat(line[1].trim());
right = parseFloat(line[2].trim());
if (isNaN(left) || isNaN(right)) { continue; }
left_sum += left;
right_sum += right;
}
// return an array of [ left_average, right_average ]
return [ left_sum / lines.length, right_sum / lines.length ];
};
var averages = parseLines(results);

Related

Write final json to a file from repeated requests to rest API

I am trying to build a file of json data from repeated calls to a restAPI. The final file to be written is the sum of the data received from all the calls. At present the file is being written with contents of the first call then overwritten by the contents of the first + second call (see console output below code).
As I have to make many calls, once the code is working, I would like to only write the file once the request has finished and the json string has been built. Does anyone now how I would go about doing this? Maybe with a callback(?), which I still don't have the hang of, once the requests have finished or the json string has finished being built.
"use strict";
const fs = require('fs');
const request = require('request');
var parse = require('csv-parse');
const path = "../path tocsv.csv";
const pathJSON = "../pathtoJSON.json";
var shapes = "https://url";
var options = {
url: '',
method: 'GET',
accept: "application/json",
json: true,
};
var csvData = [];
var jsonData = "[";
fs.createReadStream(path)
.pipe(parse({delimiter: ','}))
.on('data', function(data) {
csvData.push(data[1]);
})
.on('end',function() {
var start = Date.now();
var records = csvData.length //2212 objects
console.log(records);
var dataLength = 2 //set low at moment
for (var i = 0; i < dataLength; i += 1) {
var url = shapes + csvData[i];
options.url = url; //set url query
request(options, function(error, response, body) {
var time = Date.now() - start;
var s = JSON.stringify(body.response);
console.log( '\n' + (Buffer.byteLength(s)/1000).toFixed(2)+
" kilobytes downloaded in: " + (time/1000) + " sec");
console.log(i)
buildJSON(s);
});
}
function buildJSON(s) {
var newStr = s.substring(1, s .length-1);
jsonData += newStr + ',';
writeFile(jsonData);
}
function writeFile(jsonData) {
fs.writeFile(pathJSON, jsonData, function(err) {
if (err) {
return console.log(err);
} else {
console.log("file complete")
}
});
}
});
128.13 kilobytes downloaded in: 2.796 sec
2
file complete
256.21 kilobytes downloaded in: 3.167 sec
2
file complete
Perhaps writing to the file after all requests are complete will help. In the current code, the writeFile function is called each time a request is completed (which overwrites the file each time)
A quick way to fix this is to count requests (and failures) and write to file only after all the requests are complete.
"use strict";
const fs = require('fs');
const request = require('request');
var parse = require('csv-parse');
const path = "../path tocsv.csv";
const pathJSON = "../pathtoJSON.json";
var shapes = "https://url";
var options = {
url: '',
method: 'GET',
accept: "application/json",
json: true,
};
var csvData = [];
var jsonData = "[";
fs.createReadStream(path)
.pipe(parse({
delimiter: ','
}))
.on('data', function (data) {
csvData.push(data[1]);
})
.on('end', function () {
var start = Date.now();
var records = csvData.length //2212 objects
console.log(records);
var dataLength = 2 //set low at moment
var jsonsDownloaded = 0; // Counter to track complete JSON requests
var jsonsFailed = 0; // Counter to handle failed JSON requests
for (var i = 0; i < dataLength; i += 1) {
var url = shapes + csvData[i];
options.url = url; //set url query
request(options, function (error, response, body) {
if(error){
jsonsFailed++;
writeFile(jsonData);
return;
}
jsonsDownloaded++;
var time = Date.now() - start;
var s = JSON.stringify(body.response);
console.log('\n' + (Buffer.byteLength(s) / 1000).toFixed(2) +
" kilobytes downloaded in: " + (time / 1000) + " sec");
console.log(i)
buildJSON(s);
});
}
function buildJSON(s) {
var newStr = s.substring(1, s.length - 1);
jsonData += newStr + ',';
writeFile(jsonData);
}
function writeFile(jsonData) {
if(dataLength - (jsonsDownloaded + jsonsFailed) > 0){
return;
}
fs.writeFile(pathJSON, jsonData, function (err) {
if (err) {
return console.log(err);
} else {
console.log("file complete")
}
});
}
});
Note:
Requests being fired in quick succession like (2000 requests in a for loop) in my experience does not work well.. Try batching them. Also, doing it this way does not guarantee order (if that is important in your usecase)
An alternative would be to open your file in append mode. You can do this by passing an extra options object with flag set to your fs.writeFile call.
fs.writeFile(pathJSON, jsonData, {
flag: 'a'
}, function (err) {
if (err) {
return console.log(err);
}
});
References:
fs.writeFile Docs
File system flags

Searching two tables in one function in DynamoDB

I am trying to link two tables in DynamoDB for an Amazon Alexa skill. I am using two tables one is named 'yesno' and the other 'fixtures'. The fixtures table has a list of 22 names in each record and these names are in the 'yesno' table along with the column 'goals'. Here you can see the tables in more detail. Name Table:
Fixtures Table:
As you can see there are names that link the two databases together. I use the team1 column to search the fixtures table and use the name column to search the name table. Here is my code for searching:
function readDynamoItem(params2, callback) {
var AWS = require('aws-sdk');
AWS.config.update({region: AWSregion});
var dynamodb = new AWS.DynamoDB();
const names = new Array();
console.log('reading item from DynamoDB table');
dynamodb.scan(params2, function (err, data){
if (err) console.log(err, err.stack); // an error occurred
else{
console.log(data); // successful response
//tried to put a automatic loop for the long bit of code after this but didnt work so anyone with insight on this too would be helpful
/*for(var i = 1; i <= 11; i++){
var str = "T1S";
var pos = i.toString();
pos = str.concat(pos);
names[i] = jsonToString(data.Items[0].pos);
}
for(var j = 1; j <= 11; j++){
str = "T2S";
pos = j.toString();
pos = str.concat(pos);
names[(j+11)] = jsonToString(data.Items[0].pos);
}
*/
names[1] = jsonToString(data.Items[0].T1S1);
names[2] = jsonToString(data.Items[0].T1S2);
names[3] = jsonToString(data.Items[0].T1S3);
names[4] = jsonToString(data.Items[0].T1S4);
names[5] = jsonToString(data.Items[0].T1S5);
names[6] = jsonToString(data.Items[0].T1S6);
names[7] = jsonToString(data.Items[0].T1S7);
names[8] = jsonToString(data.Items[0].T1S8);
names[9] = jsonToString(data.Items[0].T1S9);
names[10] = jsonToString(data.Items[0].T1S10);
names[11] = jsonToString(data.Items[0].T1S11);
names[12] = jsonToString(data.Items[0].T2S1);
names[13] = jsonToString(data.Items[0].T2S2);
names[14] = jsonToString(data.Items[0].T2S3);
names[15] = jsonToString(data.Items[0].T2S4);
names[16] = jsonToString(data.Items[0].T2S5);
names[17] = jsonToString(data.Items[0].T2S6);
names[18] = jsonToString(data.Items[0].T2S7);
names[19] = jsonToString(data.Items[0].T2S8);
names[20] = jsonToString(data.Items[0].T2S9);
names[21] = jsonToString(data.Items[0].T2S10);
names[22] = jsonToString(data.Items[0].T2S11);
}
});
var goals = new Array();
//for loop to be used later when expanding
//for(var i = 1; i <= 22; i++){
var params = {
TableName: 'yesno',
FilterExpression: 'name = :value',
ExpressionAttributeValues: {':value': {"S": names[2]}}
};
dynamodb.scan(params, function (err, data) {
if (err) console.log(err, err.stack); // an error occurred
else{
console.log(data); // successful response
var temp = jsonToString(data.Items[0].goals);
goals[1] = temp;
}
callback(goals[1]);
});
//}
}
function jsonToString(str){
str = JSON.stringify(str);
str = str.replace('{\"S\":\"', '');
str = str.replace('\"}', '');
return str;
}
I am trying to use the goals array to print each persons goals off but right now it won't even print one persons and instead will print an undefined object of some sort. I'm guessing it just can't search the names table using the names array. The main bit of code I am having a problem with is when searching the yesno table as you can see in this code:
var goals = new Array();
//for loop to be used later when expanding
//for(var i = 1; i <= 22; i++){
var params = {
TableName: 'yesno',
FilterExpression: 'name = :value',
ExpressionAttributeValues: {':value': {"S": names[2]}}
};
dynamodb.scan(params, function (err, data) {
if (err) console.log(err, err.stack); // an error occurred
else{
console.log(data); // successful response
var temp = jsonToString(data.Items[0].goals);
goals[1] = temp;
}
callback(goals[1]);
});
//}
I know for sure there is nothing wrong with the implementation but here it is just in case it is helpful:
const handlers = {
'LaunchRequest': function () {
this.response.speak('welcome to magic answers. ask me a yes or no question.').listen('try again');
this.emit(':responseReady');
},
'MyIntent': function () {
var MyQuestion = this.event.request.intent.slots.MyQuestion.value;
console.log('MyQuestion : ' + MyQuestion);
const params2 = {
TableName: 'Fixtures',
FilterExpression: 'team1 = :value',
ExpressionAttributeValues: {':value': {"S": MyQuestion.toLowerCase()}}
};
//const params3 = {
// TableName: 'Fixtures',
// FilterExpression: 'team2 = :value',
// ExpressionAttributeValues: {':value': {"S": MyQuestion.toLowerCase()}}
//};
readDynamoItem(params2, myResult=>{
var say = MyQuestion;
say = myResult;
say = 'The top scorer for ' + MyQuestion + ' is ' + myResult;
this.response.speak(say).listen('try again');
this.emit(':responseReady');
});
},
'AMAZON.HelpIntent': function () {
this.response.speak('ask me a yes or no question.').listen('try again');
this.emit(':responseReady');
},
'AMAZON.CancelIntent': function () {
this.response.speak('Goodbye!');
this.emit(':responseReady');
},
'AMAZON.StopIntent': function () {
this.response.speak('Goodbye!');
this.emit(':responseReady');
}
}
;

How to empty an Array in a Script

I have a script that uses AJAX/PHP/SQL to query data and pushes it into an array with a bunch of IF's statements. The changeData function is called every 6 seconds. The first query I return I have 6 arrays. The second time i send a request, my push array(IsVacant1) is double and went to 12. after a while, I have over 500 arrays going into my .each statement.
How do I 'clear' this every time I make a request so that I am not adding arrays? Any help is most appreciated.
function changeData() {
isPaused = true;
var mydata0 = null;
$.post('php/ProductionChange.php', {
'WC': cc
}, function(data) { // This is Where I use an AJAX call into a php file.
mydata0 = data; // This takes the array from the call and puts it into a variable
var pa = JSON.parse(mydata0); // This parses the data into arrays and elements
var temp = {};
var bayData = '';
if (pa != null) {
for (var i = 0; i <= pa.length - 1; i++) {
var job = pa[i][0];
var shipdate = pa[i][1];
var status = pa[i][2];
var name = pa[i][3];
var EnclLoc = pa[i][13];
var Enclsize = pa[i][14];
var backpan = pa[i][15];
var percentCom = pa[i][16];
var IsVisible = pa[i][17];
var png = pa[i][18];
var WorkC = pa[i][20];
baydata = 'bayData' + i + '';
temp = {
job, shipdate, name, EnclLoc, Enclsize, backpan, percentCom, IsVisible, png, WorkC, status
};
isVacant1.push({
baydata: temp
});
}
} else {
ii = 1;
//alert("There are no more job numbers in this bay location. Thank you. ");
}
$.each(isVacant1, function(key, value) {
var job = value.baydata.job;
var ship = value.baydata.shipdate;
var name = value.baydata.name;
var encl = value.baydata.EnclLoc;
var EnclSize = value.baydata.EnclLoc;
var percentCom = value.baydata.percentCom;
var backpan = value.baydata.backpan;
var PngLogo = value.baydata.png;
var IsVisible = value.baydata.IsVisible;
var WorkC = value.baydata.WorkC;
var status = value.baydata.status;
var p = WorkC;
WorkC = (WorkC < 10) ? ("0" + WorkC) : WorkC;
//// remember if the encl location matches the workcell cell then do stuff based on that....... hint encl image not hiding becase of duplicate 17s
if (((encl == p) || (backpan == p)) && job != 123) {
$('#WC' + p).show();
document.getElementById("bayData" + p).innerHTML = name + ' ' + ship; // Work Cell Name and Ship Date
document.getElementById("bayData" + p + "a").innerHTML = job; // Work cell Job Number
document.getElementById("percentCom" + p).innerHTML = percentCom + '%'; // Work Cell Percent Complete
} else {
$('#WC' + p).hide();
From your question it looks like you want to clear the isVacant1 array.
In your ajax callback just put isVacant1 = []; as the first line. Like this
function(data) { // This is Where I use an AJAX call into a php file.
isVacant1 = [];
mydata0 = data; // This takes the array from the call and puts it into a variable
var pa = JSON.parse(mydata0); // This parses the data into arrays and elements
var temp = {};
var bayData = '';
..................
From your code it's not clear how you are declaring/initializing isVacant1 so i have suggested isVacant1 = [] otherwise you can also use isVacant1.length = 0.
You can also take a look here How do I empty an array in JavaScript?

Wait for Javascript Web Scraping Function to finish before running for next page?

I am attempting to create a web scraper (in node.js) that will pull down information from a site, and write it to a file. I have it built to correctly work for one page, but when I try to use the function in a for loop, to iterate through multiple games, I get bad data in all of the games.
I understand that this is related to Javascript's asynchronous nature, and I have read about callback functions, but I'm not sure I understand how to apply it to my code. Any help would be GREATLY appreciated:
for(x = 4648; x < 4650; x++){ //iterate over a few gameIDs, used in URL for request
scrapeGame(x);
}
function scrapeGame(gameId){
//request from URL, scrape HTML to arrays as necessary
//write final array to file
}
Essentially, what I am looking to do, is within the for loop, tell it to WAIT to finish the scrapeGame(x) function before incrementing x and running it for the next game -- otherwise, the arrays start to overwrite each other and the data becomes a huge mess.
EDIT: I've now included the full code which I am attempting to run! I'm getting errors when looking in the files after they are written. For example, the first file is 8kb, second is ~16, 3rd is ~32, etc. It seems things aren't getting cleared before running the next game.
Idea of the program is to pull Jeopardy questions/answers from the archive site in order to eventually build a quiz app for myself.
//Iterate over arbitrary number of games, scrape each
for(x = 4648; x < 4650; x++){
scrapeGame(x, function(scrapeResult) {
if(scrapeResult){
console.log('Scrape Successful');
} else {
console.log('Scrape ERROR');
}
});
}
function scrapeGame(gameId, callback){
var request = require('request');
cheerio = require('cheerio');
fs = require('fs');
categories = [];
categorylist = [];
ids = [];
clues = [];
values = ['0','$200','$400','$600','$800','$1000','$400','$800','$1200','$1600','$2000'];
valuelist = [];
answers = [];
array = [];
file = [];
status = false;
var showGameURL = 'http://www.j-archive.com/showgame.php?game_id=' + gameId;
var showAnswerURL = 'http://www.j-archive.com/showgameresponses.php?game_id=' + gameId;
request(showGameURL, function(err, resp, body){
if(!err && resp.statusCode === 200){
var $ = cheerio.load(body);
//add a row to categories to avoid starting at 0
categories.push('Category List');
//pull all categories to use for later
$('td.category_name').each(function(){
var category = $(this).text();
categories.push(category);
});
//pull all clue IDs (coordinates), store to 1d array
//pull any id that has "stuck" in the string, to prevent duplicates
$("[id*='stuck']").each(function(){
var id = $(this).attr('id');
id = id.toString();
id = id.substring(0, id.length - 6);
ids.push(id);
//if single J, pick category 1-6
if (id.indexOf("_J_") !== -1){
var catid = id.charAt(7);
categorylist.push(categories[catid]);
var valId = id.charAt(9);
valuelist.push(values[valId]);
}
//if double J, pick category 7-12
else if (id.indexOf("_DJ_") !== -1){
var catid = parseInt(id.charAt(8)) + 6;
categorylist.push(categories[catid]);
var valId = parseInt(id.charAt(10)) + 5;
valuelist.push(values[valId]);
}
//if final J, pick category 13
else {
categorylist.push(categories[13]);
}
});
//pull all clue texts, store to 1d array
$('td.clue_text').each(function(){
var clue = $(this).text();
clues.push(clue);
});
//push pulled values to big array
array.push(ids);
array.push(categorylist);
array.push(valuelist);
array.push(clues);
//new request to different URL to pull responses
request(showAnswerURL, function(err, resp, body){
if(!err && resp.statusCode === 200){
var $ = cheerio.load(body);
$('.correct_response').each(function(){
var answer = $(this).text();
answers.push(answer);
});
//push answers to big array
array.push(answers);
//combine arrays into 1-d array to prep for writing to file
for(var i = 0; i < array[0].length; i++){
var print = array[0][i] + "|" + array[1][i] + "|" + array[2][i] + "|" + array[3][i] + "|" + array[4][i];
var stringPrint = print.toString();
file.push(stringPrint);
}
//update string, add newlines, etc.
var stringFile = JSON.stringify(file);
stringFile = stringFile.split('\\').join('');
stringFile = stringFile.split('","').join('\n');
//write to file, eventually will append to end of one big file
fs.writeFile('J_GAME_' + gameId +'.txt', stringFile, function(err) {
if(err) {
console.log(err);
} else {
console.log("Game #" + gameId + " has been scraped.");
status = true;
}
});
}
});
}
});
//clear arrays used
valuelist = [];
answers = [];
categories = [];
categorylist = [];
ids = [];
clues = [];
array = [];
file = [];
//feed callback status
callback(status);
}
// Iterate over a few gameIDs, used in URL for request.
for (x = 4648; x < 4650; x++) {
// Pass in the callback as an anonymous function.
// So below I am passing in the id and the function I want to execute.
// AND, defining the results I am expecting as passed in arguments.
scrapeGame(x, function(scrapeResult, err) {
// This will *NOT* execute *UNTIL* you call it in the function below.
// That means that the for loop's execution is halted.
// This function receives the status that is passed in,
// in this case, a boolean true/false and an error if any.
if (scrapeResult) {
// Scrape was true, nothing to do.
// The for loop will now move on to the next iteration.
console.log('Scrape Successful');
} else {
// Scrape was false, output error to console.log and
// break loop to handle error.
console.log('Scrape ERROR :: ' + err);
// Notice we are calling break while in the
// scope of the callback function
// Remove the break if you want to just move onto
// the next game ID and not stop the loop
break;
}
});
}
// This function now accepts two arguments.
function scrapeGame(gameId, callback) {
// ************************************************
// ** Do Your Work Here **
// Request from URL, scrape HTML to arrays as necessary.
// Write final array to file.
// After file creation, execute the callback and pass bool
// status (true/false).
// ************************************************
var request = require('request'),
cheerio = require('cheerio'),
fs = require('fs'),
categories = [],
categorylist = [],
ids = [],
clues = [],
values = [
'0',
'$200',
'$400',
'$600',
'$800',
'$1000',
'$400',
'$800',
'$1200',
'$1600',
'$2000'
],
valuelist = [],
answers = [],
array = [],
file = [],
showGameURL = 'http://www.j-archive.com/showgame.php?game_id=' + gameId,
showAnswerURL = 'http://www.j-archive.com/showgameresponses.php?game_id=' + gameId;
request(showGameURL, function(err, resp, body) {
if (!err && resp.statusCode === 200) {
var $ = cheerio.load(body);
//add a row to categories to avoid starting at 0
categories.push('Category List');
//pull all categories to use for later
$('td.category_name').each(function() {
var category = $(this).text();
categories.push(category);
});
//pull all clue IDs (coordinates), store to 1d array
//pull any id that has "stuck" in the string, to prevent duplicates
$("[id*='stuck']").each(function() {
var id = $(this).attr('id');
id = id.toString();
id = id.substring(0, id.length - 6);
ids.push(id);
//if single J, pick category 1-6
if (id.indexOf("_J_") !== -1) {
var catid = id.charAt(7);
categorylist.push(categories[catid]);
var valId = id.charAt(9);
valuelist.push(values[valId]);
}
//if double J, pick category 7-12
else if (id.indexOf("_DJ_") !== -1) {
var catid = parseInt(id.charAt(8)) + 6;
categorylist.push(categories[catid]);
var valId = parseInt(id.charAt(10)) + 5;
valuelist.push(values[valId]);
}
//if final J, pick category 13
else {
categorylist.push(categories[13]);
}
});
//pull all clue texts, store to 1d array
$('td.clue_text').each(function() {
var clue = $(this).text();
clues.push(clue);
});
//push pulled values to big array
array.push(ids);
array.push(categorylist);
array.push(valuelist);
array.push(clues);
//new request to different URL to pull responses
request(showAnswerURL, function(err, resp, body) {
if (!err && resp.statusCode === 200) {
var $ = cheerio.load(body);
$('.correct_response').each(function() {
var answer = $(this).text();
answers.push(answer);
});
//push answers to big array
array.push(answers);
//combine arrays into 1-d array to prep for writing to file
for (var i = 0; i < array[0].length; i++) {
var print = array[0][i] + "|" + array[1][i] + "|" + array[2][i] + "|" + array[3][i] + "|" + array[4][i];
var stringPrint = print.toString();
file.push(stringPrint);
}
//update string, add newlines, etc.
var stringFile = JSON.stringify(file);
stringFile = stringFile.split('\\').join('');
stringFile = stringFile.split('","').join('\n');
//write to file, eventually will append to end of one big file
fs.writeFile('J_GAME_' + gameId + '.txt', stringFile, function(err) {
//clear arrays used
valuelist = [];
answers = [];
categories = [];
categorylist = [];
ids = [];
clues = [];
array = [];
file = [];
if (err) {
// ******************************************
// Callback false with error.
callback(false, err);
// ******************************************
} else {
console.log("Game #" + gameId + " has been scraped.");
// ******************************************
// Callback true with no error.
callback(true);
// ******************************************
}
});
}
});
}
});
}
My assumption is that you want them to be scraped one after one, not in parallel. So, for loop won't help. The following approach should do the trick:
var x = 4648;
var myFunc = scrapeGame(x, function cb(){
if(x >= 4650){
return;
}
x++;
return myFunc(x, cb);
});
function scrapeGame(gameId){
//request from URL, scrape HTML to arrays as necessary
//write final array to file
}
For nested async function, where you want them be executed in serial manner, you should just forget about for loop.
An example of correct request handling with http client:
function scrapeGame(gameId, cb){
//your code and set options
http.request(options, function(response){
var result = "";
response.on('data', function (chunk) {
result += chunk;
});
response.on('end',function(){
//write data here;
//do the callback
cb();
});
});
}
I solved the ROOT cause of the issue that I was seeing, though I do believe without the callback assistance from red above, I would have been just as lost.
Turns out the data was processing correctly, but the file write was scrambling. Turns out that there is a different method to call instead of writeFile or appendFile:
fs.appendFileSync();
Calling the Synchronous version processed the writes to the file IN THE ORDER they got appended to the file, instead of just going for it. This, in addition to the callback help above, solved the issue.
Thanks to everyone for the assistance!

node.js: read a text file into an array. (Each line an item in the array.)

I would like to read a very, very large file into a JavaScript array in node.js.
So, if the file is like this:
first line
two
three
...
...
I would have the array:
['first line','two','three', ... , ... ]
The function would look like this:
var array = load(filename);
Therefore the idea of loading it all as a string and then splitting it is not acceptable.
Synchronous:
var fs = require('fs');
var array = fs.readFileSync('file.txt').toString().split("\n");
for(i in array) {
console.log(array[i]);
}
Asynchronous:
var fs = require('fs');
fs.readFile('file.txt', function(err, data) {
if(err) throw err;
var array = data.toString().split("\n");
for(i in array) {
console.log(array[i]);
}
});
If you can fit the final data into an array then wouldn't you also be able to fit it in a string and split it, as has been suggested?
In any case if you would like to process the file one line at a time you can also try something like this:
var fs = require('fs');
function readLines(input, func) {
var remaining = '';
input.on('data', function(data) {
remaining += data;
var index = remaining.indexOf('\n');
while (index > -1) {
var line = remaining.substring(0, index);
remaining = remaining.substring(index + 1);
func(line);
index = remaining.indexOf('\n');
}
});
input.on('end', function() {
if (remaining.length > 0) {
func(remaining);
}
});
}
function func(data) {
console.log('Line: ' + data);
}
var input = fs.createReadStream('lines.txt');
readLines(input, func);
EDIT: (in response to comment by phopkins) I think (at least in newer versions) substring does not copy data but creates a special SlicedString object (from a quick glance at the v8 source code). In any case here is a modification that avoids the mentioned substring (tested on a file several megabytes worth of "All work and no play makes Jack a dull boy"):
function readLines(input, func) {
var remaining = '';
input.on('data', function(data) {
remaining += data;
var index = remaining.indexOf('\n');
var last = 0;
while (index > -1) {
var line = remaining.substring(last, index);
last = index + 1;
func(line);
index = remaining.indexOf('\n', last);
}
remaining = remaining.substring(last);
});
input.on('end', function() {
if (remaining.length > 0) {
func(remaining);
}
});
}
Using the Node.js readline module.
var fs = require('fs');
var readline = require('readline');
var filename = process.argv[2];
readline.createInterface({
input: fs.createReadStream(filename),
terminal: false
}).on('line', function(line) {
console.log('Line: ' + line);
});
js:
var array = fs.readFileSync('file.txt', 'utf8').split('\n');
ts:
var array = fs.readFileSync('file.txt', 'utf8').toString().split('\n');
Essentially this will do the job: .replace(/\r\n/g,'\n').split('\n').
This works on Mac, Linux & Windows.
Code Snippets
Synchronous:
const { readFileSync } = require('fs');
const array = readFileSync('file.txt').toString().replace(/\r\n/g,'\n').split('\n');
for(let i of array) {
console.log(i);
}
Asynchronous:
With the fs.promises API that provides an alternative set of asynchronous file system methods that return Promise objects rather than using callbacks. (No need to promisify, you can use async-await with this too, available on and after Node.js version 10.0.0)
const { readFile } = require('fs').promises;
readFile('file.txt', function(err, data) {
if(err) throw err;
const arr = data.toString().replace(/\r\n/g,'\n').split('\n');
for(let i of arr) {
console.log(i);
}
});
More about \r & \n here: \r\n, \r and \n what is the difference between them?
use readline (documentation). here's an example reading a css file, parsing for icons and writing them to json
var results = [];
var rl = require('readline').createInterface({
input: require('fs').createReadStream('./assets/stylesheets/_icons.scss')
});
// for every new line, if it matches the regex, add it to an array
// this is ugly regex :)
rl.on('line', function (line) {
var re = /\.icon-icon.*:/;
var match;
if ((match = re.exec(line)) !== null) {
results.push(match[0].replace(".",'').replace(":",''));
}
});
// readline emits a close event when the file is read.
rl.on('close', function(){
var outputFilename = './icons.json';
fs.writeFile(outputFilename, JSON.stringify(results, null, 2), function(err) {
if(err) {
console.log(err);
} else {
console.log("JSON saved to " + outputFilename);
}
});
});
file.lines with my JFile package
Pseudo
var JFile=require('jfile');
var myF=new JFile("./data.txt");
myF.lines // ["first line","second line"] ....
Don't forget before :
npm install jfile --save
With a BufferedReader, but the function should be asynchronous:
var load = function (file, cb){
var lines = [];
new BufferedReader (file, { encoding: "utf8" })
.on ("error", function (error){
cb (error, null);
})
.on ("line", function (line){
lines.push (line);
})
.on ("end", function (){
cb (null, lines);
})
.read ();
};
load ("file", function (error, lines){
if (error) return console.log (error);
console.log (lines);
});
To read a big file into array you can read line by line or chunk by chunk.
line by line refer to my answer here
var fs = require('fs'),
es = require('event-stream'),
var lines = [];
var s = fs.createReadStream('filepath')
.pipe(es.split())
.pipe(es.mapSync(function(line) {
//pause the readstream
s.pause();
lines.push(line);
s.resume();
})
.on('error', function(err) {
console.log('Error:', err);
})
.on('end', function() {
console.log('Finish reading.');
console.log(lines);
})
);
chunk by chunk refer to this article
var offset = 0;
var chunkSize = 2048;
var chunkBuffer = new Buffer(chunkSize);
var fp = fs.openSync('filepath', 'r');
var bytesRead = 0;
while(bytesRead = fs.readSync(fp, chunkBuffer, 0, chunkSize, offset)) {
offset += bytesRead;
var str = chunkBuffer.slice(0, bytesRead).toString();
var arr = str.split('\n');
if(bytesRead = chunkSize) {
// the last item of the arr may be not a full line, leave it to the next chunk
offset -= arr.pop().length;
}
lines.push(arr);
}
console.log(lines);
This is a variation on the answer above by #mtomis.
It creates a stream of lines. It emits 'data' and 'end' events, allowing you to handle the end of the stream.
var events = require('events');
var LineStream = function (input) {
var remaining = '';
input.on('data', function (data) {
remaining += data;
var index = remaining.indexOf('\n');
var last = 0;
while (index > -1) {
var line = remaining.substring(last, index);
last = index + 1;
this.emit('data', line);
index = remaining.indexOf('\n', last);
}
remaining = remaining.substring(last);
}.bind(this));
input.on('end', function() {
if (remaining.length > 0) {
this.emit('data', remaining);
}
this.emit('end');
}.bind(this));
}
LineStream.prototype = new events.EventEmitter;
Use it as a wrapper:
var lineInput = new LineStream(input);
lineInput.on('data', function (line) {
// handle line
});
lineInput.on('end', function() {
// wrap it up
});
i just want to add #finbarr great answer, a little fix in the asynchronous example:
Asynchronous:
var fs = require('fs');
fs.readFile('file.txt', function(err, data) {
if(err) throw err;
var array = data.toString().split("\n");
for(i in array) {
console.log(array[i]);
}
done();
});
#MadPhysicist, done() is what releases the async. call.
Using Node.js v8 or later has a new feature that converts normal function into an async function.
util.promisify
It's an awesome feature. Here's the example of parsing 10000 numbers from the txt file into an array, counting inversions using merge sort on the numbers.
// read from txt file
const util = require('util');
const fs = require('fs')
fs.readFileAsync = util.promisify(fs.readFile);
let result = []
const parseTxt = async (csvFile) => {
let fields, obj
const data = await fs.readFileAsync(csvFile)
const str = data.toString()
const lines = str.split('\r\n')
// const lines = str
console.log("lines", lines)
// console.log("str", str)
lines.map(line => {
if(!line) {return null}
result.push(Number(line))
})
console.log("result",result)
return result
}
parseTxt('./count-inversion.txt').then(() => {
console.log(mergeSort({arr: result, count: 0}))
})
I had the same problem, and I have solved it with the module line-by-line
https://www.npmjs.com/package/line-by-line
At least for me works like a charm, both in synchronous and asynchronous mode.
Also, the problem with lines terminating not terminating \n can be solved with the option:
{ encoding: 'utf8', skipEmptyLines: false }
Synchronous processing of lines:
var LineByLineReader = require('line-by-line'),
lr = new LineByLineReader('big_file.txt');
lr.on('error', function (err) {
// 'err' contains error object
});
lr.on('line', function (line) {
// 'line' contains the current line without the trailing newline character.
});
lr.on('end', function () {
// All lines are read, file is closed now.
});
Another answer using an npm package. The nexline package allows one to asynchronously read a file line-by-line:
"use strict";
import fs from 'fs';
import nexline from 'nexline';
const lines = [];
const reader = nexline({
input: fs.createReadStream(`path/to/file.ext`)
});
while(true) {
const line = await reader.next();
if(line === null) break; // line is null if we reach the end
if(line.length === 0) continue; // Ignore empty lines
// Process the line here - below is just an example
lines.push(line);
}
This approach will work even if your text file is larger than the maximum allowed string length, thereby avoiding the Error: Cannot create a string longer than 0x1fffffe8 characters error.
To put each line as an item inside an array, a new function was added in Node.js v18.11.0 to read files line by line
filehandle.readLines([options])
This is how you use this with a text file you want to read a file and put each line in an array
import { open } from 'node:fs/promises';
const arr = [];
myFilereader();
async function myFileReader() {
const file = await open('./TextFileName.txt');
for await (const line of file.readLines()) {
arr.push(line);
}
console.log(arr)
}
To understand more read Node.js documentation here is the link for file system readlines():
https://nodejs.org/api/fs.html#filehandlereadlinesoptions

Categories

Resources