Related
Really struggling here and just lost at this point. Promises SEEM simple until I try to use them. I am providing my simplified code that includes no attempt at a promise to keep things simple. I have come back to this after a few months of just giving up. This little script reads the files in a directory line by line and adds all of those lines to an array. Once that is over, I want to write all of those files to a new file.
The problem, of course, is that writeAllLinesToFile is being called before the allTheLines array is full of lines read from the directories files. The array is completely empty and nothing from the read files are written in the new file.
I've given a lot of tries with this and just made a mess of my real code in that attempt. Can someone push me over this hump?
var allTheLines = [];
function processFile(inputFile) {
var fs = require('fs');
var readline = require('readline');
var instream = fs.createReadStream(inputFile);
var outstream = new (require('stream'))();
var rl = readline.createInterface(instream, outstream);
// when you get a line, do this
rl.on('line', function (line) {
// console.log("pushing: ", line);
allTheLines.push(line);
});
// when there are no more lines do this
rl.on('close', function (line) {
console.log('...end of a file');
});
}
function writeAllLinesToFile() {
var fs = require('fs');
for (var i = 0; i < allTheLines.length; i++) {
console.log("line: : ", allTheLines[i]);
fs.appendFile('./allTheLines.html', allTheLines[i] + '\n', function (err) {
if (err) throw err;
});
};
console.log("Done writing.");
}
// ***************************************
// Execution Starts Here
// ***************************************
var fs = require('fs');
fs.readdir('./filesToRead', (err, files) => {
for (var i = 0; i < files.length; i++) {
processFile('./filesToRead/' + files[i]);
console.log('file#: ', i);
};
console.log('Done processing files.');
writeAllLinesToFile();
});
Here's my quick solution to this and you can take from here and improve it as you see it fit.
I believe the most important thing you forgot is that when we use event emitters, we should know that they're asynchronous code and not synchronous, that's why writeAllLinesToFile had empty array because it was called before rl.on('line', ....) did its job and it's tricky to catch up ;)
Hope this will help you understand how Promises also work and always think of asynchronous code carefully.
var fs = require('fs');
var readline = require('readline');
var path = require('path');
var stream = require('stream');
function processFile(inputFile) {
var instream = fs.createReadStream(inputFile);
var outstream = new stream();
var rl = readline.createInterface(instream, outstream);
var allTheLines = [];
// when you get a line, do this
rl.on('line', function (line) {
allTheLines.push(line);
}).on('close', function () {
writeAllLinesToFile(allTheLines);
});
}
function writeAllLinesToFile(lines) {
for (var i = 0; i < lines.length; i++) {
fs.appendFile(path.join(__dirname, 'allTheLines.html'), lines[i] + '\n', function (err) {
if (err) throw err;
});
};
console.log("Done writing.");
}
// ***************************************
// Execution Starts Here
// ***************************************
fs.readdir(path.join(__dirname, 'filesToRead'), (err, files) => {
if (err) throw err;
for (var i = 0; i < files.length; i++) {
processFile(path.join(__dirname, 'filesToRead', files[i]));
};
console.log('Done processing files.');
});
I'm having problems trying to write a chunk buffer into a file. My problem is basically that the file only contains the last bytes of the output buffer. The chunk buffer is very large, and my file.out is truncated, the last items are correct.
Here is my code
var reqChart = http.request(chart, function(res1) {
res1.setEncoding( 'utf8' );
res1.on('data', function (chunk) {
var fs = require('fs');
//var b = new Buffer(chunk.length);
var c = "";
for (var i = 0;i < chunk.length;i++) {
// b[i] = chunk[i];
c = c + chunk[i]
}
console.log(c);
fs.writeFile("rich.txt", c, "utf-8",function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
});
And if you are interested here is the BEGINNING the input I get in my file vs the BEGINNING of the "console.log" of my variable... I already tried to write the "console.log" to a file but same results.
File
0,0.120179572670496],[1498028100000,0.105581318680705],[1498028400000,0.167319933562371],[1498028700000,0.147574197583768],[1498029000000,0.114172853959319],[1498029300000,0.241186960587686],[1498029600000,1.45701290672775],[1498029900000,0.154756395075166],[1498030200000,0.0836631006369253],[1498030500000,0.0800486694467188],[1498030800000,0.0861569133026863],[1498031100000,0.092360872083502],[1498031400000,0.0790707908354345],[1498031700000,0.129877438815933],[1498032000000,0.118812121796025],[1498032300000,0.0989746376933164],[1498032600000,2.30001837257628],[1498032900000,0.313639093279438],[1498033200000,0.227936449562983],
Buffer/Variable/log
{"requestData":{"options":{"width":950,"gui_component_mode":1,"exporttocsv":1,"only_stacks_in_legend":false,"reverse_axis":false,"height":446,"datasetTitle":"","legend_average":false,"legend_maximum":false,"legend_minimum":false},"model_group":101,"name":"autil_17","model":"nmis_graph","dataset_id":0,"owner_cid":"c3","data_source_type":"chart","model_view":"graph","parameters":{"graph_type":"interface","resource_index":"17","nmis_data_type":null,"value_column":"value","node":"RT01459","axis":0,"end_date_raw":1498095300,"substitutions":{"time.start":1497922702,"time.end":1498095502},"time_column":"time","translation":"","field":"","lineType":"line","period":"2d","index_graph_type":"autil","resource":"interface","start_date_raw":1497922500,"resolution":300,"class":null},"data_source":"local_nmis","translation":null},"replyData":{"options":{"subtitleText":"ENTERPRISE_RT01","titleText":"Interface gigabitethernet0-2-3913","legend_raw":" Avg In Max In Avg 12.76 % Max 98.99 % \\n Avg Out Max Out Avg 4.98 % Max 52.49 % \\n","yAxis0TitleText":"% Avg Util"},"stacking":"normal","meta_data":{"time_start":1497922500,"start_date_input":"2017-06-19 20:35:00","end_date_input":"2017-06-21 20:35:00"},"data":[{"yAxis":0,"reverse_axis":0,"valueDecimals":2,"value_min":-98.9864025844157,"color":"#00BFFF","suffix":"","dataset_multiplier":1,"sum":-7373.17229868631,"connectNulls":0,"stack":1,"value_max":-0.0591203443255937,"name":"Avg In","data":[[1497922500000,-0.7137202476565],[1497922800000,-1.43305756579003],[1497923100000,-0.150464409649807],[1497923400000,-0.150475661479925],[1497923700000,-0.100369773564214],[1497924000000,-0.0893947123021048]
I thought maybe in a timeout function or something, but the log is WRITING THE INFO THE TERMINAL, maybe I'm missing something.
You realize that you reset c at every chunk as its locally scoped?:
var c="";//not resetted
res1.on('data', function (chunk) {
c+=chunk;
}
And you need to await the stream to finish:
res1.on("end",function(){
console.log(c);
var fs = require('fs');
fs.writeFile("rich.txt", c, "utf-8",function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
The problem you're having is that you're writing the file while you are at stream.on('data',.... You need to add all that data to a file, and use stream.on('end',... event to write to a file. I hope this example helps
var fs = require('fs');
// Some example of getting original data
var readableStream = fs.createReadStream('file.txt');
var data = '';
readableStream.on('data', function(chunk) {
data+=chunk;
});
readableStream.on('end', function() {
console.log(data);
fs.writeFile("rich.txt", data, "utf-8",function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
Now copying your code and modifying it:
var reqChart = http.request(chart, (res1) => {
res1.setEncoding('utf8');
var fs = require('fs');
var c = "";
res1.on('data', function (chunk) {
//var b = new Buffer(chunk.length);
for (var i = 0; i < chunk.length; i++) {
// b[i] = chunk[i];
c = c + chunk[i]
}
console.log(c);
});
res1.on('end', function () {
fs.writeFile("rich.txt", c, "utf-8", function (err) {
if (err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
})
});
The pipe function for streams would make your life a lot easier.
const fs = require('fs');
const http = require('http');
const output = fs.createWriteStream('output.html'); // change to rich.txt
const url = 'http://example.com'; // change to chart URL
http.get(url, response => {
response.pipe(output);
});
I have a file which is tab separated. It has thousands of data. How can I use nodeJs to read the file, line by line, parse them and create an object and insert them in a mongo DB.
I am just learning node and mongo. I come from different background. So how can this be done.
Finally the Mongo DB has to be populated with proper data.
I searched in net but I could not find the complete solution.
Thanks.
I had an issue with the answer by Juvenik. My problem was that the database would not be populated by the time readline had completed. The lines were being read synchronously, but the DB insertion was asynchronous.
Instead, I found a simpler solution with the line-reader package. It reads the lines and waits for a callback before continuing.
var MongoClient = require('mongodb').MongoClient
var dbName = 'yourDbName'
var url = 'mongodb://localhost:27017/' + dbName
var collectionName = 'yourCollectionName'
var filename = 'yourFileName.txt'
var printLine = 1000
MongoClient.connect(url, function(err, db) {
if (err) {
console.error('Problem connecting to database')
} else {
console.log('Connected correctly to server.')
var lineReader = require('line-reader')
var collection = db.collection(collectionName)
var lineNum = -1
var headers = []
lineReader.eachLine(filename, function(line, last, cb) {
lineNum++
try {
var split = line.split('\t')
var object = {}
if (lineNum > 0) {
for (var i = 0; i < split.length; i += 1) {
object[headers[i]] = split[i]
}
collection.insert(object, function (insertErr, insertObj) {
if (insertErr) console.error(insertErr)
if (lineNum % printLine === 0) console.log('Line ' + lineNum)
if (last) {
console.log('Done with ' + filename + ' (' + lineNum + ' records)')
process.exit(0)
} else {
cb()
}
})
} else {
headers = line.split('\t')
cb()
}
} catch (lineError) {
console.error(lineError)
}
})
}
})
I came across similar problem. This approach worked for me.
Have a look, it might be helpful.
var mongoDb = require('mongodb');
var mongoClient = mongoDb.MongoClient;
var dbname = 'YOUR_DB_NAME';
var collectionName = 'YOUR_COLLECTION_NAME';
var url = 'mongodb://localhost:27017/'+dbname;
var filename = 'FIle_Name.txt';
console.log('***************Process started');
mongoClient.connect(url,function(err,db){
if(err){
console.log('error on connection '+err);
}
else{
console.log('***************Successfully connected to mongodb');
var collection = db.collection(collectionName);
var fs = require('fs');
var readline = require('readline');
var stream = require('stream');
var instream = fs.createReadStream(filename);
var outstream = new stream;
var rl = readline.createInterface(instream,outstream);
console.log('***************Parsing, please wait ...');
rl.on('line',function(line){
try{
var arr = line.split('\t');
var object = {};
//Parse them here
//Example
object['name'] = arr[0]; //Just an example
var res = collection.insert(object);
}
catch (err){
console.log(err);
}
});
rl.on('close',function(){
db.close();
console.log('***************completed');
});
}
});
I am a learner too. If someone can make it better, it will be good.
Here is a more performant (inserting batches of objects) and updated version (using async and latest mongo driver) of frank-0's answer
const lineReader = require('line-reader');
async function readFileAndInsertInMongo(file) {
let total = 0;
return new Promise((resolve, reject) => {
let buffer = [];
lineReader.eachLine(file, (line, last, cb) => {
// prepare your object based on the line content
let insertObject = {'some_content': 'some_value'};
if (total % 10000 === 0 || last) {
collection.insertMany(buffer, function(err, res){
if (last) {
if (err) {
reject(err);
} else {
resolve(res);
}
} else {
buffer = [];
return cb();
}
});
} else {
buffer.push(insertObject);
return cb();
}
});
});
}
This really is the best solution I have found to parse huge files and insert them in the database without exploding Node's memory. Hope this can help ;)
I'm trying to import a file using javascript, and every time I run, it flashes the same error. I already tried redownloading the file system, and I just downloaded requirejs.
var importFile = function() {
console.log("started import");
var fs = require(['fs']);
console.log("required!");
fs.exists('articles.txt', function(exists) {
if(exists) console.log("found file");
});
fs.readFile('articles.txt', function(err, data) {
if(err) {
throw err;
console.log("error thrown");
}
var rawFileData = data.toString().split("\n");
for(i in rawFileData) {
console.log(articles[i]);
}
});
for(var i = 0; i< rawFileData.length; i+=4) {
var title = rawDataFiles[i];
var tags = rawDataFiles[i+1].split(",");
var content = rawDataFiles[i+2];
var date = rawDataFiles[i+3];
articles.append(new Article(title, tags, content, date));
}
}
I solved this using jQuery:
var importFile = function() {
console.log("started import");
var file = "/path/to/file.txt";
console.log(file);
$.get(file, function(data) {
var lines = data.split("\n");
var id = 0;
$.each(lines, function(n, elem) {
parseLine(elem, id);
id++;
});
console.log("done parsing.");
}, "text");
console.log("done getting");
};
though it never prints "done getting". I don't know why.
If fs hasn't loaded before and require is really requirejs, you have to have it in a callback.
require(['fs'], function(fs) { ... });
I suspect you are using node and its node's require. You should just drop the square brackets.
var fs = require('fs');
I would like to read a very, very large file into a JavaScript array in node.js.
So, if the file is like this:
first line
two
three
...
...
I would have the array:
['first line','two','three', ... , ... ]
The function would look like this:
var array = load(filename);
Therefore the idea of loading it all as a string and then splitting it is not acceptable.
Synchronous:
var fs = require('fs');
var array = fs.readFileSync('file.txt').toString().split("\n");
for(i in array) {
console.log(array[i]);
}
Asynchronous:
var fs = require('fs');
fs.readFile('file.txt', function(err, data) {
if(err) throw err;
var array = data.toString().split("\n");
for(i in array) {
console.log(array[i]);
}
});
If you can fit the final data into an array then wouldn't you also be able to fit it in a string and split it, as has been suggested?
In any case if you would like to process the file one line at a time you can also try something like this:
var fs = require('fs');
function readLines(input, func) {
var remaining = '';
input.on('data', function(data) {
remaining += data;
var index = remaining.indexOf('\n');
while (index > -1) {
var line = remaining.substring(0, index);
remaining = remaining.substring(index + 1);
func(line);
index = remaining.indexOf('\n');
}
});
input.on('end', function() {
if (remaining.length > 0) {
func(remaining);
}
});
}
function func(data) {
console.log('Line: ' + data);
}
var input = fs.createReadStream('lines.txt');
readLines(input, func);
EDIT: (in response to comment by phopkins) I think (at least in newer versions) substring does not copy data but creates a special SlicedString object (from a quick glance at the v8 source code). In any case here is a modification that avoids the mentioned substring (tested on a file several megabytes worth of "All work and no play makes Jack a dull boy"):
function readLines(input, func) {
var remaining = '';
input.on('data', function(data) {
remaining += data;
var index = remaining.indexOf('\n');
var last = 0;
while (index > -1) {
var line = remaining.substring(last, index);
last = index + 1;
func(line);
index = remaining.indexOf('\n', last);
}
remaining = remaining.substring(last);
});
input.on('end', function() {
if (remaining.length > 0) {
func(remaining);
}
});
}
Using the Node.js readline module.
var fs = require('fs');
var readline = require('readline');
var filename = process.argv[2];
readline.createInterface({
input: fs.createReadStream(filename),
terminal: false
}).on('line', function(line) {
console.log('Line: ' + line);
});
js:
var array = fs.readFileSync('file.txt', 'utf8').split('\n');
ts:
var array = fs.readFileSync('file.txt', 'utf8').toString().split('\n');
Essentially this will do the job: .replace(/\r\n/g,'\n').split('\n').
This works on Mac, Linux & Windows.
Code Snippets
Synchronous:
const { readFileSync } = require('fs');
const array = readFileSync('file.txt').toString().replace(/\r\n/g,'\n').split('\n');
for(let i of array) {
console.log(i);
}
Asynchronous:
With the fs.promises API that provides an alternative set of asynchronous file system methods that return Promise objects rather than using callbacks. (No need to promisify, you can use async-await with this too, available on and after Node.js version 10.0.0)
const { readFile } = require('fs').promises;
readFile('file.txt', function(err, data) {
if(err) throw err;
const arr = data.toString().replace(/\r\n/g,'\n').split('\n');
for(let i of arr) {
console.log(i);
}
});
More about \r & \n here: \r\n, \r and \n what is the difference between them?
use readline (documentation). here's an example reading a css file, parsing for icons and writing them to json
var results = [];
var rl = require('readline').createInterface({
input: require('fs').createReadStream('./assets/stylesheets/_icons.scss')
});
// for every new line, if it matches the regex, add it to an array
// this is ugly regex :)
rl.on('line', function (line) {
var re = /\.icon-icon.*:/;
var match;
if ((match = re.exec(line)) !== null) {
results.push(match[0].replace(".",'').replace(":",''));
}
});
// readline emits a close event when the file is read.
rl.on('close', function(){
var outputFilename = './icons.json';
fs.writeFile(outputFilename, JSON.stringify(results, null, 2), function(err) {
if(err) {
console.log(err);
} else {
console.log("JSON saved to " + outputFilename);
}
});
});
file.lines with my JFile package
Pseudo
var JFile=require('jfile');
var myF=new JFile("./data.txt");
myF.lines // ["first line","second line"] ....
Don't forget before :
npm install jfile --save
With a BufferedReader, but the function should be asynchronous:
var load = function (file, cb){
var lines = [];
new BufferedReader (file, { encoding: "utf8" })
.on ("error", function (error){
cb (error, null);
})
.on ("line", function (line){
lines.push (line);
})
.on ("end", function (){
cb (null, lines);
})
.read ();
};
load ("file", function (error, lines){
if (error) return console.log (error);
console.log (lines);
});
To read a big file into array you can read line by line or chunk by chunk.
line by line refer to my answer here
var fs = require('fs'),
es = require('event-stream'),
var lines = [];
var s = fs.createReadStream('filepath')
.pipe(es.split())
.pipe(es.mapSync(function(line) {
//pause the readstream
s.pause();
lines.push(line);
s.resume();
})
.on('error', function(err) {
console.log('Error:', err);
})
.on('end', function() {
console.log('Finish reading.');
console.log(lines);
})
);
chunk by chunk refer to this article
var offset = 0;
var chunkSize = 2048;
var chunkBuffer = new Buffer(chunkSize);
var fp = fs.openSync('filepath', 'r');
var bytesRead = 0;
while(bytesRead = fs.readSync(fp, chunkBuffer, 0, chunkSize, offset)) {
offset += bytesRead;
var str = chunkBuffer.slice(0, bytesRead).toString();
var arr = str.split('\n');
if(bytesRead = chunkSize) {
// the last item of the arr may be not a full line, leave it to the next chunk
offset -= arr.pop().length;
}
lines.push(arr);
}
console.log(lines);
This is a variation on the answer above by #mtomis.
It creates a stream of lines. It emits 'data' and 'end' events, allowing you to handle the end of the stream.
var events = require('events');
var LineStream = function (input) {
var remaining = '';
input.on('data', function (data) {
remaining += data;
var index = remaining.indexOf('\n');
var last = 0;
while (index > -1) {
var line = remaining.substring(last, index);
last = index + 1;
this.emit('data', line);
index = remaining.indexOf('\n', last);
}
remaining = remaining.substring(last);
}.bind(this));
input.on('end', function() {
if (remaining.length > 0) {
this.emit('data', remaining);
}
this.emit('end');
}.bind(this));
}
LineStream.prototype = new events.EventEmitter;
Use it as a wrapper:
var lineInput = new LineStream(input);
lineInput.on('data', function (line) {
// handle line
});
lineInput.on('end', function() {
// wrap it up
});
i just want to add #finbarr great answer, a little fix in the asynchronous example:
Asynchronous:
var fs = require('fs');
fs.readFile('file.txt', function(err, data) {
if(err) throw err;
var array = data.toString().split("\n");
for(i in array) {
console.log(array[i]);
}
done();
});
#MadPhysicist, done() is what releases the async. call.
Using Node.js v8 or later has a new feature that converts normal function into an async function.
util.promisify
It's an awesome feature. Here's the example of parsing 10000 numbers from the txt file into an array, counting inversions using merge sort on the numbers.
// read from txt file
const util = require('util');
const fs = require('fs')
fs.readFileAsync = util.promisify(fs.readFile);
let result = []
const parseTxt = async (csvFile) => {
let fields, obj
const data = await fs.readFileAsync(csvFile)
const str = data.toString()
const lines = str.split('\r\n')
// const lines = str
console.log("lines", lines)
// console.log("str", str)
lines.map(line => {
if(!line) {return null}
result.push(Number(line))
})
console.log("result",result)
return result
}
parseTxt('./count-inversion.txt').then(() => {
console.log(mergeSort({arr: result, count: 0}))
})
I had the same problem, and I have solved it with the module line-by-line
https://www.npmjs.com/package/line-by-line
At least for me works like a charm, both in synchronous and asynchronous mode.
Also, the problem with lines terminating not terminating \n can be solved with the option:
{ encoding: 'utf8', skipEmptyLines: false }
Synchronous processing of lines:
var LineByLineReader = require('line-by-line'),
lr = new LineByLineReader('big_file.txt');
lr.on('error', function (err) {
// 'err' contains error object
});
lr.on('line', function (line) {
// 'line' contains the current line without the trailing newline character.
});
lr.on('end', function () {
// All lines are read, file is closed now.
});
Another answer using an npm package. The nexline package allows one to asynchronously read a file line-by-line:
"use strict";
import fs from 'fs';
import nexline from 'nexline';
const lines = [];
const reader = nexline({
input: fs.createReadStream(`path/to/file.ext`)
});
while(true) {
const line = await reader.next();
if(line === null) break; // line is null if we reach the end
if(line.length === 0) continue; // Ignore empty lines
// Process the line here - below is just an example
lines.push(line);
}
This approach will work even if your text file is larger than the maximum allowed string length, thereby avoiding the Error: Cannot create a string longer than 0x1fffffe8 characters error.
To put each line as an item inside an array, a new function was added in Node.js v18.11.0 to read files line by line
filehandle.readLines([options])
This is how you use this with a text file you want to read a file and put each line in an array
import { open } from 'node:fs/promises';
const arr = [];
myFilereader();
async function myFileReader() {
const file = await open('./TextFileName.txt');
for await (const line of file.readLines()) {
arr.push(line);
}
console.log(arr)
}
To understand more read Node.js documentation here is the link for file system readlines():
https://nodejs.org/api/fs.html#filehandlereadlinesoptions