NodeJS - output json array as mutiple json files to disk - javascript

I try to use NodeJS to read a JSON array from a JSON file, and then output each JSON object multiple JSON files to the disk.
However, I got the Error EMFILE: too many open files
The array has 20,000 objects.
The code:
function main(){
var clusters_statistics=require("cluster_whole_1.json");
for(var i=0; i<clusters_statistics.length; i++){
var fs=require('fs');
var outputFilename='cut_json/'+i+'.json';
fs.writeFile(outputFilename, JSON.stringify(clusters_statistics[i], null, 4), function(err) {
if(err) {
console.log(err);
} else {
console.log(data);
}
});
}
}
Update:
1. I tried to use the close() function as suggested by Gustavo, unfortunately, it still says "there are too many files open". ("Open" this time).
2. Then I tried recursion inside the close(), and it works now.
The code:
function main(){
clusters_statistics=require("cluster_whole_1.json");
call_close(clusters_statistics.length);
}
function call_close(i){
var fs = require("fs");
var path = 'cut_json/'+i+'.json';
fs.open(path, "w+", function(error, fd) {
if (error) {
console.error("open error: " + error.message);
}else {
fs.writeFile(path, JSON.stringify(clusters_statistics[i], null, 4), function(err) {
if(err) {
console.log(err);
} else {
}
});
fs.close(fd, function(error) {
if (error) {
console.log(err);
} else {
if(i<=0){
return;
}else{
if(i%100==0){
console.log(i);
}
call_close(i-1);
}
}
});
}
});
}

Close the file after you finish writing the Jason into it.
Now you are keeping the files open and filling the memory, if you don't close them manually they will only be closed when your program finishes​.
var fs=require('fs');
function main(){
var clusters_statistics=require("cluster_whole_1.json"); for(var i=0; i<clusters_statistics.length; i++){
var outputFilename='cut_json/'+i+'.json';
var my_file = fs.open(outputFilename, 'w+');
var buffer = new Buffer(JSON.stringify(clusters_statistics[i], null,4):
fs.write(my_file, buffer, 0, buffer.length, null, function(err, written, buffer) {
if(err) {
console.log(err);
} else {
console.log('OK!');
}
});
fs.close(my_file);
}

Opening and closing the file descriptor is not necessary when using fs.writeFile, but you'll probably want to write the files sequentially so you don't open too many files at once.
function write_cluster_statistics(clusters_statistics, callback, index) {
index = index || 0;
if (index >= clusters_statistics.length) {
return callback();
}
var fs = require('fs');
var path = 'cut_json/' + index + '.json';
var content = JSON.stringify(clusters_statistics[index], null, 4);
fs.writeFile(path, content, function (err) {
if (err) {
callback(err);
} else {
write_cluster_statistics(clusters_statistics, callback, index + 1);
}
});
}
function main() {
var clusters_statistics = require("cluster_whole_1.json")
write_cluster_statistics(clusters_statistics, function (err) {
if (err) {
console.error(err);
} else {
console.log('done');
}
});
}

Related

Node.js writing data to file results in additional written characters

as the title says, when i try to save data to a file using the filesystem function fs.writeFile(), sometimes the file has extra data on it.
My code:
fs.writeFile('path', JSON.stringify(data), function (err) {});
May be its because of the JSON.stringify(), or its a problem of the fs.writeFile.
If you need additional information, im willing to give it!
More code:
function CheckLeaderBoards(player, tag, points) {
fs.readFile(datapath + '/data/topplayers.json', function(err, data) {
var lb = JSON.parse(data);
var isin = false;
for (let i = 0; i < lb.length; i++) {
if (lb[i].tag == tag) {
isin = true;
lb[i].points = points;
break;
}
}
if (!isin)
lb.push({"player": player.toString(), "tag": tag.toString(), "points": parseInt(points)});
for (let i = 0; i < lb.length; i++) {
var bestpoints = -100;
var bestindex = 0;
for (let j = i; j < lb.length; j++) {
if (lb[j].points > bestpoints) {
bestpoints = lb[j].points;
bestindex = j;
}
}
lb = ChangeArrayIndex(lb, bestindex, i);
}
fs.writeFile(datapath + '/data/topplayers.json', JSON.stringify(lb), function (err) {});
})
}
function ChangeArrayIndex(array, fromIndex, toIndex) {
var arr = [];
for (let i = 0; i < array.length; i++) {
if (i == toIndex) arr.push(array[fromIndex]);
if (i == fromIndex) continue;
arr.push(array[i]);
}
return arr;
}
Basicly i want to write a leaderboard, i have an array of JSON Objects, ex: {"player":"Bob","tag":"a10b","points": 10},...
To write in a file, you to open the file, in callback you will get file descriptor that descriptor will be used to write in the file. Please see example:
fs.open(datapath + '/data/topplayers.json', 'wx', function(error, fileDescriptor){
if(!error && fileDescriptor){
var stringData = JSON.stringify(data);
fs.writeFile(fileDescriptor, stringData, function(error){
if(!error){
fs.close(fileDescriptor, function(error){
if(!error){
callback(false);
}else{
callback('Error in close file');
}
});
}else{
callback('Error in writing file.');
}
});
}
}
Ok, If you want to update the file, please check this code:
const myUpdaterFcn = (dir,file,data,callback)=>{
//dir looks like this: '/your/existing/path/file.json'
// Open the file for writing (using the keyword r+)
fs.open(dir, 'r+', (err, fileDescriptor)=>{
if(!err && fileDescriptor){
// Convert data to string
const stringData = JSON.stringify(data)
// Truncate the file
fs.truncate(fileDescriptor,err=>{
if(!err){
// Write to file and close it
fs.writeFile(fileDescriptor, stringData,err=>{
if(!err){
fs.close(fileDescriptor,err=>{
if(!err){
callback(false)
} else {
callback('Error closing existing file')
}
})
} else {
callback('Error writing to existing file')
}
})
} else {
callback('Error truncating file')
}
})
} else {
callback('Could not open file for updating, it may not exist yet')
}
})
}
Good Luck.

Write http stream in a file

I'm having problems trying to write a chunk buffer into a file. My problem is basically that the file only contains the last bytes of the output buffer. The chunk buffer is very large, and my file.out is truncated, the last items are correct.
Here is my code
var reqChart = http.request(chart, function(res1) {
res1.setEncoding( 'utf8' );
res1.on('data', function (chunk) {
var fs = require('fs');
//var b = new Buffer(chunk.length);
var c = "";
for (var i = 0;i < chunk.length;i++) {
// b[i] = chunk[i];
c = c + chunk[i]
}
console.log(c);
fs.writeFile("rich.txt", c, "utf-8",function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
});
And if you are interested here is the BEGINNING the input I get in my file vs the BEGINNING of the "console.log" of my variable... I already tried to write the "console.log" to a file but same results.
File
0,0.120179572670496],[1498028100000,0.105581318680705],[1498028400000,0.167319933562371],[1498028700000,0.147574197583768],[1498029000000,0.114172853959319],[1498029300000,0.241186960587686],[1498029600000,1.45701290672775],[1498029900000,0.154756395075166],[1498030200000,0.0836631006369253],[1498030500000,0.0800486694467188],[1498030800000,0.0861569133026863],[1498031100000,0.092360872083502],[1498031400000,0.0790707908354345],[1498031700000,0.129877438815933],[1498032000000,0.118812121796025],[1498032300000,0.0989746376933164],[1498032600000,2.30001837257628],[1498032900000,0.313639093279438],[1498033200000,0.227936449562983],
Buffer/Variable/log
{"requestData":{"options":{"width":950,"gui_component_mode":1,"exporttocsv":1,"only_stacks_in_legend":false,"reverse_axis":false,"height":446,"datasetTitle":"","legend_average":false,"legend_maximum":false,"legend_minimum":false},"model_group":101,"name":"autil_17","model":"nmis_graph","dataset_id":0,"owner_cid":"c3","data_source_type":"chart","model_view":"graph","parameters":{"graph_type":"interface","resource_index":"17","nmis_data_type":null,"value_column":"value","node":"RT01459","axis":0,"end_date_raw":1498095300,"substitutions":{"time.start":1497922702,"time.end":1498095502},"time_column":"time","translation":"","field":"","lineType":"line","period":"2d","index_graph_type":"autil","resource":"interface","start_date_raw":1497922500,"resolution":300,"class":null},"data_source":"local_nmis","translation":null},"replyData":{"options":{"subtitleText":"ENTERPRISE_RT01","titleText":"Interface gigabitethernet0-2-3913","legend_raw":" Avg In Max In Avg 12.76 % Max 98.99 % \\n Avg Out Max Out Avg 4.98 % Max 52.49 % \\n","yAxis0TitleText":"% Avg Util"},"stacking":"normal","meta_data":{"time_start":1497922500,"start_date_input":"2017-06-19 20:35:00","end_date_input":"2017-06-21 20:35:00"},"data":[{"yAxis":0,"reverse_axis":0,"valueDecimals":2,"value_min":-98.9864025844157,"color":"#00BFFF","suffix":"","dataset_multiplier":1,"sum":-7373.17229868631,"connectNulls":0,"stack":1,"value_max":-0.0591203443255937,"name":"Avg In","data":[[1497922500000,-0.7137202476565],[1497922800000,-1.43305756579003],[1497923100000,-0.150464409649807],[1497923400000,-0.150475661479925],[1497923700000,-0.100369773564214],[1497924000000,-0.0893947123021048]
I thought maybe in a timeout function or something, but the log is WRITING THE INFO THE TERMINAL, maybe I'm missing something.
You realize that you reset c at every chunk as its locally scoped?:
var c="";//not resetted
res1.on('data', function (chunk) {
c+=chunk;
}
And you need to await the stream to finish:
res1.on("end",function(){
console.log(c);
var fs = require('fs');
fs.writeFile("rich.txt", c, "utf-8",function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
The problem you're having is that you're writing the file while you are at stream.on('data',.... You need to add all that data to a file, and use stream.on('end',... event to write to a file. I hope this example helps
var fs = require('fs');
// Some example of getting original data
var readableStream = fs.createReadStream('file.txt');
var data = '';
readableStream.on('data', function(chunk) {
data+=chunk;
});
readableStream.on('end', function() {
console.log(data);
fs.writeFile("rich.txt", data, "utf-8",function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
Now copying your code and modifying it:
var reqChart = http.request(chart, (res1) => {
res1.setEncoding('utf8');
var fs = require('fs');
var c = "";
res1.on('data', function (chunk) {
//var b = new Buffer(chunk.length);
for (var i = 0; i < chunk.length; i++) {
// b[i] = chunk[i];
c = c + chunk[i]
}
console.log(c);
});
res1.on('end', function () {
fs.writeFile("rich.txt", c, "utf-8", function (err) {
if (err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
})
});
The pipe function for streams would make your life a lot easier.
const fs = require('fs');
const http = require('http');
const output = fs.createWriteStream('output.html'); // change to rich.txt
const url = 'http://example.com'; // change to chart URL
http.get(url, response => {
response.pipe(output);
});

Node.js print directory files

Currently working on a small project and having a few issues with getting files to print onto the page.
Currently, all files within my /views directory will print, however, I am wanting to expand this so it can print files within folders, for example, /views/test/prototype.html.
module.exports = function (router) {
var fs = require('fs');
router.get('/file-list', function (req, res) {
var markup = function (files, callback) {
var items = [];
for (i = 0; i < files.length; i++) {
var q = files[i];
q = q.slice(0, -5);
var markup = '<li>' + '' + q + '' + '</li>';
items.push(markup);
};
callback(items);
};
var getFiles = function (callback) {
fs.readdir(__dirname + '/views', function (err, files) { // '/' denotes the root folder
if (err) throw err;
markup(files, callback)
});
};
getFiles(function (items) {
// render markup for items
res.render('file-list', { 'files': items });
});
});
}
Inside your getFiles method, you can add some extra logic to see if the file that you are currently parsing is a directory, case in which you can loop over the files in that directory and push them to a result set array (filesArray). My example below only takes into account level 2 folders, such as in the /views/test/prototype.html scenario that you provided:
module.exports = function (router) {
var fs = require('fs');
router.get('/file-list', function (req, res) {
var markup = function (files, callback) {
var items = [];
for (i = 0; i < files.length; i++) {
var q = files[i];
q = q.slice(0, -5);
var markup = '<li>' + '' + q + '' + '</li>';
items.push(markup);
};
callback(items);
};
var getFiles = function (callback) {
var rootDir = __dirname + '/views';
fs.readdir(rootDir, function (err, files) { // '/' denotes the root folder
if (err) throw err;
// Subdirectory parsing logic START
var filesArray = [];
for (var i = 0; i < files.length; i++) {
var subdirPath = rootDir + '/' + files[i];
fs.stat(subdirPath, function (err, stats) {
if (err) {
console.log(err);
throw err;
}
if (stats.isDirectory()) {
fs.readdir(subdirPath, function (err, subdirFiles) {
if (err) throw err;
Array.prototype.forEach.call(subdirFiles, function (file) {
filesArray.push(file);
});
});
} else {
filesArray.push(files[i]);
}
});
}
// Subdirectory parsing logic END
markup(filesArray, callback)
});
};
getFiles(function (items) {
// render markup for items
res.render('file-list', {
'files': items
});
});
});
}
The above example is a crude one, as it would be recommended to use a mechanism such as Promises that allows you to better control the asynchronous looping going around and make sure that you have parsed all the individual files before calling markup().
UPDATE #1
I've been trying various approaches for trying to crawl all directories and subdirectories using promises, with no success, but I did manage to come across a working solution for your scenario in this Stack Overflow accepted answer - to be more precise, it's the parallel search mechanism provided in the aforementioned answer.
Below is an adaptation of the parallel search mechanism blended in with your existing code base - I've tested it and it works as intended:
var fs = require('fs');
var path = require('path');
module.exports = function (router) {
router.get('/file-list', function (req, res) {
var getFiles = function (callback) {
var rootDir = path.join(__dirname, 'views');
walk(rootDir, function (err, results) {
if (err) {
console.log(err);
return;
}
markup(results, callback);
});
};
var walk = function (dir, done) {
var results = [];
fs.readdir(dir, function (err, list) {
if (err) {
return done(err);
}
var pending = list.length;
if (!pending) {
return done(null, results);
}
list.forEach(function (file) {
file = path.resolve(dir, file);
fs.stat(file, function (err, stat) {
if (stat && stat.isDirectory()) {
walk(file, function (err, res) {
results = results.concat(res);
if (!--pending) {
done(null, results);
}
});
} else {
results.push(file);
if (!--pending) {
done(null, results);
}
}
});
});
});
};
var markup = function (files, callback) {
var items = [];
for (i = 0; i < files.length; i++) {
var q = files[i];
q = q.slice(0, -5);
var markup = '<li>' + '' + q + '' + '</li>';
items.push(markup);
};
callback(items);
};
getFiles(function (items) {
// render markup for items
res.render('file-list', {
'files': items
});
});
});
};
You need to examine files array with fs.stat and recursively do getFiles on directories.
This answer solves similar problem

Javascript for loop wait for callback

I have this function:
function tryStartLocalTrendsFetch(woeid) {
var userIds = Object.keys(twitClientsMap);
var isStarted = false;
for (var i = 0; i < userIds.length; i++) {
var userId = userIds[i];
var twitClientData = twitClientsMap[userId];
var isWoeidMatch = (woeid === twitClientData.woeid);
if (isWoeidMatch) {
startLocalTrendsFetch(woeid, twitClientData, function (err, data) {
if (err) {
// Couldn't start local trends fetch for userId: and woeid:
isStarted = false;
} else {
isStarted = true;
}
});
// This will not obviously work because startLocalTrendsFetch method is async and will execute immediately
if (isStarted) {
break;
}
}
}
console.log("No users are fetching woeid: " + woeid);
}
The gist of this method is that I want the line if (isStarted) { break; } to work. The reason is that if it's started it should not continue the loop and try to start another one.
I'm doing this in NodeJS.
try to use a recursive definition instead
function tryStartLocalTrendsFetch(woeid) {
var userIds = Object.keys(twitClientsMap);
recursiveDefinition (userIds, woeid);
}
function recursiveDefinition (userIds, woeid, userIndex)
var userId = userIds[userIndex = userIndex || 0];
var twitClientData = twitClientsMap[userId];
var isWoeidMatch = (woeid === twitClientData.woeid);
if (isWoeidMatch && userIndex<userIds.length) {
startLocalTrendsFetch(woeid, twitClientData, function (err, data) {
if (err) {
recursiveDefinition(userIds, woeid, userIndex + 1)
} else {
console.log("No users are fetching woeid: " + woeid);
}
});
} else {
console.log("No users are fetching woeid: " + woeid);
}
}
You may also use async (npm install async):
var async = require('async');
async.forEach(row, function(col, callback){
// Do your magic here
callback(); // indicates the end of loop - exit out of loop
}, function(err){
if(err) throw err;
});
More material to help you out: Node.js - Using the async lib - async.foreach with object

node.js async function in loop?

I am having some problems with node.js. What I'm trying to do is get an array of the directories in "./"+req.user.email and loop through them finding out their size and adding a table row to output, as you can see in the code. At the end I wan't to send all the table rows using res.send().
However the only output I am getting is:
<tr></tr>
for each file in the array. It seems that the forEach function is not waiting for readSizeRecursive at all. The readSizeRecursive function is asynchronous, and I believe that is what's causing the problem, but I don't know how I can fix this.
Any help would be greatly appreciated, I have included the readSizeRecursive function too. Thank you!
var output = "";
fs.readdir("./" + req.user.email, function (err, files) {
files.forEach(function(file){
output += "<tr>";
readSizeRecursive("./"+req.user.email+"/"+file, function (err, total){
output += '<td>' + file + '</td><td>' + total + '</td>';
});
output += "</tr>"
});
res.send(output)
});
readSizeRecursive() :
// Function to find the size of a directory
function readSizeRecursive(item, cb) {
fs.lstat(item, function(err, stats) {
var total = stats.size;
if (!err && stats.isDirectory()) {
fs.readdir(item, function(err, list) {
async.forEach(
list,
function(diritem, callback) {
readSizeRecursive(path.join(item, diritem), function(err, size) {
total += size;
callback(err);
});
},
function(err) {
cb(err, total);
}
);
});
}
else {
cb(err, total);
}
});
}
Please use the async module for this kind of pattern. Using async.each will allow you to compute the size for each folder asynchronously, and then return the sizes once you're done computing everything individually.
var output = [];
fs.readdir('./' + req.user.email, function (err, files) {
async.each(compute, report);
});
function compute (file, done) {
// calculate size, then callback to signal completion
// produce a result like below, then invoke done()
var obj = { files: [
{ name: file, size: size },
{ name: file, size: size },
{ name: file, size: size }
]};
output.push(obj);
done();
}
// doesn't need to be this awful
function format (list) {
var result = [];
list.forEach(function (item) {
var description = item.files.map(function (file) {
return util.format('<td>%s</td><td>%s</td>', file.name, file.size);
});
result.push(description);
});
result.unshift('<tr>');
result.push('</tr>');
return result.join('</tr><tr>');
}
function report (err) {
if (err) { return next(err); }
var result = format(output);
res.send(result);
}
This way you can easily swap out the different pieces of functionality, changing the formatting without altering the computing of the file size tree, for example.
Your main issue was control flow. You return with res.send while you are asynchronously looping and figuring out the sizes.
var fs = require ("fs");
var createTableContent = function (p, cb){
var read = function (p, cb){
//Prevent recursion if error
if (err) return cb ();
fs.stat (p, function (error, stats){
if (error){
err = error;
return cb ();
}
if (stats.isDirectory ()){
var dirSize = 0;
fs.readdir (p, function (error, entries){
if (error){
err = error;
return cb ();
}
var pending = entries.length;
//Empty dir
if (!pending) return cb (0);
entries.forEach (function (entry){
read (p + "/" + entry, function (entrySize){
dirSize += entrySize;
if (!--pending) return cb (dirSize);
});
});
});
}else{
cb (stats.size);
}
});
};
//A lot of errors can be produced, return only the first one
var err = null;
//Suppose p is a dir
fs.readdir (p, function (error, entries){
if (error) return cb (error);
var content = "";
var pending = entries.length;
if (!pending) return cb (null, content);
entries.forEach (function (entry){
read (p + "/" + entry, function (totalSize){
if (err) return cb (err);
content += "<tr><td>" + entry + "</td><td>" + totalSize + "</td></tr>";
if (!--pending){
//End
cb (null, content);
}
});
});
});
};
//Here goes the "email" path
createTableContent (".", function (error, content){
if (error) return console.error (error);
console.log (content);
});

Categories

Resources