I have a file that has around 1000 lines of data, in that I am appending 'FFFF' to a particular line. The append working fine and the file is changed. But when I re-read the file again immediately after appending, changes are not coming - it is showing old data instead of new changes.
Below is the code:
const fs = require('fs');
const parseTxt = async (txtFile) => {
const data = await fs.readFileAsync(txtFile);
const str = data.toString('utf8');
const lines = str.split('\r\n');
var ff_string = 'FFFF';
var append_FF = linedata.substring(0, line_len - 2) + ff_string + linedata.substring(line_len - 2);
replace_line(linedata, append_FF, txtFile);
/* Re-Read the File with Changed/Appended data FF */
var re_data = re_read_file(txtFile);
const re_str = re_data.toString('utf8');
const re_lines = re_str.split('\r\n');
console.log('Re Lines Data:=========',re_str);
}
parseTxt('file.txt').then(() => {
console.log('parseTxt===');
})
function replace_line(linedata, append_FF, txtFile){
fs.readFile(txtFile, 'utf8', function(err,data) {
var formatted = data.replace(linedata, append_FF);
fs.writeFile(txtFile, formatted, 'utf8', function (err) {
if (err) return console.log(err);
});
});
return;
}
function re_read_file(txtFile){
try {
const data = fs.readFileSync(txtFile)
console.log('Re-readed File data',data);
} catch (err) {
console.error(err)
}
}
Variables 'linedata' and 'line_len' I am getting it from the different functions which I have not included in this as it is a huge function.
The reason for your issues is that writing to the file is asynchronous. Here's what happens:
// you do some magic and create a string that you'd like to write to a file
replace_line(linedata, append_FF, txtFile); // you call "write this to a file (1)
/* Re-Read the File with Changed/Appended data FF */
// you've called `replace_line`, and you get here
// but since reading/writing to the file is asynchronous, it's working somewhere
// and your code continues to execute
// you read the file again, happens immediately! check (2)
var re_data = re_read_file(txtFile);
const re_str = re_data.toString('utf8');
const re_lines = re_str.split('\r\n');
// you print the result
console.log('Re Lines Data:=========',re_str);
// you execute this method from (1)
function replace_line(linedata, append_FF, txtFile) {
// you start reading the file, but it doesn't happen immediately
// you provide a callback to execute when read is done
fs.readFile(txtFile, 'utf8', function(err,data) {
var formatted = data.replace(linedata, append_FF);
// same goes here - you start writing the file, but it's asynchronous
// you pass a callback to execute when it's done
fs.writeFile(txtFile, formatted, 'utf8', function (err) {
if (err) return console.log(err);
});
});
return;
}
function re_read_file(txtFile){ // (2)
try {
// here you use read file SYNC, so it happens immediately!
const data = fs.readFileSync(txtFile)
console.log('Re-readed File data',data);
} catch (err) {
console.error(err)
}
}
So the overall flow is like this:
You do some string manipulation
You try to write that to a file
it makes asynchronous read of the file
it makes asynchronous write of the new data
You read the file synchronously (read immediately)
You write the result of that read
-> at some point the asynchronous reading/writing to the file is done
You should either use sync read/write, or you should somehow wait for the updating of the document to be done and then re-read it again (Promises, await/async, callbacks - whatever you like).
Related
I'm reading about requireing a JSON object from a file. I can read the JSON correctly, but after I add data to it and save it into the file, re-requiring doesn't get the updated JSON, but retrieves the old file. Can someone explain why this is so? How can I fix this?
var file = process.env[(process.platform == 'win32') ? 'USERPROFILE' : 'HOME'] + "\\AppData\\Roaming\\BetterDiscord\\plugins\\Test.json";
var json = require(file);
console.log(JSON.stringify(json.img));
json.img.push({name:"Test"});
console.log(JSON.stringify(json.img));
save(JSON.stringify(json), file);
json = require(file);
console.log(JSON.stringify(json.img));
This is the save method:
var save = function(value, file){
var fs = require('fs');
fs.writeFile(file, value, function(err) {
if (err) {
console.log(err);
}
});
};
This is the Output:
Output 1: [{"name":"A"},{"name":"B"}]
Output 2: [{"name":"A"},{"name":"B"}, {"name":"Test"}]
Output 3: [{"name":"A"},{"name":"B"}]
There's two major problems with your code. The first is that save is an asynchronous function. That means that any code you write directly after it will run before it completes. It works just like setTimeout in that regard.
console.log(1);
setTimeout(function() {
console.log(2);
}, 100);
console.log(3);
Notice how it outputs 3 before 2. That's because setTimeout, like fs.writeFile, is asynchronous. To make sure you run code after it's done, you can pass a callback or use Promises (outside of the scope of this question but very useful). So your code could look something like this.
const save = (path, cb) => {
setTimeout(() => {
cb();
}, 100);
};
let path = 'path/to/file';
save(path, () => {
console.log(`Saved ${path}`);
});
console.log(`Saving ${path}...`);
This whole thing could also be avoided by using the sync version of writeFile.
fs.writeFileSync(file, value);
The next problem stems from how require caches the results. It does this because it's intended for loading in modules and they will not be changing dynamically, most of the time. Instead, load the file from the system directly. This can also be used asynchronously or synchronously. Here's how I would rewrite your code using synchronous code (although asynchronous is generally preferable).
var fs = require('fs');
var save = function(value, file){
return fs.writeFileSync(file, value);
};
var file = process.env[(process.platform == 'win32') ? 'USERPROFILE' : 'HOME'] + "\\AppData\\Roaming\\BetterDiscord\\plugins\\Test.json";
var obj = JSON.parse(fs.readFileSync(file, 'utf8'));
obj.img.push({name:"Test"});
console.log(JSON.stringify(obj.img));
save(JSON.stringify(obj), file);
obj = JSON.parse(fs.readFileSync(file, 'utf8'));
console.log(JSON.stringify(obj.img));
require will cache the file. Instead of requiring, use the fs.readFileSync method to read the file again.
json = fs.readFileSync(file);
Also, as #MikeC points out, you're writing to the file asynchronously, so either switch that out for the synchronous version writeFileSync, or rewrite your code using with readFile and writeFile and use callbacks.
I am trying to write a function that:
Takes an array of URLs
Gets files from URLs in parallel (order's irrelevant)
Processes each file
Returns an object with the processed files
Furthermore, I don't need for errors in #2 or #3 to affect the rest of the execution in my application in any way - the app could continue even if all the requests or processing failed.
I know how to fire all the requests in a loop, then once I have all the data, fire the callback to process the files, by using this insertCollection pattern.
However, this is not efficient, as I shouldn't need to wait for ALL files to download before attempting to process them - I would like to process them as each download finishes.
So far I have this code:
const request = require('request');
const urlArray = [urlA, urlB, urlC];
const results = {};
let count = 0;
let processedResult;
const makeRequests = function (urls, callback) {
for (let url of urls) {
request(url, function(error, response, body) {
if (error) {
callback(error);
return;
}
processedResult = callback(null, body)
if (processedResult) {
console.log(processedResult); // prints correctly!
return processedResult;
}
})
}
};
const processResult = function(error, file) {
if (error) {
console.log(error);
results.errors.push(error);
}
const processedFile = file + `<!-- Hello, Dolly! ${count}-->`;
results.processedFiles.push(processedFile);
if (++count === urlArray.length) {
return results;
}
};
const finalResult = makeRequests(urlArray, processResult);
console.log(finalResult); // undefined;
In the last call to processResult I manage to send a return, and makeRequests captures it, but I'm failing to "reign it in" in finalResult after that.
My questions are:
Why is this not working? I can print a well-formed processedResult
on the last iteration of makeRequests, but somehow I cannot return
it back to the caller (finalResult)
How can this be solved, ideally "by hand", without promises or the
help of libraries like async?
The makeRequests function returns undefined to finalResult because that is a synchronous function. Nothing stops the code executing, so it gets to the end of the function and, because there is no defined return statement, it returns undefined as default.
I have about a million JSON files saved across many sub-directories of the directory "D:/njs/nodetest1/imports/source1/" and I want to import them into the collection "users" in my mongoDB database.
The following code correctly traverses through the file system. As you can see, it reads each item in the directory and if that item is a directory it reads each item in it. For each item that is not a directory it performs a some operations on it before sending a variable holding an to a function.
function traverseFS (path){
var files = fs.readdirSync(path);
for (var i in files){
var currentFile = path + '/' + files[i];
var stats = fs.statSync(currentFile);
if (stats.isFile())
runOnFile(currentFile);
else
traverseFS(currentFile);
}
}
traverseFS("D:/njs/nodetest1/imports/source1/")
Next, I run a few operations on the code (see below). This reads the file, parses it into a JSON object, reads two attributes of that object into variables,creates an object in the variable "entry" and passes the variable to another function.
function runOnFile(currentFile){
var fileText = fs.readFileSync(currentFile,'utf8');
var generatedJSON = JSON.parse(fileText);
var recordID = generatedJSON.recordID;
var recordText = generatedJSON.recordTexts;
var entry = {recordID:recordID, recordText:recordText};
insertRecord(entry);
}
The final function then should be used to insert the data into mongoDB. I think that this is where thing go wrong.
function insertRecord(entry){
var MongoClient = mongodb.MongoClient;
var MongoURL = 'mongodb://localhost:27017/my_database_name';
MongoClient.connect(MongoURL, function (err, db) {
var collection = db.collection('users');
collection.insert([entry], function (err, result) {
db.close();
});
});
}
I expected this to run through the file structure, reading the JSON files into objects and then inserting those objects into my mongoDB. Instead it reads the first file into the database and then stops/hangs.
Notes:
I don't want to use mongoimport because I don't want to insert all the data from these files into my MongoDB database. I however am not tied to any aspect of this approach. If some other solution exists I am open to it.
This connects to the database just fine. For each item in the directory this successfully creates an "entry" object and passes it to the insertRecord function. In other words, the problem must be occuring in the insertRecord section. But it obviously could be caused by something earlier in the process.
If I add error handling, no errors are produced. I have left the error handling out of this post because it clutters the readability of the code snippets.
As per mongodb2.2 (current latest) documentation, insert is deprecated
DEPRECATED
Use insertOne, insertMany or bulkWrite
So the short answer is probably to change collection.insert([entry], ...) to collection.insertOne(entry, ...) and you're done.
Then for the long answer, you say "about a million of json files", which typically deserves a full async approach with the least amount of overhead.
There are two (potential) bottlenecks in the sample code:
fs.readFileSync, this is a blocking operation
the connecting, inserting a record and closing the database connection
Both are executed "about a million of times". Granted, an import is not usually done over and over again and (hopefully) not on a machine which needs its performance for other important tasks. Still, the sample code can easily be made more robust.
Consider using the glob module to obtain the list of json file.
glob('imports/**/*.json', function(error, files) {...})
This provides you with the full list of files easily in an async fashion.
Then consider connecting to the database just once, insert everything and close once.
Maintaining more or less the same steps you have in the sample, I'd suggest something like:
var glob = require('glob'),
mongodb = require('mongodb'),
fs = require('fs'),
MongoClient = mongodb.MongoClient,
mongoDSN = 'mongodb://localhost:27017/my_database_name',
collection; // moved this to the "global" scope so we can do it only once
function insertRecord(json, done) {
var recordID = json.recordID || null,
recordText = json.recordText || null;
// the question implies some kind of validation/sanitation/preparation..
if (recordID && recordText) {
// NOTE: insert was changed to insertOne
return collection.insertOne({recordID: recordID, recordText: recordText}, done);
}
done('No recordID and/or recordText');
}
function runOnFile(file, done) {
// moved to be async
fs.readFile(file, function(error, data) {
if (error) {
return done(error);
}
var json = JSON.parse(data);
if (!json) {
return done('Unable to parse JSON: ' + file);
}
insertRecord(json, done);
});
}
function processFiles(files, done) {
var next = files.length ? files.shift() : null;
if (next) {
return runOnFile(next, function(error) {
if (error) {
console.error(error);
// you may or may not want to stop here by throwing an Error
}
processFiles(files, done);
});
}
done();
}
MongoClient.connect(mongoDSN, function(error, db) {
if (error) {
throw new Error(error);
}
collection = db.collection('users');
glob('imports/**/*.json', function(error, files) {
if (error) {
throw new Error(error);
}
processFiles(files, function() {
console.log('all done');
db.close();
});
});
});
NOTE: You can collect multiple "entry"-records to leverage the performance gain of multiple inserts using insertMany, though I have the feeling the inserted records are more complicated than described and it might give some memory issues if not handled correctly.
Just structure your data into one big array of objects, then run db.collection.insertMany.
I suggest you doing this using Promises:
const Bluebird = require('bluebird');
const glob = Bluebird.promisify(require('glob'));
const mongodb = require('mongodb');
const fs = Bluebird.promisifyAll(require('fs'));
const Path = require('path');
const MongoClient = mongodb.MongoClient;
const insertMillionsFromPath = Bluebird.coroutine(function *(path, mongoConnString) {
const db = yield MongoClient.connect(mongoConnString);
try {
const collection = db.collection('users');
const files = yield glob(Path.join(path, "*.json"));
yield Bluebird.map(
files,
Bluebird.coroutine(function *(filename) {
console.log("reading", filename);
const fileContent = yield fs.readFileAsync(filename);
const obj = JSON.parse(fileContent);
console.log("inserting", filename);
yield collection.insertOne(obj);
}),
{concurrency: 10} // You can increase concurrency here
);
} finally {
yield db.close();
}
});
insertMillionsFromPath("./myFiles", "mongodb://localhost:27017/database")
.then(()=>console.log("OK"))
.catch((err)=>console.log("ERROR", err));
In order to work, you will need to install the following packages:
npm install --save mongodb bluebird glob
and you will need to use node.js version 6 or greater, otherwise you will need to transpile your javascript (due to function *() generators usage).
I have some code that looks like this:
async function promptHandler(source) {
source.subscribe(function(line) {
console.log(`line == ${line}`);
});
let matchingTests = await getMatchingTests('ROGL');
}
This prints out the contents of the source Observable, which is listening to a ReadStream of a txt file. When the function as it is above is called, I see the output of the file. However, if I call subscribe() after getMatchingTests() gets called, like this:
async function promptHandler(source) {
let matchingTests = await getMatchingTests('ROGL');
source.subscribe(function(line) {
console.log(`line == ${line}`);
});
}
I don't see the contents of the txt file. I know that the matchingTests variable contains the successful results of getMatchingTests, so I don't think it's preventing Node from executing that line.
I'm guessing that something about the getMatchingTests async function call is messing with the source Observable, but I'm not seeing how.
Here's my source code:
let fileStream = createReadStream(file)
.pipe(split());
let source = new Observable(o => {
fileStream.on('data', line => {console.log('data'); o.next(line);});
fileStream.on('error', err => o.error(err));
fileStream.on('end', () => {console.log('end'); o.complete();});
});
My intuition here is that the source observable is a hot source, and that by the time await has returned with the matching tests, your text file is already read. So when you subscribe at that point, there is no line to read, they were read before you subscribed to the source.
UPDATE :
Given your code, if the ordering is a problem for your use case, you can consider moving the filestream creation into the observable factory, i.e.
let source = new Observable(o => {
let fileStream = createReadStream(file)
.pipe(split());
fileStream.on('data', line => {console.log('data'); o.next(line);});
fileStream.on('error', err => o.error(err));
fileStream.on('end', () => {console.log('end'); o.complete();});
});
That way, the stream will be created and started only when you subscribe.
I want to use API blueprint and make it automatically with grunt. I want to read apiary (tool for API blueprint) file, parsing it (with Protagonist which is API Blueprint Parser for Node.js), stringify it to the JSON format and write into another file. It is a simple task but I don't know how to do it, I always get undefined result. Here is what I have so far:
grunt.registerTask('apiary2js', 'Generate js version of apiary file.', function () {
var parser = require('protagonist');
var content = grunt.file.read('apiary.apib');
var blueprint = parser.parse(content, function (error, result) {
if (error) {
console.log(error);
return;
}
return result.ast; <-- (how to return this value?)
});
var json = JSON.stringify(blueprint);
grunt.file.write('test/frontend/apiary.js', "var apiary = " + json);
});
And result in apiary.js is this:
var apiary = undefined
The problem you're running into is that the parser.parse() method accepts a callback which executes asynchronously. You can't return a value from a callback as you would in a synchronous method because you don't know when it will be executed. The solution is to place the 'return' logic in the callback.
grunt.registerTask('apiary2js', 'Generate js version of apiary file.', function () {
var parser = require('protagonist');
var content = grunt.file.read('apiary.apib');
// Parse the contents of the file & execute the callback when done parsing.
parser.parse(content, function (error, result) {
if (error) {
console.log(error);
return;
}
// Now we can use the result as desired.
var json = JSON.stringify(result.ast);
grunt.file.write('test/frontend/apiary.js', "var apiary = " + json);
});
});