js share array between thread - javascript

I try to make threads is js, 1 thread will read a file and push words into an array, 2nd thread will read the array and console.log the word.
I didn't find any solutions except using worker_threads.
This is my main file:
var data = [];
var i = 0;
var finish = false;
function runWorker() {
const worker = new Worker('./worker.js', { workerData: { i, data } });
const worker2 = new Worker('./worker2.js', { workerData: { i, data, finish } });
worker.on('message', function(x) {
data.push[x.data]
if(x.finish != undefined){
finish = true
}
});
worker.on('error', function(error) {
console.log(error);
});
worker2.on('message', function(x) {
console.log(x)
});
worker2.on('error', function(error) {
console.log(error);
});
worker.on('exit', code => {
if (code !== 0) console.log(new Error(`Worker stopped with exit code ${code}`));
});
}
runWorker();
this is my worker.js file :
const { workerData, parentPort } = require('worker_threads');
var fs = require('fs');
const chalk = require('chalk');
fs.readFile('./input.txt', async function read(err, data) {
if (err) {
console.log(' |-> ' + chalk.bold.red("impossible a lire"))
}
else{
console.log(' |-> ' + chalk.blue("début lecture du fichier"))
data = data.toString().split(" ");
for(var i = 0; i<data.length; i++){
parentPort.postMessage({data:data[i]})
}
parentPort.postMessage({finish:true})
}
})
Here I read a file, and send each word to the main programme to put it into an array
this is my worker2.js file :
const { workerData, parentPort } = require('worker_threads');
let finish = false
while(!finish){
let data = workerData.data;
let i = workerData.i;
finish = workerData.finish;
console.log(data[i]);
}
Here I have an infinite loop, and if something is on the array, print it.

Related

How to read and Write multiple files using node js?

In a array I have filenames; I want to first read one file and perform some operation then store result in a separate file. Then read 2nd file, perform operation again and save result in new 2nd file. Do the same procedure for all files. Below I have written code to read and write files.
TextReader.js
var fs = require('fs');
const readline= require('readline');
var headerIndex = [];
var isFirstLine = true;
var finalList = [];
module.exports={
readTextFile: (filename)=>{
console.log('inside textreader')
readline.createInterface({
input: fs.createReadStream(`./s3/${filename}`)
}).on('line', function(line) {
console.log(line);
console.log("-----------------------------");
if (isFirstLine) {
headerIndex = line.split('|');
}
else if (!isFirstLine){
let rowValues = line.split('|');
let valueIndex = 0;
var singlePerson = {};
headerIndex.forEach(currentval => {
singlePerson[currentval] = rowValues[valueIndex];
valueIndex++;
});
finalList.push(singlePerson);
}
isFirstLine = false;
}).on('close',function(){
//console.log(finalList);
var data='';
var header= "Employee ID"+'\t'+headerIndex[0]+'\t'+headerIndex[2]+'\t'+headerIndex[1]+'\t'+headerIndex[4]
+'\t'+headerIndex[3]+'\t'+headerIndex[5]+'\n';
for (var i = 0; i < finalList.length; i++) {
function split(name){
var conv=name.split(' ');
var result=[conv.slice(0, -1).join(' '),conv.slice(-1)[0]].join(conv.length < 2 ? '' : ',');
return result;
}
split(finalList[i].UserName);
data=data+finalList[i].LoginID+'\t'+split(finalList[i].UserName)+'\t'+finalList[i].Email+'\t'
+finalList[i].LoginID+'\t'+'A&G Professional'+'\t'+finalList[i].Title+'\t'+finalList[i].State+'\n';
}
var newFilename= filename.substr(0, filename.lastIndexOf("."))
var alldata= header + data;
//console.log(alldata)
fs.appendFile(`./s3/${filename}.xlsx`,alldata, (err) => {
if (err) throw err;
console.log('File created');
});
});
}
}
I am calling readTextFile(); from another file.
demo.js
const { readTextFile } = require("./textReader");
var array=['UserRoleDetails_12102021063206.txt',
'UserRoleDetails_12102021064706 (1).txt',
'UserRoleDetails_12102021064706.txt',
'UserRoleDetails_12102021070206.txt']
array.forEach(function(currentItem){
readTextFile(currentItem);
})
The problem i am facing is that all files are processed at the same time and all the datas of all files are stored together.
first, this node js is not work in sequential as you mention here
and second, array.forEach is not useful here to do the sequential operation
you need to use
const { readTextFile } = require("./textReader");
var array=['UserRoleDetails_12102021063206.txt',
'UserRoleDetails_12102021064706 (1).txt',
'UserRoleDetails_12102021064706.txt',
'UserRoleDetails_12102021070206.txt']
for (const element of array) {
readTextFile(currentItem);
}
NOTE:- readTextFile(currentItem) your this function is not async so maybe you need to make it async
if you are not clear then raise your hand

Why is my code not waiting for the completion of the function?

I am trying to read some data from a file and store it in a database.
This is part of a larger transaction and I need the returned ids for further steps.
async parseHeaders(mysqlCon, ghID, csv) {
var self = this;
var hIDs = [];
var skip = true;
var idx = 0;
console.log("Parsing headers");
return new Promise(async function(resolve, reject) {
try {
var lineReader = require('readline').createInterface({
input: require('fs').createReadStream(csv)
});
await lineReader.on('close', async function () {
console.log("done: ", JSON.stringify(hIDs));
resolve(hIDs);
});
await lineReader.on('line', async function (line) {
line = line.replace(/\"/g, '');
if (line.startsWith("Variable")) { //Variable,Statistics,Category,Control
console.log("found variables");
skip = false; //Ignore all data and skip to the parameter description.
return; //Skip also the header line.
}
if (!skip) {
var data = line.split(",");
if (data.length < 2) { //Variable section done return results.
console.log("Found sub?",line);
return lineReader.close();
}
var v = data[0];
var bidx = data[0].indexOf(" [");
if (bidx > 0)
v = data[0].substring(0, bidx); //[] are disturbing mysql (E.g.; Air temperature [�C])
var c = data[2];
hIDs[idx++] = await self.getParamID(mysqlCon, ghID, v, c, data);//, function(hID,sidx) { //add data in case the parameter is not in DB, yet.
}
});
} catch(e) {
console.log(JSON.stringify(e));
reject("some error occured: " + e);
}
});
}
async getParamID(mysqlCon,ghID,variable,category,data) {
return new Promise(function(resolve, reject) {
var sql = "SELECT ID FROM Parameter WHERE GreenHouseID="+ghID+" AND Variable = '" + variable + "' AND Category='" + category + "'";
mysqlCon.query(sql, function (err, result, fields) {
if(result.length === 0 || err) { //apparently not in DB, yet ... add it (Acronym and Machine need to be set manually).
sql = "INSERT INTO Parameter (GreenHouseID,Variable,Category,Control) VALUES ("+ghID+",'"+variable+"','"+category+"','"+data[3]+"')";
mysqlCon.query(sql, function (err, result) {
if(err) {
console.log(result,err,this.sql);
reject(err);
} else {
console.log("Inserting ",variable," into DB: ",JSON.stringify(result));
resolve(result.insertId); //added, return generated ID.
}
});
} else {
resolve(result[0].ID); //found in DB .. return ID.
}
});
});
}
The functions above are in the base class and called by the following code:
let headerIDs = await self.parseHeaders(mysqlCon, ghID, filePath);
console.log("headers:",JSON.stringify(headerIDs));
The sequence of events is that everything in parseHeaders completes except for the call to self.getParamID and control returns to the calling function which prints an empty array for headerIDs.
The console.log statements in self.getParamID are then printed afterward.
What am I missing?
Thank you
As you want to execute an asynchronous action for every line we could define a handler to do right that:
const once = (target, evt) => new Promise(res => target.on(evt, res));
function mapLines(reader, action) {
const results = [];
let index = 0;
reader.on("line", line => results.push(action(line, index++)));
return once(reader, "close").then(() => Promise.all(results));
}
So now you can solve that easily:
let skip = false;
const hIDs = [];
await mapLines(lineReader, async function (line, idx) {
line = line.replace(/\"/g, '');
if (line.startsWith("Variable")) { //Variable,Statistics,Category,Control
console.log("found variables");
skip = false; //Ignore all data and skip to the parameter description.
return; //Skip also the header line.
}
if (!skip) {
var data = line.split(",");
if (data.length < 2) { //Variable section done return results.
console.log("Found sub?",line);
return lineReader.close();
}
var v = data[0];
var bidx = data[0].indexOf(" [");
if (bidx > 0)
v = data[0].substring(0, bidx); //[] are disturbing mysql (E.g.; Air temperature [�C])
var c = data[2];
hIDs[idx] = await self.getParamID(mysqlCon, ghID, v, c, data);
}
});

Write http stream in a file

I'm having problems trying to write a chunk buffer into a file. My problem is basically that the file only contains the last bytes of the output buffer. The chunk buffer is very large, and my file.out is truncated, the last items are correct.
Here is my code
var reqChart = http.request(chart, function(res1) {
res1.setEncoding( 'utf8' );
res1.on('data', function (chunk) {
var fs = require('fs');
//var b = new Buffer(chunk.length);
var c = "";
for (var i = 0;i < chunk.length;i++) {
// b[i] = chunk[i];
c = c + chunk[i]
}
console.log(c);
fs.writeFile("rich.txt", c, "utf-8",function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
});
And if you are interested here is the BEGINNING the input I get in my file vs the BEGINNING of the "console.log" of my variable... I already tried to write the "console.log" to a file but same results.
File
0,0.120179572670496],[1498028100000,0.105581318680705],[1498028400000,0.167319933562371],[1498028700000,0.147574197583768],[1498029000000,0.114172853959319],[1498029300000,0.241186960587686],[1498029600000,1.45701290672775],[1498029900000,0.154756395075166],[1498030200000,0.0836631006369253],[1498030500000,0.0800486694467188],[1498030800000,0.0861569133026863],[1498031100000,0.092360872083502],[1498031400000,0.0790707908354345],[1498031700000,0.129877438815933],[1498032000000,0.118812121796025],[1498032300000,0.0989746376933164],[1498032600000,2.30001837257628],[1498032900000,0.313639093279438],[1498033200000,0.227936449562983],
Buffer/Variable/log
{"requestData":{"options":{"width":950,"gui_component_mode":1,"exporttocsv":1,"only_stacks_in_legend":false,"reverse_axis":false,"height":446,"datasetTitle":"","legend_average":false,"legend_maximum":false,"legend_minimum":false},"model_group":101,"name":"autil_17","model":"nmis_graph","dataset_id":0,"owner_cid":"c3","data_source_type":"chart","model_view":"graph","parameters":{"graph_type":"interface","resource_index":"17","nmis_data_type":null,"value_column":"value","node":"RT01459","axis":0,"end_date_raw":1498095300,"substitutions":{"time.start":1497922702,"time.end":1498095502},"time_column":"time","translation":"","field":"","lineType":"line","period":"2d","index_graph_type":"autil","resource":"interface","start_date_raw":1497922500,"resolution":300,"class":null},"data_source":"local_nmis","translation":null},"replyData":{"options":{"subtitleText":"ENTERPRISE_RT01","titleText":"Interface gigabitethernet0-2-3913","legend_raw":" Avg In Max In Avg 12.76 % Max 98.99 % \\n Avg Out Max Out Avg 4.98 % Max 52.49 % \\n","yAxis0TitleText":"% Avg Util"},"stacking":"normal","meta_data":{"time_start":1497922500,"start_date_input":"2017-06-19 20:35:00","end_date_input":"2017-06-21 20:35:00"},"data":[{"yAxis":0,"reverse_axis":0,"valueDecimals":2,"value_min":-98.9864025844157,"color":"#00BFFF","suffix":"","dataset_multiplier":1,"sum":-7373.17229868631,"connectNulls":0,"stack":1,"value_max":-0.0591203443255937,"name":"Avg In","data":[[1497922500000,-0.7137202476565],[1497922800000,-1.43305756579003],[1497923100000,-0.150464409649807],[1497923400000,-0.150475661479925],[1497923700000,-0.100369773564214],[1497924000000,-0.0893947123021048]
I thought maybe in a timeout function or something, but the log is WRITING THE INFO THE TERMINAL, maybe I'm missing something.
You realize that you reset c at every chunk as its locally scoped?:
var c="";//not resetted
res1.on('data', function (chunk) {
c+=chunk;
}
And you need to await the stream to finish:
res1.on("end",function(){
console.log(c);
var fs = require('fs');
fs.writeFile("rich.txt", c, "utf-8",function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
The problem you're having is that you're writing the file while you are at stream.on('data',.... You need to add all that data to a file, and use stream.on('end',... event to write to a file. I hope this example helps
var fs = require('fs');
// Some example of getting original data
var readableStream = fs.createReadStream('file.txt');
var data = '';
readableStream.on('data', function(chunk) {
data+=chunk;
});
readableStream.on('end', function() {
console.log(data);
fs.writeFile("rich.txt", data, "utf-8",function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
Now copying your code and modifying it:
var reqChart = http.request(chart, (res1) => {
res1.setEncoding('utf8');
var fs = require('fs');
var c = "";
res1.on('data', function (chunk) {
//var b = new Buffer(chunk.length);
for (var i = 0; i < chunk.length; i++) {
// b[i] = chunk[i];
c = c + chunk[i]
}
console.log(c);
});
res1.on('end', function () {
fs.writeFile("rich.txt", c, "utf-8", function (err) {
if (err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
})
});
The pipe function for streams would make your life a lot easier.
const fs = require('fs');
const http = require('http');
const output = fs.createWriteStream('output.html'); // change to rich.txt
const url = 'http://example.com'; // change to chart URL
http.get(url, response => {
response.pipe(output);
});

Reading a file line by line, parse them and insert them in mongo in node js

I have a file which is tab separated. It has thousands of data. How can I use nodeJs to read the file, line by line, parse them and create an object and insert them in a mongo DB.
I am just learning node and mongo. I come from different background. So how can this be done.
Finally the Mongo DB has to be populated with proper data.
I searched in net but I could not find the complete solution.
Thanks.
I had an issue with the answer by Juvenik. My problem was that the database would not be populated by the time readline had completed. The lines were being read synchronously, but the DB insertion was asynchronous.
Instead, I found a simpler solution with the line-reader package. It reads the lines and waits for a callback before continuing.
var MongoClient = require('mongodb').MongoClient
var dbName = 'yourDbName'
var url = 'mongodb://localhost:27017/' + dbName
var collectionName = 'yourCollectionName'
var filename = 'yourFileName.txt'
var printLine = 1000
MongoClient.connect(url, function(err, db) {
if (err) {
console.error('Problem connecting to database')
} else {
console.log('Connected correctly to server.')
var lineReader = require('line-reader')
var collection = db.collection(collectionName)
var lineNum = -1
var headers = []
lineReader.eachLine(filename, function(line, last, cb) {
lineNum++
try {
var split = line.split('\t')
var object = {}
if (lineNum > 0) {
for (var i = 0; i < split.length; i += 1) {
object[headers[i]] = split[i]
}
collection.insert(object, function (insertErr, insertObj) {
if (insertErr) console.error(insertErr)
if (lineNum % printLine === 0) console.log('Line ' + lineNum)
if (last) {
console.log('Done with ' + filename + ' (' + lineNum + ' records)')
process.exit(0)
} else {
cb()
}
})
} else {
headers = line.split('\t')
cb()
}
} catch (lineError) {
console.error(lineError)
}
})
}
})
I came across similar problem. This approach worked for me.
Have a look, it might be helpful.
var mongoDb = require('mongodb');
var mongoClient = mongoDb.MongoClient;
var dbname = 'YOUR_DB_NAME';
var collectionName = 'YOUR_COLLECTION_NAME';
var url = 'mongodb://localhost:27017/'+dbname;
var filename = 'FIle_Name.txt';
console.log('***************Process started');
mongoClient.connect(url,function(err,db){
if(err){
console.log('error on connection '+err);
}
else{
console.log('***************Successfully connected to mongodb');
var collection = db.collection(collectionName);
var fs = require('fs');
var readline = require('readline');
var stream = require('stream');
var instream = fs.createReadStream(filename);
var outstream = new stream;
var rl = readline.createInterface(instream,outstream);
console.log('***************Parsing, please wait ...');
rl.on('line',function(line){
try{
var arr = line.split('\t');
var object = {};
//Parse them here
//Example
object['name'] = arr[0]; //Just an example
var res = collection.insert(object);
}
catch (err){
console.log(err);
}
});
rl.on('close',function(){
db.close();
console.log('***************completed');
});
}
});
I am a learner too. If someone can make it better, it will be good.
Here is a more performant (inserting batches of objects) and updated version (using async and latest mongo driver) of frank-0's answer
const lineReader = require('line-reader');
async function readFileAndInsertInMongo(file) {
let total = 0;
return new Promise((resolve, reject) => {
let buffer = [];
lineReader.eachLine(file, (line, last, cb) => {
// prepare your object based on the line content
let insertObject = {'some_content': 'some_value'};
if (total % 10000 === 0 || last) {
collection.insertMany(buffer, function(err, res){
if (last) {
if (err) {
reject(err);
} else {
resolve(res);
}
} else {
buffer = [];
return cb();
}
});
} else {
buffer.push(insertObject);
return cb();
}
});
});
}
This really is the best solution I have found to parse huge files and insert them in the database without exploding Node's memory. Hope this can help ;)

Javascript for loop wait for callback

I have this function:
function tryStartLocalTrendsFetch(woeid) {
var userIds = Object.keys(twitClientsMap);
var isStarted = false;
for (var i = 0; i < userIds.length; i++) {
var userId = userIds[i];
var twitClientData = twitClientsMap[userId];
var isWoeidMatch = (woeid === twitClientData.woeid);
if (isWoeidMatch) {
startLocalTrendsFetch(woeid, twitClientData, function (err, data) {
if (err) {
// Couldn't start local trends fetch for userId: and woeid:
isStarted = false;
} else {
isStarted = true;
}
});
// This will not obviously work because startLocalTrendsFetch method is async and will execute immediately
if (isStarted) {
break;
}
}
}
console.log("No users are fetching woeid: " + woeid);
}
The gist of this method is that I want the line if (isStarted) { break; } to work. The reason is that if it's started it should not continue the loop and try to start another one.
I'm doing this in NodeJS.
try to use a recursive definition instead
function tryStartLocalTrendsFetch(woeid) {
var userIds = Object.keys(twitClientsMap);
recursiveDefinition (userIds, woeid);
}
function recursiveDefinition (userIds, woeid, userIndex)
var userId = userIds[userIndex = userIndex || 0];
var twitClientData = twitClientsMap[userId];
var isWoeidMatch = (woeid === twitClientData.woeid);
if (isWoeidMatch && userIndex<userIds.length) {
startLocalTrendsFetch(woeid, twitClientData, function (err, data) {
if (err) {
recursiveDefinition(userIds, woeid, userIndex + 1)
} else {
console.log("No users are fetching woeid: " + woeid);
}
});
} else {
console.log("No users are fetching woeid: " + woeid);
}
}
You may also use async (npm install async):
var async = require('async');
async.forEach(row, function(col, callback){
// Do your magic here
callback(); // indicates the end of loop - exit out of loop
}, function(err){
if(err) throw err;
});
More material to help you out: Node.js - Using the async lib - async.foreach with object

Categories

Resources