node.js Wait for task to be finished - javascript

So I am writing this node.js program to import XML files into arrays of JSON objects. I got 2 files to import, teachers.xml and students.xml.
Teachers and student contains several thousands of info about teachers and students. I got that part pretty well covered with my code.
This is my javascript file for parsing the files :
var fs = require('fs');
var xmldom = require('xmldom');
var async = require('async');
// Parse `catalog` xml file and call `callback(catalog domxml root)`
function parse_catalog(catalog, callback) {
// Read xml file content
fs.readFile(catalog, function (err, data) {
if (err) {
throw 'Error reading XML catalog';
} else {
// Parse xml content and return dom root
var domRoot = new xmldom.DOMParser().parseFromString(data.toString());
// Call callback passing dom root
callback(domRoot)
}
});
}
I got two methods like this to convert the xml to json and its working perfectly (one for teachers and one for students)
// Convert teacher XML into json format in a array
function convert_teachers(domCatalog) {
var teachers = domCatalog.getElementsByTagName('teacher');
var teachers_arr= [];
for (var i = 0; i < teachers .length; i++) {
var teacher= teachers[i];
...
//Reading the xml
teachers_arr.push({
...
//Create the json
});
}
console.log("Teachers are now in JSON format ");
}
So in the end all I have to do is this :
parse_catalog('teachers.xml', convert_teachers);
When I do this :
parse_catalog('teachers.xml', convert_teachers);
parse_catalog('students.xml', convert_students);
One or the other will finish first depending on the number of elements to import, witch is normal I think.
What I want is to wait for both to be imported and then do some javascript manipulations and this is where I am stuck.
I tried to do this with async, but it doesnt not wait until the two files are finished to import.
async.parallel([
function(callback) {
parse_catalog('teachers.xml', convert_teachers);
callback();
},
function(callback) {
parse_catalog('students.xml', convert_students);
callback();
}
], function(err) {
if (err) return next(err);
console.log("Finished");
//Enventually some Javascript manipulations on the two arrays
});
Actually it outputs :
Finished
Teachers are now in JSON format
Students are now in JSON format
or depending of the file size
Finished
Students are now in JSON format
Teachers are now in JSON format
What I would like is more like :
Teachers are now in JSON format (or students)
Students are now in JSON format (or teachers)
Finished
I plan to load 2 more files, and the order they will be loaded doesn't matter to me.
Any leads ? Thanks!

You're executing callback() in your async.parallel() functions too soon because by that time fs.readFile() hasn't even started yet. Try something like this instead:
function parse_catalog(catalog, callback) {
// Read xml file content
fs.readFile(catalog, function(err, data) {
if (err)
return callback(err);
// Parse xml content and return dom root
var domRoot = new xmldom.DOMParser().parseFromString(data.toString());
// Call callback passing dom root
callback(null, domRoot);
});
}
// Convert teacher XML into json format in a array
function convert_teachers(domCatalog) {
var teachers = domCatalog.getElementsByTagName('teacher');
var teachers_arr = [];
for (var i = 0; i < teachers .length; i++) {
var teacher = teachers[i];
...
//Reading the xml
teachers_arr.push({
...
//Create the json
});
}
console.log('Teachers are now in JSON format');
return teachers_arr;
}
// and similarly for `convert_students`
async.parallel({
teachers: function(callback) {
parse_catalog('teachers.xml', function(err, domCatalog) {
if (err)
return callback(err);
var teachers = convert_teachers(domCatalog);
callback(null, teachers);
});
},
students: function(callback) {
parse_catalog('students.xml', function(err, domCatalog) {
if (err)
return callback(err);
var students = convert_students(domCatalog);
callback(null, students);
});
}
}, function(err, results) {
if (err) return next(err);
console.log('Finished');
// here you have `results.teachers` and `results.students`
console.dir(results);
});

Related

node.js insert json array object into mysql table

i have a csv that I want to save into my mySQL Table. My parser works well and it also save the created json Array to my table. My problem is that he insert row for row in the background and don't response it.
My code looks like that:
var file= './mytable.csv';
connection.connect(function (err) {});
var csv = require('csvtojson');
csv({ delimiter:","})
.fromFile(file)
.on('end_parsed', function(jsonArray){
for(var i = 0; i < jsonArray.length; i++){
var post = jsonArray[i]
conn.query('INSERT INTO mytable SET ?', post, function(err, results) {
if (err) throw err;
console.log(result.insertId);
});
}
res.end("done");
})
.on('done', function(error){
console.log('end')
})
My Goal is that my api send: its "done" with (res.json("Done")) when the complete query is done. What should I change?
Greetings
edit: my csv is realy large, with almost 500k rows!
EDIT:
I inserted async into my parser like that:
csv({ delimiter:";"})
.fromFile(file)
.on('end_parsed', function(jsonArray) {
async.forEach(jsonArray, function (jsonArrays, callback) {
conn.query('INSERT INTO mytable SET ?', jsonArrays, callback);
}, function (err) {
if (err) return next(err);
res.json("done");
console.log("done")
});
});
But he don't responde with "done" (in Terminal he write it, but postman give me only "Could not get any response")
Your call to res.end()/ res.json() doesn't wait for all inserts to be finished.
And if you start your inserts within a for-loop you start them all more or less in parallel. You should take look at something like the async library (http://caolan.github.io/async). There you find a eachLimit() function that lets you run async operations on a collection/array. With this function you can limit how many operations can run in parallel. And you get a callback that is called when an error happens or all async calls have finished. Within this callback you can call the res.json(...) function to send your response.
Sample:
var async = require('async');
//...
function save_row_to_db (post, callback) {
conn.query('INSERT INTO mytable SET ?', post, callback);
}
function finished(err) {
if (err) throw err;
res.end("done");
}
async.eachLimit(csvRows, 20, save_row_to_db, finished);

Scraping Cheerio - Getting only element[k]

Problem : With this code I get a list of every iteration of a given element (h1 here)
I would like to get only the number k (k can be a variable assumption) of this element.
Ultimately, I would like to get say, the element1[i], element2[j], where element1 can be a class, id, same for element2
And then concatenate element1,2,...n in a JSON file
I think this is very basic question, but I am quite new to jQuery...
'use strict';
var url = "http://www.website.html",
rech = "h1";
// Import the dependencies
var cheerio = require("cheerio"),
req = require("tinyreq");
// Define the scrape function
function scrape(url, data, cb) {
// 1. Create the request
req(url, (err, body) => {
if (err) { return cb(err); }
// 2. Parse the HTML
let $ = cheerio.load(body)
, pageData = {}
;
// 3. Extract the data
Object.keys(data).forEach(k => {
pageData[k] = $(data[k]).text();
});
// Send the data in the callback
cb(null, pageData);
});
}
// Extract some data from my website
scrape(url, {
rech
}, (err, data) => {
console.log(err || data);
});

Make multiple request in Mongoose

I'm trying to reach different select's from another select with MongoDB database with mongoose to redirect to Emberjs front-end.
If the text above it's not clear, look at the schema of the database:
// I already get the exam content given an id
Exam:{
...
collections:[{
question: Object(Id)
}]
...
}
and in the question schema it's:
// I want to get content of this giving an id
question:{
...
questions: String, // I want to get this given an Id exam
value: Number // and this
...
}
I tried to get it getting the objects id of the collections and then make a for to extract each question, and save the returned values into a json variable like this:
Test.findById(id, 'collections', function(err, collections){
if(err)
{
res.send(err);
}
var result ={}; //json variable for the response
// the for it's with the number of objectId's that had been found
for(var i = 0; i < collections.collections.length; i++)
{
// Send the request's to the database
QuestionGroup.findById(collections.collections[i].id, 'questions').exec(function(err, questiongroup)
{
if(err)
{
res.send(err);
}
// save the results in the json
result.questiongroup = questiongroup;
// prints questions
console.log(result);
});
// it return's {}
console.log(result);
}
// also return {}
console.log(result);
res.json({result: result});
});
It's there a way to save the requests into a variable and return it like a json to the front end?
since the query within the loop executes in async manner you'll have send response once everything finished execution.
For eg.
Test.findById(id, 'collections', function(err, collections) {
if (err) {
res.send(err);
}
var result = []; //json variable for the response
function done(result) {
res.json({
result
});
}
for (var i = 0, l = collections.collections.length; i < l; i++) {
// i need needs to be in private scope
(function(i) {
QuestionGroup.findById(collections.collections[i].id, 'questions').exec(function(err, questiongroup) {
if (err) {
res.send(err);
}
// save the results in the json
result.push(questiongroup);
if (l === i + 1) {
done(result);
}
});
})(i);
}
});
NOTE: untested, and you might have to handle errors appropriately

How do I get a value nested inside two assign functions and a forEach loop?

I'm writing a NodeJS module that copies a bunch of folders over from Dropbox, and creates a directory based on the folder structure. The part that is giving me a headache is that I need to get the names of all the folders in the main directory, then the names of all the files within a folder before moving on to the next function.
Here is my process right now:
Get the list of folders in main directory using dropboxClient.readdir()
Iterate through the folders and get the names sub-folders (again with dropboxClient.readdir())
Iterate through these sub-folders and get the names of the files.
If the file is a markdown file, add the name to a list
Return the list of all markdown files in the sub-directories
and some pseudocode:
function getListOfFiles() {
var subfolders = [];
var fileNames = [];
dbClient.readdir('', function(error, folders) {
folders.forEach(function(folder, index) {
subfolders.push(folder);
dbClient.readdir('/'+folder, function(error, subfolders) {
subfolders.forEach(function(subfolder, index) {
dbClient.readdir('/'+folder+'/'+subfolder, function(error, files) {
files.forEach(function(file, index) {
if (isMarkdownFile) {
fileNames.push(file)
}
});
});
});
});
}
});
return fileNames;
}
I've looked into a handful of packages that seem like they are supposed to solve this scenario, as well as JS Generators, but I'm not sure what the simplest solution should be. My code runs fine on Node 0.11.3, so generators are an options, but that's a new concept for me, and I can't seem to find examples that match up to mine.
utilize the async package. Specifically, the each, eachSeries, or eachLimit for the loops, as well as waterfall and series for control flow.
I'd recommend reading up on... each... of the each functions to figure out which is efficient and consistent/reliable for your situation.
function getListOfFiles(callback) {
async.waterfall([
// get a list of the top level folders
function (cb) {
dbClient.readdir('', function (error, topLevelFolders) {
if (error) return cb(error);
cb(null, topLevelFolders); // pass the folders to the next function (this is the "waterfall")
});
},
// get an array of all topLevel/subFolders combos
function (topLevelFolders, cb) {
var everySubFolder = [];
async.each(topLevelFolders, function (folder, subFolderCallback) {
dbClient.readdir(folder, function (error, subFolders) {
if (error) return subFolderCallback(error);
everySubFolder = everySubFolder.concat(subFolders);
});
}, function (error) {
if (error) return cb(error);
cb(null, everySubFolder); // pass all the folder/subfolder combos to the next function
});
},
// get an array of all the files in each folder/subfolder
function (everySubfolder, cb) {
var fileNames = [];
async.each(everySubFolder, function (folder, fileNameCallback) {
dbClient.readdir(folder, function (error, files) {
if (error) return fileNameCallback(error);
fileNames = fileNames.concat(files);
fileNameCallback();
});
}, function (error) {
if (error) return cb(error);
cb(null, fileNames); // pass every file combo to the waterfall callback function
});
}
], function (error, fileNames) {
if (error) return callback(error);
callback(null, fileNames); // all done! Every file combo goes the function's callback!
});
}
When you use it, you'll do:
getListOfFiles(function (err, files) {
// Voila! here are all your files
});
DEFINITELY add the .each error handling. If it bumps into an error during the loops, it will continue looping without it. Which, # of files dependent, could be a little while.

mongodb .save() from CSV stream doesn't get called in forEach loop

I'm trying to import a CSV file into mongodb. Before importing some alterations have to be done in JS to the data.
In the following script the save function however doesn't seem to be called.
console.log(newCampaign) shows the correct object.
console.log('saving') is not displayed.
var input = fs.createReadStream('DATA.csv');
var csv = "";
input.on('data', function(chunk) {
csv += chunk;
})
input.on('end', function() {
csvParse(csv, function(err, output){
if (err) console.log(err);
output.forEach(function(csvCampaign){
// CONVERT csvCampaign TO newCampaign
console.log(newCampaign)
var campaignObj = new Campaign(newCampaign);
campaignObj.save(function(err) {
console.log('saving');
if (err) {
console.log(err);
}
console.log('document saved');
});
})
})
});
So I found the answer. The forEach loop already returns before the callback function can be executed. As a solution I used the async.eachSeries function https://github.com/caolan/async#eachSeries

Categories

Resources