Getting around Node's Asynchronous nature - javascript

I am writing a content scraper that scrapes information about shirts on a particular webiste. I have everything set up with NPM packages in Node to scrape and create a CSV file. The problem I am running into is that as many know, Node is asynchronous in nature. The CSV file I am trying to write is writing before the JSON object I create is finished being created (iterating with an each loop to build it), thus it passes in my 'fields' parameter for json2csv (npm package). But it passes in my data as an empty object. Can anyone tell me how to tell node to wait until my json object is built before trying to use fs.writefile to create the CSV file? Thank you
'use strict';
//require NPM packages
var request = require('request');
var cheerio = require('cheerio');
var fs = require('fs');
var json2csv = require('json2csv');
//Array for shirts JSON object for json2csv to write.
var ShirtProps = [];
var homeURL = "http://www.shirts4mike.com/";
//start the scraper
scraper();
//Initial scrape of the shirts link from the home page
function scraper () {
//use the datafolderexists function to check if data is a directory
if (!DataFolderExists('data')) {
fs.mkdir('data');
}
//initial request of the home url + the shirts.php link
request(homeURL + "shirts.php", function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
//scrape each of the links for its html data
$('ul.products li').each(function(i, element){
var ShirtURL = $(this).find('a').attr('href');
console.log(ShirtURL);
//pass in each shirtURL data to be scraped to add it to an object
ShirtHTMLScraper(ShirtURL);
});
FileWrite();
// end first request
} else {
console.error(error);
}
});
}
//create function to write the CSV file.
function FileWrite() {
var fields = ['Title', 'Price', 'ImageURL', 'URL', 'Time'];
var csv = json2csv({data: ShirtProps, fields: fields});
console.log(csv);
var d = new Date();
var month = d.getMonth()+1;
var day = d.getDate();
var output = d.getFullYear() + '-' +
((''+month).length<2 ? '0' : '') + month + '-' +
((''+day).length<2 ? '0' : '') + day;
fs.writeFile('./data/' + output + '.csv', csv, function (error) {
if (error) throw error;
});
}
//function to scrape each of the shirt links and create a shirtdata object for each.
function ShirtHTMLScraper(ShirtURL) {
request(homeURL + ShirtURL, function (error, response, html) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(html);
var time = new Date().toJSON().substring(0,19).replace('T',' ');
//json array for json2csv
var ShirtData = {
title: $('title').html(),
price: $(".price").html(),
imgURL: $('img').attr('src'),
url: homeURL + ShirtURL,
time: time.toString()
};
//push the shirt data scraped into the shirtprops array
ShirtProps.push(ShirtData);
console.log(ShirtProps);
// //set the feilds in order for the CSV file
// var fields = ['Title', 'Price', 'ImageURL', 'URL', 'Time'];
// //use json2csv to write the file -
// var csv = json2csv({data: ShirtProps, fields: fields});
// console.log(csv);
// //date for the filesystem to save the scrape with today's date.
// var d = new Date();
// var month = d.getMonth()+1;
// var day = d.getDate();
// var output = d.getFullYear() + '-' +
// ((''+month).length<2 ? '0' : '') + month + '-' +
// ((''+day).length<2 ? '0' : '') + day;
// //use filesystem to write the file, or overrite if it exists.
// fs.writeFile('./data/' + output + '.csv', csv, function (error) {
// if (error) throw error;
// }); //end writeFile
} else {
console.error(error);
}
});
}
//Check if data folder exists, source: http://stackoverflow.com/questions/4482686/check-synchronously-if-file-directory-exists-in-node-js
function DataFolderExists(folder) {
try {
// Query the entry
var DataFolder = fs.lstatSync(folder);
// Is it a directory?
if (DataFolder.isDirectory()) {
return true;
} else {
return false;
}
} //end try
catch (error) {
console.error(error);
}
}

It's not so much about node being asynchronous in nature as it is about certain functions being asynchronous. In this case, it's the calls using request that are asynchronous. You're calling FileWrite directly after the second request call (the one inside ShirtHTMLScraper) begins. Place the call to FileWrite in the callback of ShirtHTMLScraper, after populating ShirtProps.
edit: After looking closer, that won't work either. The problem is that you are calling an asynchronous function inside a synchronous loop. You can get that to work by creating a counter that increments on each asynchronous callback and checks to see if you've hit the length of the item you're iterating over. If you're on the last iteration, run FileWrite.
A better way to go might be to check out the Async library. You can use .each() to supply two callbacks, one to run on each iteration, and one to run when they've all finished.

Related

Is there a JavaScript InDesign function to get ID value

I used the command to export the hard drive ID to drive C:
var command="wmic diskdrive get SerialNumber > C:/idhdd.txt";
app.system("cmd.exe /c\""+command+"" );
I get the text file
SerialNumber
2012062914345300
Is there a JavaScript statement to remove SerialNumber, I just want to get the ID in the text file and save it to the hard drive C.
Here's ready-to-use getDriveIDs() function that should work in any Adobe app and will return array of HDD ID strings for you. I hope this can be easily generalized for other scenarios with Windows scripting inside Adobe scripting ;-)
//----------------------------------------------------------------------//
// Detects IDs (serial numbers) of connected drives and returns them as array of strings.
var getDriveIDs = function() {
var idFile = File(Folder.temp + '/saved_hdd_serials.txt');
var scriptFile = File(Folder.temp + '/dump_hdd_serials.bat');
var scriptContent = 'wmic diskdrive get SerialNumber > ' + idFile.fsName + '\n';
var ids = []
withTempFile(scriptFile, scriptContent, function() {
scriptFile.execute();
$.writeln(idFile.length == 0); // wait for asynchronous script execution to finish
$.sleep(1);
withTempFile(idFile, undefined, function(file, lines) {
ids = lines.slice(1);
});
});
return ids;
};
//----------------------------------------------------------------------//
// utilities
var withTempFile = function(file, content, callback) {
if (undefined == content) { // read temp file
file.open('r');
content = [];
while (!file.eof)
content.push(file.readln());
} else { // write temp file
file.open('w');
file.write(content);
content = undefined;
}
file.close();
callback(file, content);
file.remove();
}
//----------------------------------------------------------------------//
// main: demo
var ids = getDriveIDs();
alert('Drive IDs:\n\t' + ids.join('\n\t'));

Update DOM with responses from several XMLHttpRequest

I am building a simple open source Chromium extension that retrieve some data from several urls and then update the DOM. I could find another way to do this than by adding the line to update the DOM inside the callback http1.onreadystatechange
My XMLHttpRequest requests were often stuck on http1.readyState = 3 so I have added a 3rd parameter to http1.open("GET"); to make the request synchronous like this:
http1.open("GET", url, false);
But I am still getting these errors:
results[1].join is not a function at XMLHttpRequest.http.onreadystatechange
annot read property 'join' of undefined at XMLHttpRequest.http.onreadystatechange
Even thought they don't prevent the script from running, I think this isn't the right way to do what I want. So here is my question: how to update the DOM with the responses from several XMLHttpRequest request? Let's say I need to retrieve and compare all the data before updating the DOM. Then is there a way to process all the data at once after we have retrieve all of them (cf my comment on the last line)?
Here is the relevant part of my script, the full script is available here:
var urls = [
["https://www.cnrtl.fr/morphologie/" + keyword, "vtoolbar", "morf_sound"], //best for plural
["https://www.cnrtl.fr/synonymie/" + keyword, "syno_format"],
]
// for test set keyword to any of this word : hibou, tribal, aller, lancer
var resultdiv = document.getElementById("result")
resultdiv.innerText = "requete en cours";
var results = [];
var errors = [];
urls.forEach((item, index) => {
var http = new XMLHttpRequest();
http.onreadystatechange = function () {
if (http.readyState == 4 && http.status == 200) {
parser = new DOMParser();
var ulr1response = parser.parseFromString(http.responseText, "text/html");
if (index == 0) {
//retrieve the data needed, save then in a list and push this list to the main list result
} else if (index == 1) {
//retrieve the data needed, save then in a list and push this list to the main list result
}
// update the DOM
if (results[1] == "") {
resultdiv.innerHTML = results[0].join(", ") + "</br></br>Pas de synonymes trouvés"
} else {
resultdiv.innerHTML = "<b>" + results[0].join(", ") + "</br></br>Synonymes:</b></br>● " + results[1].join('</br>● ')
}
} else {
errors.push(index);
resultdiv.innerText = "Erreur: " + index + " " + http.readyState + " " + http.status;
}
}
http.open("GET", item[0], false);
http.send(null); // null = no parameters
});
// it would be simplier if I could update the DOM here and not in http.onreadystatechange
If you want to execute some code once all requests have succeeded, you can try using Promise.all together with Fetch.
let keyword = "beaucoup";
let parser = new DOMParser();
let urls = [
["https://www.cnrtl.fr/morphologie/" + keyword, "vtoolbar", "morf_sound"], //best for plural
["https://www.cnrtl.fr/synonymie/" + keyword, "syno_format"]
];
let fetchPromises = urls.map(
item => fetch(item[0]).then(
response => parser.parseFromString(response.text(), "text/html")
)
);
Promise.all(fetchPromises).then(
results => {
// code in here executes once all fetchPromises have succeeded
// "results" will be an array of parsed response data
console.log(results);
}
).catch(console.error);

nodejs: compare function generated JSON data to JSON file

I have a function that scans a directory and creates a JSON file with the audio files metadata. I want it to check if the file already exists and only overwrite if there is any diference between the file that was created from the last time the script was run and the data from the the second time it runs.
This is my code:
var fs = require('fs');
var nodeID3 = require('node-id3');
var path = require('path');
var tracksPath = './public/tracks/';
var dataPath = './public/data/';
fs.readdir(tracksPath,function(err,files){
if(err) {
throw err;
}
//Read the tracks metadata
var tracksMetadata = [];
files.forEach(function(trackName){
var trackFile = nodeID3.read(tracksPath + trackName);
//If the track returns metadata push it to the array
if (trackFile.title && trackFile.artist){
var metadata = {
"filename" : trackName,
"title" : trackFile.title,
"artist" : trackFile.artist
};
tracksMetadata.push(metadata);
}
//If no metadata is found ignore and log it to the console
else if (trackName.charAt(0) != "."){
var filename = {
"filename" : trackName
};
tracksMetadata.push(filename);
console.log(trackName + " doesn't have metadata. Ignoring.");
}
if(fs.existsSync(dataPath + "metadata.json")){
fs.readFile(dataPath + "metadata.json",'utf8', function (err, data){
if (err) throw err;
console.log(JSON.parse(JSON.stringify(data)));
console.log(JSON.parse(JSON.stringify(tracksMetadata)));
console.log(Boolean(JSON.parse(JSON.stringify(data)) == JSON.parse(JSON.stringify(tracksMetadata))));
});
}
});
fs.writeFile(path.join(dataPath, 'metadata.json'),
JSON.stringify(tracksMetadata),'utf8', function(err){
if(err){
throw err;
}
console.log("Tracks Metadata JSON created succesfully");
});
});
Right now I'm only writing to the console a Boolean value that checks wether the data from the file and the data generated by the function are equal and so far I get false.
What should I do?

Is Promise.all not working on the second time through? Why not?

I'm just finishing off this basic webscraper project for a tshirt website.
It enters through one hardcoded url, the home page. It will search for any product pages, and add them to an url. If it finds another link (remainder), it will scrape that again and find any more product pages. It adds the product pages to urlSet and will then scrape those again, grab the tshirt data (price, img, title) and then convert, then write them to a CSV file.
For some reason, this is not working on the second run through of the scrape with 'remainder'.
If I remove the second scrape of url, everything works out fine and the file gets written correctly. But if I want to get the other product pages, it seems to be failing somewhere.
Here is my code, i apologise for posting so much of it but I don't know how it will be understood properly without the right context, hopefully it's been commented okay:
//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
//Save the scraped data in a spreadsheet (CSV format).
'use strict';
//Modules being used:
var cheerio = require('cheerio');
var json2csv = require('json2csv');
var request = require('request');
var moment = require('moment');
var fs = require('fs');
//harcoded url
var url = 'http://shirts4mike.com/';
//url for tshirt pages
var urlSet = new Set();
var remainder;
var tshirtArray = [];
const requestPromise = function(url) {
return new Promise(function(resolve, reject) {
request(url, function(error, response, html) {
if(error)return reject(error);
if(!error && response.statusCode == 200){
return resolve(html);
}
});
});
}
// Go into webpage via url, load html and grab links shirt in url
function scrape (url) {
console.log("Currently scraping " + url)
return requestPromise(url)
.then(function(html) {
var $ = cheerio.load(html);
var links = [];
//get all the links
$('a[href*=shirt]').each(function(){
var a = $(this).attr('href');
//add into link array
links.push(url + a);
});
// return array of links
return links;
});
}
function nextStep (arrayOfLinks) {
var promiseArray = [];
console.log(arrayOfLinks);
for(var i = 0; i < arrayOfLinks.length; i++){
promiseArray.push(requestPromise(arrayOfLinks[i]));
}
//return both the html of pages and their urls
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return {arrayOfHtml: arrayOfHtml , arrayOfUrls: arrayOfLinks};
});
}
//go through the html of each url and add to urlSet if there is a checkout button
//add to remainder otherwise to rescrape
function lastStep (obj){
for(var i = 0; i < obj.arrayOfHtml.length; i++){
var $ = cheerio.load(obj.arrayOfHtml[i]);
//if page has a submit it must be a product page
if($('[type=submit]').length !== 0){
//add page to set
urlSet.add(obj.arrayOfUrls[i]);
console.log(obj.arrayOfUrls[i]);
} else if(remainder == undefined) {
//if not a product page, add it to remainder so it another scrape can be performed.
remainder = obj.arrayOfUrls[i];
console.log("The remainder is " + remainder)
}
}
//return remainder for second run-through of scrape
return remainder;
}
//iterate through urlSet (product pages and grab html)
function lastScraperPt1(){
//call lastScraper so we can grab data from the set (product pages)
//scrape set, product pages
var promiseArray = [];
for(var item of urlSet){
var url = item;
promiseArray.push(requestPromise(url));
}
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return arrayOfHtml;
});
}
//iterate over the html of the product pages and store data as objects
function lastScraperPt2(html){
for(var i = 0; i < html.length; i++){
var $ = cheerio.load(html[i]);
//grab data and store as variables
var price = $('.price').text();
var imgURL = $('.shirt-picture').find('img').attr('src');
var title = $('body').find('.shirt-details > h1').text().slice(4);
var tshirtObject = {};
//add values into tshirt object
tshirtObject.Title = title;
tshirtObject.Price = price;
tshirtObject.ImageURL = imgURL;
tshirtObject.URL = url;
tshirtObject.Date = moment().format('MMMM Do YYYY, h:mm:ss a');
//add the object into the array of tshirts
tshirtArray.push(tshirtObject);
}
convertJson2Csv();
}
//convert tshirt objects and save as CSV file
function convertJson2Csv(){
//The scraper should generate a folder called `data` if it doesn’t exist.
var dir ='./data';
if(!fs.existsSync(dir)){
fs.mkdirSync(dir);
}
var fields = ['Title', 'Price', 'ImageURL', 'URL', 'Date'];
//convert tshirt data into CSV and pass in fields
var csv = json2csv({ data: tshirtArray, fields: fields });
//Name of file will be the date
var fileDate = moment().format('MM-DD-YY');
var fileName = dir + '/' + fileDate + '.csv';
//Write file
fs.writeFile(fileName, csv, {overwrite: true}, function(err) {
console.log('file saved');
if (err) throw err;
});
}
scrape(url) //scrape from original entry point
.then(nextStep)
.then(lastStep)
.then(scrape) //scrape again but with remainder url
.then(nextStep)
.then(lastStep)
.then(lastScraperPt1)
.then(lastScraperPt2)
.catch(function(err) {
// handle any error from any request here
console.log(err);
});
I'm console logging the arrayOfLinks in nextStep so I can see that they are being grabbed properly, I just cannot work out why they aren't being passed through to 'lastStep' properly.
Currently scraping http://shirts4mike.com/
[ 'http://shirts4mike.com/shirts.php',
'http://shirts4mike.com/shirts.php',
'http://shirts4mike.com/shirt.php?id=108',
'http://shirts4mike.com/shirt.php?id=107',
'http://shirts4mike.com/shirt.php?id=106',
'http://shirts4mike.com/shirt.php?id=105' ]
The remainder is http://shirts4mike.com/shirts.php
http://shirts4mike.com/shirt.php?id=108
http://shirts4mike.com/shirt.php?id=107
http://shirts4mike.com/shirt.php?id=106
http://shirts4mike.com/shirt.php?id=105
Currently scraping http://shirts4mike.com/shirts.php
[ 'http://shirts4mike.com/shirts.phpshirts.php',
'http://shirts4mike.com/shirts.phpshirt.php?id=101',
'http://shirts4mike.com/shirts.phpshirt.php?id=102',
'http://shirts4mike.com/shirts.phpshirt.php?id=103',
'http://shirts4mike.com/shirts.phpshirt.php?id=104',
'http://shirts4mike.com/shirts.phpshirt.php?id=105',
'http://shirts4mike.com/shirts.phpshirt.php?id=106',
'http://shirts4mike.com/shirts.phpshirt.php?id=107',
'http://shirts4mike.com/shirts.phpshirt.php?id=108' ]
BUT if I choose to only call the first scrape and don't call the second, like this:
scrape(url) //scrape from original entry point
.then(nextStep)
.then(lastStep)
.then(lastScraperPt1)
.then(lastScraperPt2)
.catch(function(err) {
// handle any error from any request here
console.log(err);
});
... Then everything works. I just don't get to all the urls.
What is happening here and how can I fix it? Thank you guys
The issue is tshirtArray is not defined in convertJson2Csv(). At lastlastScraperPt2 pass tshirtArray to convertJsonCsv()
convertJson2Csv(tshirtArray)
at convertJson2Csv
function convertJson2Csv(tshirtArray) {
// do stuff
}
One problem seems to be in your lastStep. It looks like you mean for remainder to be another array of urls. Correct me if I'm wrong there. However, what's happing is that the first time the if($('[type=submit]').length !== 0) condition fails, you'll automatically go down to the next block, because remainder start undefined. Whatever the current url is, you assign that one to remainder. For the rest of the iterations of your for-loop, you will never again hit the condition where remainder == undefined. So if you will only ever end up with one url assigned to remainder, while any more that you were hoping to get will simply be passed over.
You might want to define remainder as remainder = [];. And then instead of saying else if (remainder == undefined), you would just say
} else {
remainder.push(obj.arrayOfUrls[i]);
}
However, then you're passing an array of urls to scrape when scrape is only expecting a single url. If this is what you want and I am right in assuming that you mean for remainder to be an array of urls, you could defined a new function as follows:
function scrapeRemainders(remainders) {
var promises = [];
remainder.forEach(function (url) {
promises.push(requestPromise(url));
});
return Promise.all(promises).then(function (results) {
_.flattenDeep(results);
})
}
Then instead of the second scrape in your promise chain, you would replace it with scrapeRemainders. Also, for you the _ in the previous function, you would need to npm install lodash and then var _ = require('lodash'). On a side note, lodash has nothing to do with promises, but it is a great tool for data manipulation. You should look into it when you have the chance.
Also, in lastScraperPt1, you can change
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return arrayOfHtml;
});
to
return Promise.all(promiseArray);
It does the same thing.
Hope this helps. If this does not answer your question, comment at me and I can change my answer accordingly.
All fixed, it was grabbing the wrong urls in scrape(). Though I only knew this after I logged the statusCodes to the console :
//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
//Save the scraped data in a spreadsheet (CSV format).
'use strict';
//Modules being used:
var cheerio = require('cheerio');
var json2csv = require('json2csv');
var request = require('request');
var moment = require('moment');
var fs = require('fs');
//harcoded url
var urlHome = 'http://shirts4mike.com/';
//url for tshirt pages
var urlSet = [];
var tshirtArray = [];
const requestPromise = function(url) {
return new Promise(function(resolve, reject) {
request(url, function(error, response, html) {
if(error) {
errorHandler(error);
return reject(error);
}
if(!error && response.statusCode == 200){
return resolve(html);
}
if(response.statusCode !== 200){
console.log("response code is " + response.statusCode);
}
return resolve("");
});
});
}
// Go into webpage via url, load html and grab links shirt in url
function scrape (url) {
console.log("Currently scraping " + url)
return requestPromise(url)
.then(function(html) {
var $ = cheerio.load(html);
var links = [];
var URL = 'http://shirts4mike.com/';
//get all the links
$('a[href*=shirt]').each(function(){
var a = $(this).attr('href');
//add into link array
links.push(URL + a);
});
// return array of links
return links;
});
}
function nextStep (arrayOfLinks) {
var promiseArray = [];
console.log(arrayOfLinks);
for(var i = 0; i < arrayOfLinks.length; i++){
promiseArray.push(requestPromise(arrayOfLinks[i]));
}
//return both the html of pages and their urls
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return {arrayOfHtml: arrayOfHtml , arrayOfUrls: arrayOfLinks};
});
}
//go through the html of each url and add to urlSet if there is a checkout button
//add to remainder otherwise to rescrape
function lastStep (obj){
for(var i = 0; i < obj.arrayOfHtml.length; i++){
var $ = cheerio.load(obj.arrayOfHtml[i]);
//if page has a submit it must be a product page
if($('[type=submit]').length !== 0){
//add page to set
urlSet.push(obj.arrayOfUrls[i]);
console.log(obj.arrayOfUrls[i]);
} else if(remainder == undefined) {
//if not a product page, add it to remainder so it another scrape can be performed.
var remainder = obj.arrayOfUrls[i];
console.log("The remainder is " + remainder)
}
}
//return remainder for second run-through of scrape
return remainder;
}
//iterate through urlSet (product pages and grab html)
function lastScraperPt1(){
//call lastScraper so we can grab data from the set (product pages)
//scrape set, product pages
var promiseArray = [];
for(var item of urlSet){
var url = item;
promiseArray.push(requestPromise(url));
}
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return arrayOfHtml;
});
}
//iterate over the html of the product pages and store data as objects
function lastScraperPt2(html){
for(var i = 0; i < html.length; i++){
var $ = cheerio.load(html[i]);
//grab data and store as variables
var price = $('.price').text();
var imgURL = $('.shirt-picture').find('img').attr('src');
var title = $('body').find('.shirt-details > h1').text().slice(4);
var tshirtObject = {};
//add values into tshirt object
tshirtObject.Title = title;
tshirtObject.Price = price;
tshirtObject.ImageURL = urlHome + imgURL;
tshirtObject.URL = urlSet[i];
tshirtObject.Date = moment().format('MMMM Do YYYY, h:mm:ss a');
//add the object into the array of tshirts
tshirtArray.push(tshirtObject);
}
return tshirtArray;
}
//conver tshirt objects and save as CSV file
function convertJson2Csv(tshirtArray){
//The scraper should generate a folder called `data` if it doesn’t exist.
var dir ='./data';
if(!fs.existsSync(dir)){
fs.mkdirSync(dir);
}
var fields = ['Title', 'Price', 'ImageURL', 'URL', 'Date'];
//convert tshirt data into CSV and pass in fields
var csv = json2csv({ data: tshirtArray, fields: fields });
//Name of file will be the date
var fileDate = moment().format('MM-DD-YY');
var fileName = dir + '/' + fileDate + '.csv';
//Write file
fs.writeFile(fileName, csv, {overwrite: true}, function(err) {
console.log('file saved');
if (err) errorHandler(err);
});
}
scrape(urlHome) //scrape from original entry point
.then(nextStep)
.then(lastStep)
.then(scrape)
.then(nextStep)
.then(lastStep)
.then(lastScraperPt1)
.then(lastScraperPt2)
.then(convertJson2Csv)
.catch(function(err) {
// handle any error from any request here
console.log(err);
});
//If the site is down, an error message describing the issue should appear in the console.
//This is to be tested by disabling wifi on your device.
//When an error occurs log it to a file scraper-error.log . It should append to the bottom of the file with a time stamp and error
var errorHandler = function (error) {
console.log(error.message);
console.log('The scraper could not not scrape data from ' + url + ' there is either a problem with your internet connection or the site may be down');
/**
* create new date for log file
*/
var loggerDate = new Date();
/**
* create message as a variable
*/
var errLog = '[' + loggerDate + '] ' + error.message + '\n';
/**
*when the error occurs, log that to the error logger file
*/
fs.appendFile('scraper-error.log', errLog, function (err) {
if (err) throw err;
console.log('There was an error. The error was logged to scraper-error.log');
});
};

how to pass data from module.export function to an object

I have a simple Node/Express app and am trying to pass data from a javascript function to a template (powered by jade).
The javascript function looks like this:
module.exports = {
getFeatures: function() {
var request = require("request")
// ID of the Google Spreadsheet + Base URL
var spreadsheetID = "abcdefg-123456";
var sheetID = "od6";
var url = "https://spreadsheets.google.com/feeds/list/" + spreadsheetID + "/" + sheetID + "/public/values?alt=json";
//empty array for features
var features = [];
//get the features
request({
url: url,
json: true
}, function (error, response, body) {
if (!error && response.statusCode === 200) {
var data = body.feed.entry;
data.forEach(function(item) {
var obj = {
pub: item.gsx$publication.$t,
date: item.gsx$date.$t,
title: item.gsx$title.$t,
url: item.gsx$url.$t,
}
features.push(obj);
});
console.log("features", features"); //prints array containing all objects to server console
return features;
}
});
}
};
And the main app looks like this:
'use strict';
var express = require('express');
var jade = require('jade');
var gsheets = require("./gsheets.js"); //pulls in module.exports from above
var featuresOld = require('../private/features.json'); //original json pull (a single array of objects)
var port = process.env.PORT || 3000;
var app = express();
// defining middleweare
app.use('/static', express.static(__dirname + '../../public'));
app.set('view engine', 'jade');
app.set('views', __dirname + '/templates');
...
// features route
app.get('/features', function(req, res) {
var path = req.path;
res.locals.path = path;
var features = gsheets.getFeatures(); //attempting to call js function above
res.render('features', {features: features}); //trying to pass data into a template
});
The first function successfully prints an array of objects to the server console, so I think the error lies in how I'm calling it in the main app.js. (Please note, it's only printing when I have it entered as gsheets.getFeatures();, not var features = gsheets.getFeatures();.)
Please also note that the featuresOld variable is an array of objects that has been successfully passed through to a jade tempalte, so the error is not in the res.render('features', {features: features}); line.
I'm sure this is pretty straightforward, but I can't seem to figure it out. Any help is greatly appreciated, thank you.
I'd recommend you to look into Promises (either Native or using a library like Bluebird).
But without using Promises or generators and keeping things simple, you can pass a callback function that will be called only when the values are retrieved. Within this function you can render the template.
(Note that your function currently does not return anything)
module.exports = {
getFeatures: function(callback) {
var request = require("request")
// ID of the Google Spreadsheet + Base URL
var spreadsheetID = "abcdefg-123456";
var sheetID = "od6";
var url = "https://spreadsheets.google.com/feeds/list/" + spreadsheetID + "/" + sheetID + "/public/values?alt=json";
//empty array for features
var features = [];
//get the features
request({
url: url,
json: true
}, function (error, response, body) {
if (!error && response.statusCode === 200) {
var data = body.feed.entry;
data.forEach(function(item) {
var obj = {
pub: item.gsx$publication.$t,
date: item.gsx$date.$t,
title: item.gsx$title.$t,
url: item.gsx$url.$t,
}
features.push(obj);
});
console.log("features", features"); //prints array containing all objects to server console
callback(features); // call the rendering function once the values are available
}
});
}
};
Now in your main app, you just pass a callback to the function
app.get('/features', function(req, res) {
var path = req.path;
res.locals.path = path;
gsheets.getFeatures(function(features) {
res.render('features', {features: features}); //trying to pass data into a template
});
});
Basically, your request function is asynchronous - the request will run in background and the callback function will be called with the value once it's retrieved. In the meantime, the rest of the code will keep running (in your case you'd try to use the value even though it hasn't been retrieved yet).
If you need to do something that depends on that value, then you'd have to put that code in a callback function which would be called when the value is available (as showed above).
Promises provide a nice API for doing that. There are also new features ES6 that helps you better organise asynchronous code.

Categories

Resources