Scope of variable in anonymous function - javascript

I've got this function
function parseLink(link) {
var product;
request(link, function (error, response, body) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(body);
// title
var title = $('h1').text();
if (!title)
var title = $('title').text();
var description = $('meta[name="description"]').attr('content');
product = new Product(link.trim(), title.trim(), description.trim());
}
});
console.log(product);
return product;
}
And I don't understand why when I do console.log(product) outside of the request call, I've got undefinded but inside, I can see my product.
I learn lot of things about scopes in javascript and I don't understand, causse I defined product in the top function.
I need to return this variable for get it in another function, if do the return inside request I've got of course an undefined so I need to do that outside...
Thank you

javascript does not run the code like c or php where you can be sure that the next line of code runs when the previous is ready. In your case request is an asynchronous function so the two lines
console.log(product);
return product;
are mostly run before your request function is ready. In that case you can not just return some value from your parseLink function. You have two possibilities here:
use promises:
https://developer.mozilla.org/en/docs/Web/JavaScript/Reference/Global_Objects/Promise
use a callback:
like this:
function parseLink(link, callback) {
var product;
request(link, function (error, response, body) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(body);
// title
var title = $('h1').text();
if (!title)
var title = $('title').text();
var description = $('meta[name="description"]').attr('content');
product = new Product(link.trim(), title.trim(), description.trim());
callback(product);
}
});
}
and you run the code like
parseLink('http://...', function(product) { /* do something with the product */ });
ps: the use of callbacks is a lot easier imo, but in some cases you can to separate the scope for example if you run it in a for loop

request is an asynchronous call so this procedure gets pushed to the event queue which will run once the current call stack has finished. console.log prints undefined because that is the default value for unassigned variables.
You must use callbacks or promises if you need to return the value from the asynchronous call. Here's an example using a Promise:
function parseLink(link) {
return new Promise((resolve, reject) => {
request(link, function(error, response, body) {
if (error) return reject(error);
if (response.statusCode !== 200) {
return reject(new Error('Not OK'));
}
var $ = cheerio.load(body);
var title = $('h1').text() || $('title').text();
var description = $('meta[name="description"]').attr('content');
var product = new Product(link.trim(), title.trim(), description.trim());
resolve(product);
});
});
}
parseLink('http://example.com')
.then(product => {
console.log(product);
})
.catch(error => {
console.error(error);
});

Related

How does one access, process and pass along response data outside of the request?

In the snippet below, I want to be able to access locationArray outside of the request function, I understand that in my code below why it will not work, however, I have tried many different methods to access the array. I have tried using promises, callback functions etc, however, none of them seem to be working.
Any other ideas on how to do this? Even open to ways I've tried as at this point everything is worth a try.
request(process.env.RESOURCE_SHEET, (error, response, html) => {
var locationArray = new Array
if(!error && response.statusCode == 200) {
const $ = cheerio.load(html);
$("h3").each((i, lle) => {
const location = $(lle).text();
if(location.includes("Kansas")) return;
if(location.includes("In Stock")) {
var level = location + " ✅";
} else {
var level = location + " ❌";
}
locationArray.push(level);
});
}
console.log(locationArray) // Output 1: [level1,level2,level3,leveletc]
});
console.log(locationArray) // Output 2: []
One not only might consider a Promise based approach, as it got already suggested, but also a code refactoring which separates the different concerns into tasks and implements the latter as functions which can be fed/passed to the promise chain. As an advantage the refactoring pays back with human readable code which also might be easier to maintain ...
function createRequest(src) {
return new Promise((resolve, reject) => {
request(src, (error, response, html) => {
if (!error && response.statusCode === 200) {
resolve(html);
} else {
reject({ error, response });
}
});
};
}
function handleFailedRequest(reason) {
const { error, response } = reason;
// proceed with failure handling based on
// either request data `error` and/or `response`.
}
function createLocationArray(html) {
const locationArray = [];
const $ = cheerio.load(html);
$('h3').each((i, lle) => {
const location = $(lle).text();
if (!location.includes('Kansas')) {
const isInStock = location.includes('In Stock');
locationArray.push(
`${ location } ${ isInStock && '✅' || '❌' }`
);
}
});
return locationArray;
}
function processLocationArray(array) {
console.log('locationArray ... ', array);
}
const promisedResponse = createRequest(process.env.RESOURCE_SHEET);
promisedResponse
.then(createLocationArray)
.then(processLocationArray)
.catch(handleFailedRequest);
#StackSlave was right, it just needed a promise, I believe I messed up the syntaxing when I first tried resolving it using a promise, but this seemed to work.
const promise = new Promise((resolve,reject) => {
request(process.env.RESOURCE_SHEET, (error, response, html) => {
var locationArray = new Array
if(!error && response.statusCode == 200) {
const $ = cheerio.load(html);
$("h3").each((i, lle) => {
const location = $(lle).text();
if(location.includes("Kansas")) return;
if(location.includes("In Stock")) {
var level = location + " ✅";
} else {
var level = location + " ❌";
}
locationArray.push(level);
resolve(locationArray);
});
}
console.log(locationArray) // Output 1: [level1,level2,level3,leveletc]
});
});
promise.then(array => {
console.log(array);
});

How do I make sure a function is completely executed before its value is passed to res.render?

I want to parse the address query and return the addresses and their titles such that
http://localhost:3000/I/want/title/?address=google.com&address=youtube.com shall return:
google.com - 'google', youtube.com - 'youtube'
I'm using cheerio.js to extract the title from the URLs but it takes time and the res.render line is executed before the variable titles is filled with the URL titles. How do I make sure that my code for retrieving the titles is executed before the res.render?
As of now, I'm not getting any errors but the titles[] array is sent without data to my .ejs file. I've tried solving this through callbacks, step.js, async.js but nothing seems to work. I've tried solving it using rsvp.js (promise) as shown below (from app.js) but it doesn't work either and titles[] remains empty:
app.get("/I/want/title/", function(req,res){
if (typeof req.query.address === "string"){
query = [req.query.address];
}
else {
query = req.query.address;
}
var titles=[];
var promise = new RSVP.Promise(function(resolve, reject) {
for (i=0;i<(query.length);i++){
if (!((query[i]).startsWith("https://www."))){
var url = "https://www." + query[i];
}else{
url=query[i];
}
request(url, function (err, resp, body) {
if (err) {
var title = "NO RESPONSE"
} else {
var $ = cheerio.load(body);
var title = $("title").text();
}
titles.push(title);
});
}
resolve(titles);
reject();
});
promise.then(function(titles) {
res.render("title", {url: query, siteName: titles});
}).catch(function() {
console.log("oh no");
});
});
Is there something wrong with my syntax or logic? How should I execute this with either callbacks or promises?
You have to put reject & resolve of promise inside request callback, then your code should work fine (as shown below).
app.get('/I/want/title/', function(req, res) {
if (typeof req.query.address === 'string') {
query = [req.query.address]
} else {
query = req.query.address
}
var titles = []
var promise = new RSVP.Promise(function(resolve, reject) {
for (i = 0; i < query.length; i++) {
if (!query[i].startsWith('https://www.')) {
var url = 'https://www.' + query[i]
} else {
url = query[i]
}
request(url, function(err, resp, body) {
if (err) {
var title = 'NO RESPONSE'
reject()
} else {
var $ = cheerio.load(body)
var title = $('title').text()
}
titles.push(title)
resolve(titles)
})
}
})
promise
.then(function(titles) {
res.render('title', {url: query, siteName: titles})
})
.catch(function() {
console.log('oh no')
})
})
Does this answer your question ?
Your resolve should be inside the else part of the request callback, and the reject should be inside the if (err).

Terminate mysql connection after multiple queries have executed

I have some node.js code which fetches data from an API in a loop and runs mutliple mysql queries to update some rows.
The issue I have is that the script keeps running until I terminate the mysql connection with connection.end(). I am a newbie in asynchronous code. Where do I call the termination function so that it executes when all the queries have finished executing? What's the right design pattern for this? Would waterfall be any good?
This is a snippet from the code I have at the moment (error handling removed for simplicity):
var connection = mysql.createConnection({ host, user, etc... });
for (var i = 0; i < 10; i++) {
var url = "http://api.com?i="+i;
request(url, function(error, response, body) {
var data = JSON.parse(body);
for (el in data) {
connection.query(
"UPDATE table SET col = ? WHERE symbol = ?",
[
data[el].col,
el
]
);
}
});
}
// this will run before all queries have executed
// resulting in an error
connection.end();
So, the problem here is that you are cycling in a synchronized way through the data here:
var data = JSON.parse(body);
for (el in data) {
connection.query(
"UPDATE table SET col = ? WHERE symbol = ?",
[
data[el].col,
el
]
);
}
while the mysql module handles the query in a callback style:
connection.query(query, function(error, rows, fields) {
if (error) {
return callback(error);
} else {
return callback(null,rows);
}
});
where callback has the signature callback(error,rows), so that you can handle the results in this way supposed to have a reusable function:
var executeQuery = function(query,callback) {
var self=this;
this.connection.query(query, function(error, rows, fields) {
if (error) {
return callback(error);
} else {
return callback(null,rows);
}
});
}
and you can call in your code like
executeQuery(statement, function(error,rows) {
//...
})
That said, you must consider that you are doing multiple queries to your database and it is not recommended to do this in for loop cycle. You should consider to use a better solution that could be a waterfall as you say or a promise all using the Promise paradigma.
Suppose that to have this nice function:
var promiseAllP = function(items, block) {
var promises = [];
items.forEach(function(item,index) {
promises.push( function(item,i) {
return new Promise(function(resolve, reject) {
return block.apply(this,[item,index,resolve,reject]);
});
}(item,index))
});
return Promise.all(promises);
}
that takes as input an array of items and a execution function that is function(item,index,resolve,reject) that has a resolve and reject functions of a Promise, so let's turn your executeQuery function in a Promise as well:
var executeQueryP = function(query) {
var self=this;
return new Promise(function(resolve, reject) {
self.connection.query(query, function(error, rows, fields) {
if (error) {
return reject(error);
} else {
return resolve(null,rows);
}
});
}
Now you can process your data in a totally async way promisyfied:
promiseAllP(data,(item,index,resolve,reject) => {
var query= "UPDATE table SET col = %s WHERE symbol = %s";
// example: prepare the query from item in the data
query = replaceInString(query,item.col,item);
executeQueryP(query)
.then(result => resolve(result))
.catch(error => reject(error))
})
.then(results => { // all execution completed
console.log(results)
})
.catch(error => { // some error occurred while executing
console.error(error)
})
where the replaceInString will help you to prepare the statement
var replaceInString = function() {
var args = Array.prototype.slice.call(arguments);
var rep= args.slice(1, args.length);
var i=0;
var output = args[0].replace(/%s|%d|%f|%#/g, function(match,idx) {
var subst=rep.slice(i, ++i);
return( subst );
});
return(output);
},//replace,
This is what we have done here:
Used native Promise only
Turned your mysql query in a promise
Called the statements against your data in a completely asynchronous way
Used a Promise and Promise all paradigma, that let you collect the results of the Promise and return to the caller when all the functions are completed.
Catched errors in all the statements execution
Added a simply way to fulfill statements with parameters
Also notice the arrow function syntax (param1, param2) => that simplify the way to write a function, that can help a lot with the Promise paradigma.
For anyone interested, I ended up solving it by a mixture of promises and counting the queries, something along the lines of this (not sure if this code actually works but the idea is there):
function fetchFromAPI() {
return new Promise((resolve, reject)=>{
var urls = [];
for (var i = 0; i < 10; i++) {
urls.push("http://api.com?i="+i);
}
var data = [];
var requestedUrls=0;
urls.forEach(url=>{
request(url, (err, response, body) {
if(err) reject(err);
data.push(JSON.parse(body));
requestedUrls++;
if(requestedUrls==urls.length) resolve(data);
};
});
}
}
fetchFromAPI().then(data=>{
mysql.createConnection({ user, hostname, etc... });
var processedKeys=0;
data.forEach(el=> {
mysql.query("UPDATE table SET name = ? WHERE id = ?", [el.name, el.id], (err, rows, fields) => {
processedKeys++;
if(processedKeys==data.length) {
connection.end();
}
});
}
}).catch(err=>{
console.error(err);
});

Know when my function is over

This is my functions
function parseLinks(links, callback) {
var products = [];
for (var i = 0; i < links.length; i++) {
request(links[i], function (error, response, body) {
var product;
if (!error && response.statusCode == 200) {
var $ = cheerio.load(body);
// title
var title = $('h1').text();
if (!title)
var title = $('title').text();
var description = $('meta[name="description"]').attr('content');
product = new Product(links[0].trim(), title.trim(), description.trim());
products.push(product);
}
});
}
callback(products) // the callback only do a console.log(products)
}
After that, I want to do a console.log(products) who display all the products.
So I setup a callback attached to parseLinks and call it after the for loop. The problem is in my for loop, I call asynchronous function request each times, so my callback is called before the end of all the request calls, so my console.log(products) print an empty array.
Do you know how fix that ?
Thanks
You have to check if all the asynchronous calls have finished. Create an inner function that calls callback when all asynchronous work is done:
function parseLinks(links, callback) {
var products = [],
numberOfItems = links.length; // numbers of linkes to be parsed
function checkIfDone() { // this function will be called each time link is parsed
numberOfItems--; // decrement the numberOfItems (number that tells us how many links left)
if(numberOfItems === 0) // if there are none left (all links are parsed), then call callback with the resultant array.
callback(products);
}
for (var i = 0; i < links.length; i++) {
request(links[i], function (error, response, body) {
// ...
checkIfDone(); // everytime a link is parsed, call checkIfDone
});
}
}
You can embed the logic of checkIfDone inside the function request directly. I used a separate function for clarity.
The best way is to use async.
var async = require("async");
function parseLinks(links, callback) {
var products = [];
async.forEach(links, function(link, done) {
request(link, function (error, response, body) {
var product;
if (!error && response.statusCode == 200) {
var $ = cheerio.load(body);
// title
var title = $('h1').text();
if (!title)
var title = $('title').text();
var description = $('meta[name="description"]').attr('content');
product = new Product(links[0].trim(), title.trim(), description.trim());
products.push(product);
}
done();
});
}, function() {
callback(products);
});
}
You can use async.each from asnyc module
Simplified code:
function parseLinks(links, callback) {
var products = [];
async.each(links, function(link, requestCallback) {
request(links[i], function(error, response, body) {
//... rest of your code
requestCallback(); //Request has ended
});
}, function(err) {
//All requests ended!
callback();
});
}

Node.js web scraping with loop on array of URLs

I'm trying to build a little script to scrap some data. I'm some basics knowledge in javascript however I'm kind of lost with all the async callback or promises stuff. Here is what I have now :
url = "http://Blablablabla.com";
var shares = function(req, res) {
request(url, function (error, response, body) {
if (!error) {
var $ = cheerio.load(body),
share = $(".theitemIwant").html();
return res.send(url + ":" + share);
} else {
console.log("We've encountered an error: " + error);
}
})
}
So everything is fine with this piece of code. What I would like to do is :
Using an array of url var urls = [url1,url2,url3,etc...]
Storing my scrapped data into another array, something like this data = [{url: url1, shares: share},{url: url2, shares: share},etc...]
I know I need to use something like this data.push({ urls: url, shares: share})})
and I understand that I need to loop over my first url array to push data into my second data array.
however I'm kind of lost with the request method and the way I should deal with async issue in my situation.
thanks !
edit#1 :
I tried this to use promises :
var url = "www.blablabla.com"
var geturl = request(url, function (error, response, body) {
if (!error) { return $ = cheerio.load(body) } else
{ console.log("We've encountered an error: " + error); }
});
var shares = geturl.then( function() {
return $(".nb-shares").html();
})
but got the following error geturl.then is not a function
I think you should use async:
var async = require('async');
var urls = ["http://example.com", "http://example.com", "http://example.com"];
var data = [];
var calls = urls.map((url) => (cb) => {
request(url, (error, response, body) => {
if (error) {
console.error("We've encountered an error:", error);
return cb();
}
var $ = cheerio.load(body),
share = $(".theitemIwant").html();
data.push({ url, share })
})
})
async.parallel(calls, () => { /* YOUR CODE HERE */ })
You could do the same with promises, but I don't see why.
I took a stab at it. You need to install the q library and require it to
var Q = require('q');
//... where ever your function is
//start with an array of string urls
var urls = [ "http://Blablablabla.com", '...', '...'];
//store results in this array in the form:
// {
// url: url,
// promise: <will be resolved when its done>,
// share:'code that you wanted'
// }
var results = [];
//loop over each url and perform the request
urls.forEach(processUrl);
function processUrl(url) {
//we use deferred object so we can know when the request is done
var deferred = Q.defer();
//create a new result object and add it to results
var result = {
url: url,
promise: deferred.promise
};
results.push(result);
//perform the request
request(url, function (error, response, body) {
if (!error) {
var $ = cheerio.load(body),
share = $(".theitemIwant").html();
//resolve the promise so we know this request is done.
// no one is using the resolve, but if they were they would get the result of share
deferred.resolve(share);
//set the value we extracted to the results object
result.share = share;
} else {
//request failed, reject the promise to abort the chain and fall into the "catch" block
deferred.reject(error)
console.log("We've encountered an error: " + error);
}
});
}
//results.map, converts the "array" to just promises
//Q.all takes in an array of promises
//when they are all done it rull call your then/catch block.
Q.all(results.map(function(i){i.promise}))
.then(sendResponse) //when all promises are done it calls this
.catch(sendError); //if any promise fails it calls this
function sendError(error){
res.status(500).json({failed: error});
}
function sendResponse(data){ //data = response from every resolve call
//process results and convert to your response
return res.send(results);
}
Here is another solution I like a lot :
const requestPromise = require('request-promise');
const Promise = require('bluebird');
const cheerio = require('cheerio');
const urls = ['http://google.be', 'http://biiinge.konbini.com/series/au-dela-des-murs-serie-herve-hadmar-marc-herpoux-critique/?src=konbini_home']
Promise.map(urls, requestPromise)
.map((htmlOnePage, index) => {
const $ = cheerio.load(htmlOnePage);
const share = $('.nb-shares').html();
let shareTuple = {};
shareTuple[urls[index]] = share;
return shareTuple;
})
.then(console.log)
.catch((e) => console.log('We encountered an error' + e));

Categories

Resources