Run function in series with dynamic arguments in nodeJs - javascript

Goal: (solved: updated working code below)
Create an array of urls
Create a function that accepts a single url, makes a request to that url, and writes some data to file
Run above function in series multiple times for each url in the array.
Node Dependencies: Request, Cheerio, async, fs
Here is what I have so far:
//load dependencies
var request = require("request"),
cheerio = require("cheerio"),
fs = require('fs'),
async = require('async'),
wstream = fs.createWriteStream('./model/data.txt');
//create url array
var arr = [];
for (var i = 0; i < 10; i++){
arr.push('http://www.test.com/' + i)
}
//function that gets data from url and creates write stream
function getUrlData(url, callback){
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(body);
var data = $('h1').html();
wstream.write(data);
callback();
}
});
}
//close write stream
function endStream(){
wstream.end();
}
//updated working version
async.eachSeries(arr,getUrlData,endStream);

Since you want getUrlData to be executed sequentually, you'll need to add a callback to getUrlData:
function getUrlData(url, callback){
request(url, function (error, response, body) {
if (!error && response.statusCode == 200) {
var $ = cheerio.load(body);
var data = $('h1').html();
wstream.write(data);
callback();
}
callback(error);
});
}
Then use eachSeries:
async.eachSeries(arr, getUrlData, endStream);

Related

web scraping question: trying to store a specific value of my scrape into a var

const cheerio = require("cheerio");
const request = require("request");
request("https://champion.gg/champion/Nautilus/Support?league=", (error, response, html) => {
if (!error && response.statusCode == 200)
{
var $ = cheerio.load(html);
var row = $('.footer-attr');
var divvoorscript = row.prev().prev().html();
//var physicaldmg = divvoorscript.find('physicalDmg');
console.log(divvoorscript);
}
});
This is what I'm currently using and it returns all this
https://i.imgur.com/l1pOQ6V.png
I'd like to save the value of "physicalDmg" "magicDmg" and "trueDmg" into 3 variables but have no idea how to go about this.
If someone could help me with this that'd be great!

Using request.on() for GET requests

When dealing with GET requests, should I be using request.on()?
For instance, in my main.js,
var http = require('http');
var dispatcher = require('./public/javascript/dispatcher.js');
var serverPort = 8124;
http.createServer(function (request, response) {
dispatcher.deal(request, response);
}).listen(serverPort);
and in dispatcher.js, I have
module.exports = {
deal: function(request, response) {
if(request.method === "GET") { // WITHIN THIS BLOCK
var get = require('./get.js');
// Pass data to the appropriate function
get.do(request, response);
} else if(request.method === "POST") {
var qs = require('querystring');
var requestBody = '';
request.on('data', function(data) {
requestBody += data;
});
request.on('end', function() {
var formData = qs.parse(requestBody);
var post = require('./post.js');
// Pass data to the appropriate function
post.doing(request, response, formData);
});
}
} // End deal
} // End exports
So to expand on my initial question and the code I've posted, the program works fine without the request.on(). My guess is that since there's no data to wait for from a GET request, there's no need for a require.on('end', ...). Any clarifications, recommendations, or suggestions are welcome!

Variable inside request

I want to parse body outside of request. But i can not figure out a way to get it outside of the request function. Can you please show me how? Or at least give me an example. I do not understand it.
"use strict"
var robotsParser = require('robots-parser');
var request = require('request');
var fs = require('fs')
let url = 'http://google.de/robots.txt'
request(url, function(error, response, body) {
//console.log(body)
robots = robotsParser(url, body)
var reserveisDisallowed = robots.isDisallowed('http://google.de/maps/reserve/api/', '*')
console.log(reserveisDisallowed)
})
Use jQuery deferred, if you want to execute this instruction after success callback as:
var $deferred = $.Deferred(),
robots,
globalBody;
$deferred.done(function(body){
robots = robotsParser(url, body);
globalBody = body; //After here globalBody object has body available. Do whatever you want to do now.
var reserveisDisallowed = robots.isDisallowed('http://google.de/maps/reserve/api/', '*')
console.log(reserveisDisallowed)
});
request(url, function(error, response, body) {
//console.log(body)
$deferred.resolve(body);
})
or if U just want body available outside, simply use 'globalBody' variable in your code
As:
request(url, function(error, response, body) {
//console.log(body)
robots = robotsParser(url, body);
globalBody = body;
var reserveisDisallowed = robots.isDisallowed('http://google.de/maps/reserve/api/', '*')
console.log(reserveisDisallowed)
})

Extract/Return var from Request - Javascript

i'm new in Node.js, I'm trying to use a variable declared within a Request, but I need to use it outside the Request.Is this possible?.
Sample code:
//stackExample
var request = require('request')
cheerio = require('cheerio')
jsonArr = []
request ({
url: 'http://youtube.com',
encoding: 'utf8'
},
function (err, resp, body){
if(!err && resp.statusCode == 200){
var $ = cheerio.load(body);
$('.yt-lockup-title').each(function(){
var title = $(this).find('a').text();
jsonArr.push({
titulo: title,
});
});
}
console.log(jsonArr) //Here Works!
}
);
console.log(jsonArr) //Here not :(, I need works here :'(
Your jsonArr variable only works inside your request function because this wait for the url response, while your outside console.log its executed line by line. You can create a function that will be called inside your anonymous function to use it outside the request function.
var request = require('request')
cheerio = require('cheerio')
jsonArr = []
request ({
url: 'http://youtube.com',
encoding: 'utf8'
},
function (err, resp, body){
if(!err && resp.statusCode == 200){
var $ = cheerio.load(body);
$('.yt-lockup-title').each(function(){
var title = $(this).find('a').text();
jsonArr.push({
titulo: title,
});
updated()
});
}
console.log(jsonArr)
}
);
function updated(){
console.log(jsonArr)
}
The issue with above code is that the value in jsonArr is pushed when the request returns from the http call and then injects the value into jsonArr.
You would want to access the value of jsonArr after that call returns.Which you can do by using promises.
Or just a hack using setTimeout.
setTimeout(function(){
console.log(jsonArr); // use value after time interval
},1000); // you can set this value depending on approx time it takes for your request to complete.
Or use the defered function libraries refer this.
var sequence = Futures.sequence();
sequence
.then(function(next) {
http.get({}, next);
})
.then(function(next, res) {
res.on("data", next);
})
.then(function(next, d) {
http.get({}, next);
})
.then(function(next, res) {
...
})
You need to wrap the console.log in a callback as follows.
The following code will log [5]
var jsonArry = [];
var test = function(num, callback){
jsonArry.push(num);
callback(jsonArry);
};
test(5, function(data){
console.log(data);
});

Cheerio selector returns blank array

Try to get names of mathes in "Starting soon" section of this web-site: https://favbet.com/en/bets/
This is my code:
var request = require('request'),
cheerio = require('cheerio');
matches = [];
request('https://favbet.com/en/bets/', function (err, resp, body) {
if (!err && resp.statusCode == 200) {
var $ = cheerio.load(body);
var content = $('#startsoon').find('li.col20').each(function() {
var match = this.text();
matches.push(match);
});
console.log(matches);
}
});
Console.log returns blank array. I'm not quite understand how the deep nesting works in this case.

Categories

Resources