Why my offset variable's value remains zero - javascript

Trying to make a simple Tumblr scraper using node.js
var request = require('request');
var fs = require('fs');
var apiKey = 'my-key-here';
var offset = 0;
for (var i=0; i<5; i++) {
console.log('request #' + i + '...');
var requestURL = 'http://api.tumblr.com/v2/blog/blog.tumblr.com/posts/text?api_key='
+ apiKey
+ '&offset='
+ offset;
console.log(requestURL);
request(requestURL, function(error, response, body) {
if (!error && response.statusCode == 200) {
var resultAsJSON = JSON.parse(body);
resultAsJSON.response.posts.forEach(function(obj) {
fs.appendFile('content.txt', offset + ' ' + obj.title + '\n', function (err) {
if (err) return console.log(err);
});
offset++;
});
}
});
}
By default, the API only returns a maximum of 20 latest posts. I want to grab all the posts instead. As a test, I want to get the latest 100 first, hence the i<5in the loop declaration.
The trick to do it is to use the offset parameter. Given an offset value of 20, for example, the API will not return the latest 20, but instead returns posts starting from the 21st from the top.
As I can't be sure that the API will always return 20 posts, I am using offset++ to get the correct offset number.
The code above works, but console.log(requestURL) returns http://api.tumblr.com/v2/blog/blog.tumblr.com/posts/text?api_key=my-key-here&offset=0
five times.
So my question is, why does the offset value in my requestURL remains as 0, even though I have added offset++?

You should increment the offset in the loop, not in callbacks. Callbacks fire only after the request has been completed, which means you make five requests with offset = 0 and it's incremented after you get a response.
var requestURL = 'http://api.tumblr.com/v2/blog/blog.tumblr.com/posts/text?api_key='
+ apiKey
+ '&offset='
+ (offset++); // increment here, before passing URL to request();
Edit:
To offset by 20 in each iteration, and use the offset in callback:
for (var i=0; i<5; i++) {
var offset = i * 20, requestURL = 'http://api.tumblr.com/v2/blog/blog.tumblr.com/posts/text?api_key='
+ apiKey
+ '&offset='
+ offset;
(function(off){
request(requestURL, function(error, response, body) {
if (!error && response.statusCode == 200) {
var resultAsJSON = JSON.parse(body);
resultAsJSON.response.posts.forEach(function(obj) {
fs.appendFile('content.txt', off + ' ' + obj.title + '\n', function (err) {
if (err) return console.log(err);
});
off++;
});
}
});
}(offset)); // pass the offset from loop to a closure
}

Related

Wait for fetch response to continue in for loop. Javascript Nodejs

I have a function that connect to a web service in SOAP. Unfortunately the web service only support a very limited connections. I have an array of items to search in the web service, if i do a for or a foreach loop, the 70% of cases complete with no error, but in the 30% the web service response a error. This occurs when the max connections is overflow. This happens because the loop is no waiting the response of the webservice and the loop cotinues creating a lot of connections.
Here's my code:
var promiseArray = [];
for (var i = 0; i < result.length; i++) {
let m = result[i].id
let xml = '<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:tem="http://tempuri.org/">' +
'<soapenv:Header/>' +
'<soapenv:Body>' +
'<tem:EjecutarConsultaXML>' +
'<!--Optional:-->' +
'<tem:pvstrxmlParametros>' +
'<![CDATA[' +
'<Consulta><NombreConexion>USERNAME</NombreConexion>' +
'<IdConsulta>QUERY</IdConsulta>' +
'<Parametros>' +
'<doc>' + m + '</doc>' +
'</Parametros>' +
'</Consulta>' +
']]>' +
'</tem:pvstrxmlParametros>' +
'</tem:EjecutarConsultaXML>' +
'</soapenv:Body>' +
'</soapenv:Envelope>';
const options = {
explicitArray: true
};
promiseArray.push(new Promise(async(resolve, reject) => {
await axios.post(url, xml, {
headers: {
'Content-Type': 'text/xml;charset=UTF-8'
}
})
.then((data) => {
xml2js.parseString(data.data, options, (err, result) => {
var temp = (result['soap:Envelope']['soap:Body'][0]['EjecutarConsultaXMLResponse'][0]['EjecutarConsultaXMLResult'][0]['diffgr:diffgram'][0]['NewDataSet'][0]['Resultado'])
resolve({
doc: m,
state: temp[0].f430_ind_estado[0]
})
});
})
.catch((err) => {
console.log(err)
});
}))
}
res.send(await Promise.all(promiseArray))
There are several issues with your code within the call to promiseArray.push().
There is no need to create a new Promise() since axios already provides one
This is actually and antipattern
There is no need for async/await in that call for the same reason.
Mixing Promises and functions that use callbacks usually doesn't turn out too well
You have no error checking in your code if the XML parser fails
The option object is not required as explicitArray: true is the default
Changes:
Removed all the extra/uneeded Promise code
Replaced xml2js.parseString with xml2js.parseStringPromise
Changed resolve to return
Since you're simply console.log() the error, removed unecessary boilerplate
Everything else is OK as written. Please let me know if I've missed something.
promiseArray.push(
axios.post(url, xml, {
headers: {
'Content-Type': 'text/xml;charset=UTF-8'
}
})
.then(data=>data.data)
.then(xml2js.parseStringPromise)
.then(result => {
var temp = result['soap:Envelope']['soap:Body'][0]['EjecutarConsultaXMLResponse'][0]['EjecutarConsultaXMLResult'][0]['diffgr:diffgram'][0]['NewDataSet'][0]['Resultado'];
return {
doc: m,
state: temp[0].f430_ind_estado[0]
};
});
.catch(console.log)
);
Just do it one by one, using async/await to do that, this means you have to use parseStringPromise instead.
var response = [];
for (var i = 0; i < result.length; i++) {
let m = result[i].id
let xml = '<soapenv:Envelope xmlns:soapenv="http://schemas.xmlsoap.org/soap/envelope/" xmlns:tem="http://tempuri.org/">' +
'<soapenv:Header/>' +
'<soapenv:Body>' +
'<tem:EjecutarConsultaXML>' +
'<!--Optional:-->' +
'<tem:pvstrxmlParametros>' +
'<![CDATA[' +
'<Consulta><NombreConexion>USERNAME</NombreConexion>' +
'<IdConsulta>QUERY</IdConsulta>' +
'<Parametros>' +
'<doc>' + m + '</doc>' +
'</Parametros>' +
'</Consulta>' +
']]>' +
'</tem:pvstrxmlParametros>' +
'</tem:EjecutarConsultaXML>' +
'</soapenv:Body>' +
'</soapenv:Envelope>';
const options = {
explicitArray: true
};
try {
var { data } = await axios.post(url, xml, { // extract data from data.data
headers: {
'Content-Type': 'text/xml;charset=UTF-8'
}
})
var xmlObject = await xml2js.parseStringPromise(data)
var temp = (xmlObject['soap:Envelope']['soap:Body'][0]['EjecutarConsultaXMLResponse'][0]['EjecutarConsultaXMLResult'][0]['diffgr:diffgram'][0]['NewDataSet'][0]['Resultado'])
response.push({
doc: m,
state: temp[0].f430_ind_estado[0]
}) // push item to result array
} catch (error) {
console.log(error);
}
}
res.send(result) // send the result to client

synchronous method in javascript and $.get method in jQuery

I have a strange issue with my method :
$('#search').on('keyup', function () {
var valNow = $('#search').val();
if (last !== valNow && valNow !== '') {
console.log(valNow + ' / ' + i);
//interrogate a server from a cities
$.get(path + '/' + strategy() + '/' + valNow,
function (data, status) {
//here
console.log(status);
if (status === 'success') {
cities = [];
cities = data;
}
},
'json');
// make new last
last = valNow;
//list result
var items = [];
console.log(cities[0]);
console.log(' / ' + i);
$(cities).each(function (index, value) {
console.log(value);
var notStrong = valNow.length;
var strong = value.length;
items.push('<li><strong>'+ valNow +'</strong>'+value.substr(notStrong)+'</li>');
});
$('.result').append(items).show();
i++;
console.log('finished');
}
}
);
the problem is simply when I use (/bind) this function I get finish message before console.log(status) (commented://here), the $.get function takes a lot of times to interrogate the web service , I don't know why I have this issue with $.get function, is it a thread or something like this ??? what I want is to get in order all statements (console.log(status) then console.log('finish')).
Try appending your options inside the function block which gives you the data
$('#search').on('keyup', function () {
var valNow = $('#search').val();
if (last !== valNow && valNow !== '') {
console.log(valNow + ' / ' + i);
//interrogate a server from a cities
$.get(path + '/' + strategy() + '/' + valNow,
function (data, status) {
if (status === 'success') {
cities = data;
// append all the options here
}
},'json');
}
}
);
Using AJAX to get data from a remote location always runs asynchronous, meaning that, when calling $.get, the call to the server will be made and the js code returns immediately. Then, after the code in between, console.log('finish') will be called, and some time later, when the $.get call receives the response from the server, the code inside the $.get anonymous function will be called, which then runs console.log(status).
That is the intended design for grabbing data from remote locations. If you want to run the other code strictly after that, you have to run it inside the callback function of $.get, like that:
$('#search').on('keyup', function() {
var valNow = $('#search').val();
if (last !== valNow && valNow !== '') {
console.log(valNow + ' / ' + i);
//interrogate a server from a cities
$.get(path + '/' + strategy() + '/' + valNow,
function(data, status) {
//here
console.log(status);
if (status === 'success') {
cities = [];
cities = data;
}
// make new last
last = valNow;
//list result
var items = [];
console.log(cities[0]);
console.log(' / ' + i);
$(cities).each(function(index, value) {
console.log(value);
var notStrong = valNow.length;
var strong = value.length;
items.push('<li><strong>' + valNow + '</strong>' + value.substr(notStrong) + '</li>');
});
$('.result').append(items).show();
i++;
console.log('finished');
},
'json');
}
});
There are other ways to make the code more pretty, for example using Promises.

How can I get my URL to update to different API id's once I've looped through a JSON array?

When I'm looping through my JSON file I'm taking out all the Ids with this forEach loop
var gameId = request("https://api.sportradar.us/ncaamb-t3/games/" + yyyy + "/" + mm + "/" + dd + "/schedule.json?api_key=************", function(error, response, body){
if(!error && response.statusCode == 200){
var data = JSON.parse(body);
data.games.forEach((v) => {
console.log(v.id);
});
}
});
It gives me the ID's of the information that I want like this
dcb9a3df-a350-4fa3-ba8d-ddb413a5c5df
c4aa937a-0676-4395-856d-5b7e2216027c
91b8cea3-d41d-4269-aac7-b94e4b06d27c
6043b675-64b7-4575-bc0f-1af1502eb366
56083249-a870-40f7-9d86-f221c588d31c
6f0e4992-d5cc-4e40-ba7f-08cf8f801295
a4ca2161-bbcd-4423-8a51-e5886873e78a
1b9cd7bc-8970-4c62-ae72-c03f93cb2c05
21f9601e-db34-4c30-a568-b40f40627e66
0ccb6184-7abc-4035-ad14-6b2c405ee002
7618e36d-4191-4be1-8cdd-61087df2b07b
793e0d40-4f34-462e-ae0c-fde5c0e0aec3
a7da9ccc-80a1-44f8-b8eb-93369f8e8aae
eb02bcae-83f2-4d00-98be-3add44be7c51
a8fd9234-1b37-4b18-abea-f33afcda0ae0
ad536204-057a-4fe6-8f3d-c151018867af
b4c96280-9799-4566-a36e-9c4ac2fb942b
ed0bdb48-3a3b-47f4-b84c-3cdea0122c9e
0a1276d6-0c5d-4c77-9a3c-de6f04a2f342
b9e451c9-e978-40af-b6cf-bcb21f33aec6
c16b8327-5994-45a7-b380-03e1712e6a0c
6a02a4f5-983a-494b-82a8-bd8155256fd8
c0c3061e-814c-4869-ac66-5bbeb8a59830
9a778f37-1010-4625-b715-a7a0222df673
ddcd9846-c8b7-46d5-87c9-526d0431dc13
c75b0bd5-db16-4f8a-81d1-b8e82e7f2dc9
426f3858-a2b0-462c-a72e-57bc5db32a2f
d2cfb022-064b-4fac-ad99-67666ef25a2d
36ced72a-9d6a-47fe-af0b-613f30ac69c0
But then I would like to insert each ID that is returned into a new URL like this:
request("https://api.sportradar.us/ncaamb-t3/"+ gameId +"/boxscore.json?api_key=***************", function(error, response, body) {
if(!error && response.statusCode == 200){
var data = JSON.parse(body);
var homeName = (data.home.name);
var awayName = (data.away.name);
var homeScore = (data.home.points);
var awayScore = (data.away.points);
if(homeScore > awayScore){
console.log(homeName.toUpperCase() + " WIN!");
}else {
console.log(awayName.toUpperCase() + " WIN!");
}
}
});
I'm using the variable gameId inside of my url like this "https://api.sportradar.us/ncaamb-t3/"+ gameId +"/boxscore.json?api_key=***************"
But it's only giving me the id's and not what I'm wanting which is whether or not the team won or not. Do you guys have any idea what I could be doing wrong? I need it to visit two different JSON pages.
EDIT******
Tried this out but it didn't seem to work, though it also didn't give me any errors.
request("https://api.sportradar.us/ncaamb-t3/games/" + yyyy + "/" + mm + "/" + dd + "/schedule.json?api_key=j6mu95u99hsaayj5etfuzh6w", function(error, response, body){
if(!error && response.statusCode == 200){
var data = JSON.parse(body);
data.games.forEach((v) => {
request("https://api.sportradar.us/ncaamb-t3/"+ v.id +"/boxscore.json?api_key=j6mu95u99hsaayj5etfuzh6w", function(error, response, body) {
if(!error && response.statusCode == 200){
var data = JSON.parse(body);
var homeName = (data.home.name);
var awayName = (data.away.name);
var homeScore = (data.home.points);
var awayScore = (data.away.points);
if(homeScore > awayScore){
console.log(homeName.toUpperCase() + " WIN!");
}else {
console.log(awayName.toUpperCase() + " WIN!");
}
}
});
});
}
});
When I console.logged it it seems to be working, so I'm not sure why it's not console.logging properly......
792e9eff-cece-49bc-a696-a0632702e96e
https://api.sportradar.us/ncaamb-t3/games/792e9eff-cece-49bc-a696-a0632702e96e/boxscore.json?api_key=*********** function (error, response, body) {
if(!error && response.statusCode == 200){
var homeName = (data.home.name);
var awayName = (data.away.name);
var homeScore = (data.home.points);
var awayScore = (data.away.points);
if(homeScore > awayScore){
console.log(homeName.toUpperCase() + " WIN!");
}else {
console.log(awayName.toUpperCase() + " WIN!");
}
}
}
For the code in your edit--
v.id is the piece of information you want by the looks of it, so in your second API request, you should be concatenating v.id, not gameId.
request("https://api.sportradar.us/ncaamb-t3/"+ v.id +"/boxscore.json?api_key=j6mu95u99hsaayj5etfuzh6w", function(error, response, body) {

Node.js with Restler to return a value?

This is very early in my Node and JavaScript learning. Ideally, what I am attempting to do is create a small module querying a specific type of rest endpoint and returning a specific feature based on an attribute query. The module is correctly logging out the result, but I am struggling to get the .findById function to return this result. Although aware it has something to do with how the callbacks are working, I am not experienced enough to be able to sort it out yet. Any help, advice and direction towards explaning the solution is greatly appreciated.
// import modules
var restler = require('restler');
// utility for padding zeros so the queries work
function padZeros(number, size) {
var string = number + "";
while (string.length < size) string = "0" + string;
return string;
}
// create feature service object
var FeatureService = function (url, fields) {
// save the parameters
this.restEndpoint = url;
this.fields = fields;
var self = this;
this.findById = function (idField, value, padZeroLength) {
var options = {
query: {
where: idField + '=\'' + padZeros(value, padZeroLength) + '\'',
outFields: this.fields,
f: "pjson"
},
parsers: 'parsers.json'
};
var url = this.restEndpoint + '/query';
restler.get(url, options).on('complete', function(result){
if (result instanceof Error){
console.log('Error:', result.message);
} else {
console.log(result); // this log result works
self.feature = JSON.parse(result);
}
});
return self.feature;
};
};
var restEndpoint = 'http://services.arcgis.com/SgB3dZDkkUxpEHxu/ArcGIS/rest/services/aw_accesses_20140712b/FeatureServer/1';
var fields = 'nameRiver,nameSection,nameSectionCommon,difficulty,diffMax';
var putins = new FeatureService(restEndpoint, fields);
var feature = putins.findById('awid_string', 1143, 8);
console.log(feature); // this log result does not
//console.log('River: ' + feature.attributes.nameRiver);
//console.log('Section: ' + feature.attributes.nameSection + ' (' + feature.attributes.nameSectionCommon + ')');
//console.log('Difficulty: ' + feature.attributes.difficulty);
So, I sorted out how to insert a callback from a previous thread. It appears it is just passed in as a variable and called with expected parameters. However, I now wonder if there is a better way to accept parameters, possibly in the form of options. Any advice in this regard?
// import modules
var restler = require('restler');
// utility for padding zeros so the queries work
function padZeros(number, size) {
var string = number + "";
while (string.length < size) string = "0" + string;
return string;
}
// create feature service object
var FeatureService = function (url, fields) {
// save the parameters
this.restEndpoint = url;
this.fields = fields;
var self = this;
// find and return single feature by a unique value
this.findById = function (idField, value, padZeroLength, callback) {
// query options for
var options = {
query: {
where: idField + '=\'' + padZeros(value, padZeroLength) + '\'',
outFields: this.fields,
f: "pjson"
},
parsers: 'parsers.json'
};
var url = this.restEndpoint + '/query';
restler.get(url, options)
.on('success', function(data, response){
var dataObj = JSON.parse(data).features[0];
console.log(dataObj);
callback(dataObj);
})
.on('fail', function(data, response){
console.log('Error:', data.message);
});
return self.feature;
};
};
var restEndpoint = 'http://services.arcgis.com/SgB3dZDkkUxpEHxu/ArcGIS/rest/services/aw_accesses_20140712b/FeatureServer/1';
var fields = 'nameRiver,nameSection,nameSectionCommon,difficulty,diffMax';
var putins = new FeatureService(restEndpoint, fields);
putins.findById('awid_string', 1143, 8, function(dataObject){
console.log('River: ' + dataObject.attributes.nameRiver);
console.log('Section: ' + dataObject.attributes.nameSection + ' (' + dataObject.attributes.nameSectionCommon + ')');
console.log('Difficulty: ' + dataObject.attributes.difficulty);
});

Node.js - Logging response statusCode to console (a la Django development server)

I'm trying to build a Node.js server with console logging similar to that of Django's development server. E.g.
[27/Jun/2011 15:26:50] "GET /?test=5 HTTP/1.1" 200 545
The following server.js (based on the Node Beginner Book tutorial) gets me the time and request information:
var http = require("http");
var url = require ("url");
var port = 1234;
function start(route, handle) {
function onRequest(request, response) {
var pathname = url.parse(request.url).pathname;
var query = url.parse(request.url).query;
route(handle, pathname, query, response);
logRequest(request);
}
http.createServer(onRequest).listen(port);
console.log("\nServer running at http://192.168.1.5:" + port + "/");
console.log("Press CONTROL-C to quit.\n");
}
function logRequest(request) {
var pathname = url.parse(request.url).pathname;
var query = url.parse(request.url).query;
if (query == undefined) {
query = "";
}
var currentDate = new Date();
var day = currentDate.getDate();
var month = currentDate.getMonth() + 1;
var year = currentDate.getFullYear();
var hours = currentDate.getHours();
var minutes = currentDate.getMinutes();
var seconds = currentDate.getSeconds();
if (minutes < 10) {
minutes = "0" + minutes;
}
if (seconds < 10) {
seconds = "0" + seconds;
}
console.log("[" + year + "/" + month + "/" + day +
" " + hours + ":" + minutes + ":" + seconds + '] "' +
request.method + " " + pathname + query +
" HTTP/" + request.httpVersion + '"');
}
exports.start = start;
Question: how would I update this code to get the response.statusCode and whatever the "545" number is into the log output?
When I tried adding the response object to the logRequest function, the response statusCode was always "200", even when I knew (via debug logging) that my router.js was generating 404 errors.
If you are using the route method found in the tutorial you linked, then the reason why using the response.statusCode doesn't work is because they set the 404 status code using this line
response.writeHead(404, {"Content-Type": "text/html"});
If you take a look at the documentation right there Node.JS Docs - writeHead it states that .writeHead will not set the .statusCode value. If you want to use the .statusCode value, you should change the line in the route method to read like this :
response.statusCode = 404;
response.setHeader("Content-Type", "text/html");
Directly assigning a value to statusCode and using "implicit" header declarations (setHeader instead of writeHead) will produce the same result for the user, but on your side you will have access to the statusCode later on in your log method.

Categories

Resources