Google Custom Search API for NodeJS - javascript

I'm trying to get the first 5 pages of search results with google custom search API ...
So far I've tried to achieve the result using nested function but with no luck.
I know that I'm messing with callback but, so far I've not figure out the correct way (without using promises library) to solve my problem.
Could some of you point me out in the right direction?
Thanks.
app.get('/assesment', function(req, res){
console.log('route: /assesment');
var api_key = '';
var customsearch = google.customsearch('v1');
var response = "";
var number_of_pages = 5;
var next_page = 1;
var exit = 0
const CX = 'XXXXX';
const API_KEY = 'XXXXX';
const SEARCH = 'Test Query';
console.log('start');
// console.log('QUERY PAGE: '+pages);
doSearch(CX, SEARCH, API_KEY, next_page, function(resp){
res.send(resp);
});
//
// Functions
//
function doSearch(_cx, _search, _api_key, _start, callback ){
var response = '';
customsearch.cse.list({ cx: _cx, q: _search, auth: _api_key, start: _start }, function (err, resp) {
if (err) {
response = JSON.stringify(err);
} else {
// Got the response from custom search
console.log('Result: ' + resp.searchInformation.formattedTotalResults);
if (resp.items && resp.items.length > 0) {
console.log('First result of '+resp.items.length+' is ' + resp.items[0].title);
for (var i = 0; i < resp.items.length; i++) {
response += resp.items[i].title+"<br>";
response += resp.items[i].link +"<br><hr>";
}
}
res = {
response: response,
next_page: resp.queries.nextPage
}
// res =
}
_start += 1;
if (_start < 6 ) {
doSearch(_cx, _search, _api_key, _start, _start*10+1,
function(resp){
response += resp;
});
}
if (callback && typeof callback === "function") callback(response);
});
};
});

You can use a third-party service like SerpApi to scrape Google and get back structured JSON.
Example using the Node.js library to get 4 page of results:
var gsr = require('GoogleSearchResults')
let serp = new gsr.GoogleSearchResults("demo")
serp.json({
q: "Coffee",
num: 10,
start: 30,
location: "Portland"
}, (result) => {
console.log(result)
})

Related

Write final json to a file from repeated requests to rest API

I am trying to build a file of json data from repeated calls to a restAPI. The final file to be written is the sum of the data received from all the calls. At present the file is being written with contents of the first call then overwritten by the contents of the first + second call (see console output below code).
As I have to make many calls, once the code is working, I would like to only write the file once the request has finished and the json string has been built. Does anyone now how I would go about doing this? Maybe with a callback(?), which I still don't have the hang of, once the requests have finished or the json string has finished being built.
"use strict";
const fs = require('fs');
const request = require('request');
var parse = require('csv-parse');
const path = "../path tocsv.csv";
const pathJSON = "../pathtoJSON.json";
var shapes = "https://url";
var options = {
url: '',
method: 'GET',
accept: "application/json",
json: true,
};
var csvData = [];
var jsonData = "[";
fs.createReadStream(path)
.pipe(parse({delimiter: ','}))
.on('data', function(data) {
csvData.push(data[1]);
})
.on('end',function() {
var start = Date.now();
var records = csvData.length //2212 objects
console.log(records);
var dataLength = 2 //set low at moment
for (var i = 0; i < dataLength; i += 1) {
var url = shapes + csvData[i];
options.url = url; //set url query
request(options, function(error, response, body) {
var time = Date.now() - start;
var s = JSON.stringify(body.response);
console.log( '\n' + (Buffer.byteLength(s)/1000).toFixed(2)+
" kilobytes downloaded in: " + (time/1000) + " sec");
console.log(i)
buildJSON(s);
});
}
function buildJSON(s) {
var newStr = s.substring(1, s .length-1);
jsonData += newStr + ',';
writeFile(jsonData);
}
function writeFile(jsonData) {
fs.writeFile(pathJSON, jsonData, function(err) {
if (err) {
return console.log(err);
} else {
console.log("file complete")
}
});
}
});
128.13 kilobytes downloaded in: 2.796 sec
2
file complete
256.21 kilobytes downloaded in: 3.167 sec
2
file complete
Perhaps writing to the file after all requests are complete will help. In the current code, the writeFile function is called each time a request is completed (which overwrites the file each time)
A quick way to fix this is to count requests (and failures) and write to file only after all the requests are complete.
"use strict";
const fs = require('fs');
const request = require('request');
var parse = require('csv-parse');
const path = "../path tocsv.csv";
const pathJSON = "../pathtoJSON.json";
var shapes = "https://url";
var options = {
url: '',
method: 'GET',
accept: "application/json",
json: true,
};
var csvData = [];
var jsonData = "[";
fs.createReadStream(path)
.pipe(parse({
delimiter: ','
}))
.on('data', function (data) {
csvData.push(data[1]);
})
.on('end', function () {
var start = Date.now();
var records = csvData.length //2212 objects
console.log(records);
var dataLength = 2 //set low at moment
var jsonsDownloaded = 0; // Counter to track complete JSON requests
var jsonsFailed = 0; // Counter to handle failed JSON requests
for (var i = 0; i < dataLength; i += 1) {
var url = shapes + csvData[i];
options.url = url; //set url query
request(options, function (error, response, body) {
if(error){
jsonsFailed++;
writeFile(jsonData);
return;
}
jsonsDownloaded++;
var time = Date.now() - start;
var s = JSON.stringify(body.response);
console.log('\n' + (Buffer.byteLength(s) / 1000).toFixed(2) +
" kilobytes downloaded in: " + (time / 1000) + " sec");
console.log(i)
buildJSON(s);
});
}
function buildJSON(s) {
var newStr = s.substring(1, s.length - 1);
jsonData += newStr + ',';
writeFile(jsonData);
}
function writeFile(jsonData) {
if(dataLength - (jsonsDownloaded + jsonsFailed) > 0){
return;
}
fs.writeFile(pathJSON, jsonData, function (err) {
if (err) {
return console.log(err);
} else {
console.log("file complete")
}
});
}
});
Note:
Requests being fired in quick succession like (2000 requests in a for loop) in my experience does not work well.. Try batching them. Also, doing it this way does not guarantee order (if that is important in your usecase)
An alternative would be to open your file in append mode. You can do this by passing an extra options object with flag set to your fs.writeFile call.
fs.writeFile(pathJSON, jsonData, {
flag: 'a'
}, function (err) {
if (err) {
return console.log(err);
}
});
References:
fs.writeFile Docs
File system flags

NodeJS Loop issue due to async/synchronicity issues

I am porting an old ruby script over to use javascript setting the function as a cron instance so it will run on schedule. The function queries our mysql database and retrieves inventory information for our products and then sends requests to a trading partners api to update our inventory on their site.
Due to nodes a-synchronicity I am running into issues. We need to chunk requests into 1000 items per request, and we are sending 10k products. The issue is each request is just sending the last 1000 items each time. The for loop that is inside the while loop is moving forward before it finishes crafting the json request body. I tried creating anon setTimeout functions in the while loop to try and handle it, as well as creating an object with the request function and the variables to be passed and stuffing it into an array to iterate over once the while loop completes but I am getting the same result. Not sure whats the best way to handle it so that each requests gets the correct batch of items. I also need to wait 3 minutes between each request of 1000 items to not hit the request cap.
query.on('end',()=>{
connection.release();
writeArray = itemArray.slice(0),
alteredArray = [];
var csv = json2csv({data: writeArray,fields:fields}),
timestamp = new Date(Date.now());
timestamp = timestamp.getFullYear() + '-' +(timestamp.getMonth() + 1) + '-' + timestamp.getDate()+ ' '+timestamp.getHours() +':'+timestamp.getMinutes()+':'+timestamp.getSeconds();
let fpath = './public/assets/archives/opalEdiInventory-'+timestamp+'.csv';
while(itemArray.length > 0){
alteredArray = itemArray.splice(0,999);
for(let i = 0; i < alteredArray.length; i++){
jsonObjectArray.push({
sku: alteredArray[i]['sku'],
quantity: alteredArray[i]["quantity"],
overstockquantity: alteredArray[i]["osInv"],
warehouse: warehouse,
isdiscontinued: alteredArray[i]["disc"],
backorderdate: alteredArray[i]["etd"],
backorderavailability: alteredArray[i]["boq"]
});
}
var jsonObject = {
login: user,
password: password,
items: jsonObjectArray
};
postOptions.url = endpoint;
postOptions.body = JSON.stringify(jsonObject);
funcArray.push({func:function(postOptions){request(postOptions,(err,res,body)=>{if(err){console.error(err);throw err;}console.log(body);})},vars:postOptions});
jsonObjectArray.length = 0;
}
var mili = 180000;
for(let i = 0;i < funcArray.length; i++){
setTimeout(()=>{
var d = JSON.parse(funcArray[i]['vars'].body);
console.log(d);
console.log('request '+ i);
//funcArray[i]['func'](funcArray[i]['vars']);
}, mili * i);
}
});
});
You would need async/await or Promise to handle async actions in node js.
I am not sure if you have node version which supports Async/await so i have tried a promise based solution.
query.on('end', () => {
connection.release();
writeArray = itemArray.slice(0),
alteredArray = [];
var csv = json2csv({ data: writeArray, fields: fields }),
timestamp = new Date(Date.now());
timestamp = timestamp.getFullYear() + '-' + (timestamp.getMonth() + 1) + '-' + timestamp.getDate() + ' ' + timestamp.getHours() + ':' + timestamp.getMinutes() + ':' + timestamp.getSeconds();
let fpath = './public/assets/archives/opalEdiInventory-' + timestamp + '.csv';
var calls = chunk(itemArray, 1000)
.map(function(chunk) {
var renameditemsArray = chunk.map((item) => new renamedItem(item, warehouse));
var postOptions = {};
postOptions.url = endpoint;
postOptions.body = JSON.stringify({
login: user,
password: password,
items: renameditemsArray
});
return postOptions;
});
sequenceBatch(calls, makeRequest)
.then(function() {
console.log('done');
})
.catch(function(err) {
console.log('failed', err)
});
function sequenceBatch (calls, cb) {
var sequence = Promise.resolve();
var count = 1;
calls.forEach(function (callOptions) {
count++;
sequence = sequence.then(()=> {
return new Promise(function (resolve, reject){
setTimeout(function () {
try {
cb(callOptions);
resolve(`callsequence${count} done`);
}
catch(err) {
reject(`callsequence ${count} failed`);
}
}, 180000);
});
})
});
return sequence;
}
function makeRequest(postOptions) {
request(postOptions, (err, res, body) => {
if (err) {
console.error(err);
throw err;
}
console.log(body)
});
}
function chunk(arr, len) {
var chunks = [],
i = 0,
n = arr.length;
while (i < n) {
chunks.push(arr.slice(i, i += len));
}
return chunks;
}
function renamedItem(item, warehouse) {
this.sku = item['sku']
this.quantity = item["quantity"]
this.overstockquantity = item["osInv"]
this.warehouse = warehouse
this.isdiscontinued = item["disc"]
this.backorderdate = item["etd"]
this.backorderavailability= item["boq"]
}
});
Could you please try this snippet and let me know if it works?I couldn't test it since made it up on the fly. the core logic is in the sequenceBatch function. the The answer is based on an another question which explains how timeouts and promises works together.
Turns out this wasn't a closure or async issues at all, the request object I was building was using references to objects instead of shallow copies resulting in the data all being linked to the same object ref in the ending array.

Another Parse 'success/error not called' error

I recently posted an issue I had with another Parse CloudCode method, were the error was thrown that Error: success/error was not called. I am having that issue again but with a different method/scenario.
Parse.Cloud.define("background", function(request, response) {
var moments = require("cloud/moments.js");
var now = moments.moment();
var query = new Parse.Query("Group");
query.find({
success: function(results) {
for (var i = 0; i < results.length; i++) {
var object = results[i];
var events = object.get("Events");
var getUsers = false;
for (var q = 0; q < events.length; q++) {
var e = events[q];
if (e.get("date") == now) {
getUsers = true;
break;
}
}
if (getUsers == true) {
for (var q = 0; q < events.length; q++) {
var e = events[q];
if (e.get("date") == now) {
var relation = object.relation("created");
var partOne = e.get("name");
var outString1 = partOne.concat(" is now");
// generate a query based on that relation
var query = relation.query();
Parse.Push.send({
where: query, // Set our Installation query
data: {
alert: outString1
}
}, {
success: function() {
// Push was successful
},
error: function(error) {
// Handle error
}
});
var relation2 = object.relation("joined");
var partOnee = e.get("name");
var outString = partOnee.concat(" is now");
// generate a query based on that relation
var query2 = relation.query();
Parse.Push.send({
where: query2, // Set our Installation query
data: {
alert: outString
}
}, {
success: function() {
// Push was successful
},
error: function(error) {
// Handle error
}
});
e.destroy();
}
}
}
}
}
});
response.success();
});
Since this method involves more than just a simple query and return (as it has the for loop among other things) I am a bit confused on how to implement the Parse Promise stuff. If anyone could assist me in how I should go about implementing the promise stuff it would be much appreciated.
Parse documentation is very clear on how to use Promises and how to rewrite your pyramid code with .then() blocks instead.

possible nested asynchronous function

So I have read a lot of the different answers about asynchronous functions on here but I think I am over thinking my problem, or I have been staring at it just to long and I cant figure it out. So your help is greatly appreciated.
So I am parsing a csv file and then trying to get the lat/long through another api. but I cant access the lat/lng outside of the function. below is my code I have commented it to the best of my ability please let me know if there are any questions or a much better way to do this.
Thanks,
var location = []
function run() {
http.get(url, function(res) {
if(res.statusCode === 200) {
res.pipe(parse(function(err, data) {
for(i = 1; i < data.length; i++) {
var info = data[i];
passLoc = info[6].replace('block ', '')
passLoc = passLoc.replace(/ /g, "+")
getLoc(passLoc, function(loc) {
location.push(loc);
//If I console.log(location) here I get all the info I want but.....it is printed 100 times becuase it is printed for each i in data.length
})
}
console.log(location) // loging this here gives me an empty array
}))
}else {
console.error('The address is unavailable. (%d)', res.statusCode);
}
})
}
function getLoc(x, callback) {
var url = "http://geodata.alleghenycounty.us/arcgis/rest/services/Geocoders/EAMS_Composite_Loc/GeocodeServer/findAddressCandidates?Street=" + x + "&City=Pittsburgh&State=PA&ZIP=&SingleLine=&outFields=&outSR=4326&searchExtent=&f=pjson";
http.get(url, function(res) {
var data = '';
res.on('data', function(chunk) {
data += chunk;
});
res.on('end', function() {
var d = JSON.parse(data);
var obj = d.candidates;
if(obj != '') {
var loc = obj[0].location
var lat = loc.x
var lng = loc.y
var location = [lat, lng];
callback(location)
} else {
callback(x);
}
});
res.on('error', function(err) {
callback("error!")
});
});
}
Your code tries to synchronously consume asynchronous data -- you're synchronously trying to access the results (location) before any of the asynchronous operations have finished.
As you have multiple async operations running in parallel, you can make use of async.parallel to aid in controlling the asynchronous flow:
var async = require('async');
function run() {
http.get(url, function(res) {
if(res.statusCode === 200) {
res.pipe(parse(function(err, data) {
// array of async tasks to execute
var tasks = [];
data.slice(1).forEach(function(info) {
var passLoc = info[6].replace('block ', '').replace(/ /g, '+');
// push an async operation to the `tasks` array
tasks.push(function(cb) {
getLoc(passLoc, function(loc) {
cb(null, loc);
});
});
});
// run all async tasks in parallel
async.parallel(tasks, function(err, locations) {
// consume data when all async tasks are finished
console.log(locations);
});
}));
}else {
console.error('The address is unavailable. (%d)', res.statusCode);
}
});
}
Also, for loops don't create a scope, so I've swapped it by a forEach in order to scope the info and passLoc variables inside each iteration.
Here's a slightly more condensed version using ES5's Array#map:
var async = require('async');
function run() {
http.get(url, function(res) {
if(res.statusCode === 200) {
res.pipe(parse(function(err, data) {
async.parallel(
// map data items to async tasks
data.slice(1).map(function(info) {
return function(cb) {
var passLoc = info[6].replace('block ', '').replace(/ /g, '+');
getLoc(passLoc, function(loc) {
cb(null, loc);
});
};
}),
function(err, locations) {
// consume data when all async tasks are finished
console.log(locations);
}
);
}));
} else {
console.error('The address is unavailable. (%d)', res.statusCode);
}
});
}

How to correct structure an asynchronous program to ensure correct results?

I have a nodejs program that requests a series of XML files, parses them and then puts the output in an array which is written to disk as a CSV file.
The program mostly works, however occasionally the files end up in the wrong order in the array.
I want the order of the results to be in the same as the order as the URLs. The URLs are stored in an array, so when I get the XML file I check what the index of the URL was in the source array and insert the results at the same index in the destination URL.
can anyone see the flaw that is allowing the results to end up in the wrong order?
addResult = function (url, value, timestamp) {
data[config.sources.indexOf(url)] = {
value : value,
timestamp : timestamp,
url : url
};
numResults++;
if (numResults === config.sources.length) { //once all results are in build the output file
createOutputData();
}
}
fs.readFile("config.json", function (fileError, data) {
var eachSource, processResponse = function (responseError, response, body) {
if (responseError) {
console.log(responseError);
} else {
parseXML(body, {
explicitArray : false
}, function (xmlError, result) {
if (xmlError) {
console.log(xmlError);
}
addResult(response.request.uri.href, result.Hilltop.Measurement.Data.E.I1, moment(result.Hilltop.Measurement.Data.E.T));
});
}
};
if (fileError) {
console.log(fileError);
} else {
config = JSON.parse(data); //read in config file
for (eachSource = 0; eachSource < config.sources.length; eachSource++) {
config.sources[eachSource] = config.sources[eachSource].replace(/ /g, "%20"); //replace all %20 with " "
request(config.sources[eachSource], processResponse); //request each source
}
}
});
var writeOutputData, createOutputData, numResults = 0, data = [], eachDataPoint, multipliedFlow = 0;
writeOutputData = function (output, attempts) {
csv.writeToPath(config.outputFile, [ output ], {
headers : false
}).on("finish", function () {
console.log("successfully wrote data to: ", config.outputFile);
}).on("error", function (err) { //on write error
console.log(err);
if (attempts < 2) { //if there has been less than 3 attempts try writing again after 500ms
setTimeout(function () {
writeOutputData(output, attempts + 1);
}, 500);
}
});
};
createOutputData = function () {
var csvTimestamp, output = [];
if (config.hasOwnProperty("timestampFromSource")) {
csvTimestamp = data.filter(function (a) {
return a.url === config.sources[config.timestampFromSource];
})[0].timestamp.format("HHmm");
console.log("timestamp from source [" + config.timestampFromSource + "]:", csvTimestamp);
} else {
csvTimestamp = data.sort(function (a, b) { //sort results from oldest to newest
return a.timestamp.unix() - b.timestamp.unix();
});
csvTimestamp = csvTimestamp[0].timestamp.format("HHmm");//use the oldest date for the timestamp
console.log("timestamp from oldest source:", csvTimestamp);
}
//build array to represent data to be written
output.push(config.plDestVar); //pl var head address first
output.push(config.sources.length + 1); //number if vars to import
output.push(csvTimestamp); //the date of the data
for (eachDataPoint = 0; eachDataPoint < data.length; eachDataPoint++) { //add each data point
if (config.flowMultiplier) {
multipliedFlow = Math.round(data[eachDataPoint].value * config.flowMultiplier); //round to 1dp and remove decimal by *10
} else {
multipliedFlow = Math.round(data[eachDataPoint].value * 10); //round to 1dp and remove decimal by *10
}
if (multipliedFlow > 32766) {
multipliedFlow = 32766;
} else if (multipliedFlow < 0) {
multipliedFlow = 0;
}
output.push(multipliedFlow);
}
console.log(output);
writeOutputData(output, 0); //write the results, 0 is signalling first attempt
};
I think that the url to index code needs debugging.
Here is an example that uses an object that is pre-populated with keys in the for loop.
`
var http = require('http');
var fs = require("fs");
var allRequestsComplete = function(results){
console.log("All Requests Complete");
console.log(results);
};
fs.readFile("urls.json", function (fileError, data) {
var responseCount = 0;
if (fileError) {
console.log(fileError);
} else {
var allResponses = {};
config = JSON.parse(data); //read in config file
var requestComplete = function(url, fileData){
responseCount++;
allResponses[url] = fileData;
if(responseCount===config.sources.length){
allRequestsComplete(allResponses);
}
};
for (var eachSource = 0; eachSource < config.sources.length; eachSource++) {
(function(url){
allResponses[url] = "Waiting";
http.get({host: url,path: "/"}, function(response) {
response.on('error', function (chunk) {
requestComplete(url, "ERROR");
});
var str = ''
response.on('data', function (chunk) {
str += chunk;
});
response.on('end', function () {
requestComplete(url, str);
});
});
}(config.sources[eachSource].replace(/ /g, "%20").replace("http://", "")));
}
}
});
`
I agree with #Kevin B, you cannot assume that async callbacks will return in the same order of which you send them. However, you could ensure the order, by adding an index function on processResponse.
say you add the following to addResult
addResult = function (index, url, value, timestamp) {
data[index] = {
value : value,
timestamp : timestamp,
url : url
};
numResults++;
if (numResults === config.sources.length) { //once all results are in build the output file
createOutputData();
}
}
and use an extra function to call your request
function doRequest(index, url) {
request(url, function(responseError, response, body) {
if (responseError) {
console.log(responseError);
} else {
parseXML(body, {
explicitArray : false
}, function (xmlError, result) {
if (xmlError) {
console.log(xmlError);
}
addResult(index, response.request.uri.href, result.Hilltop.Measurement.Data.E.I1, moment(result.Hilltop.Measurement.Data.E.T));
});
}
});
}
then you can also change your loop to:
for (eachSource = 0; eachSource < config.sources.length; eachSource++) {
config.sources[eachSource] = config.sources[eachSource].replace(/ /g, "%20"); //replace all %20 with " "
doRequest(eachSource, config.sources[eachSource]); //request each source
}

Categories

Resources