so i have create a function where i call an api and then fetch from that response but some time the api which i'm using right know it's return success so, i wan't to check for successful response from webscraping.com and retry 5 times if the response is failing, because the actual API request might fail and we need to track that
const wrapper = require('./wrapper');
const {save } = require('./api.service');
function hasHex (url){
var start = new Date();
var googleRemarkeging;
var facbookPixel;
var googleCheck = 'https://www.googletagmanager.com/gtm.js?id=';
var facbookCheck = 'https://connect.facebook.net/en_US/fbevents.js';
/////////////////////////// MAIN FUNCTION //////////////////////////
const options = {
"method": "GET",
"hostname": "api.webscrapingapi.com",
"port": null,
"path": `/v1?api_key=${wrapper.api_key}&url=${encodeURIComponent(wrapper.target_url(url))}&render_js=1&timeout=60000`,
};
const req = wrapper.http.request(options, function(res) {
const chunks = [];
res.on("data", function(chunk) {
chunks.push(chunk);
});
res.on("end", function() {
const body = Buffer.concat(chunks);
const response = body.toString();
const $ = wrapper.cheerio.load(response);
// console.log($.html())
if (res.statusCode == 200) {
$("script").each(function(index,text){
if($(text).text().includes(googleCheck)== true){
googleRemarkeging = "Yes";
return false;
}
})
$("script").each(function(index,text){
if($(text).text().includes(facbookCheck)== true){
facbookPixel = "Yes";
return false;
}
})
const data ={
"status":"success",
"result":{
googleRemarkeging:googleRemarkeging,
facbookPixel:facbookPixel
}
}
var result = JSON.stringify(data.result);
var duration = (new Date() - start)/1000;
var api_endpoint = '/api/hasHex'
save(api_endpoint,url,data,duration,result);
console.log(data);
}
});
});
req.end();
}
module.exports = hasHex;
Related
i want to check specific endpoints which are running on specific events.
I have a function which is working fine with xhr type of request but i want to get the response with fetch type of requests and not finding a way to do that.
(function () {
var proxied = window.XMLHttpRequest.prototype.open;
window.XMLHttpRequest.prototype.open = function (requestType, endPoint) {
var pointer = this;
var intervalId = window.setInterval(() => {
if (pointer.readyState != 4) return;
const end_point_list = [
'/cart/add.js',
'/cart/update.js',
'/cart/change.js',
'/cart/clear.js',
'/cart/add',
'/cart/update',
'/cart/change',
'/cart/clear',
];
const method = pointer._method;
const url = pointer._url;
console.log(url)
if (end_point_list.includes(url) && method.toUpperCase() === 'POST') {
console.log(url)
const responseBody = pointer.response
? JSON.parse(pointer.response)
: '';
const shop_name = window.Shopify.shop;
console.log({ url, responseBody, shop_name });
}
clearInterval(intervalId);
}, 1);
proxied.apply(this, arguments);
};
})();
How can I handle request fails in this example of axios.all requests. I.e. if all servers are responde with JSON all is okay and I have JSON file at end of a cycle. But if one of this servers not responde with JSON or not responde at all I do have nothing in "/data.json" file, even all other servers are working perfectly. How can I catch a server fail and skip it?
var fs = require("fs");
var axios = require('axios');
var util = require('util');
var round = 0;
var tmp = {};
var streem = fs.createWriteStream(__dirname + '/data.json', {flags : 'w'});
toFile = function(d) { //
streem.write(util.format(d));
};
start();
setInterval(start, 27000);
function start(){
streem = fs.createWriteStream(__dirname + '/data.json', {flags : 'w'});
monitor();
}
function monitor(){
axios.all([
axios.get('server1:api'),
axios.get('server2:api'),
axios.get('server3:api'),
axios.get('server4:api'),
]).then(axios.spread((response1, response2, response3, response4) => {
tmp.servers = {};
tmp.servers.server1 = {};
tmp.servers.server1 = response1.data;
tmp.servers.server2 = {};
tmp.servers.server2 = response2.data;
tmp.servers.server3 = {};
tmp.servers.server3 = response3.data;
tmp.servers.server4 = {};
tmp.servers.server4 = response4.data;
toFile(JSON.stringify(tmp));
round++;
streem.end();
streem.on('finish', () => {
console.error('Round: ' + round);
});
})).catch(error => {
console.log(error);
});
}
The most standard way to approach this would be a recursive function like below.
let promises = [
axios.get('server1:api'),
axios.get('server2:api'),
axios.get('server3:api'),
axios.get('server4:api'),
];
async function monitor() {
const responses = (arguments.length === 1 ? [arguments[0]] : Array.apply(null, arguments))[0];
const nextPromise = promises.shift();
if (nextPromise) {
try {
const response = await getSentenceFragment(offset);
responses.push(response);
}
catch (error) {
responses.push({});
}
return responses.concat(await monitor(responses));
} else {
return responses;
}
}
monitor([]).then(([response1, response2, response3, response4]) => {
tmp.servers = {};
tmp.servers.server1 = {};
tmp.servers.server1 = response1.data;
tmp.servers.server2 = {};
tmp.servers.server2 = response2.data;
tmp.servers.server3 = {};
tmp.servers.server3 = response3.data;
tmp.servers.server4 = {};
tmp.servers.server4 = response4.data;
toFile(JSON.stringify(tmp));
round++;
streem.end();
streem.on('finish', () => {
console.error('Round: ' + round);
});
});
I'm trying to get the first 5 pages of search results with google custom search API ...
So far I've tried to achieve the result using nested function but with no luck.
I know that I'm messing with callback but, so far I've not figure out the correct way (without using promises library) to solve my problem.
Could some of you point me out in the right direction?
Thanks.
app.get('/assesment', function(req, res){
console.log('route: /assesment');
var api_key = '';
var customsearch = google.customsearch('v1');
var response = "";
var number_of_pages = 5;
var next_page = 1;
var exit = 0
const CX = 'XXXXX';
const API_KEY = 'XXXXX';
const SEARCH = 'Test Query';
console.log('start');
// console.log('QUERY PAGE: '+pages);
doSearch(CX, SEARCH, API_KEY, next_page, function(resp){
res.send(resp);
});
//
// Functions
//
function doSearch(_cx, _search, _api_key, _start, callback ){
var response = '';
customsearch.cse.list({ cx: _cx, q: _search, auth: _api_key, start: _start }, function (err, resp) {
if (err) {
response = JSON.stringify(err);
} else {
// Got the response from custom search
console.log('Result: ' + resp.searchInformation.formattedTotalResults);
if (resp.items && resp.items.length > 0) {
console.log('First result of '+resp.items.length+' is ' + resp.items[0].title);
for (var i = 0; i < resp.items.length; i++) {
response += resp.items[i].title+"<br>";
response += resp.items[i].link +"<br><hr>";
}
}
res = {
response: response,
next_page: resp.queries.nextPage
}
// res =
}
_start += 1;
if (_start < 6 ) {
doSearch(_cx, _search, _api_key, _start, _start*10+1,
function(resp){
response += resp;
});
}
if (callback && typeof callback === "function") callback(response);
});
};
});
You can use a third-party service like SerpApi to scrape Google and get back structured JSON.
Example using the Node.js library to get 4 page of results:
var gsr = require('GoogleSearchResults')
let serp = new gsr.GoogleSearchResults("demo")
serp.json({
q: "Coffee",
num: 10,
start: 30,
location: "Portland"
}, (result) => {
console.log(result)
})
var url = require('url');
var http = require('http');
var downloader = require("./downloader");
http.createServer(onRequest).listen(8080);
function onRequest(request,response)
{
if(request.method=='POST')
handlePost(request,response);
else
handleGet(request,response);
}
function handlePost(request,response)
{
var data = '';
request.on('data',function(chunk){
data+=chunk.toString();
});
request.on('end',downloadTrainStatus);
function downloadTrainStatus()
{
var downloadPromise = downloader.download(data);
downloadPromise.then(function (responses) {
var total = responses.length;
var result = [];
for( var i = 0 ; i < total ; i++)
result.push(JSON.parse(responses[i][1]));
response.writeHead(200);
response.write(JSON.stringify(result));
response.end();
}, function (err) {
console.log(err);
var result = { status : "error" };
response.writeHead(200);
response.write(JSON.stringify(result));
response.end();
})
}
}
function handleGet(request,response)
{
console.log("GET request");
response.writeHead(200);
response.write("Get request works !!");
response.end();
}
The above file server.js simply starts the web server. It uses a file downloader.js included below. The downloader.js just parallely makes 10 or 20 parallel web requests to a url which returns a JSON response. On running this process for half a day, the memory usage of the process shoots to 1.5GB. Is there any memory leak in this code ?
var Promise = require('bluebird')
var request = Promise.promisify(require('request'))
function download(json)
{
var requests = []
var data = JSON.parse(json);
for(var i = 0; i< data.total ; i++)
{
var stationCode = stations[i].StationCode;
var journeyDay = stations[i].JourneyDayCode;
requests.push(downloadStatus());
}
return Promise.all(requests);
}
function downloadStatus()
{
var url = "http://google.com";
var headers = {'User-Agent' : 'Apache-HttpClient/UNAVAILABLE (java 1.4)'};
var options = {
url: url ,
headers: headers,
timeout: 15 * 1000
}
return request(options);
}
module.exports.download = download;
I'm trying to send HTTP requests via StreamSocket, but response is truncated with
"failedWinRTError: The object has been closed."
Here is my code:
var count, hostName, raw_request, raw_response, reader, socketProtection, startReader, streamSocket, writer;
streamSocket = new Windows.Networking.Sockets.StreamSocket();
hostName = new Windows.Networking.HostName("www.reddit.com", "80");
raw_response = "";
count = 0;
startReader = function() {
return reader.loadAsync(8 * 1000).done(function(bytesRead) {
raw_response += reader.readString(reader.unconsumedBufferLength);
if (raw_response.indexOf("</html>") > 0) {
return;
} else {
startReader();
}
}, function(error) {
raw_response += reader.readString(reader.unconsumedBufferLength);
window.raw_response = raw_response;
return;
});
};
streamSocket.connectAsync(hostName, "80", 0).done(function(response) {
var string;
reader = new Windows.Storage.Streams.DataReader(streamSocket.inputStream);
reader.inputStreamOptions = 1;
writer = new Windows.Storage.Streams.DataWriter(streamSocket.outputStream);
string = "Hello world";
writer.writeString(raw_request);
return writer.storeAsync().done(function() {
writer.flushAsync();
writer.detachStream();
return startReader();
});
});
I noticed that the beginning of the response is truncated as well.
This is what I get at the beginning of HTTP responses.
/1.1 200 OK
Also strangely... HTTPS requests work perfectly.
Any idea what I'm doing wrong? Thanks :)
Remove http:// from the host name and the second parameter is not needed:
var hostName = new Windows.Networking.HostName("www.reddit.com");
Use this object in ConnectAsync, just hostname and service name parameters are needed:
streamSocket.connectAsync(hostName, "80").done(function (response) {
// ....
}, function (error) {
console.log(error);
});
UPDATE: Ok, if the connection is being closed, probably the server closes it. Are you sending a well formed request? Here is an example:
var raw_request, raw_response, reader, writer;
var streamSocket = new Windows.Networking.Sockets.StreamSocket();
function doRequest() {
var hostName = new Windows.Networking.HostName("www.reddit.com");
streamSocket.connectAsync(hostName, "808").then(function () {
reader = new Windows.Storage.Streams.DataReader(streamSocket.inputStream);
reader.inputStreamOptions = Windows.Storage.Streams.InputStreamOptions.partial;
writer = new Windows.Storage.Streams.DataWriter(streamSocket.outputStream);
raw_request = "GET / HTTP/1.1\r\nHost: www.reddit.com/\r\nConnection: close\r\n\r\n";
writer.writeString(raw_request);
return writer.storeAsync();
}).then(function () {
raw_response = "";
return startReader();
}, function (error) {
console.log(error);
});
}
function startReader() {
return reader.loadAsync(99999999).then(function (bytesRead) {
raw_response += reader.readString(reader.unconsumedBufferLength);
if (bytesRead === 0) {
window.raw_response.value = raw_response;
return;
}
return startReader();
});
};