How to get a specific GET Request with a headless browser? - javascript

How can I get a specific GET Request and the information in this request like https://example.de/ca.aspx?campaign=123456&pitype=Content via PhantomJS or another headless browser?

var Url = "http://www.google.de";
var params = new Array();
var webPage = require('webpage');
var page = webPage.create();
var targetJSON = {};
page.open(Url);
// the request url you want to catch
page.onResourceRequested = function(requestData, networkRequest) {
var match = requestData.url.match(/google.com\/test.aspx/g);
if (match != null) {
var targetString = decodeURI(JSON.stringify(requestData.url));
var klammerauf = targetString.indexOf("{");
var jsonobjekt = targetString.substr(klammerauf, (targetString.indexOf("}") - klammerauf) + 1);
// request contains some JSON stuff in my case, so I decode it as a JSON object
targetJSON = (decodeURIComponent(jsonobjekt));
console.log(targetJSON);
phantom.exit();
}
};

Related

Adding a Parameter to Url with javascript

I am trying to add the parameter "referer=" to my url corresponding to the trafic referer of a new session.
I used some of the code from this topic... but it keeps reloading the page in a loop... then the url is like :
https://example.com?refere=facebookreferer=facebookreferer=facebook
Note:
I have been using this solution 1 :
function addOrUpdateUrlParam(name, value)
{
var ref = document.referrer;
var refsplit = ref.split(".")[1];
var href = window.location.href;
var regex = new RegExp("[&\\?]" + name + "=");
if(regex.test(href))
{
regex = new RegExp("([&\\?])" + name + "=\\d+");
{
else
{
if(href.indexOf("?") > -1)
window.location.href = href + "&" + name + "=" + value;
else
window.location.href = href + "?" + name + "=" + value;
}
if (refsplit != "example") {
return addOrUpdateUrlParam("referer", refsplit);
}
}
And this solution 2:
function () {
var ref = document.referrer;
var refsplit = ref.split(".")[1];
if (refsplit != "example") {
return location.search += "referer=" + refsplit;
}
}
Edit 1:
Thanks to Prasanth I improved the code to :
function () {
var ref = document.referrer;
var refsplit = ref.split(".")[1];
var currentUrl = location.href;
var url1 = currentUrl += "?referer="+refsplit;
var url2 = currentUrl += "&referer="+refsplit;
if(currentUrl.indexOf("?") < 0) {
return window.location = url1;
} else {
return window.location = url2;
}
}
However, it is returning both conditions :
https://example.com/?referer=facebook&referer=facebook
Edit 2:
So after many attempts, I achieved it by working with the parameters of the url (location.search) instead of the full url (location.href) :
function addRefererParam () {
var ref = document.referrer; //Get Referrer
var refDomain = ref.match(/[^(?:http:\/\/|www\.|https:\/\/)]([^\/]+)/i)[0]; //Extract Referrer Domain name for better readability
var params = location.search; //Get Url parameters
if (refDomain.match(/mydomain|null|undefined/i)) { //check if domain not null or own domain.
return params ;
} else {
return params += "utm_source=" + refDomain; //create new query string with referrer domain
}
}
However, it is no making a persistent query string through browsing... how can I make the new parameters persistent ?
Obtain the url of the current window and after the domain name just concat your url with &referer=value.
var currentUrl = location.href;
var paramsInUrl = currentUrl.split('&');
var flag = true;
for(var i in paramsInUrl)
{
if(!paramsInUrl[i].includes('referer=')
{
continue;
}
else
{
flag = false;
break;
}
}
if(flag)
{
currentUrl += '&referer='+value;
window.location = currentUrl;
}
For what it's worth (because the more generic question of just how to do this generally is what lead me to this post), I've made a 178 byte helper function that takes in an object of the query parameters you want to add to a url for a GET request (in similar format for how you might add headers to a request) and made an npm package for it here: https://www.npmjs.com/package/add-query-params-to-url
Hopefully this is helpful to some.

How to create a valid signature for the mws amazon (javascript)?

var protocol = "https";
var method = "POST";
var host = "mws.amazonservices.com";
var uri = "/Products/2011-10-01";
var marketPlaceId = "ATVPDKIKX0DER";
function generateRequest(asin, action){
var today = new Date();
time = today.toISOString();
var parameters = {
// "ASINList.ASIN.1":asin,
"Query":asin,
"AWSAccessKeyId":AWSAccessKeyId,
"Action": action,
"MarketplaceId":marketPlaceId,
"SellerId": SellerId,
"SignatureMethod":"HmacSHA256",
"SignatureVersion":"2",
"Timestamp":time,
"Version":"2011-10-01"
};
parameters = $.param( parameters );
var messageToEncrypt = method+"\n"+host+"\n"+uri+"\n"+parameters;
var sig = CryptoJS.HmacSHA256(messageToEncrypt, SecretKey);
sig = sig.toString(CryptoJS.enc.Base64);
sig = encodeURIComponent(sig);
parameters = parameters+"&Signature="+sig;
var mwsRequest = protocol+"://"+host+uri+"?"+parameters;
return mwsRequest;
}
// var asaUrl = generateRequest('B01I94N9TC','GetMatchingProduct');
var asaUrl = generateRequest('B01I94N9TC','ListMatchingProducts');
$.ajax({
url:asaUrl,
method: "POST",
success: function(data){
console.log(data)
}
});
It gives an error
"Check your AWS Secret Access Key and signing method. Consult the service documentation for details"
but if you send to Get Matching Product is operating normally

CasperJS - Scraper not navigating to the next page

The following code is a simple scraper written in CasperJS.
var casper = require('casper').create();
var url = casper.cli.get(0);
var page1 = casper.cli.get(1);
var page2 = casper.cli.get(2);
//console.log(page2);
var proxy = casper.cli.get(3);
//alert(page1);
var exp = /[-a-zA-Z0-9#:%_\+.~#?&//=]{2,256}\.[a-z]{2,4}\b(\/[-a-zA-Z0-9#:%_\+.~#?&//=]*)?/gi;
var regex = new RegExp(exp);
var baseUrl = url;
//console.log(baseUrl);
var nextBtn = "a.navigation-button.next";
var allLinks = [];
casper.start(baseUrl);
casper.waitForSelector(nextBtn, processPage);
casper.run();
function processPage() {
for (var i = page1; i < page2; i = i + 1) {
console.log(i);
var pageData = this.evaluate(getPageData);
allLinks = allLinks.concat(pageData);
if (!this.exists(nextBtn)) {
return;
};
this.thenClick(nextBtn).then(function() {
//this.echo(i);
this.echo(this.getCurrentUrl());
//this.wait(1000);
});
};
}
function getPageData(){
//return document.title;
var links = document.getElementsByClassName('pro-title');
links = Array.prototype.map.call(links,function(link){
return link.getAttribute('href');
});
return links;
};
casper.then(function(){
//require('utils').dump(allLinks);
this.each(allLinks,function(self,link){
if (link.match(regex)) {
self.thenOpen(link,function(a){
jsonObj = {};
jsonObj.title = this.fetchText('a.profile-full-name');
jsonObj.services = this.getHTML('div.info-list-text span:nth-child(2) span');
jsonObj.services = jsonObj.services.replace(/&/g,"and");
jsonObj.location = this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(3) div.info-list-text span');
//jsonObj.contact = this.fetchText('span.pro-contact-text');
jsonObj.description = this.getHTML('div.profile-about div:nth-child(1)');
//jsonObj.description.replace(/\s/g, '');
//require('utils').dump(jsonObj);
//jsonObj.description = jsonObj.description.replace(/[\t\n]/g,"");
//jsonObj = JSON.stringify(jsonObj, null, '\t');
//console.log(i);
require('utils').dump(jsonObj);
});
};
});
});
I am executing this script as follows,
casperjs scraping.js http://www.houzz.com/professionals/c/Chicago--IL/p/15 1 3
The first CLI argument is the starting URL. The second and third arguments are the starting and ending page numbers of the scrape.
I am able to extract data from the first page, but I don't understand why I am not able to extract data from any of the consequent pages.
You cannot mix synchronous and asynchronous code like this in processPage. The loop is immediately executed, but the click and the loading of the next page happens asynchronously. The evaluation of the page has to be done asynchronously:
function processPage() {
for (var i = page1; i < page2; i = i + 1) {
this.then(function(){
console.log(i);
var pageData = this.evaluate(getPageData);
allLinks = allLinks.concat(pageData);
if (!this.exists(nextBtn)) {
return;
}
this.thenClick(nextBtn).then(function() {
this.echo(this.getCurrentUrl());
});
});
};
}

Get signed_request in Node.js (Express) Facebook canvas app

is there any way how to get and parse signed_request in Node.js Facebook page tab app? I need to know page id and if user liked the page...
I did this a little while ago, and ended up writing a small library to do it. The original CoffeeScript can be found at https://gist.github.com/fbef51815ab6f062b51a#file_signed_request.coffee, here is a JavaScript translation:
var crypto = require('crypto');
SignedRequest = (function() {
function SignedRequest(secret, request) {
this.secret = secret;
this.request = request;
this.verify = this.verify.bind(this);
var parts = this.request.split('.');
this.encodedSignature = parts[0];
this.encoded = parts[1];
this.signature = this.base64decode(this.encodedSignature);
this.decoded = this.base64decode(this.encoded);
this.data = JSON.parse(this.decoded);
}
SignedRequest.prototype.verify = function() {
if (this.data.algorithm !== 'HMAC-SHA256') {
return false;
}
var hmac = crypto.createHmac('SHA256', this.secret);
hmac.update(this.encoded);
var result = hmac.digest('base64').replace(/\//g, '_').replace(/\+/g, '-').replace(/\=/g, '');
return result === this.encodedSignature;
};
SignedRequest.prototype.base64encode = function(data) {
return new Buffer(data, 'utf8').toString('base64').replace(/\//g, '_').replace(/\+/g, '-').replace(/\=/g, '');
};
SignedRequest.prototype.base64decode = function(data) {
while (data.length % 4 !== 0) {
data += '=';
}
data = data.replace(/-/g, '+').replace(/_/g, '/');
return new Buffer(data, 'base64').toString('utf-8');
};
return SignedRequest;
})();
module.exports = SignedRequest;
Which you can use like this:
var verifier = new SignedRequest(clientSecret, signedRequest);
verifier.verify() // whether or not the signed request verifies
verifier.data // the data from the signed request

How do get param from a url

As seen below I'm trying to get #currentpage to pass client params
Can someone help out thanks.
$(document).ready(function() {
window.addEventListener("load", windowLoaded, false);
function windowLoaded() {
chrome.tabs.getSelected(null, function(tab) {
document.getElementById('currentpage').innerHTML = tab.url;
});
}
var url = $("currentpage");
// yes I relize this is the part not working.
var client = jQuery.param("currentpage");
var page = jQuery.param("currentpage");
var devurl = "http://#/?clientsNumber=" + client + "&pageName=" + page ;
});
This is a method to extract the params from a url
function getUrlParams(url) {
var paramMap = {};
var questionMark = url.indexOf('?');
if (questionMark == -1) {
return paramMap;
}
var parts = url.substring(questionMark + 1).split("&");
for (var i = 0; i < parts.length; i ++) {
var component = parts[i].split("=");
paramMap [decodeURIComponent(component[0])] = decodeURIComponent(component[1]);
}
return paramMap;
}
Here's how to use it in your code
var url = "?c=231171&p=home";
var params = getUrlParams(url);
var devurl = "http://site.com/?c=" + encodeURIComponent(params.c) + "&p=" + encodeURIComponent(params.p) + "&genphase2=true";
// devurl == "http://site.com/?c=231171&p=home&genphase2=true"
See it in action http://jsfiddle.net/mendesjuan/TCpsD/
Here's the code you posted with minimal changes to get it working, it also uses $.param as it's intended, that is to create a query string from a JS object, this works well since my suggested function returns an object from the url
$(document).ready(function() {
// This does not handle arrays because it's not part of the official specs
// PHP and some other server side languages support it but there's no official
// consensus
function getUrlParams(url) {
var paramMap = {};
var questionMark = url.indexOf('?');
if (questionMark == -1) {
return paramMap;
}
var parts = url.substring(questionMark + 1).split("&");
for (var i = 0; i < parts.length; i ++) {
var component = parts[i].split("=");
paramMap [decodeURIComponent(component[0])] = decodeURIComponent(component[1]);
}
return paramMap;
}
// no need for the extra load listener here, jquery.ready already puts
// your code in the onload
chrome.tabs.getSelected(null, function(tab) {
document.getElementById('currentpage').innerHTML = tab.url;
});
var url = $("currentpage");
var paramMap = getUrlParams(url);
// Add the genphase parameter to the param map
paramMap.genphase2 = true;
// Use jQuery.param to create the url to click on
var devurl = "http://site.com/?"+ jQuery.param(paramMap);
$('#mydev').click( function(){
window.open(devurl);
});
});

Categories

Resources