Related
I'm fetching data from different sources and the output string looks like the following:
"addressId":"132234","businessEntryCount":2026},{"district":"Nordend-West","districtSlug":"frankfurt-am-main-nordend-west","addressId":"132232","businessEntryCount":1925}],"generated":"2022-01-23 19:35:43.469","grisuLocation":null,"district":null,"geo":null};
kt.Data.SearchResult.distanceLocation = "Frankfurt am Main";
kt.Data.SearchResult.distanceStreetnumber = "";
kt.Service.citySlug = 'frankfurt';
kt.Data.what = 'Handwerker';
kt.Data.where = 'Frankfurt am Main';
kt.Data.trade = 'Maler';
{"#context":"http:\/\/schema.org","#type":"SearchResultsPage","mainEntity":{"#type":"ItemList","itemListElement":[{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Dachdecker Olaf Pocklitz","url":"http:\/\/www.test.de","email":"test#t-online.de","address":{"#type":"PostalAddress","postalCode":"65931","addressLocality":"Frankfurt","addressRegion":"Hessen",
The above string content is my data result. I want to have only all the email addresses left to save them in a file. In the above example it would be test#t-online.de and if we have more than one email address, then I want the second email address in a new line. Im struggling how to perfectly filter them out and afterwards save them one by line. I already made it work to save it but I don't know how to only get the email address out of it:
console.log('received data: ' + data)
fs.writeFileSync('./results/test.json', data)
EDIT:
This is my code so far:
var matches = data.match(/\"mainEntity":{"(.*?)\"}/);
var preResult = [matches]
//.itemListElement[0].item.email
console.log('received data: ' + preResult)
fs.writeFileSync('./results/test.json', preResult)
and my result but im not able to access the email:
"mainEntity":{"#type":"ItemList","itemListElement":[{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Dachdecker Olaf test","url":"http:\/\/www.test.de","email":"test#t-online.de","address":{....
With this selector: element.mainEntity.itemListElement[0].item.email
const element = {
"#context":"http:\/\/schema.org",
"#type":"SearchResultsPage",
"mainEntity":{
"#type":"ItemList",
"itemListElement":[
{
"#type":"ListItem",
"item":{
"#type":"LocalBusiness",
"name":"Dachdecker Olaf Pocklitz",
"url":"http:\/\/www.test.de",
"email":"test#t-online.de",
"address": ""
}
}
]
}}
console.log(element.mainEntity.itemListElement[0].item.email)
Update
d = {"#context":"http:\/\/schema.org","#type":"SearchResultsPage","mainEntity":{"#type":"ItemList","itemListElement":[{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Dachdecker Olaf Pocklitz","url":"http:\/\/www.pocklitz.de","email":"opocklitz#t-online.de","address":{"#type":"PostalAddress","postalCode":"65931","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Erfurter Weg 21"},"telephone":"(069) 765820","aggregateRating":{"#type":"AggregateRating","worstRating":1,"bestRating":5,"ratingValue":1,"reviewCount":3,"itemReviewed":{"#type":"Organization","name":"Dachdecker Olaf Pocklitz"}}}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Dachdeckerei Havan","url":"http:\/\/www.dachdeckereihavan.de","email":"info#dachdeckereihavan.de","address":{"#type":"PostalAddress","postalCode":"60599","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Offenbacher Landstr. 364"},"telephone":"(069) 651540"}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Dachdeckerfachbetrieb Thomas Piller","email":"piller-bedachungen#gmx.de","address":{"#type":"PostalAddress","postalCode":"60439","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Eduard-Bernstein-Weg 3"},"aggregateRating":{"#type":"AggregateRating","worstRating":1,"bestRating":5,"ratingValue":5,"reviewCount":1,"itemReviewed":{"#type":"Organization","name":"Dachdeckerfachbetrieb Thomas Piller"}}}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Dachdeckermeisterbetrieb Tuvana","url":"http:\/\/www.tuvana-dach.de","email":"info#tuvana-dach.de","address":{"#type":"PostalAddress","postalCode":"60596","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Tiroler Str. 28c"},"telephone":"(069) 78809681"}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Decos GmbH","url":"http:\/\/www.decos-gmbh.de","email":"decosgmbh#web.de","address":{"#type":"PostalAddress","postalCode":"60388","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Flinschstr. 21"},"telephone":"(069) 42603940"}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Dejan Miloradovic","email":"dmilo#web.de","address":{"#type":"PostalAddress","postalCode":"60487","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Sophienstr. 25"},"telephone":"(069) 703177"}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Denstedt Bedachungs-GmbH","email":"denstedt.bedachungsgmbh#t-online.de","address":{"#type":"PostalAddress","postalCode":"60388","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Riedstr. 43"},"telephone":"(06109) 31967"}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Der Zollstock - Ulrich Heuser & Hans-J\u00fcrgen Kurth","url":"https:\/\/www.der-zollstock.de","email":"kundenbetreuung#der-zollstock.de","address":{"#type":"PostalAddress","postalCode":"60388","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"R\u00f6ntgenstr. 8"},"telephone":"(06109) 378400"}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Die Stadtzimmerei Stefan Scherer","url":"http:\/\/www.die-stadtzimmerei.de","email":"info#die-stadtzimmerei.de","address":{"#type":"PostalAddress","postalCode":"60385","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Wittelsbacherallee 102"},"telephone":"(069) 43059517"}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"einfeldt Baudekoration GmbH","url":"https:\/\/www.einfeldt-baudeko.de","email":"info#einfeldt-baudeko.de","address":{"#type":"PostalAddress","postalCode":"60386","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Orber Str. 42"},"telephone":"(06039) 3878"}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Emil Scholz GmbH & Co. KG","url":"https:\/\/www.malerteam-scholz.de","email":"info#malerteam-scholz.de","address":{"#type":"PostalAddress","postalCode":"60385","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Ostparkstr. 55"},"telephone":"(069) 943411-0","aggregateRating":{"#type":"AggregateRating","worstRating":1,"bestRating":5,"ratingValue":1,"reviewCount":1,"itemReviewed":{"#type":"Organization","name":"Emil Scholz GmbH & Co. KG"}}}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Ewald Thamer","email":"ewald.thamer#t-online.de","address":{"#type":"PostalAddress","postalCode":"65931","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Hesselbergweg 30"},"telephone":"(069) 36402623"}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"Falke Designs","email":"info#falke-designs.de","address":{"#type":"PostalAddress","postalCode":"60486","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Kaufunger Str. 16"},"telephone":"(0173) 8605872"}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"FarbHey GmbH & Co.KG Malerfachbetrieb","url":"https:\/\/www.farbhey.de","email":"kontakt#farbhey.de","address":{"#type":"PostalAddress","postalCode":"60433","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Am Gr\u00fcnen Graben 20"},"telephone":"(069) 516309"}},{"#type":"ListItem","item":{"#type":"LocalBusiness","name":"FBM Boras Facility & Bau Management GmbH","url":"https:\/\/www.fbmgmbh.de","email":"info#fbmgmbh.de","address":{"#type":"PostalAddress","postalCode":"60594","addressLocality":"Frankfurt","addressRegion":"Hessen","streetAddress":"Darmst\u00e4dter Landstr. 4"},"telephone":"(069) 63159155"}}],"name":"Alle Ergebnisse f\u00fcr Handwerker in Frankfurt online vergleichen"}}
console.log( d.mainEntity.itemListElement[0].item.email)
first time using jsPDF.
I'm trying to export some data via PDF, however when I try to export the array it gives me an error. I have ASSUMED you export arrays using doc.table because I can't get to find any documentation, there's a tag for it in stackoverflow for questions but didn't find anyone with the same question neither.
This is what I have so far
const generatePDF = () => {
var doc = new jsPDF('landscape', 'px', 'a4', 'false');
doc.addImage(AIBLogo, 'PNG', 250,10,100,100)
doc.setFont('Helvertica', "bold")
doc.text(60,150, "Informacion del Pedido");
doc.setFont('Helvertica', "Normal")
doc.text(60,170, "Estado del Pago: "+estadoDePago)
doc.text(60,190, "Estado de Disponibilidad: "+estadoDeDisponibilidad)
doc.text(60,210, "Total del Pedido: "+total)
doc.text(60,230, "Total Pagado: "+totalPagado)
doc.setFont('Helvertica', "bold")
doc.text(360,150, "Informacion del Estudiante");
doc.setFont('Helvertica', "Normal")
doc.text(360,170, "Nombre del Estudiante: "+nombre)
doc.text(360,190, "Escuela: "+escuela)
doc.text(360,210, "Estado del Pago: "+grado)
doc.text(360,240, "Direccion de Entrega: Retirar en institución")
doc.table(100,100, librosData)
doc.save(id+".pdf")
}
Excluding the table bit it prints out like this:
I would like to add the DataTable after all that info and make new pages if is out of bounds cause table can be up to 20 items.
UPDATE
I have try the following but is not working neither I got it from here: StackOverflow Answer
var col = ["Descripcion", "Tipo", "Editorial","Precio"]
row = []
for (let i = 0; i < librosData.length; i++){
var temp = [librosData[i].descripcion, librosData[i].tipo, librosData[i].editorial, librosData[i].precio];
row.push(temp)
}
doc.autoTable(col,row, {startY:380});
doc.save(id+".pdf")
I know all the data is coming correctly this is how the row is printing:
Any help or documentation is appreciate it.
Final Update
To fix the issue that says autotable is not a function just
Solution: StackOverflow
In the end to make the table I recommend this:
To do the table: How to export a table with jsPDF
To fix the compatibility: autoTable is not a function
To fix "deprecated autoTable initiation":
Initialize your autoTable the following way
import autoTable from 'jspdf-autotable'
autoTable(doc, {
head: [col],
body: row
})
Alright , so I'm making a bot in Discord for my server, and something I wanted to implement is a youtube command.
I've been searching all over and looking in the Youtube API and all I can find is their search for what seems like a browser
I'm using nodejs to run it off of my laptop, and my bot runs off of discord.jsI have a similar command that does a MAL and a Urban Dictionary search, but I've found nothing and have no idea how to do the same with youtube
I used to have a command for a python bot that was able to due this, and I've seen other Discord bots able to do it as well, so I know it's obviously possible
Basically what I'm saying is I need to be able to search for and return a youtube video URL (the first search result) from a string of search terms so the usage would look like
>>youtube Tunak Tunak Tun
Would return
https://www.youtube.com/watch?v=vTIIMJ9tUc8 , which is the first search result for that keyword(s)
EDIT:I've found the python command that would do this, but have nigh the skills nor the confidence to try to translate this to JavaScript
elif prefix and cmd=="youtube" and len(args) > 0:
try:
yword=args.replace(" ","_")
ydata= urlreq.urlopen("http://gdata.youtube.com/feeds/api/videos?vq="+yword+"&racy=include&orderby=relevance&max-results=1")
yread= str(ydata.read())
if "<openSearch:totalResults>0</openSearch:totalResults>" in yread:
room.message("I got nothin' for ya by the name of "+args)
else:
trash , yclean=yread.split("<media:player url='http://www.youtube.com/watch?v=",1)
yclean , trash=yclean.split("&",1)
room.message("http://http://www.youtube.com/watch?v="+yclean,True)
except:
room.message("Somethin ain't right")
EDIT2 (Apologies for length) : Alright! I've found something that's gotten me a lot closer!
https://www.npmjs.com/package/youtube-search
I've got a command in my bot now that goes something like this:
if (commandIs("yt" , message)){
search(args.join(' ').substring(4), opts, function(err, results) {
if(err) return console.log(err);
message.channel.sendMessage(results);
console.log(results);
});
}
So now when I enter >>yt Tunak Tunak Tun I get
[ { id: 'vTIIMJ9tUc8',
link: 'https://www.youtube.com/watch?v=vTIIMJ9tUc8',
kind: 'youtube#video',
publishedAt: '2014-03-21T07:00:01.000Z',
channelId: 'UC3MLnJtqc_phABBriLRhtgQ',
channelTitle: 'SonyMusicIndiaVEVO',
title: 'Daler Mehndi - Tunak Tunak Tun Video',
description: 'Presenting \'Tunak Tunak Tun\' music video sung by the talented Daler Mehndi Song Name - Tunak Tunak Tun Album - Tunak Tunak Tun Singer - Daler Mehndi ...',
thumbnails: { default: [Object], medium: [Object], high: [Object] } } ]
in the console and [object Object] in the discord channel.
http://i.imgur.com/Vorpn0f.png
So the problem now is I have the link in my reach, but I can not get it to return JUST the link, and I've no idea how to pull it out of that mess.
It sounds like your results object is a JSON string. This essentially means that it is a string representation of a javascript object. You can parse this into an object by using JSON.parse().
var objResults = JSON.parse(results);
console.log(objResults);
console.log(objResults.link);
EDIT
Failed to notice that your result is actually an array. You just need to access it like this: console.log(results[0].link). Shouldn't need to JSON.parse()
Okay, here's another approach that is working for me, using the google javascript API. Once again, the SO snippet doesn't run it, so I'll link you to the fiddle.
This method requires you to setup a google API key, then enable youtube API access.
I've removed my google API key from the fiddle, so you'll need to set that up. I can PM you mine if you want to test first though.
var apiKey = null //put your API key here
function search() {
var searchTerm = $('#txtSearch').val()
gapi.client.init({
'apiKey': apiKey,
'discoveryDocs': ['https://www.googleapis.com/discovery/v1/apis/youtube/v3/rest']
}).then(function() {
return gapi.client.youtube.search.list({
q: searchTerm,
part: 'snippet'
});
}).then(function(response) {
var searchResult = response.result;
$('#search-results').append(JSON.stringify(searchResult, null, 4))
console.log(searchResult.items[0])
var firstVideo = searchResult.items[0]
firstVideo.url = `https://youtube.com/watch?v=${firstVideo.id.videoId}`
$('#first-video').text(firstVideo.url).attr('href', firstVideo.url)
$('#first-video-title').text(firstVideo.snippet.title)
$('#first-video-description').text(firstVideo.snippet.description)
});
}
$('#btnSearch').on('click', function() {
$('#first-video-title').text("")
if (!apiKey) {
$('#first-video-title').text("You need to set an apiKey!")
return;
}
gapi.load('client', search)
});
#search-results { white-space: pre; font-family: monospace; }
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<script src='https://apis.google.com/js/api.js'></script>
<div id="container">
<input id="txtSearch" type="text" />
<button id="btnSearch">
Search!
</button>
<br />
<p id='first-video-title'> </p>
<p id='first-video-description'></p>
<a target="_blank" id="first-video"></a>
<div id='search-results'>
</div>
</div>
I'm scraping the Madrid Assembly's website, built in aspx, and I have no idea how to simulate clicks on the links where I need to get the corresponding politicians from. I tried this:
import scrapy
class AsambleaMadrid(scrapy.Spider):
name = "Asamblea_Madrid"
start_urls = ['http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx']
def parse(self, response):
for id in response.css('div#moduloBusqueda div.sangria div.sangria ul li a::attr(id)'):
target = id.extract()
url = "http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx"
formdata= {'__EVENTTARGET': target,
'__VIEWSTATE': '/wEPDwUBMA9kFgJmD2QWAgIBD2QWBAIBD2QWAgIGD2QWAmYPZBYCAgMPZBYCAgMPFgIeE1ByZXZpb3VzQ29udHJvbE1vZGULKYgBTWljcm9zb2Z0LlNoYXJlUG9pbnQuV2ViQ29udHJvbHMuU1BDb250cm9sTW9kZSwgTWljcm9zb2Z0LlNoYXJlUG9pbnQsIFZlcnNpb249MTQuMC4wLjAsIEN1bHR1cmU9bmV1dHJhbCwgUHVibGljS2V5VG9rZW49NzFlOWJjZTExMWU5NDI5YwFkAgMPZBYMAgMPZBYGBSZnXzM2ZWEwMzEwXzg5M2RfNGExOV85ZWQxXzg4YTEzM2QwNjQyMw9kFgJmD2QWAgIBDxYCHgtfIUl0ZW1Db3VudAIEFghmD2QWAgIBDw8WBB4PQ29tbWFuZEFyZ3VtZW50BTRHcnVwbyBQYXJsYW1lbnRhcmlvIFBvcHVsYXIgZGUgbGEgQXNhbWJsZWEgZGUgTWFkcmlkHgRUZXh0BTRHcnVwbyBQYXJsYW1lbnRhcmlvIFBvcHVsYXIgZGUgbGEgQXNhbWJsZWEgZGUgTWFkcmlkZGQCAQ9kFgICAQ8PFgQfAgUeR3J1cG8gUGFybGFtZW50YXJpbyBTb2NpYWxpc3RhHwMFHkdydXBvIFBhcmxhbWVudGFyaW8gU29jaWFsaXN0YWRkAgIPZBYCAgEPDxYEHwIFL0dydXBvIFBhcmxhbWVudGFyaW8gUG9kZW1vcyBDb211bmlkYWQgZGUgTWFkcmlkHwMFL0dydXBvIFBhcmxhbWVudGFyaW8gUG9kZW1vcyBDb211bmlkYWQgZGUgTWFkcmlkZGQCAw9kFgICAQ8PFgQfAgUhR3J1cG8gUGFybGFtZW50YXJpbyBkZSBDaXVkYWRhbm9zHwMFIUdydXBvIFBhcmxhbWVudGFyaW8gZGUgQ2l1ZGFkYW5vc2RkBSZnX2MxNTFkMGIxXzY2YWZfNDhjY185MWM3X2JlOGUxMTZkN2Q1Mg9kFgRmDxYCHgdWaXNpYmxlaGQCAQ8WAh8EaGQFJmdfZTBmYWViMTVfOGI3Nl80MjgyX2ExYjFfNTI3ZDIwNjk1ODY2D2QWBGYPFgIfBGhkAgEPFgIfBGhkAhEPZBYCAgEPZBYEZg9kFgICAQ8WAh8EaBYCZg9kFgQCAg9kFgQCAQ8WAh8EaGQCAw8WCB4TQ2xpZW50T25DbGlja1NjcmlwdAW7AWphdmFTY3JpcHQ6Q29yZUludm9rZSgnVGFrZU9mZmxpbmVUb0NsaWVudFJlYWwnLDEsIDEsICdodHRwOlx1MDAyZlx1MDAyZnd3dy5hc2FtYmxlYW1hZHJpZC5lc1x1MDAyZkVTXHUwMDJmUXVlRXNMYUFzYW1ibGVhXHUwMDJmQ29tcG9zaWNpb25kZWxhQXNhbWJsZWFcdTAwMmZMb3NEaXB1dGFkb3MnLCAtMSwgLTEsICcnLCAnJykeGENsaWVudE9uQ2xpY2tOYXZpZ2F0ZVVybGQeKENsaWVudE9uQ2xpY2tTY3JpcHRDb250YWluaW5nUHJlZml4ZWRVcmxkHgxIaWRkZW5TY3JpcHQFIVRha2VPZmZsaW5lRGlzYWJsZWQoMSwgMSwgLTEsIC0xKWQCAw8PFgoeCUFjY2Vzc0tleQUBLx4PQXJyb3dJbWFnZVdpZHRoAgUeEEFycm93SW1hZ2VIZWlnaHQCAx4RQXJyb3dJbWFnZU9mZnNldFhmHhFBcnJvd0ltYWdlT2Zmc2V0WQLrA2RkAgEPZBYCAgUPZBYCAgEPEBYCHwRoZBQrAQBkAhcPZBYIZg8PFgQfAwUPRW5nbGlzaCBWZXJzaW9uHgtOYXZpZ2F0ZVVybAVfL0VOL1F1ZUVzTGFBc2FtYmxlYS9Db21wb3NpY2lvbmRlbGFBc2FtYmxlYS9Mb3NEaXB1dGFkb3MvUGFnZXMvUmVsYWNpb25BbGZhYmV0aWNhRGlwdXRhZG9zLmFzcHhkZAICDw8WBB8DBQZQcmVuc2EfDgUyL0VTL0JpZW52ZW5pZGFQcmVuc2EvUGFnaW5hcy9CaWVudmVuaWRhUHJlbnNhLmFzcHhkZAIEDw8WBB8DBRpJZGVudGlmaWNhY2nDs24gZGUgVXN1YXJpbx8OBTQvRVMvQXJlYVVzdWFyaW9zL1BhZ2luYXMvSWRlbnRpZmljYWNpb25Vc3Vhcmlvcy5hc3B4ZGQCBg8PFgQfAwUGQ29ycmVvHw4FKGh0dHA6Ly9vdXRsb29rLmNvbS9vd2EvYXNhbWJsZWFtYWRyaWQuZXNkZAIlD2QWAgIDD2QWAgIBDxYCHwALKwQBZAI1D2QWAgIHD2QWAgIBDw8WAh8EaGQWAgIDD2QWAmYPZBYCAgMPZBYCAgUPDxYEHgZIZWlnaHQbAAAAAAAAeUABAAAAHgRfIVNCAoABZBYCAgEPPCsACQEADxYEHg1QYXRoU2VwYXJhdG9yBAgeDU5ldmVyRXhwYW5kZWRnZGQCSQ9kFgICAg9kFgICAQ9kFgICAw8WAh8ACysEAWQYAgVBY3RsMDAkUGxhY2VIb2xkZXJMZWZ0TmF2QmFyJFVJVmVyc2lvbmVkQ29udGVudDMkVjRRdWlja0xhdW5jaE1lbnUPD2QFKUNvbXBvc2ljacOzbiBkZSBsYSBBc2FtYmxlYVxMb3MgRGlwdXRhZG9zZAVHY3RsMDAkUGxhY2VIb2xkZXJUb3BOYXZCYXIkUGxhY2VIb2xkZXJIb3Jpem9udGFsTmF2JFRvcE5hdmlnYXRpb25NZW51VjQPD2QFGkluaWNpb1xRdcOpIGVzIGxhIEFzYW1ibGVhZJ',
'__EVENTVALIDATION': '/wEWCALIhqvYAwKh2YVvAuDF1KUDAqCK1bUOAqCKybkPAqCKnbQCAqCKsZEJAvejv84Dtkx5dCFr3QGqQD2wsFQh8nP3iq8',
'__VIEWSTATEGENERATOR': 'BAB98CB3',
'__REQUESTDIGEST': '0x476239970DCFDABDBBDF638A1F9B026BD43022A10D1D757B05F1071FF3104459B4666F96A47B4845D625BCB2BE0D88C6E150945E8F5D82C189B56A0DA4BC859D'}
yield scrapy.FormRequest(url=url, formdata= formdata, callback=self.takeEachParty)
def takeEachParty(self, response):
print response.css('ul.listadoVert02 ul li::text').extract()
Going into the source code of the website, I can see how links look like, and how they send the JavaScript query. This is one of the links I need to access:
<a id="ctl00_m_g_36ea0310_893d_4a19_9ed1_88a133d06423_ctl00_Repeater1_ctl00_lnk_Grupo" href="javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions("ctl00$m$g_36ea0310_893d_4a19_9ed1_88a133d06423$ctl00$Repeater1$ctl00$lnk_Grupo", "", true, "", "", false, true))">Grupo Parlamentario Popular de la Asamblea de Madrid</a>
I have been reading so many articles about, but probably the problem is my ignorance in respect.
Thanks in advance.
EDITED:
SOLUTION: I finally did it! Translating the very helpul code from Padraic Cunningham into Scrapy way. As I specified the issue for Scrapy, I want to post the result just in case someone has the same problem as I had.
So here it goes:
import scrapy
import js2xml
class AsambleaMadrid(scrapy.Spider):
name = "AsambleaMadrid"
start_urls = ['http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx']
def parse(self, response):
source = response
hrefs = response.xpath("//*[#id='moduloBusqueda']//div[#class='sangria']/ul/li/a/#href").extract()
form_data = self.validate(source)
for ref in hrefs:
# js2xml allows us to parse the JS function and params, and so to grab the __EVENTTARGET
js_xml = js2xml.parse(ref)
_id = js_xml.xpath(
"//identifier[#name='WebForm_PostBackOptions']/following-sibling::arguments/string[starts-with(.,'ctl')]")[0]
form_data["__EVENTTARGET"] = _id.text
url_diputado = 'http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx'
# The proper way to send a POST in scrapy is by using the FormRequest
yield scrapy.FormRequest(url=url_diputado, formdata=form_data, callback=self.extract_parties, method='POST')
def validate(self, source):
# these fields are the minimum required as cannot be hardcoded
data = {"__VIEWSTATEGENERATOR": source.xpath("//*[#id='__VIEWSTATEGENERATOR']/#value")[0].extract(),
"__EVENTVALIDATION": source.xpath("//*[#id='__EVENTVALIDATION']/#value")[0].extract(),
"__VIEWSTATE": source.xpath("//*[#id='__VIEWSTATE']/#value")[0].extract(),
" __REQUESTDIGEST": source.xpath("//*[#id='__REQUESTDIGEST']/#value")[0].extract()}
return data
def extract_parties(self, response):
source = response
name = source.xpath("//ul[#class='listadoVert02']/ul/li/a/text()").extract()
print name
I hope is clear. Thanks everybody, again!
If you look at the data posted to the form in chrome or firebug you can see there are many fields passed in the post request, there are a few that are essential and must be parsed from the original page, parsing the ids from the div.sangria ul li a tags is not sufficient as the actual data posted is slightly different, what is posted is in the Javascript function, WebForm_DoPostBackWithOptions which is in the href not the id attribute:
href='javascript:WebForm_DoPostBackWithOptions(new
WebForm_PostBackOptions("ctl00$m$g_36ea0310_893d_4a19_9ed1_88a133d06423$ctl00$Repeater1$ctl03$lnk_Grupo", "", true, "", "", false, true))'>
Sometimes all the underscores are replaced with dollar signs so it is easy to do a str.replace to get them in the correct order but not really in this case, we could use a regex to parse but I like the js2xml lib which can parse a javascript function and its args into an xml tree.
The following code using requests shows you how can get the data from the initial request and get to all the pages you want:
import requests
from lxml import html
import js2xml
post = "http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx"
def validate(xml):
# these fields are the minimum required as cannot be hardcoded
data = {"__VIEWSTATEGENERATOR": xml.xpath("//*[#id='__VIEWSTATEGENERATOR']/#value")[0],
"__EVENTVALIDATION": xml.xpath("//*[#id='__EVENTVALIDATION']/#value")[0],
"__VIEWSTATE": xml.xpath("//*[#id='__VIEWSTATE']/#value")[0],
" __REQUESTDIGEST": xml.xpath("//*[#id='__REQUESTDIGEST']/#value")[0]}
return data
with requests.Session() as s:
# make initial requests to get the links/hrefs and the from fields
r = s.get(
"http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx")
xml = html.fromstring(r.content)
hrefs = xml.xpath("//*[#id='moduloBusqueda']//div[#class='sangria']/ul/li/a/#href")
form_data = validate(xml)
for h in hrefs:
js_xml = js2xml.parse(h)
_id = js_xml.xpath(
"//identifier[#name='WebForm_PostBackOptions']/following-sibling::arguments/string[starts-with(.,'ctl')]")[
0]
form_data["__EVENTTARGET"] = _id.text
r = s.post(post, data=form_data)
xml = html.fromstring(r.content)
print(xml.xpath("//ul[#class='listadoVert02']/ul/li/a/text()"))
If we run the code above we see the different text output from all teh anchor tags:
In [2]: with requests.Session() as s:
...: r = s.get(
...: "http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx")
...: xml = html.fromstring(r.content)
...: hrefs = xml.xpath("//*[#id='moduloBusqueda']//div[#class='sangria']/ul/li/a/#href")
...: form_data = validate(xml)
...: for h in hrefs:
...: js_xml = js2xml.parse(h)
...: _id = js_xml.xpath(
...: "//identifier[#name='WebForm_PostBackOptions']/following-sibling::arguments/string[starts-with(.,'ctl')]")[
...: 0]
...: form_data["__EVENTTARGET"] = _id.text
...: r = s.post(post, data=form_data)
...: xml = html.fromstring(r.content)
...: print(xml.xpath("//ul[#class='listadoVert02']/ul/li/a/text()"))
...:
[u'Abo\xedn Abo\xedn, Sonsoles Trinidad', u'Adrados Gautier, M\xaa Paloma', u'Aguado Del Olmo, M\xaa Josefa', u'\xc1lvarez Padilla, M\xaa Nadia', u'Arribas Del Barrio, Jos\xe9 M\xaa', u'Ballar\xedn Valc\xe1rcel, \xc1lvaro C\xe9sar', u'Berrio Fern\xe1ndez-Caballero, M\xaa In\xe9s', u'Berzal Andrade, Jos\xe9 Manuel', u'Cam\xedns Mart\xednez, Ana', u'Carballedo Berlanga, M\xaa Eugenia', 'Cifuentes Cuencas, Cristina', u'D\xedaz Ayuso, Isabel Natividad', u'Escudero D\xedaz-Tejeiro, Marta', u'Fermosel D\xedaz, Jes\xfas', u'Fern\xe1ndez-Quejo Del Pozo, Jos\xe9 Luis', u'Garc\xeda De Vinuesa Gardoqui, Ignacio', u'Garc\xeda Mart\xedn, Mar\xeda Bego\xf1a', u'Garrido Garc\xeda, \xc1ngel', u'G\xf3mez Ruiz, Jes\xfas', u'G\xf3mez-Angulo Rodr\xedguez, Juan Antonio', u'Gonz\xe1lez Gonz\xe1lez, Isabel Gema', u'Gonz\xe1lez Jim\xe9nez, Bartolom\xe9', u'Gonz\xe1lez Taboada, Jaime', u'Gonz\xe1lez-Mo\xf1ux V\xe1zquez, Elena', u'Gonzalo L\xf3pez, Rosal\xeda', 'Izquierdo Torres, Carlos', u'Li\xe9bana Montijano, Pilar', u'Mari\xf1o Ortega, Ana Isabel', u'Moraga Valiente, \xc1lvaro', u'Mu\xf1oz Abrines, Pedro', u'N\xfa\xf1ez Guijarro, Jos\xe9 Enrique', u'Olmo Fl\xf3rez, Luis Del', u'Ongil Cores, M\xaa Gador', 'Ortiz Espejo, Daniel', u'Ossorio Crespo, Enrique Mat\xedas', 'Peral Guerra, Luis', u'P\xe9rez Baos, Ana Isabel', u'P\xe9rez Garc\xeda, David', u'Pla\xf1iol De Lacalle, Regina M\xaa', u'Redondo Alcaide, M\xaa Isabel', u'Roll\xe1n Ojeda, Pedro', u'S\xe1nchez Fern\xe1ndez, Alejandro', 'Sanjuanbenito Bonal, Diego', u'Serrano Guio, Jos\xe9 Tom\xe1s', u'Serrano S\xe1nchez-Capuchino, Alfonso Carlos', 'Soler-Espiauba Gallo, Juan', 'Toledo Moreno, Lucila', 'Van-Halen Acedo, Juan']
[u'Andaluz Andaluz, M\xaa Isabel', u'Ardid Jim\xe9nez, M\xaa Isabel', u'Carazo G\xf3mez, M\xf3nica', u'Casares D\xedaz, M\xaa Luc\xeda Inmaculada', u'Cepeda Garc\xeda De Le\xf3n, Jos\xe9 Carmelo', 'Cruz Torrijos, Diego', u'Delgado G\xf3mez, Carla', u'Franco Pardo, Jos\xe9 Manuel', u'Freire Campo, Jos\xe9 Manuel', u'Gabilondo Pujol, \xc1ngel', 'Gallizo Llamas, Mercedes', u"Garc\xeda D'Atri, Ana", u'Garc\xeda-Rojo Garrido, Pedro Pablo', u'G\xf3mez Montoya, Rafael', u'G\xf3mez-Chamorro Torres, Jos\xe9 \xc1ngel', u'Gonz\xe1lez Gonz\xe1lez, M\xf3nica Silvana', u'Leal Fern\xe1ndez, M\xaa Isaura', u'Llop Cuenca, M\xaa Pilar', 'Lobato Gandarias, Juan', u'L\xf3pez Ruiz, M\xaa Carmen', u'Manguan Valderrama, Eva M\xaa', u'Maroto Illera, M\xaa Reyes', u'Mart\xednez Ten, Carmen', u'Mena Romero, M\xaa Carmen', u'Moreno Navarro, Juan Jos\xe9', u'Moya Nieto, Encarnaci\xf3n', 'Navarro Lanchas, Josefa', 'Nolla Estrada, Modesto', 'Pardo Ortiz, Josefa Dolores', u'Quintana Viar, Jos\xe9', u'Rico Garc\xeda-Hierro, Enrique', u'Rodr\xedguez Garc\xeda, Nicol\xe1s', u'S\xe1nchez Acera, Pilar', u'Sant\xedn Fern\xe1ndez, Pedro', 'Segovia Noriega, Juan', 'Vicente Viondi, Daniel', u'Vinagre Alc\xe1zar, Agust\xedn']
['Abasolo Pozas, Olga', 'Ardanuy Pizarro, Miguel', u'Beirak Ulanosky, Jazm\xedn', u'Camargo Fern\xe1ndez, Ra\xfal', 'Candela Pokorna, Marco', 'Delgado Orgaz, Emilio', u'D\xedaz Rom\xe1n, Laura', u'Espinar Merino, Ram\xf3n', u'Espinosa De La Llave, Mar\xeda', u'Fern\xe1ndez Rubi\xf1o, Eduardo', u'Garc\xeda G\xf3mez, M\xf3nica', 'Gimeno Reinoso, Beatriz', u'Guti\xe9rrez Benito, Eduardo', 'Huerta Bravo, Raquel', u'L\xf3pez Hern\xe1ndez, Isidro', u'L\xf3pez Rodrigo, Jos\xe9 Manuel', u'Mart\xednez Abarca, Hugo', u'Morano Gonz\xe1lez, Jacinto', u'Ongil L\xf3pez, Miguel', 'Padilla Estrada, Pablo', u'Ruiz-Huerta Garc\xeda De Viedma, Lorena', 'Salazar-Alonso Revuelta, Cecilia', u'San Jos\xe9 P\xe9rez, Carmen', u'S\xe1nchez P\xe9rez, Alejandro', u'Serra S\xe1nchez, Isabel', u'Serra S\xe1nchez, Clara', 'Sevillano De Las Heras, Elena']
[u'Aguado Crespo, Ignacio Jes\xfas', u'\xc1lvarez Cabo, Daniel', u'Gonz\xe1lez Pastor, Dolores', u'Iglesia Vicente, M\xaa Teresa De La', 'Lara Casanova, Francisco', u'Marb\xe1n De Frutos, Marta', u'Marcos Arias, Tom\xe1s', u'Meg\xedas Morales, Jes\xfas Ricardo', u'N\xfa\xf1ez S\xe1nchez, Roberto', 'Reyero Zubiri, Alberto', u'Rodr\xedguez Dur\xe1n, Ana', u'Rubio Ruiz, Juan Ram\xf3n', u'Ruiz Fern\xe1ndez, Esther', u'Sol\xeds P\xe9rez, Susana', 'Trinidad Martos, Juan', 'Veloso Lozano, Enrique', u'Zafra Hern\xe1ndez, C\xe9sar']
You can add the exact same logic to your spider, I just used requests to show you a working example. You should also be aware that not every asp.net site behaves the same, you may have to re-validate for every post as in this related answer.
I think that scrapy's from_response could help you a lot (maybe this isn't the best re but for it, but you'll get the idea), try something like this:
import scrapy
import urllib
from scrapy.http.request.form import FormRequest
class AsambleaMadrid(scrapy.Spider):
name = "Asamblea_Madrid"
start_urls = ['http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx']
def parse(self, response):
ids_re = r'WebForm_PostBackOptions\(([^,]*)'
for id in response.css('#moduloBusqueda li a').re(ids_re):
target = urllib.unquote(id).strip('"')
formdata = {'__EVENTTARGET': target}
request = FormRequest.from_response(response=response,
formdata=formdata,
callback=self.takeEachParty,
dont_click=True)
yield request
def takeEachParty(self, response):
print response.css('.listadoVert02 li a::text').extract()
Do agree with ELRuLL - Firebug is your best friend while scraping.
If you want to avoid JS simulation then you need reproduce carefully all the params/headers that are being sent.
For example from what I see
for __EVENTTARGET you're sending just
id="ctl00_m_g_36ea0310_893d_4a19_9ed1_88a133d06423_ctl00_Repeater2_ctl01_lnk_Diputado")
and via Firebug we see that:
__EVENTTARGET=ctl00$m$g_36ea0310_893d_4a19_9ed1_88a133d06423$ctl00$Repeater2$ctl01$lnk_Diputado
It maybe the reason and maybe not, just repeat and test.
Firebug link just in case.
I'm trying to use Django Forms with Ajax Calls.
Previously I just used a html form that I could get all the information through request.POST['item']. But I've been thinking about validators, and I would benefit if I switched normal html forms into Django forms.
In my HTML code (the page where the user clicks, and a AJAX calls another view with javascript):
if not request.user.is_authenticated():
#Tells the user to login if not authenticated
return redirect('/webapp/login.html')
else:
#Get Logger
logger = logging.getLogger('views.logger.chartConfigure')
logger_uuid = uuid.uuid4()
logger_time = datetime.datetime.now()
#Log the User
logger.info("Request in editChart, User:" + str(request.user.username) + ", UUID:" + str(logger_uuid) + ", Time:" + str(logger_time))
#Forms to use
chartName = changeChartNameForm(auto_id=False)
#Put Forms into a context
context = {'chartNameForm': chartName}
#Return the context
return render(request, 'webapp/editChart.html', context)
The Forms that are used is a changeChartNameForm:
#Form for editing chart names
class changeChartNameForm(forms.Form):
#Only one variable which is called chartName, with label set to ""
#Since I don't want any labels. I have my own in HTML.
chartName = forms.CharField(max_length=100, label="")
#form-control is an extra class that is required by bootstrap 3, and the html id
#of the form is called chartName
chartName.widget.attrs['class'] = 'form-control'
chartName.widget.attrs['id'] = 'chartName'
HTML Code:
<div class="input-group">
<span class="input-group-btn">
<button class="btn btn-default" type="button" id="newChartName" >New Chart Name</button>
</span>
{{ chartNameForm }}
</div>
The Javascript code:
$.ajax(
{
type:"POST",
url:"ajax_postColumnAction/",
datatype: 'json',
data:
{
'csrfmiddlewaretoken':csrftoken,
'currentTabSelected':currentTabSelected,
'currentColumnSelected':currentColumnSelected,
'action':'changeName',
'changeNameForm':$('#chartName').serialize()
},
success: function(response)
{
...Some logic happens here
}
}
basically the javascript code will call this view, called ajax_postColumnAction:
#Get the name form, and get the newName
changeNameForm = changeChartNameForm(request.POST['changeNameForm'])
newName = ""
if(changeNameForm.is_valid()):
newName = changeNameForm.cleaned_data['chartName']
The return is always:
'unicode' object does not have the attribute 'get' at the following line: if(changeNameForm.is_valid())
I have tried the following:
using data=request.POST
using data=request.POST['changeNameForm']
Full Traceback:
Traceback (most recent call last):
File "C:\Users\Desktop\Dropbox (Personal)\Django\Dashboard_Web\WebApp\views.py", line 738, in ajax_postColumnAction if(changeNameForm.is_valid()):
File "C:\Python27\lib\site-packages\django\forms\forms.py", line 129, in is_valid return self.is_bound and not bool(self.errors)
File "C:\Python27\lib\site-packages\django\forms\forms.py", line 121, in errors self.full_clean()
File "C:\Python27\lib\site-packages\django\forms\forms.py", line 273, in full_clean self._clean_fields()
File "C:\Python27\lib\site-packages\django\forms\forms.py", line 282, in _clean_fields value = field.widget.value_from_datadict(self.data, self.files, self.add_prefix(name))
File "C:\Python27\lib\site-packages\django\forms\widgets.py", line 207, in value_from_datadict return data.get(name, None) AttributeError: 'unicode' object has no attribute 'get'
Edit:
When I Do:
print request.POST['changeNameForm']
I get chartName = "some text I typed in the browser"
This part of the error says that data is an unicode string:
return data.get(name, None) AttributeError: 'unicode' object has no attribute 'get'
data needs to be an object. Instead, it is a string, and strings don't have a get() method, and don't have name attributes as the error trace back says.
Try going off of the Django Docs to properly call the AJAX:
https://docs.djangoproject.com/en/1.6/topics/class-based-views/generic-editing/#ajax-example
It seems that a workaround is to construct the form in the view.
I've looked at tenths and hundreds of StackOverFlow posts and Google websites, and non seem to have my problem.
The method is to recreate the form when you get the POST data, since a form uses a dictionary as a constructor.
changeNameForm = changeChartNameForm({request.POST['changeNameForm'].split("=")[0]}):request.POST['changeNameForm'].split("=")[1]})
I know that request.POST['changeNameForm'] returns a string "chartName=someName". I split the string with "=", and I would get someName, and chartName. Hence I would put someName into a dictionary, with the key called chartName.
{'chartName':'someName'}
Hence the form is recreated with the post data and finally passes is_valid.