Scrapy crawling not working on ASPX website

Scrapy crawling not working on ASPX website - javascript

I'm scraping the Madrid Assembly's website, built in aspx, and I have no idea how to simulate clicks on the links where I need to get the corresponding politicians from. I tried this:
import scrapy
class AsambleaMadrid(scrapy.Spider):
name = "Asamblea_Madrid"
start_urls = ['http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx']
def parse(self, response):
for id in response.css('div#moduloBusqueda div.sangria div.sangria ul li a::attr(id)'):
target = id.extract()
url = "http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx"
formdata= {'__EVENTTARGET': target,
'__VIEWSTATE': '/wEPDwUBMA9kFgJmD2QWAgIBD2QWBAIBD2QWAgIGD2QWAmYPZBYCAgMPZBYCAgMPFgIeE1ByZXZpb3VzQ29udHJvbE1vZGULKYgBTWljcm9zb2Z0LlNoYXJlUG9pbnQuV2ViQ29udHJvbHMuU1BDb250cm9sTW9kZSwgTWljcm9zb2Z0LlNoYXJlUG9pbnQsIFZlcnNpb249MTQuMC4wLjAsIEN1bHR1cmU9bmV1dHJhbCwgUHVibGljS2V5VG9rZW49NzFlOWJjZTExMWU5NDI5YwFkAgMPZBYMAgMPZBYGBSZnXzM2ZWEwMzEwXzg5M2RfNGExOV85ZWQxXzg4YTEzM2QwNjQyMw9kFgJmD2QWAgIBDxYCHgtfIUl0ZW1Db3VudAIEFghmD2QWAgIBDw8WBB4PQ29tbWFuZEFyZ3VtZW50BTRHcnVwbyBQYXJsYW1lbnRhcmlvIFBvcHVsYXIgZGUgbGEgQXNhbWJsZWEgZGUgTWFkcmlkHgRUZXh0BTRHcnVwbyBQYXJsYW1lbnRhcmlvIFBvcHVsYXIgZGUgbGEgQXNhbWJsZWEgZGUgTWFkcmlkZGQCAQ9kFgICAQ8PFgQfAgUeR3J1cG8gUGFybGFtZW50YXJpbyBTb2NpYWxpc3RhHwMFHkdydXBvIFBhcmxhbWVudGFyaW8gU29jaWFsaXN0YWRkAgIPZBYCAgEPDxYEHwIFL0dydXBvIFBhcmxhbWVudGFyaW8gUG9kZW1vcyBDb211bmlkYWQgZGUgTWFkcmlkHwMFL0dydXBvIFBhcmxhbWVudGFyaW8gUG9kZW1vcyBDb211bmlkYWQgZGUgTWFkcmlkZGQCAw9kFgICAQ8PFgQfAgUhR3J1cG8gUGFybGFtZW50YXJpbyBkZSBDaXVkYWRhbm9zHwMFIUdydXBvIFBhcmxhbWVudGFyaW8gZGUgQ2l1ZGFkYW5vc2RkBSZnX2MxNTFkMGIxXzY2YWZfNDhjY185MWM3X2JlOGUxMTZkN2Q1Mg9kFgRmDxYCHgdWaXNpYmxlaGQCAQ8WAh8EaGQFJmdfZTBmYWViMTVfOGI3Nl80MjgyX2ExYjFfNTI3ZDIwNjk1ODY2D2QWBGYPFgIfBGhkAgEPFgIfBGhkAhEPZBYCAgEPZBYEZg9kFgICAQ8WAh8EaBYCZg9kFgQCAg9kFgQCAQ8WAh8EaGQCAw8WCB4TQ2xpZW50T25DbGlja1NjcmlwdAW7AWphdmFTY3JpcHQ6Q29yZUludm9rZSgnVGFrZU9mZmxpbmVUb0NsaWVudFJlYWwnLDEsIDEsICdodHRwOlx1MDAyZlx1MDAyZnd3dy5hc2FtYmxlYW1hZHJpZC5lc1x1MDAyZkVTXHUwMDJmUXVlRXNMYUFzYW1ibGVhXHUwMDJmQ29tcG9zaWNpb25kZWxhQXNhbWJsZWFcdTAwMmZMb3NEaXB1dGFkb3MnLCAtMSwgLTEsICcnLCAnJykeGENsaWVudE9uQ2xpY2tOYXZpZ2F0ZVVybGQeKENsaWVudE9uQ2xpY2tTY3JpcHRDb250YWluaW5nUHJlZml4ZWRVcmxkHgxIaWRkZW5TY3JpcHQFIVRha2VPZmZsaW5lRGlzYWJsZWQoMSwgMSwgLTEsIC0xKWQCAw8PFgoeCUFjY2Vzc0tleQUBLx4PQXJyb3dJbWFnZVdpZHRoAgUeEEFycm93SW1hZ2VIZWlnaHQCAx4RQXJyb3dJbWFnZU9mZnNldFhmHhFBcnJvd0ltYWdlT2Zmc2V0WQLrA2RkAgEPZBYCAgUPZBYCAgEPEBYCHwRoZBQrAQBkAhcPZBYIZg8PFgQfAwUPRW5nbGlzaCBWZXJzaW9uHgtOYXZpZ2F0ZVVybAVfL0VOL1F1ZUVzTGFBc2FtYmxlYS9Db21wb3NpY2lvbmRlbGFBc2FtYmxlYS9Mb3NEaXB1dGFkb3MvUGFnZXMvUmVsYWNpb25BbGZhYmV0aWNhRGlwdXRhZG9zLmFzcHhkZAICDw8WBB8DBQZQcmVuc2EfDgUyL0VTL0JpZW52ZW5pZGFQcmVuc2EvUGFnaW5hcy9CaWVudmVuaWRhUHJlbnNhLmFzcHhkZAIEDw8WBB8DBRpJZGVudGlmaWNhY2nDs24gZGUgVXN1YXJpbx8OBTQvRVMvQXJlYVVzdWFyaW9zL1BhZ2luYXMvSWRlbnRpZmljYWNpb25Vc3Vhcmlvcy5hc3B4ZGQCBg8PFgQfAwUGQ29ycmVvHw4FKGh0dHA6Ly9vdXRsb29rLmNvbS9vd2EvYXNhbWJsZWFtYWRyaWQuZXNkZAIlD2QWAgIDD2QWAgIBDxYCHwALKwQBZAI1D2QWAgIHD2QWAgIBDw8WAh8EaGQWAgIDD2QWAmYPZBYCAgMPZBYCAgUPDxYEHgZIZWlnaHQbAAAAAAAAeUABAAAAHgRfIVNCAoABZBYCAgEPPCsACQEADxYEHg1QYXRoU2VwYXJhdG9yBAgeDU5ldmVyRXhwYW5kZWRnZGQCSQ9kFgICAg9kFgICAQ9kFgICAw8WAh8ACysEAWQYAgVBY3RsMDAkUGxhY2VIb2xkZXJMZWZ0TmF2QmFyJFVJVmVyc2lvbmVkQ29udGVudDMkVjRRdWlja0xhdW5jaE1lbnUPD2QFKUNvbXBvc2ljacOzbiBkZSBsYSBBc2FtYmxlYVxMb3MgRGlwdXRhZG9zZAVHY3RsMDAkUGxhY2VIb2xkZXJUb3BOYXZCYXIkUGxhY2VIb2xkZXJIb3Jpem9udGFsTmF2JFRvcE5hdmlnYXRpb25NZW51VjQPD2QFGkluaWNpb1xRdcOpIGVzIGxhIEFzYW1ibGVhZJ',
'__EVENTVALIDATION': '/wEWCALIhqvYAwKh2YVvAuDF1KUDAqCK1bUOAqCKybkPAqCKnbQCAqCKsZEJAvejv84Dtkx5dCFr3QGqQD2wsFQh8nP3iq8',
'__VIEWSTATEGENERATOR': 'BAB98CB3',
'__REQUESTDIGEST': '0x476239970DCFDABDBBDF638A1F9B026BD43022A10D1D757B05F1071FF3104459B4666F96A47B4845D625BCB2BE0D88C6E150945E8F5D82C189B56A0DA4BC859D'}
yield scrapy.FormRequest(url=url, formdata= formdata, callback=self.takeEachParty)
def takeEachParty(self, response):
print response.css('ul.listadoVert02 ul li::text').extract()
Going into the source code of the website, I can see how links look like, and how they send the JavaScript query. This is one of the links I need to access:
<a id="ctl00_m_g_36ea0310_893d_4a19_9ed1_88a133d06423_ctl00_Repeater1_ctl00_lnk_Grupo" href="javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions("ctl00$m$g_36ea0310_893d_4a19_9ed1_88a133d06423$ctl00$Repeater1$ctl00$lnk_Grupo", "", true, "", "", false, true))">Grupo Parlamentario Popular de la Asamblea de Madrid</a>
I have been reading so many articles about, but probably the problem is my ignorance in respect.
Thanks in advance.
EDITED:
SOLUTION: I finally did it! Translating the very helpul code from Padraic Cunningham into Scrapy way. As I specified the issue for Scrapy, I want to post the result just in case someone has the same problem as I had.
So here it goes:
import scrapy
import js2xml
class AsambleaMadrid(scrapy.Spider):
name = "AsambleaMadrid"
start_urls = ['http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx']
def parse(self, response):
source = response
hrefs = response.xpath("//*[#id='moduloBusqueda']//div[#class='sangria']/ul/li/a/#href").extract()
form_data = self.validate(source)
for ref in hrefs:
# js2xml allows us to parse the JS function and params, and so to grab the __EVENTTARGET
js_xml = js2xml.parse(ref)
_id = js_xml.xpath(
"//identifier[#name='WebForm_PostBackOptions']/following-sibling::arguments/string[starts-with(.,'ctl')]")[0]
form_data["__EVENTTARGET"] = _id.text
url_diputado = 'http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx'
# The proper way to send a POST in scrapy is by using the FormRequest
yield scrapy.FormRequest(url=url_diputado, formdata=form_data, callback=self.extract_parties, method='POST')
def validate(self, source):
# these fields are the minimum required as cannot be hardcoded
data = {"__VIEWSTATEGENERATOR": source.xpath("//*[#id='__VIEWSTATEGENERATOR']/#value")[0].extract(),
"__EVENTVALIDATION": source.xpath("//*[#id='__EVENTVALIDATION']/#value")[0].extract(),
"__VIEWSTATE": source.xpath("//*[#id='__VIEWSTATE']/#value")[0].extract(),
" __REQUESTDIGEST": source.xpath("//*[#id='__REQUESTDIGEST']/#value")[0].extract()}
return data
def extract_parties(self, response):
source = response
name = source.xpath("//ul[#class='listadoVert02']/ul/li/a/text()").extract()
print name
I hope is clear. Thanks everybody, again!

If you look at the data posted to the form in chrome or firebug you can see there are many fields passed in the post request, there are a few that are essential and must be parsed from the original page, parsing the ids from the div.sangria ul li a tags is not sufficient as the actual data posted is slightly different, what is posted is in the Javascript function, WebForm_DoPostBackWithOptions which is in the href not the id attribute:
href='javascript:WebForm_DoPostBackWithOptions(new
WebForm_PostBackOptions("ctl00$m$g_36ea0310_893d_4a19_9ed1_88a133d06423$ctl00$Repeater1$ctl03$lnk_Grupo", "", true, "", "", false, true))'>
Sometimes all the underscores are replaced with dollar signs so it is easy to do a str.replace to get them in the correct order but not really in this case, we could use a regex to parse but I like the js2xml lib which can parse a javascript function and its args into an xml tree.
The following code using requests shows you how can get the data from the initial request and get to all the pages you want:
import requests
from lxml import html
import js2xml
post = "http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx"
def validate(xml):
# these fields are the minimum required as cannot be hardcoded
data = {"__VIEWSTATEGENERATOR": xml.xpath("//*[#id='__VIEWSTATEGENERATOR']/#value")[0],
"__EVENTVALIDATION": xml.xpath("//*[#id='__EVENTVALIDATION']/#value")[0],
"__VIEWSTATE": xml.xpath("//*[#id='__VIEWSTATE']/#value")[0],
" __REQUESTDIGEST": xml.xpath("//*[#id='__REQUESTDIGEST']/#value")[0]}
return data
with requests.Session() as s:
# make initial requests to get the links/hrefs and the from fields
r = s.get(
"http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx")
xml = html.fromstring(r.content)
hrefs = xml.xpath("//*[#id='moduloBusqueda']//div[#class='sangria']/ul/li/a/#href")
form_data = validate(xml)
for h in hrefs:
js_xml = js2xml.parse(h)
_id = js_xml.xpath(
"//identifier[#name='WebForm_PostBackOptions']/following-sibling::arguments/string[starts-with(.,'ctl')]")[
0]
form_data["__EVENTTARGET"] = _id.text
r = s.post(post, data=form_data)
xml = html.fromstring(r.content)
print(xml.xpath("//ul[#class='listadoVert02']/ul/li/a/text()"))
If we run the code above we see the different text output from all teh anchor tags:
In [2]: with requests.Session() as s:
...: r = s.get(
...: "http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx")
...: xml = html.fromstring(r.content)
...: hrefs = xml.xpath("//*[#id='moduloBusqueda']//div[#class='sangria']/ul/li/a/#href")
...: form_data = validate(xml)
...: for h in hrefs:
...: js_xml = js2xml.parse(h)
...: _id = js_xml.xpath(
...: "//identifier[#name='WebForm_PostBackOptions']/following-sibling::arguments/string[starts-with(.,'ctl')]")[
...: 0]
...: form_data["__EVENTTARGET"] = _id.text
...: r = s.post(post, data=form_data)
...: xml = html.fromstring(r.content)
...: print(xml.xpath("//ul[#class='listadoVert02']/ul/li/a/text()"))
...:
[u'Abo\xedn Abo\xedn, Sonsoles Trinidad', u'Adrados Gautier, M\xaa Paloma', u'Aguado Del Olmo, M\xaa Josefa', u'\xc1lvarez Padilla, M\xaa Nadia', u'Arribas Del Barrio, Jos\xe9 M\xaa', u'Ballar\xedn Valc\xe1rcel, \xc1lvaro C\xe9sar', u'Berrio Fern\xe1ndez-Caballero, M\xaa In\xe9s', u'Berzal Andrade, Jos\xe9 Manuel', u'Cam\xedns Mart\xednez, Ana', u'Carballedo Berlanga, M\xaa Eugenia', 'Cifuentes Cuencas, Cristina', u'D\xedaz Ayuso, Isabel Natividad', u'Escudero D\xedaz-Tejeiro, Marta', u'Fermosel D\xedaz, Jes\xfas', u'Fern\xe1ndez-Quejo Del Pozo, Jos\xe9 Luis', u'Garc\xeda De Vinuesa Gardoqui, Ignacio', u'Garc\xeda Mart\xedn, Mar\xeda Bego\xf1a', u'Garrido Garc\xeda, \xc1ngel', u'G\xf3mez Ruiz, Jes\xfas', u'G\xf3mez-Angulo Rodr\xedguez, Juan Antonio', u'Gonz\xe1lez Gonz\xe1lez, Isabel Gema', u'Gonz\xe1lez Jim\xe9nez, Bartolom\xe9', u'Gonz\xe1lez Taboada, Jaime', u'Gonz\xe1lez-Mo\xf1ux V\xe1zquez, Elena', u'Gonzalo L\xf3pez, Rosal\xeda', 'Izquierdo Torres, Carlos', u'Li\xe9bana Montijano, Pilar', u'Mari\xf1o Ortega, Ana Isabel', u'Moraga Valiente, \xc1lvaro', u'Mu\xf1oz Abrines, Pedro', u'N\xfa\xf1ez Guijarro, Jos\xe9 Enrique', u'Olmo Fl\xf3rez, Luis Del', u'Ongil Cores, M\xaa Gador', 'Ortiz Espejo, Daniel', u'Ossorio Crespo, Enrique Mat\xedas', 'Peral Guerra, Luis', u'P\xe9rez Baos, Ana Isabel', u'P\xe9rez Garc\xeda, David', u'Pla\xf1iol De Lacalle, Regina M\xaa', u'Redondo Alcaide, M\xaa Isabel', u'Roll\xe1n Ojeda, Pedro', u'S\xe1nchez Fern\xe1ndez, Alejandro', 'Sanjuanbenito Bonal, Diego', u'Serrano Guio, Jos\xe9 Tom\xe1s', u'Serrano S\xe1nchez-Capuchino, Alfonso Carlos', 'Soler-Espiauba Gallo, Juan', 'Toledo Moreno, Lucila', 'Van-Halen Acedo, Juan']
[u'Andaluz Andaluz, M\xaa Isabel', u'Ardid Jim\xe9nez, M\xaa Isabel', u'Carazo G\xf3mez, M\xf3nica', u'Casares D\xedaz, M\xaa Luc\xeda Inmaculada', u'Cepeda Garc\xeda De Le\xf3n, Jos\xe9 Carmelo', 'Cruz Torrijos, Diego', u'Delgado G\xf3mez, Carla', u'Franco Pardo, Jos\xe9 Manuel', u'Freire Campo, Jos\xe9 Manuel', u'Gabilondo Pujol, \xc1ngel', 'Gallizo Llamas, Mercedes', u"Garc\xeda D'Atri, Ana", u'Garc\xeda-Rojo Garrido, Pedro Pablo', u'G\xf3mez Montoya, Rafael', u'G\xf3mez-Chamorro Torres, Jos\xe9 \xc1ngel', u'Gonz\xe1lez Gonz\xe1lez, M\xf3nica Silvana', u'Leal Fern\xe1ndez, M\xaa Isaura', u'Llop Cuenca, M\xaa Pilar', 'Lobato Gandarias, Juan', u'L\xf3pez Ruiz, M\xaa Carmen', u'Manguan Valderrama, Eva M\xaa', u'Maroto Illera, M\xaa Reyes', u'Mart\xednez Ten, Carmen', u'Mena Romero, M\xaa Carmen', u'Moreno Navarro, Juan Jos\xe9', u'Moya Nieto, Encarnaci\xf3n', 'Navarro Lanchas, Josefa', 'Nolla Estrada, Modesto', 'Pardo Ortiz, Josefa Dolores', u'Quintana Viar, Jos\xe9', u'Rico Garc\xeda-Hierro, Enrique', u'Rodr\xedguez Garc\xeda, Nicol\xe1s', u'S\xe1nchez Acera, Pilar', u'Sant\xedn Fern\xe1ndez, Pedro', 'Segovia Noriega, Juan', 'Vicente Viondi, Daniel', u'Vinagre Alc\xe1zar, Agust\xedn']
['Abasolo Pozas, Olga', 'Ardanuy Pizarro, Miguel', u'Beirak Ulanosky, Jazm\xedn', u'Camargo Fern\xe1ndez, Ra\xfal', 'Candela Pokorna, Marco', 'Delgado Orgaz, Emilio', u'D\xedaz Rom\xe1n, Laura', u'Espinar Merino, Ram\xf3n', u'Espinosa De La Llave, Mar\xeda', u'Fern\xe1ndez Rubi\xf1o, Eduardo', u'Garc\xeda G\xf3mez, M\xf3nica', 'Gimeno Reinoso, Beatriz', u'Guti\xe9rrez Benito, Eduardo', 'Huerta Bravo, Raquel', u'L\xf3pez Hern\xe1ndez, Isidro', u'L\xf3pez Rodrigo, Jos\xe9 Manuel', u'Mart\xednez Abarca, Hugo', u'Morano Gonz\xe1lez, Jacinto', u'Ongil L\xf3pez, Miguel', 'Padilla Estrada, Pablo', u'Ruiz-Huerta Garc\xeda De Viedma, Lorena', 'Salazar-Alonso Revuelta, Cecilia', u'San Jos\xe9 P\xe9rez, Carmen', u'S\xe1nchez P\xe9rez, Alejandro', u'Serra S\xe1nchez, Isabel', u'Serra S\xe1nchez, Clara', 'Sevillano De Las Heras, Elena']
[u'Aguado Crespo, Ignacio Jes\xfas', u'\xc1lvarez Cabo, Daniel', u'Gonz\xe1lez Pastor, Dolores', u'Iglesia Vicente, M\xaa Teresa De La', 'Lara Casanova, Francisco', u'Marb\xe1n De Frutos, Marta', u'Marcos Arias, Tom\xe1s', u'Meg\xedas Morales, Jes\xfas Ricardo', u'N\xfa\xf1ez S\xe1nchez, Roberto', 'Reyero Zubiri, Alberto', u'Rodr\xedguez Dur\xe1n, Ana', u'Rubio Ruiz, Juan Ram\xf3n', u'Ruiz Fern\xe1ndez, Esther', u'Sol\xeds P\xe9rez, Susana', 'Trinidad Martos, Juan', 'Veloso Lozano, Enrique', u'Zafra Hern\xe1ndez, C\xe9sar']
You can add the exact same logic to your spider, I just used requests to show you a working example. You should also be aware that not every asp.net site behaves the same, you may have to re-validate for every post as in this related answer.

I think that scrapy's from_response could help you a lot (maybe this isn't the best re but for it, but you'll get the idea), try something like this:
import scrapy
import urllib
from scrapy.http.request.form import FormRequest
class AsambleaMadrid(scrapy.Spider):
name = "Asamblea_Madrid"
start_urls = ['http://www.asambleamadrid.es/ES/QueEsLaAsamblea/ComposiciondelaAsamblea/LosDiputados/Paginas/RelacionAlfabeticaDiputados.aspx']
def parse(self, response):
ids_re = r'WebForm_PostBackOptions\(([^,]*)'
for id in response.css('#moduloBusqueda li a').re(ids_re):
target = urllib.unquote(id).strip('"')
formdata = {'__EVENTTARGET': target}
request = FormRequest.from_response(response=response,
formdata=formdata,
callback=self.takeEachParty,
dont_click=True)
yield request
def takeEachParty(self, response):
print response.css('.listadoVert02 li a::text').extract()

Do agree with ELRuLL - Firebug is your best friend while scraping.
If you want to avoid JS simulation then you need reproduce carefully all the params/headers that are being sent.
For example from what I see
for __EVENTTARGET you're sending just
id="ctl00_m_g_36ea0310_893d_4a19_9ed1_88a133d06423_ctl00_Repeater2_ctl01_lnk_Diputado")
and via Firebug we see that:
__EVENTTARGET=ctl00$m$g_36ea0310_893d_4a19_9ed1_88a133d06423$ctl00$Repeater2$ctl01$lnk_Diputado
It maybe the reason and maybe not, just repeat and test.
Firebug link just in case.

Related

How can table_name be a property in my post query?

I want this query to be generic on the table_name meaning that there is in my JSON file a new property named "device" which indicates in which table the data will be inserted.
the problem is that in my SQL request I can't specify it. Here is what I tried:
INSERT INTO ${device} (adc_v, adc_i, acc_axe_x, acc_axe_y, acc_axe_z, temperature, spo2, pa_diastolique, pa_systolique, indice_confiance, received_on, bpm)' +
'values(${adc_v}, ${adc_i}, ${acc_axe_x}, ${acc_axe_y}, ${acc_axe_z}, ${temperature}, ${spo2}, ${pa_diastolique}, ${pa_systolique}, ${indice_confiance}, ${received_on}, ${bpm})'
here is my JSON on postman:
{
"device": "tag_7z8eq73",
"adc_v": 130,
"adc_i": {{RandomCourant}},
"acc_axe_x": {{RandomAccX}},
"acc_axe_y": {{RandomAccY}},
"acc_axe_z": {{RandomAccZ}},
"temperature": {{RandomTemp}},
"spo2": {{RandomSpo2}},
"pa_diastolique": {{RandomDias}},
"pa_systolique": {{RandomSys}},
"indice_confiance": {{RandomIndiceConf}},
"received_on": "{{$isoTimestamp}}",
"bpm": {{RandomBpm}}}
The table name is : tag_7z8eq73
here is the error that is returned to me:
error: erreur de syntaxe sur ou près de « 'tag_7z8eq73' »
Looks like I am close to the solution but there is a syntax problem, the quote ? is my way the right one?

const device = req.body.device;
console.log(device)
return db.none('INSERT INTO '+device+' (adc_v, adc_i, acc_axe_x, acc_axe_y, acc_axe_z, temperature, spo2, pa_diastolique, pa_systolique, indice_confiance, received_on, bpm)' +
'values(${adc_v}, ${adc_i}, ${acc_axe_x}, ${acc_axe_y}, ${acc_axe_z}, ${temperature}, ${spo2}, ${pa_diastolique}, ${pa_systolique}, ${indice_confiance}, ${received_on}, ${bpm})',
req.body)
try this

jQUERY, FLASK: Bad request (400) while trying to send a Javascript variable to a Python script

Hello and Welcome!
INTRODUCTION
Thank you for viewing my question; each and every single one of your answer matters a ton to my journey towards the mastery of web development! I certainly very much appreciate all the support that the StackOverFlow community is providing to all junior developers who have just gone deep into the world of programming.
WHAT THE PROGRAM DOES
#-----------------------------------------------------------------------------
# Name: Lottery Simulator 2022
# Purpose: To encourage young people not to gamble on lotteries, as the probablity of correctly guessing the number is infinitemisial!
#
# Author: John Seong
# Created: 25-Feb-2022
# Updated: 01-Mar-2022
#-----------------------------------------------------------------------------
# I think this project deserves a level 4+ because...
#
# Features Added:
# Game being entirely web-based using the Flask micro web framework
# Utilization of both functional programming and object-oriented programming
# Calculate the chances of winning for the sake of learning why gambling is risky
# After a set of number is entered by the user, the combinations will reset and the program will give completely an arbitrary number set differing from the previous one
# The user can change the difficulty setting, which will determine the constraint of the possible number set
# Not only does it allow user to guess one number at a time, but multiple numbers stored in a dictionary
# In-game currency system that syncronizes with the SQLAlchemy database, which also generates the player leaderboard
# Game hosted on a cloud platform Heroku
# Used AJAX for communication between JAVASCRIPT and PYTHON files (via JSON)
#-----------------------------------------------------------------------------
PROBLEM I AM DEALING WITH
I got this error message on the console when I clicked the 'next' button three times which led to the ajax method that aids the communication between the JAVASCRIPT and PYTHON files:
127.0.0.1 - - [04/Mar/2022 13:18:28] "POST /game HTTP/1.1" 400 -
HTML AND JAVASCRIPT
game.html
{% extends "index.html" %} {% block content %}
<head>
<script>
// TO DO: MAKE THE BAREBONE APP FIRST AND THEN PRETTIFY IT!
var counter = 0; // Button counter
var dict = {
'name': 0,
'range': 1,
'draws': 2
};
function onClickEvent() {
// Define all the elements by their ID...
const input = document.getElementById("name");
const warning = document.getElementById("warning");
const guide_text = document.getElementById("guide-text");
const guide_text_2 = document.getElementById("guide-text-2");
const array_renderer = document.getElementById("array-renderer");
const numbers_list = document.getElementById("numbers-list");
const name_of_input = document.getElementById("name").innerHTML;
const answers = []; // List for storing all the answers
// When no value is entered in the input, throw an error message...
if (document.forms['frm'].name.value === "") {
warning.style.display = 'block';
} else {
warning.style.display = 'none';
answers.push(document.forms['frm'].name.value);
counter++;
document.forms['frm'].name.value = "";
}
// Scene transition when the submit button is pressed once... twice... three times... etc.
if (counter == 1) {
guide_text.innerHTML = "SET THE<br>RANGE OF<br>POSSIBLE<br>NUMBERS";
guide_text_2.innerHTML = "DON'T GO TOO CRAZY!";
input.placeholder = "Enter min. and max. values seperated by a space...";
} else if (counter == 2) {
guide_text.innerHTML = "HOW MANY<br>DRAWS?";
guide_text_2.innerHTML = "IS MURPHY'S LAW REAL?";
input.placeholder = "Enter the number of draws...";
answers.push(document.forms['frm'].name.value);
} else if (counter == 3) {
$.ajax({
url: '{{ url_for("main.game") }}',
type: 'POST',
data: {
nickname: answers[dict['name']],
range: answers[dict['range']],
draws: answers[dict['draws']]
},
success: function(response) {
console.log("Successful attempt at retrieving the data!");
warning.style.display = 'none';
},
error: function(response) {
warning.style.display = 'block';
warning.innerHTML = "ERROR WHILE RETRIEVING THE LIST!"
}
});
guide_text.innerHTML = "GUESS THE<br>NUMBERS IN A<br>" + answers[dict['draws']] + " * 1 ARRAY!";
array_renderer.style.display = 'block';
// Parse the JSON file handed over by views.py (set that contains random numbers)
numbers_list.innerHTML = JSON.parse(data.random_set_json);
input.placeholder = "Enter the values seperated by a space...";
}
}
// Execute a function when the user releases a key on the keyboard => NEEDS FIX! DOESN'T WORK!
input.addEventListener("keyup", function(event) {
// Number 13 is the "Enter" key on the keyboard
if (event.keyCode === 13) {
// Cancel the default action, if needed
event.preventDefault();
// Trigger the button element with a click
document.getElementById("button").click();
}
});
</script>
<link rel="stylesheet" href="../static/bootstrap/css/style.css">
</head>
<header class="masthead" style="background: lightgray">
<div class="container h-100">
<div class="row h-100">
<div class="col-lg-7 my-auto" style="border-style: none;">
<div class="mx-auto header-content" style="background: rgba(47,37,48, 1);padding: 30px;border-radius: 34px; ;border-color: white;margin: 6px; color: white">
<h1 class="mb-5" id="guide-text" style="font-family: 'Roboto Mono', monospace;color: white;text-align: center;">WHAT IS<br>YOUR NAME?</h1>
<h3 id="guide-text-2" style="font-family: 'Roboto Mono', monospace; text-transform: uppercase; color: white">DON'T HESITATE! GO ON!</h3><br>
<form action="" name="frm" method="post">
<input class="form-control" type="text" id="name" name="name" placeholder="Enter your nickname...">
<br>
<div id="array-renderer">
<h3 id="numbers-list" style="font-family: 'Roboto Mono', monospace; text-transform: uppercase; color: white">NULL</h3><br>
</div>
<br><a class="btn btn-outline-warning btn-xl" role="button" id="button" onclick="onClickEvent()" href="#download" style="color: white;font-family: 'Roboto Mono', monospace;font-size: 20px; justify-self: center; align-self: center">Next</a>
<br><br>
<p id="warning" style="display: none; font-family: 'Roboto Mono', monospace; text-transform: uppercase; color: lightcoral">The value you entered is invalid. Please try it again.</p>
</form>
</div>
</div>
</div>
</div>
</header>
{% endblock content %}
PYTHON SCRIPT
views.py
from email.policy import default
from random import Random
from flask import Blueprint, render_template, request, session, jsonify
# from flask import current_app as app
import configparser
import json
from website.extensions import db
from .models import PlayerCurrency, RandomSet
# Import the config.cfg file and read the default value (starting point) of the game currency
config = configparser.ConfigParser()
# Get the absolute path of the CFG file by doing os.getcwd() and joining it to config.cfg
cfg_path = 'website/config.cfg'
bp = Blueprint('main', __name__)
# Whether user cookies will be collected or not
cookiesAvail = True
# Read the CFG file
config.read(cfg_path)
# Fix the 'failed to load' error!
try:
default_count = config.getint("default", "NUM_OF_NUMS")
default_coins = config.getint("default","MONEY")
except:
print('CFG file failed to load twice!')
#bp.route('/', methods=['GET', 'POST'])
def index():
if request.method == "POST":
allow_cookies = request.form.get("allow-cookies")
decline_cookies = request.form.get("decline-cookies")
session.clear()
if allow_cookies == 'allow' and decline_cookies != 'decline':
cookiesAvail = True
if decline_cookies == 'decline' and allow_cookies != 'allow':
cookiesAvail = False
return render_template("home.html")
#bp.route('/about')
def about():
session.clear()
return render_template("about.html")
#bp.route('/game', methods=['GET', 'POST'])
def game():
if request.method == 'POST':
# Clear the session
session.clear()
# Get all the user input values from game.js
player_name = json.loads(request.form['nickname'])
player_range = json.loads(request.form['range']).split(' ') # player_range[0] => min value, player_range[1] => max value
player_draws = json.loads(request.form['draws'])
# Define a random list object (instantiating a class located in models.py)
random_set = RandomSet(player_range[0], player_range[1], player_draws)
# Create a random list by generating arbitrary values
random_set.generate()
# Convert the generated random list (Python) into JSON-compatible string, so we can hand it over to game.js
random_set_json = json.dumps(random_set.current_set)
# INTERACTION BETWEEN JAVASCRIPT AND PYTHON (FLASK) USING AJAX AND JSONIFY: https://ayumitanaka13.medium.com/how-to-use-ajax-with-python-flask-729c0a8e5346
# HOW PYTHON-JSON CONVERSION WORKS USING THE JSON MODULE: https://www.w3schools.com/python/python_json.asp
return render_template("game.html")
# AJAX METHOD: https://ayumitanaka13.medium.com/how-to-use-ajax-with-python-flask-729c0a8e5346
# WHAT IS CURRENT_APP? LINK: https://flask.palletsprojects.com/en/2.0.x/appcontext/
# cd .. // go to the upper directory
# requirements.txt => # pip3 install -r requirements.txt to install the files
# COOKIES => WILL BE USED TO SKIP ENTER THE NAME STAGE IN SETUP!
# ADD DIFFICULY INDICATOR DEPENDING ON THE SCALE OF THE RANGE, AND SEPERATE THE LEADERBOARD BY DIFFICULTY LEVEL (EASY, MODERATE, HARD)
models.py
class RandomSet():
def __init__(self, min_value, max_value, draws):
self.min_value = min_value # Minimum value that the raodom number can be
self.max_value = max_value # Maximum value that the random numbers can be
self.count = draws # Number of draws
# A list that contains the set that computer generated, containing machine-picked arbitrary numbers
self.current_set = []
self.chances = 0 # Chances of winning, calculated by the computer
# Generate a set containing completely arbitrary numbers
def generate(self):
for i in range(self.count):
# Add a random value generated by a computer to the list using a for loop and a RANDINT built-in function
self.current_set.append(random.randint(
self.min_value, self.max_value))
# Calculate the chances and store it in the instance's variable
self.chances = calculate_chances(self.current_set, self.count)
def calculate_chances(current_set, count):
"""
Calculate the chances of winning,
by using the permuation formula
and converting it to a percentage.
"""
return str(f'{(1 / len(permutations(current_set, count))) * 100} %')
You can also check out the full code on my GitHub repository.

It turns out I had to double-JSON stringify the data that I was handing over to the server side.
So basically what it means is:
WHILE THE ORIGINAL CODE LOOKS LIKE THIS:
$.ajax({
url: '{{ url_for("main.game") }}',
type: 'POST',
data: {
nickname: answers[dict['name']],
range: answers[dict['range']],
draws: answers[dict['draws']]
},
THE CODE SHOULD LOOK LIKE THIS FOR IT TO WORK:
$.ajax({
url: '/game',
type: 'POST',
data: JSON.stringify({ // Make sure you surround the data variable(s) with JSON.stringify's MULTIPLE TIMES to avoid any potential error! Data HAS to be in JSON format.
nickname: JSON.stringify(answers[dict['name']]),
range: JSON.stringify(answers[dict['range']]),
draws: JSON.stringify(answers[dict['draws']])
}),
It was one simple mistake but took me an excruciatingly long time to catch.

How to export an Array to PDF using jsPDF?

first time using jsPDF.
I'm trying to export some data via PDF, however when I try to export the array it gives me an error. I have ASSUMED you export arrays using doc.table because I can't get to find any documentation, there's a tag for it in stackoverflow for questions but didn't find anyone with the same question neither.
This is what I have so far
const generatePDF = () => {
var doc = new jsPDF('landscape', 'px', 'a4', 'false');
doc.addImage(AIBLogo, 'PNG', 250,10,100,100)
doc.setFont('Helvertica', "bold")
doc.text(60,150, "Informacion del Pedido");
doc.setFont('Helvertica', "Normal")
doc.text(60,170, "Estado del Pago: "+estadoDePago)
doc.text(60,190, "Estado de Disponibilidad: "+estadoDeDisponibilidad)
doc.text(60,210, "Total del Pedido: "+total)
doc.text(60,230, "Total Pagado: "+totalPagado)
doc.setFont('Helvertica', "bold")
doc.text(360,150, "Informacion del Estudiante");
doc.setFont('Helvertica', "Normal")
doc.text(360,170, "Nombre del Estudiante: "+nombre)
doc.text(360,190, "Escuela: "+escuela)
doc.text(360,210, "Estado del Pago: "+grado)
doc.text(360,240, "Direccion de Entrega: Retirar en institución")
doc.table(100,100, librosData)
doc.save(id+".pdf")
}
Excluding the table bit it prints out like this:
I would like to add the DataTable after all that info and make new pages if is out of bounds cause table can be up to 20 items.
UPDATE
I have try the following but is not working neither I got it from here: StackOverflow Answer
var col = ["Descripcion", "Tipo", "Editorial","Precio"]
row = []
for (let i = 0; i < librosData.length; i++){
var temp = [librosData[i].descripcion, librosData[i].tipo, librosData[i].editorial, librosData[i].precio];
row.push(temp)
}
doc.autoTable(col,row, {startY:380});
doc.save(id+".pdf")
I know all the data is coming correctly this is how the row is printing:
Any help or documentation is appreciate it.
Final Update
To fix the issue that says autotable is not a function just
Solution: StackOverflow

In the end to make the table I recommend this:
To do the table: How to export a table with jsPDF
To fix the compatibility: autoTable is not a function
To fix "deprecated autoTable initiation":
Initialize your autoTable the following way
import autoTable from 'jspdf-autotable'
autoTable(doc, {
head: [col],
body: row
})

Scrape Javascript-generated page using Scrapy

The following page gives access to product details by executing a Javascript request:
http://www.ooshop.com/ContentNavigation.aspx?TO_NOEUD_IDMO=N000000013143&FROM_NOEUD_IDMO=N000000013131&TO_NOEUD_IDFO=81080&NOEUD_NIVEAU=2&UNIVERS_INDEX=3
Each product has the following element:
<a id="ctl00_cphC_pn3T1_ctl01_rp_ctl00_ctl00_lbVisu" class="prodimg" href="javascript:__doPostBack('ctl00$cphC$pn3T1$ctl01$rp$ctl00$ctl00$lbVisu','')"><img id="ctl00_cphC_pn3T1_ctl01_rp_ctl00_ctl00_iVisu" title="Visualiser la fiche détail" class="image" onerror="this.src='/Media/images/null.gif';" src="Media/ProdImages/Produit/Vignettes/3270190199359.gif" alt="Dés de jambon" style="height:70px;width:70px;border-width:0px;margin-top:15px"></a>
I try to use FormRequest from Scrapy librairies to crawl these pages but it does not seem to work:
<python>
import scrapy
from scrapy.http import FormRequest
from JStest.items import JstestItem
class ooshoptest2(scrapy.Spider):
name = "ooshoptest2"
allowed_domains = ["ooshop.com"]
start_urls = ["http://www.ooshop.com/courses-en-ligne/ContentNavigation.aspx?TO_NOEUD_IDMO=N000000013143&FROM_NOEUD_IDMO=N000000013131&TO_NOEUD_IDFO=81080&NOEUD_NIVEAU=2&UNIVERS_INDEX=3"]
def parse(self, response):
URL=response.url
path='//div[#class="blockInside"]//ul/li/a'
for balise in response.xpath(path):
jsrequest = response.urljoin(balise.xpath('#href').extract()[0]
js="'"+jsrequest[25:-5]+"'"
data = {'__EVENTTARGET': js,'__EVENTARGUMENT':''}
yield FormRequest(url=URL,
method='POST',
callback=self.parse_level1,
formdata=data,
dont_filter=True)
def parse_level1(self, response):
path='//div[#class="popContent"]'
test=response.xpath(path)[0].extract()
print test
item=JstestItem()
yield item
Does anyone knows how to make this work?
Many thanks!

Trouble parsing complex JSON response using JQuery (or just javascript)

Given the following function to grab JSON data from a Solr instance:
var url = "http://myserver:8080/solr/select?indent=on&version=2.2&q=(title:*Hollis* OR sub_title:*Hollis*+OR+creator:*Hollis*+OR+publisher:*Hollis*+OR+format:*Hollis*++OR+lcsh:*Hollis*++OR+loc_call_num_subject:*Hollis*+OR+note:*Hollis*++OR+toc:*Hollis*)AND+Match_Confidence:[.75+TO+*]&start=0&rows=3500&fl=Geocoded_Field,title,id_inst,Match_Confidence,Conjunct_Longitude1,Conjunct_Latitude1,Anchor,note,creator,format,language,pub_location,publisher,score&wt=json&group=true&group.field=title";
$.getJSON(url, function(data){
console.log("EXAMPLE TYPE:"+data.grouped.title.groups.docs[0].title);
});
How do I cycle through each response 'doc' (data posted below) and grab each "title" occurrence for example? I've tried numerous variations of data.grouped.title.groups.docs[0].title with no luck. I am thinking I just have the incorrect order in the data access string (data.grouped.title.groups.docs.title).
Sample data returned from query as copied and pasted from a browser:
{ "responseHeader":{
"status":0,
"QTime":902,
"params":{
"indent":"on",
"wt":"json",
"version":"2.2",
"rows":"3500", "fl":"Geocoded_Field,title,id_inst,Match_Confidence,Conjunct_Longitude1,Conjunct_Latitude1,Anchor,note,creator,format,language,pub_location,publisher,score",
"start":"0",
"q":"(title:*Hollis* OR sub_title:*Hollis* OR creator:*Hollis* OR publisher:*Hollis* OR format:*Hollis* OR lcsh:*Hollis* OR loc_call_num_subject:*Hollis* OR note:*Hollis* OR toc:*Hollis*)AND Match_Confidence:[.75 TO *]",
"group.field":"title",
"group":"true"}}, "grouped":{
"title":{
"matches":2533,
"groups":[{
"groupValue":"Thomas Hollis [and] Thomas Brand Hollis",
"doclist":{"numFound":3,"start":0,"maxScore":0.75592893,"docs":[
{
"title":"Thomas Hollis [and] Thomas Brand Hollis",
"Match_Confidence":0.894584,
"Conjunct_Latitude1":41.89,
"Conjunct_Longitude1":12.5,
"Geocoded_Field":[
"note"],
"pub_location":[
"1752"],
"Anchor":[
"Roma"],
"id_inst":[
"009360446"],
"language":["English"],
"format":["Other"],
"note":[
"Two bust portraits carved in bas-relief. One is of Thomas Hollis, the elder (1659-1731), and one of his friend and heir, Thomas Brand Hollis (ca.1719-1804). Portraits are framed together",
"Inscribed on front of each sculpture: Andrea Pozzi fece dal naturale; verso of Thomas Hollis inscribed: Ritratto del Sig: re Tommaso / Hollis, Cau: re Inglese, Termina= / to in Roma nel suo Giorno Nat= / alizio de i 14 Aprile 1752 in / Et di 32 Ani; verso of Thomas Brand Hollis inscribed: Ritratto dell'Illust: o Sig: re / Tommaso Brand, Caualiere / Inglese, Fatto in Roma / Nell' Anno 1752",
"Title taken from plaques",
"Framed and glazed"],
"creator":["Pozzi, Andrea, 1718-1769","Hollis, Thomas, 1720-1774, former owner"],
"score":0.75592893}]
}},
{
"groupValue":"The post of duty",
"doclist":{"numFound":24,"start":0,"maxScore":0.5459487,"docs":[
{
"title":"The post of duty",
"Match_Confidence":0.985783,
"Conjunct_Latitude1":42.4842,
"Conjunct_Longitude1":-76.4799,
"Geocoded_Field":[
"lcsh"],
"pub_location":[
"Coxsackie, N. Y"],
"Anchor":[
"Lansing"],
"id_inst":[
"006317718"],
"language":["English"],
"format":["Book"],
"note":[
"Published by request"],
"publisher":[
"F.C. Dedrick, Printer"],
"creator":["Zabriskie, Francis Nicoll, 1832-1891"],
"score":0.5459487}]
}},
{
"groupValue":"Discourses concerning government: in way of dialogue",
"doclist":{"numFound":1,"start":0,"maxScore":0.41996053,"docs":[
{
"title":"Discourses concerning government: in way of dialogue",
"Match_Confidence":0.95121,
"Conjunct_Latitude1":51.5142,
"Conjunct_Longitude1":-0.093145,
"Geocoded_Field":[
"pub_location"],
"pub_location":[
"London"],
"Anchor":[
"London"],
"id_inst":[
"006199101"],
"language":["English"],
"format":["Book"],
"note":[
"First published 1681 under title: Plato redivivus",
"Bound in old mottled calf, rebacked"],
"publisher":[
"Printed, and sold by A. Baldwin"],
"creator":["Neville, Henry, 1620-1694","Hollis, Thomas, 1720-1774, former owner"],
"score":0.41996053}]
}}]}}}

See this jsfiddle http://jsfiddle.net/ByxHV/
for (var i =0; i < data.grouped.title.groups.length; i++) {
var group = data.grouped.title.groups[i];
console.log(group.groupValue, group.doclist.numFound)
}

I used JSON Pretty Print to examine the JSON
var titleList = [];
$.each(data.grouped.title.groups, function(index, value) {
$.each(value.doclist.docs, function(index, value) {
titleList.push(value.title);
});
});

Develop Reference

JavaScript is the programming language of the Web.

Scrapy crawling not working on ASPX website - javascript

Related

How can table_name be a property in my post query?

jQUERY, FLASK: Bad request (400) while trying to send a Javascript variable to a Python script

How to export an Array to PDF using jsPDF?

Scrape Javascript-generated page using Scrapy

Trouble parsing complex JSON response using JQuery (or just javascript)

Categories

Resources