Get content inside of script tag - javascript

Hello everyone I'm trying to fetch content inside of script tag.
http://www.teknosa.com/urunler/145051447/samsung-hm1500-bluetooth-kulaklik
this is the website.
Also this is script tag which I want to enter inside.
$.Teknosa.ProductDetail = {"ProductComputedIndex":145051447,"ProductName":"SAMSUNG HM1500 BLUETOOTH KULAKLIK","ProductSeoName":"samsung-hm1500-bluetooth-kulaklik","ProductBarcode":"8808993790425","ProductPriceInclTax":79.9,"ProductDiscountedPriceInclTax":null,"ProductStockQuantity":1,"ProductMinStockQuantity":null,"ProductShortDescription":null,"ProductFullDescription":null,"ProductModelName":"HM1500","ProductAdminComment":null,"ProductMetaTitle":null,"ProductMetaKeywords":null,"ProductMetaDescription":null,"ProductBrandId":299,"ProductBrandName":"SAMSUNG","ProductBrandImageName":"//img-teknosa.mncdn.com/StaticContent/images/Brand/SAMSUNG-medium.png","ProductCommentCout":29,"ProductQuestionAnswerCout":0,"ProductRatingStar":4,"ProductType":1,"ProductOriginalComputedIndex":null,"ProductIsSolo":false,"ProductIsClickCollect":true,"ProductStoreStockAmount":1,"ProductGroupDisplayName":null,"ProductOrigin":"PRC","ProductIsTss":false,"ProductIsKit":false,"AddBasketButtonType":0,"ProductViewType":0,"ProductDetailDefaultPicture":"145051447-1-samsung-hm1500-bluetooth-kulaklik.jpg","ProductRatingStarText":"Çok İyi","ProductPrice":"79,9","IsThereOutletProduct":false,"ProductIsActiveProductOriginal":false,"ProductErpCatalogCode":"_TELEKOM","ProductErpCategoryCode":"_BLUETOOTH_KULAKLIKLAR1636","ProductCategory":{"CategoryName":"Bluetooth Kulaklık ve Kit","CategorySeoName":"bluetooth-kulaklik-ve-kit","CategoryDescription":null,"CategoryParentId":134,"CategoryLevel":2,"CategoryMetaTitle":null,"CategoryMetaKeywords":null,"CategoryMetaDescription":null,"Parent":{"CategoryName":"Telefon Aksesuarları","CategorySeoName":"telefon-aksesuarlari","CategoryDescription":null,"CategoryParentId":108,"CategoryLevel":1,"CategoryMetaTitle":null,"CategoryMetaKeywords":null,"CategoryMetaDescription":null,"Parent":{"CategoryName":"Telefon","CategorySeoName":"telefon","CategoryDescription":null,"CategoryParentId":null,"CategoryLevel":0,"CategoryMetaTitle":null,"CategoryMetaKeywords":null,"CategoryMetaDescription":null,"Parent":null,"DisplayOrder":6,"StatusId":100110,"StartDate":"\/Date(1434351061000)\/","EndDate":null,"Id":108},"DisplayOrder":3,"StatusId":100110,"StartDate":"\/Date(1434351245000)\/","EndDate":null,"Id":134},"DisplayOrder":3,"StatusId":100110,"StartDate":"\/Date(1434351367000)\/","EndDate":null,"Id":173},"ProductDetailPictures":[{"ProductPictureName":"145051447-1-samsung-hm1500-bluetooth-kulaklik.jpg","ProductPictureOrder":1,"ProductPictureIsDefault":true},{"ProductPictureName":"145051447-2-samsung-hm1500-bluetooth-kulaklik.jpg","ProductPictureOrder":2,"ProductPictureIsDefault":false}],"ProductDetailAttributes":[{"Key":"Ağırlık","Value":"18.1","UnitItemName":"gr","ProductAttributeDisplayOrder":0,"DisplayOrder":2,"Description":null},{"Key":"Model","Value":"HM1500","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":4,"Description":null},{"Key":"Şarj Kullanım Süresi","Value":"2 Saat","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":80,"Description":null},{"Key":"Bekleme Süresi (Saat)","Value":"250 Saat (Maks.)","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":116,"Description":null},{"Key":"Kullanım Mesafesi","Value":"10 m. (Maks.)","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":145,"Description":null},{"Key":"Bluetooth Profili","Value":"HSP (Kulaklık), HFP (Ahizesiz)","UnitItemName":null,"ProductAttributeDisplayOrder":0,"DisplayOrder":149,"Description":null}],"ProductSuggestions":[],"ProductContents":[],"ProductKitItems":[],"ProductVideos":[],"ProductGroups":[],"ProductBadges":[{"BadgeItemBadgeId":7,"BadgeItemApplicationId":1,"BadgeItemText":null,"BadgeItemImageName":"//img-teknosa.mncdn.com/StaticContent/images/Badge/ucretsiz-kargo.png","BadgeItemDescription":null,"BadgeItemPagePosition":"ImageBottom","BadgeItemImagePosition":null,"BadgeItemDisplayView":"ProductDetail","BadgeItemType":"Image","BadgeItemDynamicType":"WebStock","BadgeItemDynamicTypeText1":null,"BadgeItemDynamicTypeText2":null,"BadgeItemDynamicTypeCalculationType":null,"BadgeItemDynamicTypeDisplayType":null,"BadgeItemEvaluationExpression":null,"BadgeItemClassName":null,"DisplayOrder":0,"StatusId":100110,"StartDate":"\/Date(1474440397000)\/","EndDate":null,"Id":5}],"DisplayOrder":1000,"StatusId":100110,"StartDate":"\/Date(1429000863000)\/","EndDate":null,"Id":4715};
And I tried this.
yield scrapy.Request(response.urljoin(url), callback = self.parseProduct, meta={
'splash': {
'endpoint': 'render.html',
'args': {'wait': 0.09}},
'url': url
})
def parseProduct(self, response):
data_bundles = {}
script = response.xpath('/html/body/div[1]/div[2]/script[2]/text()').extract_first()
print script
jstree = js2xml.parse(script)
for a in jstree.xpath('//assign[left//property/identifier/#name="$.Teknosa.ProductDetail" and right/object]'):
bundle_prop = a.xpath('./left/bracketaccessor/property/string/text()')
print bundle_prop
if bundle_prop is not None:
curr_prop = bundle_prop[0]
data_bundles[curr_prop] = {}
Thanks for your help.

This should do it:
response.xpath("//script[re:test(text(),'Teknosa.ProductDetail =','i')]").extract()
You can select script tag that contains "Teknosa.ProductDetails =" in it's text.
Edit:
If you want to load up javascript dictionary from script you need to extract text from the script and you can simply load it up with python's json module.
xp = "//script[re:test(text(),'Teknosa.ProductDetail =','i')]/text()"
data = response.xpath(xp).re(" = (\{.+\})")[0]
import json
data = json.loads(data)
print(data['ProductBarcode'])
> '8808993790425'

Related

Displaying remote images in a firefox add-on panel

I'm trying to display remote images in a FireFox add-on panel, but the src attributes are being converted from something like this:
http://example.com/image.jpg
to something like this:
resource://addon_name/data/%22http://example.com/image.jpg%22
I can't figure out if I'm breaking a security policy or not.
In my add-on script (index.js) I'm retrieving image URLs using the sdk/request API and passing them to my content script (data/my-panel.js). My data/my-panel.js file is creating DOM elements in my panel file (data/popup.html) – including images – using the URLs passed from index.js. Here are the relevant bits of code:
index.js
var Request = require("sdk/request").Request;
var panel = require("sdk/panel").Panel({
width: 500,
height: 500,
contentURL: "./popup.html",
contentScriptFile: "./my-panel.js"
});
Request({
url: url,
onComplete: function(response) {
// Get the JSON data.
json = response.json;
// Launch the popup/panel.
panel.show();
panel.port.emit("sendJSON", json);
}
}).get();
data/my-panel.js
var title;
var desc;
var list;
var titleTextNode;
var descTextNode;
self.port.on("sendJSON", function(json) {
json.docs.forEach(function(items) {
title = JSON.stringify(items.sourceResource.title);
desc = JSON.stringify(items.sourceResource.description);
img = JSON.stringify(items.object);
console.log(img);
var node = document.createElement("li"); // Create a <li> node
var imgTag = document.createElement("img"); // Create a <img> node
imgTag.setAttribute('src', img);
imgTag.setAttribute('alt', desc);
imgTag.style.width= '25px';
titleTextNode = document.createTextNode(title);
descTextNode = document.createTextNode(desc);
node.appendChild(titleTextNode); // Append the text to <li>
node.appendChild(descTextNode); // Append the text to <li>
document.getElementById("myList").appendChild(node); // Append <li> to <ul> with id="myList"
document.getElementById("myImgs").appendChild(imgTag);
});
});
The console.log(img) line is displaying the URLs correctly, but not in popup.html...
<!DOCTYPE html>
<html>
<head>
</head>
<body>
<ul id="myList"></ul>
<p id="myImgs"></p>
</body>
</html>
How can I make the images' src attributes point directly to the remote URLs?
Thanks!
I figured out what I was doing wrong. Using JSON.stringify() on the img URL was adding double quotes around it. Removing it fixed the images:
img = items.object;
I'm not so sure about SDK permissions, but as a last resort you can turn the remote url into a resource URI like this -
var res = Services.io.getProtocolHandler("resource").QueryInterface(Ci.nsIResProtocolHandler);
res.setSubstitution("rawr", Services.io.newURI('http://www.bing.com/',null,null));
// now try navigating to resource://rawr it will load bing
Then you can load resource://rawr and it should work.

HTML tag appears empty when parsing it with BeautifulSoup but has content when opened in browser

I have an issue when parsing an html page through BS4. I have a hidden div in an html page of which I want to read the content using BeautifulSoup. The content of which is generated dynamically by a javascript function which is triggered via body onload.
The problem is: when I call the page in my browser, the tag has the content it is supposed to have. When I parse the same page via BS4, the tag is empty.
I could not find any information with regards to BS4 not being able to handle onload javascript-generated content, so not sure what the issue may be here.
Python script:
import urllib.request
from bs4 import BeautifulSoup
import time
import datetime
eT = time.time()
version = 1
vNum = str(version)
t = datetime.datetime.now()
d = "0" + str(t.day)
#d = d.rstrip()
d = d[-2:]
m = "0" + str(t.month)
#m = m.rstrip()
m = m[-2:]
y = str(t.year)
dStr = y + m + d
resultFile = 'output/classAndIdList-' + dStr + '-v' + vNum + '.txt'
pageListFile = 'input/quickListFR.txt'
f = open(pageListFile, mode='r', encoding='utf-8')
urlRoot = 'http://dev.example.com/'
fOut = open(resultFile, 'w')
ciList = []
# for url in urls.split('\n'):
for l in f:
u = l.rstrip()
url = urlRoot + u
html_content = urllib.request.urlopen(url)
time.sleep(1)
html_text = html_content.read()
soup = BeautifulSoup(html_text)
ciTag = soup.find(id="testDivCSS")
print(ciTag)
ciString = ciTag.get_text()
# print(ciString)
ciArray = ciString.split(',')
# print(ciArray)
for c in ciArray:
if c not in ciList:
ciList.append(c)
fOut.write(c + '\n')
print(c)
print(u + '... DONE')
fOut.close()
Example result page via BeautifulSoup:
Example-page-1.html... DONE
<div id="testDivCSS" style="display: none;"> </div>
And the div in the browser (indicating that the php and javascript parts work fine):
<div id="testDivCSS" style="display: none;">div#menu_rightup,div#social,div#sidebar,div#specific,div#menu_rightdown,div#footer</div>
BeautifulSoup cannot handle dynamic generate contents by javascript.
You may use browser automation tools (such as selenium) to help get the whole page (including dynamic part) first, then use BeautifulSoup to parse the page.
Refer to this question:
How to retrieve the values of dynamic html content using Python

dynamic html body script Var not defined?

I'm working with amazon payments.
I'm dynamically inserting two scripts, one in head and one in body. But the SCRIPT element in my document BODY says the variable "OffAmazonPayments" in HEAD is not defined. The Variable in question is dynamically inserted into the HEAD of the document before the script in BODY, and is available from the console.
setupAmzn: () ->
amazonLoginReady = document.createElement('script')
amazonLoginReady.innerHTML = "
window.onAmazonLoginReady = function() {
amazon.Login.setClientId('CLIENT_ID_OBFU');
}
";
amazonWidgetJS = document.createElement('script');
amazonWidgetJS.src = "https://static-na.payments-amazon.com/OffAmazonPayments/us/sandbox/js/Widgets.js";
s = document.getElementsByTagName('script')[0]
s.parentNode.insertBefore(amazonLoginReady, s)
s.parentNode.insertBefore(amazonWidgetJS, s)
renderAmzn: () ->
console.log("adding the login and pay button scripts")
#setupAmzn()
#create Login and Pay button
payButtonContainer = document.createElement("div");
payButtonContainer.setAttribute("class", "payButtonContainer");
payButton = document.createElement("div");
payButton.setAttribute("id", "AmazonPayButton");
payButtonScript = document.createElement('script');
payButtonScript.innerHTML = 'console.log(typeof OffAmazonPayments === "undefined")';
# !!!!OffAmazonPayments is NOT in scope here
payButtonContainer.appendChild payButton
payButtonContainer.appendChild payButtonScript
proceed = #$('.proceed');
proceed.append(payButtonContainer);
renderAmzn() is called in the render function of my Backbone.view.
What am I doing wrong with the scoping of the variables? thank you.

'document.Form1.imgLoading' is null or not an object

Could someone help please, i'm stuck on the following error:
'document.Form1.imgLoading' is null or not an object control.js, line 55 character 2
Which is:
Line 55: document.Form1.imgLoading.style.width=370
From:
function setLoading(){
var strPath = "../images/"
var strFName = "searchingText.gif"
var strSearchPath = ""
//15/2if (!document.Form1.TextBoxLangID.value == "") { //SO LANG USED
// strSearchPath = strPath + document.Form1.TextBoxLangID.value + "/brand/" + strFName
//}
//else{
// strSearchPath = strPath + "brand/" + strFName
//15/2}
//document.Form1.imgSearchingText.src = strSearchPath
document.Form1.imgLoading.style.width=370
document.Form1.imgLoading.style.height=328
document.Form1.imgLoading.style.left = 327
document.Form1.imgLoading.style.top = 220
document.Form1.imgLoading.style.zIndex=2000'
The problem with your code is you are referring like form.controlname. In ASP.Net the control names are generated like ctl$imgLoading etc based on Master page/nested control configuration.
Better avoid document.Form1.imgLoading and make use of referring it
based on id using native document.getElementById()
If you have the script code in .aspx page, Please use the below code to refer the element
var imgLoading=document.getElementById('<%= imgLoading.ClientID %>');
imgLoading.style.width=370;
In case if you have the script in JS file
Make sure you set the ClientIDMode=staticSupported from ASP.Net 4.0 only for imgLoading control in .aspx page, then
var imgLoading=document.getElementById('imgLoading');
imgLoading.style.width=370;
I do not have enough reputation to comment, but I just want to say that if your <script> tags are in the <head> section, place your script tags at the back of the body section, or load the script onload of the document.
This is because the image that is in the <body> section of your document has yet to be loaded, so it is null or not an object.
Hope that helps.
If imgLoading is a asp:Image control then you have to find it using next code:
document.getElementById('<%= imgLoading.ClientID %>').style.width = 370

flask forms and javascript

hello i have the html code below
<form>
<input type="text" id="filepathz" size="40" placeholder="Spot your project files">
<input type="button" id="spotButton" value="Spot">
</form>
the javascript code
window.onload = init;
function init() {
var button = document.getElementById("spotButton");
button.onclick = handleButtonClick;
}
function handleButtonClick(e) {
var filepathz = document.getElementById("filepathz");
var path = filepathz.value;
if (path == "") {
alert("give a filepath");
}else{
var url = "http://localhost:5000/tx/checkme/filepathz=" + path;
window.open (url,'_self',false);
}
}
and the python code on flask
def index():
"""Load start page where you select your project folder
or load history projects from local db"""
from txclib import get_version
txc_version = get_version()
prj = project.Project(path_to_tx)
# Let's create a resource list from our config file
res_list = []
prev_proj = ''
for idx, res in enumerate(prj.get_resource_list()):
hostname = prj.get_resource_host(res)
username, password = prj.getset_host_credentials(hostname)
return render_template('init.html', txc_version=txc_version, username=username)
#app.route('/tx/checkme/<filepathz>')
def checkme(filepathz):
filepathz = request.args.get('filepathz')
return render_template('init.html', txc_version=filepathz)
what am i doing wrong and can't get the data from the form (filepathz) <--- i get None
You're not passing the variable correctly. There are two ways to pass the variable:
1) Pass it via get method:
http://localhost:5000/tx/checkme/?filepathz=" + path; (Note the '?')
Currently you are trying to get the variable from request.args but not passing it in the request, that's why you get none.
2) Get it from the url with flask's url structure:
Do this in JS : http://localhost:5000/tx/checkme/" + path
And in you view :
#app.route('/tx/checkme/<filepathz>')
def checkme(filepathz):
return render_template('init.html', txc_version=filepathz) # You can use this variable directly since you got it as a function arguement.

Categories

Resources