Web scraping data displayed inside button with no name - javascript

I'm trying to extract values stored inside different buttons on the webpage.
It seems button of each variant has no name, they are just called "variant__box", which are under "variants" div class.
As far as I can tell, values are loaded by javascript on each variant__box.
This is the website to get the data:
https://www.honda.co.uk/motorcycles/range/adventure/crf1100l-africa-twin-adventure-sports/specifications-and-price.html#/
This is the code I've written so far
Dim ie As Object
Dim html As New HTMLDocument
Dim address, str As String
Dim jobDetailsList As Object
Dim jobitem As Object
Set ie = CreateObject("InternetExplorer.Application")
ie.navigate address 'the one mentioned above
ie.Visible = False
While ie.Busy Or ie.readyState < 4
DoEvents
Wend
Set html = ie.document
Set jobDetailsList = html.getElementsByClassName("variants")
For Each jobitem In jobDetailsList
jobitem.Click
str = jobitem.innerText
ActiveSheet.Cells(i, 5).Value = str
i = i + 1
Next jobitem
Set html = Nothing
ie.Quit
Set ie = Nothing
It returns nothing.

If you want to use the IE you can use the following code. But SIM's suggestion is better because IE is then omitted.
Sub ScrapeMotorCycleData()
Dim ie As Object
Dim address, str As String
Dim jobDetailsList As Object
Dim jobitem As Object
Dim i As Long
i = 2
address = "https://www.honda.co.uk/motorcycles/range/adventure/crf1100l-africa-twin-adventure-sports/specifications-and-price.html#/"
Set ie = CreateObject("InternetExplorer.Application")
ie.navigate address 'the one mentioned above
ie.Visible = False
'The following line doesn't do what you want
'While ie.Busy Or ie.readyState < 4: DoEvents: Wend
'You nee a loop here to wait for loading the dynamic content
'Ask for the HTML part you want to scrape
'(No timeout included here, but it can be programmed)
Do
Set jobDetailsList = ie.document.getElementsByClassName("variant__wrapper")
Loop Until jobDetailsList.Length > 0
For Each jobitem In jobDetailsList
ActiveSheet.Cells(i, 5).Value = jobitem.innerText
i = i + 1
Next jobitem
ie.Quit
Set ie = Nothing
End Sub

Related

how to get value within java script tag in VBA

I hope you will be fine by the grace of God. As I get some values like price of an Item from a website successfully. But I am unable to get value like Seller ID="" which is in Script Tags. May I find this value through REGEX or something else. Please help.
Sub Test()
'Declaration
Dim ie As InternetExplorer
Dim ht As HTMLDocument
'Initialization
Set ie = New InternetExplorer
ie.navigate ("")
ie.Visible = True
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim Doc As HTMLDocument
Set Doc = ie.document
Dim sDD As String
sDD = Doc.getElementsByClassName("pdp-product-price")(0).getElementsByTagName("span")(0).innerText
MsgBox sDD
End Sub
Try this
Sub Test()
Dim ie As InternetExplorer, doc As HTMLDocument, sDD As String, sHTML As String
Set ie = New InternetExplorer
ie.Navigate ("https://www.daraz.pk/products/2019-kitchen-silicone-five-pointed-star-sink-filter-bathroom-floor-drains-shower-hair-sewer-filter-colanders-strainer-i132120426-s1292321303.html?spm=a2a0e.searchlist.list.62.50ac38869NB6RG&search=1")
ie.Visible = True
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
sDD = doc.getElementsByClassName("pdp-product-price")(0).getElementsByTagName("span")(0).innerText
Debug.Print sDD
sHTML = doc.body.outerHTML
Debug.Print Split(Split(sHTML, "seller_id=")(1), "&")(0)
End Sub

How to extract direct HTML data into VBA

<div class="r_title">
<h1 data-securitycontent="name">Fidelity® Japan Smaller Companies</h1>
<span class="gry"> FJSCX</span>
<span data-msat="span-securityInformation-star" class="r_star3"></span>
How would I go about extracting r_star3 from this? r_star3 represents 3 stars. So far, I'm able to get the inner text of it, but the stars are symbols so its blank and r_star3 seems to be its own class. I'd like to just extract r_star3 as a string and use if statements to see how many stars it is. Anything helps, thanks.
Edit:
Here's what I have so far using query selector, but the querySelector prints out [object HTMLSpanElement]. I only pasted in the relevant code. This is the site where the stars are located (right by the ticker signs).
.navigate "http://www.morningstar.com/funds/xnas/" & Range("A" & Row.Row).Value & "/quote.html"
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim doc As HTMLDocument
Set doc = ie.document
While ie.readyState <> 4
Wend
Application.Wait (Now + TimeValue("0:00:04"))
Dim tblName As Object
Dim span As Object
On Error Resume Next
'FIND THE STAR (Work in Progress)
Set tblName = doc.getElementsByClassName("reports_nav")(0)
Set span = tblName.getElementsByTagName("span").Item(1)
Dim s As String, rating As Long
s = doc.querySelector("span[class*=""r_star""]")
MsgBox (s)
rating = Replace(Split(Split(s, "class=" & Chr$(34))(1), Chr$(34))(0), "r_star", vbNullString)
Range("C" & Row.Row).Value = rating
MsgBox (rating)
You can obtain target HTML with CSS selector e.g. the following will get the element in question:
span[data-msat="span-securityInformation-star"]
which returns:
Parse result:
You can then parse the OuterHTML from the returned element to get the start rating.
Code:
Option Explicit
Public Sub Get_Information()
Dim IE As New InternetExplorer
With IE
.Visible = True
.navigate "http://www.morningstar.com/funds/xnas/seatx/quote.html"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Dim a As Object, exitTime As Date
exitTime = Now + TimeSerial(0, 0, 5)
Do
DoEvents
On Error Resume Next
Set a = .document.querySelector("span[data-msat=""span-securityInformation-star""]") '<== Loop until time out checking if element has been found and set
On Error GoTo 0
If Now > exitTime Then Exit Do
Loop While a Is Nothing
If a Is Nothing Then Exit Sub
Dim rating As Long
rating = Replace(Split(Split(a.outerHTML, "class=" & Chr$(34))(1), Chr$(34))(0), "r_star", vbNullString)
MsgBox rating
.Quit
End With
End Sub

Searching websites using VBA

What I would like to do is to search a website using VBA, putting some words in the left box and getting results on the right.
The problem is that I don't know HTML and I don't know how to refer to this box. I use GetElementByID but I received error in line:
objIE.Document.GetElementByID("text-translation-video-ad").Value = "piłka".
"Object doesn't support this property or method".
Here's my code:
Sub www()
Set objIE = CreateObject("InternetExplorer.Application")
objIE.Top = 0
objIE.Left = 0
objIE.Width = 800
objIE.Height = 600
objIE.AddressBar = 0
objIE.StatusBar = 0
objIE.Toolbar = 0
objIE.Visible = True
objIE.Navigate ("https://pl.pons.com/tłumaczenie-tekstu")
Do
DoEvents
Loop Until objIE.ReadyState = 4
pagesource = objIE.Document.Body.Outerhtml
objIE.Document.GetElementByID("text-translation-video-ad").Value = "piłka"
objIE.Document.GetElementByID("qKeyboardInputInitiator").Click
Do
DoEvents
Loop Until objIE.ReadyState = 4
End Sub
Without changing any language settings, the following translates "Hello"
Code:
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, html As HTMLDocument, translation As String
Const TRANSLATION_STRING As String = "Hello"
With IE
.Visible = True
.navigate "https://pl.pons.com/t%C5%82umaczenie-tekstu"
While .Busy Or .readyState < 4: DoEvents: Wend
Set html = .document
With html
.querySelector("textarea.text-translation-source.source").Value = TRANSLATION_STRING
.querySelector("button.btn.btn-primary.submit").Click
Application.Wait Now + TimeSerial(0, 0, 3)
translation = .querySelector("div.translated_text").innerText
End With
Debug.Print translation
'Quit '<== Remember to quit application
End With
End Sub
View:
Print out in immediate window:
Edit:
Late bound version
Option Explicit
Public Sub GetInfo()
Dim IE As Object, html As Object
With CreateObject("InternetExplorer.Application")
.Visible = True
.navigate "https://pl.pons.com/t%C5%82umaczenie-tekstu"
While .Busy Or .readyState < 4: DoEvents: Wend
Set html = CreateObject("htmlfile")
Set html = .document
With html
.getElementsByClassName("text-translation-source source")(0).innerText = "Translate"
.getElementsByClassName("btn btn-primary submit")(0).Click
Application.Wait Now + TimeSerial(0, 0, 2)
Dim i As Long
For i = 0 To .getElementsByClassName("text-translation-target target").Length - 1
Debug.Print .getElementsByClassName("text-translation-target target")(i).innerText
Next i
Stop
End With
.Quit
End With
End Sub
Element with ID "text-translation-video-ad" is a DIV which does not have .Value property. You want to access text area which is descendant of mentioned DIV.
There are 2 elements with tag "textarea" on page, the one which interests you is 1st element, therefore (0) index. Tags in GetElementsByTagName must be capitalized.
objIE.Document.GetElementsByTagName("TEXTAREA")(0).Value = "piłka"
You can also resign from IE automation and take a faster and more reliable approach, without browser automation, which will give you response in JSON format. Setting reference to Microsoft HTML Object Library is required.
Option Explicit
Public Sub Scrape()
Dim WindHttp As Object: Set WindHttp = CreateObject("WinHTTP.WinHTTPRequest.5.1")
Dim htmlDoc As New HTMLDocument
Dim urlName As String, myWord As String, requestString As String
Dim myResults() As String
Dim resultNum As Long
urlName = "https://pl.pons.com/_translate/translate"
myWord = "piłka"
requestString = "source_language=pl&target_language=en&service=deepl&text=" & _
myWord & _
"&lookup=true&requested_by=Web&source_language_confirmed=true"
Set htmlDoc = postDocument(urlName, WindHttp, requestString)
myResults = Split(Replace(Split(Split(htmlDoc.body.innerText, ",")(1), ":")(1), Chr(34), vbNullString), vbCrLf)
For resultNum = LBound(myResults) To UBound(myResults)
Debug.Print myResults(resultNum)
Next resultNum
End Sub
Function postDocument(ByVal urlName As String, myRequest As Object, Optional requestString As String) As HTMLDocument
Set postDocument = New HTMLDocument
With myRequest
.Open "POST", urlName, False
.setRequestHeader "Cache-Control", "no-cache"
.setRequestHeader "Pragma", "no-cache"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
If requestString = vbNullString Then
.send
Else
.send requestString
End If
postDocument.body.innerHTML = .responseText
End With
End Function

Downloading File from IE using VBA

I am currently working on a VBA code that retrieves the top file from this website (http://infopost.bwpmlp.com/Posting/default.aspx?Mode=Display&Id=27&tspid=100000). I am able to click on the button using Javascript in my code, and I am able to click on open after the download is kicked off. However, I am having trouble saving the file. Because the workbook is being pulled from a website there really isn't a way to set it to the active workbook that I can think of. Currently when I do ActiveWorkbook.SaveAs the code is saving the blank workbook that I am testing the code out of. The file I downloaded seems to not open until the entire code is done running even after I try putting in breaks. Anyone have any ideas? My code is below. Thanks!
Option Explicit
Dim ie As InternetExplorer
Dim h As LongPtr
Private Declare PtrSafe Function FindWindowEx Lib "user32" Alias "FindWindowExA" (ByVal hWnd1 As LongPtr, ByVal hWnd2 As LongPtr, ByVal lpsz1 As String, ByVal lpsz2 As String) As LongPtr
Sub Texas_Gas()
Application.DisplayAlerts = True
Dim ie As Object
Dim IeHandle As Long, FileDownloadHandle As Long, OpenButtonHandle As Long, IePopupBarHandle As Long
Dim AutoMode As Boolean, FileDownloadClassicPopup As Boolean, DownloadComplete As Boolean
Dim Timeout As Date
Dim strSPICE As String, strLink As String
Dim PopupGap As Integer, i As Integer
Set ie = CreateObject("InternetExplorer.Application")
DownloadComplete = False
FileDownloadClassicPopup = False
FileDownloadHandle = 0
IePopupBarHandle = 0
With ie
.Visible = True
.navigate "http://infopost.bwpmlp.com/Posting/default.aspx? Mode=Display&Id=27&tspid=100000"
Do While .Busy: DoEvents: Loop
Do While .readyState <> 4: DoEvents: Loop
End With
ie.document.parentWindow.execScript "javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions(""dgITMatrix:0:lnkBtnDownload"", """", true, """", """", false, true))"
Dim o As IUIAutomation
Dim e As IUIAutomationElement
Dim iCnd As IUIAutomationCondition
Set o = New CUIAutomation
h = ie.Hwnd
h = FindWindowEx(h, 0, "Frame Notification Bar", vbNullString)
If h = 0 Then Exit Sub
Set e = o.ElementFromHandle(ByVal h)
Set iCnd = o.CreatePropertyCondition(UIA_NamePropertyId, "Open")
Dim Button As IUIAutomationElement
Set Button = e.FindFirst(TreeScope_Subtree, iCnd)
Dim InvokePattern As IUIAutomationInvokePattern
Set InvokePattern = Button.GetCurrentPattern(UIA_InvokePatternId)
InvokePattern.Invoke
ActiveWorkbook.SaveAs "I:\Cap_Rel\raw_scrapes\Texas_Gas_Transmission\parsed\Texas_Gas_Transmission_CapRel" & Format(Date - 1, "yyyymmdd") & "MACRO", FileFormat:=xlCSV
End Sub

Problems importing HTML from defined string

I have a piece of code that loads a website and clicks a link that opens a popup. The contents of this popup is what I need to be imported into Excel (VBA) so I can manipulate that data. The issue is that this link's web address always changes, but the link is always in the same place.
The following code defines the currently active IE instance's URL as "IEURL". I would like to use the code to import the table but I get an error "Run-time error '1004': The address of this site is not valid. Check the address and try again".
Sub Button1_Click()
Dim objIE As SHDocVw.InternetExplorer
Dim IEURL As String
LastRow = Range("A" & Rows.Count).End(xlUp).Offset(1).Row
Set objIE = New InternetExplorerMedium
'apiShowWindow objIE.hwnd, SW_MAXIMIZE
objIE.navigate "http://www.youtube.com"
objIE.Visible = True
Do While objIE.READYSTATE <> 4 And objIE.Busy
DoEvents
Loop
'Call Sleep
Application.Wait (Now + TimeValue("0:00:5"))
IEURL = objIE.LocationURL
ThisWorkbook.Sheets("Sheet1").Activate
Rows("6:250").Delete
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;IEURL", _
Destination:=Range("a6"))
.FieldNames = True
.RowNumbers = False
.FillAdjacentFormulas = False
.PreserveFormatting = False
.RefreshOnFileOpen = False
.BackgroundQuery = True
.RefreshStyle = xlInsertDeleteCells
.SavePassword = False
.SaveData = True
.AdjustColumnWidth = True
.RefreshPeriod = 0
.WebSelectionType = xlEntirePage
.WebFormatting = xlWebFormattingAll
.WebPreFormattedTextToColumns = False
.WebConsecutiveDelimitersAsOne = True
.WebSingleBlockTextImport = False
.WebDisableDateRecognition = False
.WebDisableRedirections = False
.Refresh BackgroundQuery:=False
End With
End Sub
Can anyone help me out here?
P.S. I just used YouTube as an example here, as it demonstrates the same problem as the actual website I am trying to import
The objIE.LocationURL property returns a string which you are storing in a string variable. When you later try to use that variable you should append it to a string rather than just putting the name of the variable inside the string. So change
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;IEURL", _
to
With ActiveSheet.QueryTables.Add(Connection:= _
"URL;" & IEURL, _
btw, in Excel 2013, your sample code fails at IEURL = objIE.LocationURL

Categories

Resources