How to extract direct HTML data into VBA - javascript

<div class="r_title">
<h1 data-securitycontent="name">Fidelity® Japan Smaller Companies</h1>
<span class="gry"> FJSCX</span>
<span data-msat="span-securityInformation-star" class="r_star3"></span>
How would I go about extracting r_star3 from this? r_star3 represents 3 stars. So far, I'm able to get the inner text of it, but the stars are symbols so its blank and r_star3 seems to be its own class. I'd like to just extract r_star3 as a string and use if statements to see how many stars it is. Anything helps, thanks.
Edit:
Here's what I have so far using query selector, but the querySelector prints out [object HTMLSpanElement]. I only pasted in the relevant code. This is the site where the stars are located (right by the ticker signs).
.navigate "http://www.morningstar.com/funds/xnas/" & Range("A" & Row.Row).Value & "/quote.html"
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim doc As HTMLDocument
Set doc = ie.document
While ie.readyState <> 4
Wend
Application.Wait (Now + TimeValue("0:00:04"))
Dim tblName As Object
Dim span As Object
On Error Resume Next
'FIND THE STAR (Work in Progress)
Set tblName = doc.getElementsByClassName("reports_nav")(0)
Set span = tblName.getElementsByTagName("span").Item(1)
Dim s As String, rating As Long
s = doc.querySelector("span[class*=""r_star""]")
MsgBox (s)
rating = Replace(Split(Split(s, "class=" & Chr$(34))(1), Chr$(34))(0), "r_star", vbNullString)
Range("C" & Row.Row).Value = rating
MsgBox (rating)

You can obtain target HTML with CSS selector e.g. the following will get the element in question:
span[data-msat="span-securityInformation-star"]
which returns:
Parse result:
You can then parse the OuterHTML from the returned element to get the start rating.
Code:
Option Explicit
Public Sub Get_Information()
Dim IE As New InternetExplorer
With IE
.Visible = True
.navigate "http://www.morningstar.com/funds/xnas/seatx/quote.html"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Dim a As Object, exitTime As Date
exitTime = Now + TimeSerial(0, 0, 5)
Do
DoEvents
On Error Resume Next
Set a = .document.querySelector("span[data-msat=""span-securityInformation-star""]") '<== Loop until time out checking if element has been found and set
On Error GoTo 0
If Now > exitTime Then Exit Do
Loop While a Is Nothing
If a Is Nothing Then Exit Sub
Dim rating As Long
rating = Replace(Split(Split(a.outerHTML, "class=" & Chr$(34))(1), Chr$(34))(0), "r_star", vbNullString)
MsgBox rating
.Quit
End With
End Sub

Related

Web scraping data displayed inside button with no name

I'm trying to extract values stored inside different buttons on the webpage.
It seems button of each variant has no name, they are just called "variant__box", which are under "variants" div class.
As far as I can tell, values are loaded by javascript on each variant__box.
This is the website to get the data:
https://www.honda.co.uk/motorcycles/range/adventure/crf1100l-africa-twin-adventure-sports/specifications-and-price.html#/
This is the code I've written so far
Dim ie As Object
Dim html As New HTMLDocument
Dim address, str As String
Dim jobDetailsList As Object
Dim jobitem As Object
Set ie = CreateObject("InternetExplorer.Application")
ie.navigate address 'the one mentioned above
ie.Visible = False
While ie.Busy Or ie.readyState < 4
DoEvents
Wend
Set html = ie.document
Set jobDetailsList = html.getElementsByClassName("variants")
For Each jobitem In jobDetailsList
jobitem.Click
str = jobitem.innerText
ActiveSheet.Cells(i, 5).Value = str
i = i + 1
Next jobitem
Set html = Nothing
ie.Quit
Set ie = Nothing
It returns nothing.
If you want to use the IE you can use the following code. But SIM's suggestion is better because IE is then omitted.
Sub ScrapeMotorCycleData()
Dim ie As Object
Dim address, str As String
Dim jobDetailsList As Object
Dim jobitem As Object
Dim i As Long
i = 2
address = "https://www.honda.co.uk/motorcycles/range/adventure/crf1100l-africa-twin-adventure-sports/specifications-and-price.html#/"
Set ie = CreateObject("InternetExplorer.Application")
ie.navigate address 'the one mentioned above
ie.Visible = False
'The following line doesn't do what you want
'While ie.Busy Or ie.readyState < 4: DoEvents: Wend
'You nee a loop here to wait for loading the dynamic content
'Ask for the HTML part you want to scrape
'(No timeout included here, but it can be programmed)
Do
Set jobDetailsList = ie.document.getElementsByClassName("variant__wrapper")
Loop Until jobDetailsList.Length > 0
For Each jobitem In jobDetailsList
ActiveSheet.Cells(i, 5).Value = jobitem.innerText
i = i + 1
Next jobitem
ie.Quit
Set ie = Nothing
End Sub

How Excel VBA to make ticker in url for data scraping

Per previous case, I tried avoid ticker in url by adding an variable from .querySelector("#autocomplete_input").value = ThisWorkbook.Worksheets("SHEET1").Cells(1, 1). It's go to annual financials well but not success into quarter financials.querySelector("[value^='/investing/stock/" &.querySelector("#autocomplete_input").value &"/financials/income/quarter']"). It doesn’t work Is there any way I can use it, thanks?
Public Sub makeselections()
Dim ie As New InternetExplorer
With ie
.Visible = True
.Navigate2 "https://www.marketwatch.com/investing/stock/aapl/financials"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("#autocomplete_input").value = ThisWorkbook.Worksheets("SHEET1").Cells(1, 1)
.querySelector("#investing_ac_button").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
Dim ele As Object
With .document
Do
Set ele = .querySelector("[value^='/investing/stock/" &.querySelector("#autocomplete_input").value &"/financials/income/quarter']")
On Error Resume Next
Loop While ele Is Nothing
.querySelector("[value^='/investing/stock/" &.querySelector("#autocomplete_input").value &"/financials/income/quarter']").Selected = True
.querySelector(".financials select").FireEvent "onchange"
stop
End With
.Quit
End With
End Sub
Make sure you are working with lowercase tickers. Consider also using a timed loop as in this SO answer.
Public Sub MakeSelections()
Dim ie As New InternetExplorer, var As String
var = ThisWorkbook.Worksheets("Sheet1").Cells(1, 1).Value
With ie
.Visible = True
.Navigate2 "https://www.marketwatch.com/investing/stock/aapl/financials"
While .Busy Or .readyState < 4: DoEvents: Wend
With .document
.querySelector("#autocomplete_input").Value = var
.querySelector("#investing_ac_button").Click
End With
While .Busy Or .readyState < 4: DoEvents: Wend
Dim ele As Object
With .document
Do
On Error Resume Next
Set ele = .querySelector("[value^='/investing/stock/" & LCase$(var) & "/financials/income/quarter']")
On Error GoTo 0
Loop While ele Is Nothing
ele.Selected = True
.querySelector(".financials select").FireEvent "onchange"
Stop
End With
.Quit
End With
End Sub

Searching websites using VBA

What I would like to do is to search a website using VBA, putting some words in the left box and getting results on the right.
The problem is that I don't know HTML and I don't know how to refer to this box. I use GetElementByID but I received error in line:
objIE.Document.GetElementByID("text-translation-video-ad").Value = "piłka".
"Object doesn't support this property or method".
Here's my code:
Sub www()
Set objIE = CreateObject("InternetExplorer.Application")
objIE.Top = 0
objIE.Left = 0
objIE.Width = 800
objIE.Height = 600
objIE.AddressBar = 0
objIE.StatusBar = 0
objIE.Toolbar = 0
objIE.Visible = True
objIE.Navigate ("https://pl.pons.com/tłumaczenie-tekstu")
Do
DoEvents
Loop Until objIE.ReadyState = 4
pagesource = objIE.Document.Body.Outerhtml
objIE.Document.GetElementByID("text-translation-video-ad").Value = "piłka"
objIE.Document.GetElementByID("qKeyboardInputInitiator").Click
Do
DoEvents
Loop Until objIE.ReadyState = 4
End Sub
Without changing any language settings, the following translates "Hello"
Code:
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, html As HTMLDocument, translation As String
Const TRANSLATION_STRING As String = "Hello"
With IE
.Visible = True
.navigate "https://pl.pons.com/t%C5%82umaczenie-tekstu"
While .Busy Or .readyState < 4: DoEvents: Wend
Set html = .document
With html
.querySelector("textarea.text-translation-source.source").Value = TRANSLATION_STRING
.querySelector("button.btn.btn-primary.submit").Click
Application.Wait Now + TimeSerial(0, 0, 3)
translation = .querySelector("div.translated_text").innerText
End With
Debug.Print translation
'Quit '<== Remember to quit application
End With
End Sub
View:
Print out in immediate window:
Edit:
Late bound version
Option Explicit
Public Sub GetInfo()
Dim IE As Object, html As Object
With CreateObject("InternetExplorer.Application")
.Visible = True
.navigate "https://pl.pons.com/t%C5%82umaczenie-tekstu"
While .Busy Or .readyState < 4: DoEvents: Wend
Set html = CreateObject("htmlfile")
Set html = .document
With html
.getElementsByClassName("text-translation-source source")(0).innerText = "Translate"
.getElementsByClassName("btn btn-primary submit")(0).Click
Application.Wait Now + TimeSerial(0, 0, 2)
Dim i As Long
For i = 0 To .getElementsByClassName("text-translation-target target").Length - 1
Debug.Print .getElementsByClassName("text-translation-target target")(i).innerText
Next i
Stop
End With
.Quit
End With
End Sub
Element with ID "text-translation-video-ad" is a DIV which does not have .Value property. You want to access text area which is descendant of mentioned DIV.
There are 2 elements with tag "textarea" on page, the one which interests you is 1st element, therefore (0) index. Tags in GetElementsByTagName must be capitalized.
objIE.Document.GetElementsByTagName("TEXTAREA")(0).Value = "piłka"
You can also resign from IE automation and take a faster and more reliable approach, without browser automation, which will give you response in JSON format. Setting reference to Microsoft HTML Object Library is required.
Option Explicit
Public Sub Scrape()
Dim WindHttp As Object: Set WindHttp = CreateObject("WinHTTP.WinHTTPRequest.5.1")
Dim htmlDoc As New HTMLDocument
Dim urlName As String, myWord As String, requestString As String
Dim myResults() As String
Dim resultNum As Long
urlName = "https://pl.pons.com/_translate/translate"
myWord = "piłka"
requestString = "source_language=pl&target_language=en&service=deepl&text=" & _
myWord & _
"&lookup=true&requested_by=Web&source_language_confirmed=true"
Set htmlDoc = postDocument(urlName, WindHttp, requestString)
myResults = Split(Replace(Split(Split(htmlDoc.body.innerText, ",")(1), ":")(1), Chr(34), vbNullString), vbCrLf)
For resultNum = LBound(myResults) To UBound(myResults)
Debug.Print myResults(resultNum)
Next resultNum
End Sub
Function postDocument(ByVal urlName As String, myRequest As Object, Optional requestString As String) As HTMLDocument
Set postDocument = New HTMLDocument
With myRequest
.Open "POST", urlName, False
.setRequestHeader "Cache-Control", "no-cache"
.setRequestHeader "Pragma", "no-cache"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
If requestString = vbNullString Then
.send
Else
.send requestString
End If
postDocument.body.innerHTML = .responseText
End With
End Function

VBA webscraping across multiple tables and pages of Javascript website

I have a program written in VBA which scrapes the first table of the website i am working with. I added a component for it to loop through all the items on that page by clicking the next button to view the next 50 results.
What I am having trouble with is coding which table I am referencing. My code only takes the first table on the webpage and I need all the tables but I also need the program to click through all the results.
Here is my code:
Sub ETFDat()
Dim ie As Object, i As Long, strText As String
Dim jj As Long
Dim hBody As Object, hTR As Object, hTD As Object
Dim tb As Object, bb As Object, Tr As Object, Td As Object, ii As Long
Dim doc As Object, hTable As Object
Dim y As Long, z As Long, wb As Excel.Workbook, ws As Excel.Worksheet
Set wb = Excel.ActiveWorkbook
Set ws = wb.ActiveSheet
Set ie = CreateObject("InternetExplorer.Application")
ie.Visible = True
y = 1 'Column A in Excel
z = 1 'Row 1 in Excel
Sheets("Fund Basics").Activate
Cells.Select
Selection.Clear
ie.navigate "http://www.etf.com/channels/smart-beta-etfs/channels/smart- beta-etfs?qt-tabs=0#qt-tabs" ', , , , "Content-Type: application/x-www-form-urlencoded" & vbCrLf
Do While ie.busy: DoEvents: Loop
Do While ie.ReadyState <> 4: DoEvents: Loop
Set doc = ie.document
Set hTable = doc.getElementsByTagName("table") '.GetElementByID("tablePerformance")
ii = 1
Do While ii <= 17
For Each tb In hTable
Set hBody = tb.getElementsByTagName("tbody")
For Each bb In hBody
Set hTR = bb.getElementsByTagName("tr")
For Each Tr In hTR
Set hTD = Tr.getElementsByTagName("td")
y = 1 ' Resets back to column A
For Each Td In hTD
ws.Cells(z, y).Value = Td.innerText
y = y + 1
Next Td
DoEvents
z = z + 1
Next Tr
Exit For
Next bb
Exit For
Next tb
With doc
Set elems = .getElementsByTagName("a")
For Each e In elems
If (e.getAttribute("id") = "nextPage") Then
e.Click
Exit For
End If
Next e
End With
ii = ii + 1
Application.Wait (Now + TimeValue("00:00:05"))
Loop
MsgBox "Done"
End Sub
I think you would have an easier time if you called their json files and then parsed it in your VBS code.
http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/50/50/1
http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/50/50/2
http://www.etf.com/etf-finder-channel-tag/Smart-Beta%20ETFs/-aum/50/50/3
...

Downloading File from IE using VBA

I am currently working on a VBA code that retrieves the top file from this website (http://infopost.bwpmlp.com/Posting/default.aspx?Mode=Display&Id=27&tspid=100000). I am able to click on the button using Javascript in my code, and I am able to click on open after the download is kicked off. However, I am having trouble saving the file. Because the workbook is being pulled from a website there really isn't a way to set it to the active workbook that I can think of. Currently when I do ActiveWorkbook.SaveAs the code is saving the blank workbook that I am testing the code out of. The file I downloaded seems to not open until the entire code is done running even after I try putting in breaks. Anyone have any ideas? My code is below. Thanks!
Option Explicit
Dim ie As InternetExplorer
Dim h As LongPtr
Private Declare PtrSafe Function FindWindowEx Lib "user32" Alias "FindWindowExA" (ByVal hWnd1 As LongPtr, ByVal hWnd2 As LongPtr, ByVal lpsz1 As String, ByVal lpsz2 As String) As LongPtr
Sub Texas_Gas()
Application.DisplayAlerts = True
Dim ie As Object
Dim IeHandle As Long, FileDownloadHandle As Long, OpenButtonHandle As Long, IePopupBarHandle As Long
Dim AutoMode As Boolean, FileDownloadClassicPopup As Boolean, DownloadComplete As Boolean
Dim Timeout As Date
Dim strSPICE As String, strLink As String
Dim PopupGap As Integer, i As Integer
Set ie = CreateObject("InternetExplorer.Application")
DownloadComplete = False
FileDownloadClassicPopup = False
FileDownloadHandle = 0
IePopupBarHandle = 0
With ie
.Visible = True
.navigate "http://infopost.bwpmlp.com/Posting/default.aspx? Mode=Display&Id=27&tspid=100000"
Do While .Busy: DoEvents: Loop
Do While .readyState <> 4: DoEvents: Loop
End With
ie.document.parentWindow.execScript "javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions(""dgITMatrix:0:lnkBtnDownload"", """", true, """", """", false, true))"
Dim o As IUIAutomation
Dim e As IUIAutomationElement
Dim iCnd As IUIAutomationCondition
Set o = New CUIAutomation
h = ie.Hwnd
h = FindWindowEx(h, 0, "Frame Notification Bar", vbNullString)
If h = 0 Then Exit Sub
Set e = o.ElementFromHandle(ByVal h)
Set iCnd = o.CreatePropertyCondition(UIA_NamePropertyId, "Open")
Dim Button As IUIAutomationElement
Set Button = e.FindFirst(TreeScope_Subtree, iCnd)
Dim InvokePattern As IUIAutomationInvokePattern
Set InvokePattern = Button.GetCurrentPattern(UIA_InvokePatternId)
InvokePattern.Invoke
ActiveWorkbook.SaveAs "I:\Cap_Rel\raw_scrapes\Texas_Gas_Transmission\parsed\Texas_Gas_Transmission_CapRel" & Format(Date - 1, "yyyymmdd") & "MACRO", FileFormat:=xlCSV
End Sub

Categories

Resources