Searching websites using VBA - javascript

What I would like to do is to search a website using VBA, putting some words in the left box and getting results on the right.
The problem is that I don't know HTML and I don't know how to refer to this box. I use GetElementByID but I received error in line:
objIE.Document.GetElementByID("text-translation-video-ad").Value = "piłka".
"Object doesn't support this property or method".
Here's my code:
Sub www()
Set objIE = CreateObject("InternetExplorer.Application")
objIE.Top = 0
objIE.Left = 0
objIE.Width = 800
objIE.Height = 600
objIE.AddressBar = 0
objIE.StatusBar = 0
objIE.Toolbar = 0
objIE.Visible = True
objIE.Navigate ("https://pl.pons.com/tłumaczenie-tekstu")
Do
DoEvents
Loop Until objIE.ReadyState = 4
pagesource = objIE.Document.Body.Outerhtml
objIE.Document.GetElementByID("text-translation-video-ad").Value = "piłka"
objIE.Document.GetElementByID("qKeyboardInputInitiator").Click
Do
DoEvents
Loop Until objIE.ReadyState = 4
End Sub

Without changing any language settings, the following translates "Hello"
Code:
Option Explicit
Public Sub GetInfo()
Dim IE As New InternetExplorer, html As HTMLDocument, translation As String
Const TRANSLATION_STRING As String = "Hello"
With IE
.Visible = True
.navigate "https://pl.pons.com/t%C5%82umaczenie-tekstu"
While .Busy Or .readyState < 4: DoEvents: Wend
Set html = .document
With html
.querySelector("textarea.text-translation-source.source").Value = TRANSLATION_STRING
.querySelector("button.btn.btn-primary.submit").Click
Application.Wait Now + TimeSerial(0, 0, 3)
translation = .querySelector("div.translated_text").innerText
End With
Debug.Print translation
'Quit '<== Remember to quit application
End With
End Sub
View:
Print out in immediate window:
Edit:
Late bound version
Option Explicit
Public Sub GetInfo()
Dim IE As Object, html As Object
With CreateObject("InternetExplorer.Application")
.Visible = True
.navigate "https://pl.pons.com/t%C5%82umaczenie-tekstu"
While .Busy Or .readyState < 4: DoEvents: Wend
Set html = CreateObject("htmlfile")
Set html = .document
With html
.getElementsByClassName("text-translation-source source")(0).innerText = "Translate"
.getElementsByClassName("btn btn-primary submit")(0).Click
Application.Wait Now + TimeSerial(0, 0, 2)
Dim i As Long
For i = 0 To .getElementsByClassName("text-translation-target target").Length - 1
Debug.Print .getElementsByClassName("text-translation-target target")(i).innerText
Next i
Stop
End With
.Quit
End With
End Sub

Element with ID "text-translation-video-ad" is a DIV which does not have .Value property. You want to access text area which is descendant of mentioned DIV.
There are 2 elements with tag "textarea" on page, the one which interests you is 1st element, therefore (0) index. Tags in GetElementsByTagName must be capitalized.
objIE.Document.GetElementsByTagName("TEXTAREA")(0).Value = "piłka"
You can also resign from IE automation and take a faster and more reliable approach, without browser automation, which will give you response in JSON format. Setting reference to Microsoft HTML Object Library is required.
Option Explicit
Public Sub Scrape()
Dim WindHttp As Object: Set WindHttp = CreateObject("WinHTTP.WinHTTPRequest.5.1")
Dim htmlDoc As New HTMLDocument
Dim urlName As String, myWord As String, requestString As String
Dim myResults() As String
Dim resultNum As Long
urlName = "https://pl.pons.com/_translate/translate"
myWord = "piłka"
requestString = "source_language=pl&target_language=en&service=deepl&text=" & _
myWord & _
"&lookup=true&requested_by=Web&source_language_confirmed=true"
Set htmlDoc = postDocument(urlName, WindHttp, requestString)
myResults = Split(Replace(Split(Split(htmlDoc.body.innerText, ",")(1), ":")(1), Chr(34), vbNullString), vbCrLf)
For resultNum = LBound(myResults) To UBound(myResults)
Debug.Print myResults(resultNum)
Next resultNum
End Sub
Function postDocument(ByVal urlName As String, myRequest As Object, Optional requestString As String) As HTMLDocument
Set postDocument = New HTMLDocument
With myRequest
.Open "POST", urlName, False
.setRequestHeader "Cache-Control", "no-cache"
.setRequestHeader "Pragma", "no-cache"
.setRequestHeader "Content-type", "application/x-www-form-urlencoded"
If requestString = vbNullString Then
.send
Else
.send requestString
End If
postDocument.body.innerHTML = .responseText
End With
End Function

Related

Web scraping data displayed inside button with no name

I'm trying to extract values stored inside different buttons on the webpage.
It seems button of each variant has no name, they are just called "variant__box", which are under "variants" div class.
As far as I can tell, values are loaded by javascript on each variant__box.
This is the website to get the data:
https://www.honda.co.uk/motorcycles/range/adventure/crf1100l-africa-twin-adventure-sports/specifications-and-price.html#/
This is the code I've written so far
Dim ie As Object
Dim html As New HTMLDocument
Dim address, str As String
Dim jobDetailsList As Object
Dim jobitem As Object
Set ie = CreateObject("InternetExplorer.Application")
ie.navigate address 'the one mentioned above
ie.Visible = False
While ie.Busy Or ie.readyState < 4
DoEvents
Wend
Set html = ie.document
Set jobDetailsList = html.getElementsByClassName("variants")
For Each jobitem In jobDetailsList
jobitem.Click
str = jobitem.innerText
ActiveSheet.Cells(i, 5).Value = str
i = i + 1
Next jobitem
Set html = Nothing
ie.Quit
Set ie = Nothing
It returns nothing.
If you want to use the IE you can use the following code. But SIM's suggestion is better because IE is then omitted.
Sub ScrapeMotorCycleData()
Dim ie As Object
Dim address, str As String
Dim jobDetailsList As Object
Dim jobitem As Object
Dim i As Long
i = 2
address = "https://www.honda.co.uk/motorcycles/range/adventure/crf1100l-africa-twin-adventure-sports/specifications-and-price.html#/"
Set ie = CreateObject("InternetExplorer.Application")
ie.navigate address 'the one mentioned above
ie.Visible = False
'The following line doesn't do what you want
'While ie.Busy Or ie.readyState < 4: DoEvents: Wend
'You nee a loop here to wait for loading the dynamic content
'Ask for the HTML part you want to scrape
'(No timeout included here, but it can be programmed)
Do
Set jobDetailsList = ie.document.getElementsByClassName("variant__wrapper")
Loop Until jobDetailsList.Length > 0
For Each jobitem In jobDetailsList
ActiveSheet.Cells(i, 5).Value = jobitem.innerText
i = i + 1
Next jobitem
ie.Quit
Set ie = Nothing
End Sub

how to get value within java script tag in VBA

I hope you will be fine by the grace of God. As I get some values like price of an Item from a website successfully. But I am unable to get value like Seller ID="" which is in Script Tags. May I find this value through REGEX or something else. Please help.
Sub Test()
'Declaration
Dim ie As InternetExplorer
Dim ht As HTMLDocument
'Initialization
Set ie = New InternetExplorer
ie.navigate ("")
ie.Visible = True
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim Doc As HTMLDocument
Set Doc = ie.document
Dim sDD As String
sDD = Doc.getElementsByClassName("pdp-product-price")(0).getElementsByTagName("span")(0).innerText
MsgBox sDD
End Sub
Try this
Sub Test()
Dim ie As InternetExplorer, doc As HTMLDocument, sDD As String, sHTML As String
Set ie = New InternetExplorer
ie.Navigate ("https://www.daraz.pk/products/2019-kitchen-silicone-five-pointed-star-sink-filter-bathroom-floor-drains-shower-hair-sewer-filter-colanders-strainer-i132120426-s1292321303.html?spm=a2a0e.searchlist.list.62.50ac38869NB6RG&search=1")
ie.Visible = True
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Set doc = ie.document
sDD = doc.getElementsByClassName("pdp-product-price")(0).getElementsByTagName("span")(0).innerText
Debug.Print sDD
sHTML = doc.body.outerHTML
Debug.Print Split(Split(sHTML, "seller_id=")(1), "&")(0)
End Sub

How to simulate an "Focus" and "typing" Events

Trying to simulate onfocus and typing event, but it not work
Sub Login(MyLogin, MyPass)
Dim IEapp As InternetExplorer
Dim IeDoc As Object
Dim ieTable As Object
TaskKill "iexplore.exe"
Set IEapp = New InternetExplorer
IEapp.Visible = True
IEapp.Navigate "https://example.com/portal/en/login"
Do While IEapp.Busy: DoEvents: Loop: Do Until IEapp.readyState = READYSTATE_COMPLETE: DoEvents: Loop
Set IeDoc = IEapp.Document
With IeDoc.forms(2)
.Name.Value = MyLogin
.Name.Focus
.FireEvent ("onkeypress")
.FireEvent ("onchange")
.Password.Value = MyPass
.Password.Focus
.FireEvent ("onkeypress")
.FireEvent ("onchange")
End With
IeDoc.getElementsByClassName("form__button form__button--login-site")(1).Click
End Sub
How to call focus and typing events? Sendkeys is bad solution as it have Excel bug with Numlock
The event listeners for those elements indicate input events are watched for. You can create those and then fire.
Internet Explorer:
Option Explicit
Public Sub LogIn()
Dim ie As New InternetExplorer
With ie
.Visible = True
.Navigate2 "https://www.darsgo.si/portal/en/login"
While .Busy Or .readyState < 4: DoEvents: Wend
.document.querySelector(".LoginHeader + p a").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Dim event_onInput As Object
Set event_onInput = .document.createEvent("HTMLEvents")
event_onInput.initEvent "input", True, False
With .document.querySelector("#name")
.Value = "bobBuilder#banana.com"
.dispatchEvent event_onInput
End With
With .document.querySelector("#password")
.Value = "something"
.dispatchEvent event_onInput
End With
.document.querySelector(".form__button").Click
While .Busy Or .readyState < 4: DoEvents: Wend
Stop
.Quit
End With
End Sub
Selenium:
If you are prepared to use selenium basic it works just fine as follows.
After installing selenium go VBE > Tools > References and add a reference to selenium type library. You should use the latest ChromeDriver. The ChromeDriver may come installed already in the selenium folder - otherwise it needs to be added there.
Option Explicit
Public Sub Login()
Dim d As WebDriver
Set d = New ChromeDriver
Const URL = "https://www.darsgo.si/portal/en/login"
With d
.Start "Chrome"
.get URL
.FindElementByCss(".choose-language-popup__list li:nth-of-type(2) a").Click
.FindElementByCss(".choose-language-popup__icon-continue").Click
.FindElementByCss("p.registerHeader a").Click
.FindElementById("name").SendKeys "bob#builder.com"
.FindElementById("password").SendKeys "verySecret"
.FindElementByCss(".form__button").Click
Stop
.Quit
End With
End Sub
I think this would work for you:
Sub Login()
Dim IEapp As InternetExplorer
Dim IeDoc as Object
Dim ieTable As Object
TaskKill "iexplore.exe"
Set IEapp = New InternetExplorer
IEapp.Visible = True
IEapp.navigate "https://example.com/portal/en/login"
Do While IEapp.Busy: DoEvents: Loop: Do Until IEapp.readyState = READYSTATE_COMPLETE: DoEvents: Loop
Set IeDoc = IEapp.document
With IeDoc.forms(2)
.elements("name").Value = MyLogin
.elements("password").Value = MyPass
End With
IeDoc.forms(2).submit
End Sub

How to extract direct HTML data into VBA

<div class="r_title">
<h1 data-securitycontent="name">Fidelity® Japan Smaller Companies</h1>
<span class="gry"> FJSCX</span>
<span data-msat="span-securityInformation-star" class="r_star3"></span>
How would I go about extracting r_star3 from this? r_star3 represents 3 stars. So far, I'm able to get the inner text of it, but the stars are symbols so its blank and r_star3 seems to be its own class. I'd like to just extract r_star3 as a string and use if statements to see how many stars it is. Anything helps, thanks.
Edit:
Here's what I have so far using query selector, but the querySelector prints out [object HTMLSpanElement]. I only pasted in the relevant code. This is the site where the stars are located (right by the ticker signs).
.navigate "http://www.morningstar.com/funds/xnas/" & Range("A" & Row.Row).Value & "/quote.html"
Do
DoEvents
Loop Until ie.readyState = READYSTATE_COMPLETE
Dim doc As HTMLDocument
Set doc = ie.document
While ie.readyState <> 4
Wend
Application.Wait (Now + TimeValue("0:00:04"))
Dim tblName As Object
Dim span As Object
On Error Resume Next
'FIND THE STAR (Work in Progress)
Set tblName = doc.getElementsByClassName("reports_nav")(0)
Set span = tblName.getElementsByTagName("span").Item(1)
Dim s As String, rating As Long
s = doc.querySelector("span[class*=""r_star""]")
MsgBox (s)
rating = Replace(Split(Split(s, "class=" & Chr$(34))(1), Chr$(34))(0), "r_star", vbNullString)
Range("C" & Row.Row).Value = rating
MsgBox (rating)
You can obtain target HTML with CSS selector e.g. the following will get the element in question:
span[data-msat="span-securityInformation-star"]
which returns:
Parse result:
You can then parse the OuterHTML from the returned element to get the start rating.
Code:
Option Explicit
Public Sub Get_Information()
Dim IE As New InternetExplorer
With IE
.Visible = True
.navigate "http://www.morningstar.com/funds/xnas/seatx/quote.html"
While .Busy = True Or .readyState < 4: DoEvents: Wend
Dim a As Object, exitTime As Date
exitTime = Now + TimeSerial(0, 0, 5)
Do
DoEvents
On Error Resume Next
Set a = .document.querySelector("span[data-msat=""span-securityInformation-star""]") '<== Loop until time out checking if element has been found and set
On Error GoTo 0
If Now > exitTime Then Exit Do
Loop While a Is Nothing
If a Is Nothing Then Exit Sub
Dim rating As Long
rating = Replace(Split(Split(a.outerHTML, "class=" & Chr$(34))(1), Chr$(34))(0), "r_star", vbNullString)
MsgBox rating
.Quit
End With
End Sub

Downloading File from IE using VBA

I am currently working on a VBA code that retrieves the top file from this website (http://infopost.bwpmlp.com/Posting/default.aspx?Mode=Display&Id=27&tspid=100000). I am able to click on the button using Javascript in my code, and I am able to click on open after the download is kicked off. However, I am having trouble saving the file. Because the workbook is being pulled from a website there really isn't a way to set it to the active workbook that I can think of. Currently when I do ActiveWorkbook.SaveAs the code is saving the blank workbook that I am testing the code out of. The file I downloaded seems to not open until the entire code is done running even after I try putting in breaks. Anyone have any ideas? My code is below. Thanks!
Option Explicit
Dim ie As InternetExplorer
Dim h As LongPtr
Private Declare PtrSafe Function FindWindowEx Lib "user32" Alias "FindWindowExA" (ByVal hWnd1 As LongPtr, ByVal hWnd2 As LongPtr, ByVal lpsz1 As String, ByVal lpsz2 As String) As LongPtr
Sub Texas_Gas()
Application.DisplayAlerts = True
Dim ie As Object
Dim IeHandle As Long, FileDownloadHandle As Long, OpenButtonHandle As Long, IePopupBarHandle As Long
Dim AutoMode As Boolean, FileDownloadClassicPopup As Boolean, DownloadComplete As Boolean
Dim Timeout As Date
Dim strSPICE As String, strLink As String
Dim PopupGap As Integer, i As Integer
Set ie = CreateObject("InternetExplorer.Application")
DownloadComplete = False
FileDownloadClassicPopup = False
FileDownloadHandle = 0
IePopupBarHandle = 0
With ie
.Visible = True
.navigate "http://infopost.bwpmlp.com/Posting/default.aspx? Mode=Display&Id=27&tspid=100000"
Do While .Busy: DoEvents: Loop
Do While .readyState <> 4: DoEvents: Loop
End With
ie.document.parentWindow.execScript "javascript:WebForm_DoPostBackWithOptions(new WebForm_PostBackOptions(""dgITMatrix:0:lnkBtnDownload"", """", true, """", """", false, true))"
Dim o As IUIAutomation
Dim e As IUIAutomationElement
Dim iCnd As IUIAutomationCondition
Set o = New CUIAutomation
h = ie.Hwnd
h = FindWindowEx(h, 0, "Frame Notification Bar", vbNullString)
If h = 0 Then Exit Sub
Set e = o.ElementFromHandle(ByVal h)
Set iCnd = o.CreatePropertyCondition(UIA_NamePropertyId, "Open")
Dim Button As IUIAutomationElement
Set Button = e.FindFirst(TreeScope_Subtree, iCnd)
Dim InvokePattern As IUIAutomationInvokePattern
Set InvokePattern = Button.GetCurrentPattern(UIA_InvokePatternId)
InvokePattern.Invoke
ActiveWorkbook.SaveAs "I:\Cap_Rel\raw_scrapes\Texas_Gas_Transmission\parsed\Texas_Gas_Transmission_CapRel" & Format(Date - 1, "yyyymmdd") & "MACRO", FileFormat:=xlCSV
End Sub

Categories

Resources