HTMLUnit not working with Ajax/Javascript - javascript

I am trying to extract data for a class project from a webpage (a page that shows search results). Specifically, it's this page:
http://www.target.com/c/xbox-one-games-video/-/N-55krw#navigation=true&category=55krw&searchTerm=&view_type=medium&sort_by=bestselling&faceted_value=&offset=60&pageCount=60&response_group=Items&isLeaf=true&parent_category_id=55kug&custom_price=false&min_price=from&max_price=to
I just want to extract the titles of the products.
I'm using the following code:
final WebClient webClient = new WebClient(BrowserVersion.CHROME);
webClient.getOptions().setThrowExceptionOnScriptError(false);
webClient.getOptions().setJavaScriptEnabled(true);
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
final HtmlPage page = webClient.getPage(itemPageURL);
int tries = 20; // Amount of tries to avoid infinite loop
while (tries > 0) {
tries--;
synchronized(page) {
page.wait(2000); // How often to check
}
}
int numThreads = webClient.waitForBackgroundJavaScript(1000000l);
PrintWriter pw = new PrintWriter("test-target-search.txt");
pw.println(page.asXml());
pw.close();
The page that results does not have the product information that's shown on the web browser. I imagine the AJAX calls haven't completed? (not sure though.)
Any help would greatly be appreciated. Thanks!

You can use GET requests for such task. Control the page by the "pageCount" and "offset" argument in the URL, after retrieving the page (the example below does this for one page) you can use regex or whatever the content is in (JSON?) to extract the titles.
public static void main(String[] args)
{
try
{
WebClient webClient = new WebClient();
URL url = new URL(
"http://tws.target.com/searchservice/item/search_results/v1/by_keyword?callback=getPlpResponse&navigation=true&category=55krw&searchTerm=&view_type=medium&sort_by=bestselling&faceted_value=&offset=60&pageCount=60&response_group=Items&isLeaf=true&parent_category_id=55kug&custom_price=false&min_price=from&max_price=to");
WebRequest requestSettings = new WebRequest(url, HttpMethod.GET);
requestSettings.setAdditionalHeader("Accept", "*/*");
requestSettings.setAdditionalHeader("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8");
requestSettings.setAdditionalHeader("Referer", "http://www.target.com/c/xbox-one-games-video/-/N-55krw");
requestSettings.setAdditionalHeader("Accept-Language", "en-US,en;q=0.8");
requestSettings.setAdditionalHeader("Accept-Encoding", "gzip,deflate,sdch");
requestSettings.setAdditionalHeader("Accept-Charset", "ISO-8859-1,utf-8;q=0.7,*;q=0.3");
Page page = webClient.getPage(requestSettings);
System.out.println(page.getWebResponse().getContentAsString());
}
catch (Exception e)
{
e.printStackTrace();
}
}

Related

How to call Export PDF with Reserved.ReportViewerWebControl.axd....URL from BackEnd C# Code

My Aim is to call ExportBaseUrl Link , that is given by Rdlc Exprot PDF button , on C# side, i want to do that because there are 285 reports and each one have diff parameters so this will take a lot time.
I have worked on one solution but that take 15 min to load 2 page RDLC to pdf.
Its taking time due to Response coming late or some deadlock is happening,
This is what i am doing.
JS file
var reportViewerName = ControlName; //Name attribute of report viewer control.
var src_url = $find(reportViewerName)._getInternalViewer().ExportUrlBase + 'PDF';
var contentDisposition = 'AlwaysInline'; //Content Disposition instructs the server to either return the PDF being requested as an attachment or a viewable report.
var src_new = src_url.replace(/(ContentDisposition=).*?(&)/, '$1' + contentDisposition + '$2');
window.open("/printPDF.asx?url=" + encodeURIComponent("http://localhost:5402"+src_new));
PrintPDF.aspx File is like this
using iText.Kernel.Pdf;
using iText.Kernel.Pdf.Action;
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Threading.Tasks;
using System.Web;
namespace WebApp.WebAPI
{
/// <summary>
/// Summary description for printPDF
/// </summary>
public class printPDF : IHttpHandler
{
public void ProcessRequest(HttpContext context)
{
context.Response.ContentType = "text/plain";
context.Response.Write("Hello World");
string url = context.Request.QueryString["url"];
// Download data.
DownloadFile(url, context.Server.MapPath("~/pdf/pdffile.pdf"), context.Request).Wait();
PdfDocument pdfDoc = new PdfDocument(new PdfReader(context.Server.MapPath("~/pdf/pdffile.pdf")), new PdfWriter(context.Server.MapPath("~/pdf/pdffileAuto.pdf")));
// add content
PdfAction action = PdfAction.CreateJavaScript("this.print({bUI: true, bSilent: true, bShrinkToFit: true});");
pdfDoc.GetCatalog().SetOpenAction(action);
pdfDoc.Close();
context.Response.Clear();
context.Response.ContentType = "application/pdf";
context.Response.AddHeader("Content-Disposition",
"AlwaysInline;filename=\"FileName.pdf\"");
context.Response.BinaryWrite(File.ReadAllBytes(context.Server.MapPath("~/pdf/pdffileAuto.pdf")));
context.Response.Flush();
context.Response.End();
}
public async Task DownloadFile(string url, string destinationPath, HttpRequest req)
{
var request = (HttpWebRequest)WebRequest.Create(url);
request.Method = "GET";
var encoding = new UTF8Encoding();
request.Headers.Add(HttpRequestHeader.AcceptLanguage, "en-IN");
request.Headers.Add(HttpRequestHeader.AcceptEncoding, "gzip, deflate");
request.Accept = "text/html, application/xhtml+xml, image/jxr, */*";
request.UserAgent = "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko";
request.KeepAlive = true;
request.Proxy = null;
request.CookieContainer = new CookieContainer();
Uri target = new Uri("http://localhost:5402/");
foreach (String item in req.Cookies)
{
request.CookieContainer.Add(new Cookie(item, req.Cookies[item].Value) { Domain = target.Host });
}
await request.GetResponseAsync().ContinueWith(t1 =>
{
using (var responseStream = t1.Result.GetResponseStream())
{
if (responseStream == null)
return;
int bufferSize = 1024;
byte[] buffer = new byte[bufferSize];
int bytesRead = 0;
using (FileStream fileStream = File.Create(destinationPath))
{
while ((bytesRead = responseStream.Read(buffer, 0, bufferSize)) != 0)
{
fileStream.Write(buffer, 0, bytesRead);
}
}
}
t1.Result.Close();
});
}
public bool IsReusable
{
get
{
return false;
}
}
}
}
line
await request.GetResponseAsync().ContinueWith(t1 =>
with async and without async takes around 15 min time once, second time it go to deadlock/Freez
also i had to add cookie due to the url was throwing 500 internal server error.
And if i call url direct on browser it runs in 1 sec.
So if anyone know what is issue or can help then that would be really big help.
Thanks for Help in Advance.
Ok
I found what issue is,
Issue is tab request PrintPDF.aspx and that page request other URL on same site.
So untill PrintPDF.aspx response complete to tab that (HttpWebRequest) is not being called.
Any reason why? i set maxconnection in web.config though
I had to fix it by making 2 diff files, first ASPX file which show page and generate pdf in thread and on page load call Ashx file which checks if file is generated or not, if generated then return file.
there was not other way, so i fixed it by 2 links redirect call.
Thank Everyone who helped me.

How to display and interact with OKHTTP html response in android Studio using webview or Web browser

I am building an android app. I have build a request using OKHTTP and I get the response as a string composed of html css and js content. This response is actualy a form that the user must use to allow the app to communicate with a given website.
Now I want the user to be able to see that response as an html page and clicks on a button to allow the communictaion. Only problem I don't know how to display that response as an html in webview or in the web browser.
From the MainActivity:
Authenticate myAouth = new Authenticate("myCostumerKey","mySecretKey");
try {
myResponse=myAouth.run("myUrlHere");
//System.out.println( myResponse);
} catch (Exception e) {
e.printStackTrace();
}
the Autheticate class
public class Authenticate {
private final OkHttpClient client;
String[] myResponse =new String[2];
public Authenticate( final String consumerKey, final String consumerSecret) {
client = new OkHttpClient.Builder()
.authenticator(new Authenticator() {
#Override public Request authenticate(Route route, Response response) throws IOException {
if (response.request().header("Authorization") != null) {
return null; // Give up, we've already attempted to authenticate.
}
System.out.println("Authenticating for response: " + response);
System.out.println("Challenges: " + response.challenges());
String credential = Credentials.basic(consumerKey, consumerSecret);
Request myRequest =response.request().newBuilder()
.header("Authorization", credential)
.build();
HttpUrl myURL = myRequest.url();
myResponse[0]= String.valueOf(myURL);
return myRequest;
}
})
.build();
}
#RequiresApi(api = Build.VERSION_CODES.KITKAT)
public String[] run(String url) throws Exception {
Request request = new Request.Builder()
.url(url)
.build();
try (Response response = client.newCall(request).execute()) {
if (!response.isSuccessful()) throw new IOException("Unexpected code " + response);
myResponse[1]=response.body().string();
System.out.println(" URL is "+myResponse[0]+" my response body is "+myResponse[1]);
}
return myResponse;
}}
Any help would be apriciated.
Kind Regards
You can use the following code to convert the String to HTML and then display it in a WebView
try {
String html = new String(response, "UTF-8");
String mime = "text/html";
String encoding = "utf-8";
myWebView.getSettings().setJavaScriptEnabled(true);
myWebView.loadDataWithBaseURL(null, html, mime, encoding, null);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}

Ways to get data from localhost port using CefSharp

currently developing a .net C# application which is showing a web browser. But since visual studio web browser is still using ie7 and does not support quite lots of things, I plan to put in the CefSharp which is the Chromium. So, have you guys every try get some json data from a localhost server using CefSharp? I have tried two ways to get it but failed.
For C# in Visual Studio, I fired the Chromium browser like this:
var test = new CefSharp.WinForms.ChromiumWebBrowser(AppDomain.CurrentDomain.BaseDirectory + "html\\index.html")
{
Dock = DockStyle.Fill,
};
this.Controls.Add(test);
Then for the index.html, it is require to get data from local host port 1000 after it loaded. I have tried two ways for the javascript:
First using XMLHttpRequest:
var xmlhttp = new XMLHttpRequest();
var url = "http://localhost:1000/api/data1";
var services;
xmlhttp.onreadystatechange = function () {
if (xmlhttp.readyState == 4 && xmlhttp.status == 200) {
services = jQuery.parseJSON(xmlhttp.responseText);
}
}
xmlhttp.open("GET", url, true);
xmlhttp.send();
Secondly using jquery's .get():
$.get("http://localhost:1000/api/data1", function (data) {
var services = data;
});
But both ways can't return the data. If I put the index.html into normal browser like Chrome or Firefox, I am able to get the data.
Is it something missing in my coding? Any ideas what's wrong guys?
I am using Chromium web browser and making GET request to localhost for JSON. Along with this i am running a webserver which keeps on listening and return JSON.
Webserver:
public class WebServer
{
public WebServer()
{
}
void Process(object o)
{
Thread thread = new Thread(() => new WebServer().Start());
thread.Start();
HttpListenerContext context = o as HttpListenerContext;
HttpListenerResponse response = context.Response;
try
{
string json;
string url = context.Request.Url.ToString();
if (url.Contains("http://localhost:8888/json"))
{
List<SampleObject> list = new List<SampleObject>();
json = JsonConvert.SerializeObject(new
{
results = list
});
byte[] decryptedbytes = new byte[0];
decryptedbytes = System.Text.Encoding.UTF8.GetBytes(json);
response.AddHeader("Content-type", "text/json");
response.ContentLength64 = decryptedbytes.Length;
System.IO.Stream output = response.OutputStream;
try
{
output.Write(decryptedbytes, 0, decryptedbytes.Length);
output.Close();
}
catch (Exception e)
{
response.StatusCode = 500;
response.StatusDescription = "Server Internal Error";
response.Close();
Console.WriteLine(e.Message);
}
}
}
catch (Exception ex)
{
response.StatusCode = 500;
response.StatusDescription = "Server Internal Error";
response.Close();
Console.WriteLine(ex);
}
}
static byte[] GetBytes(string str)
{
byte[] bytes = new byte[str.Length * sizeof(char)];
System.Buffer.BlockCopy(str.ToCharArray(), 0, bytes, 0, bytes.Length);
return bytes;
}
public void Start()
{
HttpListener server = new HttpListener();
server.Prefixes.Add("http://localhost:8888/json");
server.Start();
while (true)
{
ThreadPool.QueueUserWorkItem(Process, server.GetContext());
}
}
}
public class SampleObject
{
string param1 { get; set; }
string param2 { get; set; }
string param3 { get; set; }
}
To Start Webserver:
Thread thread = new Thread(() => new WebServer().Start());
thread.Start();
Index.html
<!DOCTYPE html>
<html>
<head>
<script src="https://ajax.googleapis.com/ajax/libs/jquery/1.11.3/jquery.min.js"></script>
<script>
$(document).ready(function(){
$.get("http://localhost:8888/json", function (data) {
var jsonData= data;
});
});
</script>
</head>
<body>
<p>Example JSON Request.</p>
</body>
</html>
Before launching Index.html inside Chromium web browser, start running webserver to listen for requests. After document load event it makes a ajax call, then it hits the Webserver then Webserver returns JSON. You can test it using Chrome also. Start webserver and type the URL(http://localhost:8888/json) in address bar you will see returned JSON in Developers tools.
Note: Code is not tested. Hope it will work.

display byte array as image in angular js

I have an image in database. I want to retreive the image and display it in web page using angular js. After fetching data i have byte array. How do i send the data to the html page. I am having issues with the below code.. Kindly help.
When i click the link to view the image, page is sending 2 get requests to the server instead of one. It is sending request continuously for 2 times.
Note : I have tried using the below link.. But it didn't work
AngularJS - Show byte array content as image
Below is my js file
app.controller('aboutCtrl', function($scope,$http,$location) {
$scope.message = 'This is Add new order screen';
var url = '/Angular/login';
$http.get(url).success(function(result) {
$scope.image = result.image;
})
});
//html
<img data-ng-src="data:image/PNG;base64,{{image}}">
Java class file
public String getImage() throws FileNotFoundException{
String byteStr=null;
try
{
Connection con= DbConnect.getConnection();
String sql ="select * from image_data where id=1";
PreparedStatement ps=con.prepareStatement(sql);
ResultSet rs = ps.executeQuery();
while(rs.next())
{
Blob b=rs.getBlob(2);
byte barr[]=b.getBytes(1,(int)b.length());
byteStr = new String(barr);
}
}catch(Exception e){
e.printStackTrace();
}
return byteStr;
}
Servlet Code
protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
// TODO Auto-generated method stub
System.out.println("inside get");
JSONObject result= new JSONObject();
Retreive rt = new Retreive();
result.put("image", rt.getImage());
response.setContentType("application/json");
PrintWriter out = response.getWriter();
out.write(result.toString());
out.flush();
out.close();
}
Seems like your image is not base64 encoded string but still that byte array. You need to encoded it first in order to use it like this <img data-ng-src="data:image/PNG;base64,{{image}}">
Check the documentation AngularJS - Show byte array content as image
You can also try this approach https://gist.github.com/jonleighton/958841

Create application based on Website

I searched and tried a lot to develop an application which uses the content of a Website. I just saw the StackExchange app, which looks like I want to develop my application. The difference between web and application is here:
Browser:
App:
As you can see, there are some differences between the Browser and the App.
I hope somebody knows how to create an app like that, because after hours of searching I just found the solution of using a simple WebView (which is just a 1:1 like the browser) or to use Javascript in the app to remove some content (which is actually a bit buggy...).
To repeat: the point is, I want to get the content of a website (on start of the app) and to put it inside my application.
Cheers.
What you want to do is to scrape the websites in question by getting their html code and sorting it using some form of logic - I recomend xPath for this. then you can implement this data into some nice native interface.
You need however to be very aware that the data you get is not allways formated the way you want so all of your algorithems have to be very flexible.
the proccess can be cut into steps like this
retrive data from website (DefaultHttpClient and AsyncTask)
analyse and retrive relevant data (your relevant algorithm)
show data to user (Your interface implementation)
UPDATE
Bellow is some example code to fetch some data of a website it implements html-cleaner libary and you will need to implement this in your project.
class GetStationsClass extends AsyncTask<String, String, String> {
#Override
protected String doInBackground(String... params) {
HttpClient httpclient = new DefaultHttpClient();
httpclient.getParams().setParameter(CoreProtocolPNames.PROTOCOL_VERSION, HttpVersion.HTTP_1_1);
httpclient.getParams().setParameter(CoreProtocolPNames.HTTP_ELEMENT_CHARSET, "iso-8859-1");
HttpPost httppost = new HttpPost("http://ntlive.dk/rt/route?id=786");
httppost.setHeader("Accept-Charset", "iso-8859-1, unicode-1-1;q=0.8");
try {
// Add your data
List<NameValuePair> nameValuePairs = new ArrayList<NameValuePair>(3);
httppost.setEntity(new UrlEncodedFormEntity(nameValuePairs, "utf-8"));
// Execute HTTP Post Request
HttpResponse response = httpclient.execute(httppost);
int status = response.getStatusLine().getStatusCode();
String data = "";
if (status != HttpStatus.SC_OK) {
ByteArrayOutputStream ostream = new ByteArrayOutputStream();
response.getEntity().writeTo(ostream);
data = ostream.toString();
} else {
BufferedReader reader = new BufferedReader(new InputStreamReader(response.getEntity().getContent(),
"iso-8859-1"));
String line = null;
while ((line = reader.readLine()) != null) {
data += line;
}
XPath xpath = XPathFactory.newInstance().newXPath();
try {
Document document = readDocument(data);
NodeList nodes = (NodeList) xpath.evaluate("//*[#id=\"container\"]/ul/li", document,
XPathConstants.NODESET);
for (int i = 0; i < nodes.getLength(); i++) {
Node thisNode = nodes.item(i);
Log.v("",thisNode.getTextContent().trim);
}
} catch (XPathExpressionException e) {
e.printStackTrace();
}
}
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
#Override
protected void onPostExecute(String result) {
super.onPostExecute(result);
//update user interface here
}
}
private Document readDocument(String content) {
Long timeStart = new Date().getTime();
TagNode tagNode = new HtmlCleaner().clean(content);
Document doc = null;
try {
doc = new DomSerializer(new CleanerProperties()).createDOM(tagNode);
return doc;
} catch (ParserConfigurationException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return doc;
}
to run the code above use
new getStationsClass.execute();

Categories

Resources