PhantomJS push all onNavigationRequested callbacks to array - javascript

I have a phantom js script that checks every redirection and shows it in the console by the page.onNavigationRequested callback method.
but when i want to catch all the URLs that returned from the page.onNavigationRequested callback method and pushed them to an array and finally show all the URLs at the end of the script, it only shows the first redirection URL.
can you please check the script and advice.
var page = require('webpage').create();
var sys = require('system');
var fs = require('fs');
var response = {};
var arrayOfResponses = [];
var pageUrl = 'http://example.com/r1.php';
phantom.onError = function (msg, trace) {
phantom.exit(1);
};
function forceExit(){
phantom.exit(0);
}
page.onNavigationRequested = function(url, type, willNavigate, main) {
arrayOfResponses.push(url) ;
}
response.content = arrayOfResponses;
page.open(pageUrl, function(status) {
if ( status !== 'success' ) {
phantom.exit( 1 );
} else {
phantom.exit( 0 );
}
}, 100);
setTimeout(forceExit,2000);
console.log(JSON.stringify(response));
and thank you in advance.

There are two issues with your script:
You make PhantomJS exit too early, after the first url is opened. It doesn't have time to follow redirects.
You write the script from top to bottom as if the program flow is linear/synchronous, whereas in javascript it's not — onNavigationRequested can be called many times.
So with that in mind let's rewrite the script to collect all the redirects and exit if no new redirect is made for 2 seconds.
var page = require('webpage').create();
var response = {};
var arrayOfResponses = [];
var pageUrl = 'http://admin.weeqo.com/redirect/r1.php';
var exitTimeout;
// This will be called if no redirects are requested in 2 seconds
function forceExit(){
// Just for fun we'll note the final URL
var curURL = page.evaluate(function(){
return document.location.href
});
console.log("Final URL is " + curURL);
// Prepare and output the report:
response.content = arrayOfResponses;
console.log("List of all requested URLs: " + JSON.stringify(response));
// Now we can exit safely
phantom.exit(0);
}
// This is called before each redirect
page.onNavigationRequested = function(url, type, willNavigate, main) {
// Clear timeout so that script is not shut down
// because we have a new redirect
if(exitTimeout) {
clearTimeout(exitTimeout);
}
arrayOfResponses.push(url);
console.log("Navigation requested: " + url);
// Create timeout that will shut down the script
// in two seconds unless cancelled
exitTimeout = setTimeout(forceExit, 2000);
}
// open the first page
page.open(pageUrl, function(status) {
// We only care for errors because
// who knows how many time will pass before
// we hit the last redirect
if ( status !== 'success' ) {
phantom.exit( 1 );
}
});

Related

I need to pass variables to javascript called from php exec command

I have a php file that calls phantomjs via the exec command. The phantom.js file calls a url that needs to include variables. I need to send these variables from php along with the exec command and have the variables show up in the phantom.js url.
Here is my current hardcoded php:
$response = [];
exec('/usr/bin/phantomjs phantomjstest.js', $response);
$data = $response[19];
and phantom.js
var page = require('webpage').create();
console.log('The default user agent is ' + page.settings.userAgent);
page.settings.userAgent = 'SpecialAgent111';
page.open('https://www.aa.com/travelInformation/flights/status/detail?search=AA|1698|2019,1,23&ref=search', function(status) {
if (status !== 'success') {
console.log('Unable to access network');
} else {
var ua = page.evaluate(function() {
return document.getElementById('aa-content-frame').innerHTML;
});
console.log(ua);
}
phantom.exit();
});
What I would like to do is change the php so that it passes 2 variables (from a form submit) to the javascript. Something like:
PHP:
exec('/usr/bin/phantomjs phantomjstest.js?variable1=$forminput1&variable2=$forminput2', $response);
JS
page.open('https://www.aa.com/travelInformation/flights/status/detail?search=AA|variable1|variable2&ref=search', function(status) {
Or I can construct the entire URL in the php and send it along with the exec command.
Any ideas on either method, or some other way to get from here to there, are most appreciated. Thanks.
Based on suggestions in comments, I have updated my PHP to contain:
exec("/usr/bin/phantomjs phantomjstest.js https://www.aa.com/travelInformation/flights/status/detail?search=AA|1698|2019,1,23&ref=search", $response);
and my JS to:
var page = require('webpage').create();
console.log('The default user agent is ' + page.settings.userAgent);
page.settings.userAgent = 'SpecialAgent111';
var system = require('system');
var args = require('system').args;
var address = system.args[0];
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to access network');
} else {
var ua = page.evaluate(function() {
return document.getElementById('aa-content-frame').innerHTML;
});
console.log(ua);
}
phantom.exit();
});
But all I get back is a null response. Any idea why the address is either not getting passed with PHP exec or not getting picked up and run by JS?
Thanks.
****** SOLUTION ********
There were a couple of things that needed fixing, and I want to thank those that commented below, as well as others on SO that offered solutions to similar issues.
First, the JS did not like the & symbol in the URL. I had to use # in the passed argument instead and then replace it on the JS side.
Second, the JS did not like the pipe symbol in the URL so I had to escape them.
The finished PHP looks like:
exec("/usr/bin/phantomjs phantomjstest.js https://www.aa.com/travelInformation/flights/status/detail?search=AA\|$flight\|$date#ref=search", $response);
and the JS like:
var page = require('webpage').create();
console.log('The default user agent is ' + page.settings.userAgent);
page.settings.userAgent = 'SpecialAgent111';
var system = require('system');
var args = require('system').args;
var address = system.args[1];
var address = address.replace(/#/gi,"&");
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to access network');
} else {
var ua = page.evaluate(function() {
return document.getElementById('aa-content-frame').innerHTML;
});
console.log(ua);
}
phantom.exit();
});
Again, my thanks to all!!!!
****** SOLUTION ********
There were a couple of things that needed fixing, and I want to thank those that commented below, as well as others on SO that offered solutions to similar issues.
First, the JS did not like the & symbol in the URL. I had to use # in the passed argument instead and then replace it on the JS side.
Second, the JS did not like the pipe symbol in the URL so I had to escape them.
The finished PHP looks like:
exec("/usr/bin/phantomjs phantomjstest.js
https://www.aa.com/travelInformation/flights/status/detail?
search=AA\|$flight\|$date#ref=search", $response);
and the JS like:
var page = require('webpage').create();
console.log('The default user agent is ' + page.settings.userAgent);
page.settings.userAgent = 'SpecialAgent111';
var system = require('system');
var args = require('system').args;
var address = system.args[1];
var address = address.replace(/#/gi,"&");
page.open(address, function (status) {
if (status !== 'success') {
console.log('Unable to access network');
} else {
var ua = page.evaluate(function() {
return document.getElementById('aa-content-frame').innerHTML;
});
console.log(ua);
}
phantom.exit();
});

undefined is not a valid uri or options object. Nodejs

I am currently building a web scraper in NodeJS and I am facing a certain problem. After running my code, I receive this error:
undefined is not a valid uri or options object.
I am not sure how to bypass this error, I've looked at these examples: Example One, Example Two
Here is all my code:
var request = require('request');
var cheerio = require('cheerio');
var URL = require('url-parse');
var START_URL = "http://example.com";
var pagesVisited = {};
var numPagesVisited = 0;
var pagesToVisit = [];
var url = new URL(START_URL);
var baseUrl = url.protocol + "//" + url.hostname;
pagesToVisit.push(START_URL);
setInterval(crawl,5000);
function crawl() {
var nextPage = pagesToVisit.pop();
if (nextPage in pagesVisited) {
// We've already visited this page, so repeat the crawl
setInterval(crawl,5000);
} else {
// New page we haven't visited
visitPage(nextPage, crawl);
}
}
function visitPage(url, callback) {
// Add page to our set
pagesVisited[url] = true;
numPagesVisited++;
// Make the request
console.log("Visiting page " + url);
request(url, function(error, response, body) {
// Check status code (200 is HTTP OK)
console.log("Status code: " + response.statusCode);
if(response.statusCode !== 200) {
console.log(response.statusCode);
callback();
return;
}else{
console.log(error);
}
// Parse the document body
var $ = cheerio.load(body);
collectInternalLinks($);
// In this short program, our callback is just calling crawl()
callback();
});
}
function collectInternalLinks($) {
var relativeLinks = $("a[href^='/']");
console.log("Found " + relativeLinks.length + " relative links on page");
relativeLinks.each(function() {
pagesToVisit.push(baseUrl + $(this).attr('href'));
});
}
Once your pagesToVisit empties, the url will be undefined since calling pop on an empty array returns this value.
I would add a check in visitPage that url is not undefined, e.g.
function visitPage(url, callback) {
if (!url) {
// We're done
return;
}
Or in crawl, check that pagesToVisit has elements, e.g.
function crawl() {
var nextPage = pagesToVisit.pop();
if (!nextPage) {
// We're done!
console.log('Crawl complete!');
} else if (nextPage in pagesVisited) {
// We've already visited this page, so repeat the crawl
setInterval(crawl,5000);
} else {
// New page we haven't visited
visitPage(nextPage, crawl);
}
}
Taking hints from Terry Lennox's answer, I modified the crawl() function slightly:
function crawl() {
var nextPage = pagesToVisit.pop();
if (nextPage in pagesVisited) {
// We've already visited this page, so repeat the crawl
setInterval(crawl, 5000);
} else if(nextPage) {
// New page we haven't visited
visitPage(nextPage, crawl);
}
}
All I am doing is check whether the popped element exists or not before calling visitPage().
I get the following output:
Visiting page http://example.com
Status code: 200
response.statusCode: 200
null
Found 0 relative links on page
^C

Firefox WebExtension, store an array in the browser's storage

Can you store an array using browser.storage.local.set or achieve the same result with a different method?
Details:
My extension currently will redirect a website specified through the options.html form. Currently when you specify a new website the old one will be replaced. Is there a way I can append to an array of websites that will be redirected instead of replacing the website?
options.js: (will process information from form in options.html)
function saveOptions(e) {
e.preventDefault();
browser.storage.local.set({
url: document.querySelector("#url").value
});
}
function restoreOptions() {
function setCurrentChoice(result) {
document.querySelector("#url").value = result.url || "reddit.com";
}
function onError(error) {
console.log(`Error: ${error}`);
}
var getting = browser.storage.local.get("url");
getting.then(setCurrentChoice, onError);
}
document.addEventListener("DOMContentLoaded", restoreOptions);
document.querySelector("form").addEventListener("submit", saveOptions);
redirect.js:
function onError(error) {
console.log(`Error: ${error}`);
}
function onGot(item) {
var url = "reddit.com";
if (item.url) {
url = item.url;
}
var host = window.location.hostname;
if ((host == url) || (host == ("www." + url))) {
window.location = chrome.runtime.getURL("redirect/redirect.html");
}
}
var getting = browser.storage.local.get("url");
getting.then(onGot, onError);
One thought I had was to add a storage location per url, however i would have to also be stored to prevent it getting reset each time options.js is loaded. (Something similar to the below code)
var i = 0;
browser.storage.local.set({
url[i]: document.querySelector("#url").value
});
i++;
A more logical solution would be for the url storage location to be an array.
If there is a way for url to be an array then redirect.html could contain the following:
if ( (url.includes (host) ) || (url.includes ("www." + host) ) ){
window.location = chrome.runtime.getURL("redirect.html");
}
Fresh eyes has solved my problem.
In options.js:
function saveOptions(e) {
e.preventDefault();
var array = (document.querySelector("#url").value).split(",");
browser.storage.local.set({
url: array
});
In redirect.js:
function onGot(item) {
var url = "";
if (item.url) {
url = item.url;
}
var host = window.location.hostname;
if ( (url.includes(host)) || (url.includes("www." + host)) ) {
window.location = chrome.runtime.getURL("redirect/redirect.html");
}
}

Is Promise.all not working on the second time through? Why not?

I'm just finishing off this basic webscraper project for a tshirt website.
It enters through one hardcoded url, the home page. It will search for any product pages, and add them to an url. If it finds another link (remainder), it will scrape that again and find any more product pages. It adds the product pages to urlSet and will then scrape those again, grab the tshirt data (price, img, title) and then convert, then write them to a CSV file.
For some reason, this is not working on the second run through of the scrape with 'remainder'.
If I remove the second scrape of url, everything works out fine and the file gets written correctly. But if I want to get the other product pages, it seems to be failing somewhere.
Here is my code, i apologise for posting so much of it but I don't know how it will be understood properly without the right context, hopefully it's been commented okay:
//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
//Save the scraped data in a spreadsheet (CSV format).
'use strict';
//Modules being used:
var cheerio = require('cheerio');
var json2csv = require('json2csv');
var request = require('request');
var moment = require('moment');
var fs = require('fs');
//harcoded url
var url = 'http://shirts4mike.com/';
//url for tshirt pages
var urlSet = new Set();
var remainder;
var tshirtArray = [];
const requestPromise = function(url) {
return new Promise(function(resolve, reject) {
request(url, function(error, response, html) {
if(error)return reject(error);
if(!error && response.statusCode == 200){
return resolve(html);
}
});
});
}
// Go into webpage via url, load html and grab links shirt in url
function scrape (url) {
console.log("Currently scraping " + url)
return requestPromise(url)
.then(function(html) {
var $ = cheerio.load(html);
var links = [];
//get all the links
$('a[href*=shirt]').each(function(){
var a = $(this).attr('href');
//add into link array
links.push(url + a);
});
// return array of links
return links;
});
}
function nextStep (arrayOfLinks) {
var promiseArray = [];
console.log(arrayOfLinks);
for(var i = 0; i < arrayOfLinks.length; i++){
promiseArray.push(requestPromise(arrayOfLinks[i]));
}
//return both the html of pages and their urls
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return {arrayOfHtml: arrayOfHtml , arrayOfUrls: arrayOfLinks};
});
}
//go through the html of each url and add to urlSet if there is a checkout button
//add to remainder otherwise to rescrape
function lastStep (obj){
for(var i = 0; i < obj.arrayOfHtml.length; i++){
var $ = cheerio.load(obj.arrayOfHtml[i]);
//if page has a submit it must be a product page
if($('[type=submit]').length !== 0){
//add page to set
urlSet.add(obj.arrayOfUrls[i]);
console.log(obj.arrayOfUrls[i]);
} else if(remainder == undefined) {
//if not a product page, add it to remainder so it another scrape can be performed.
remainder = obj.arrayOfUrls[i];
console.log("The remainder is " + remainder)
}
}
//return remainder for second run-through of scrape
return remainder;
}
//iterate through urlSet (product pages and grab html)
function lastScraperPt1(){
//call lastScraper so we can grab data from the set (product pages)
//scrape set, product pages
var promiseArray = [];
for(var item of urlSet){
var url = item;
promiseArray.push(requestPromise(url));
}
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return arrayOfHtml;
});
}
//iterate over the html of the product pages and store data as objects
function lastScraperPt2(html){
for(var i = 0; i < html.length; i++){
var $ = cheerio.load(html[i]);
//grab data and store as variables
var price = $('.price').text();
var imgURL = $('.shirt-picture').find('img').attr('src');
var title = $('body').find('.shirt-details > h1').text().slice(4);
var tshirtObject = {};
//add values into tshirt object
tshirtObject.Title = title;
tshirtObject.Price = price;
tshirtObject.ImageURL = imgURL;
tshirtObject.URL = url;
tshirtObject.Date = moment().format('MMMM Do YYYY, h:mm:ss a');
//add the object into the array of tshirts
tshirtArray.push(tshirtObject);
}
convertJson2Csv();
}
//convert tshirt objects and save as CSV file
function convertJson2Csv(){
//The scraper should generate a folder called `data` if it doesn’t exist.
var dir ='./data';
if(!fs.existsSync(dir)){
fs.mkdirSync(dir);
}
var fields = ['Title', 'Price', 'ImageURL', 'URL', 'Date'];
//convert tshirt data into CSV and pass in fields
var csv = json2csv({ data: tshirtArray, fields: fields });
//Name of file will be the date
var fileDate = moment().format('MM-DD-YY');
var fileName = dir + '/' + fileDate + '.csv';
//Write file
fs.writeFile(fileName, csv, {overwrite: true}, function(err) {
console.log('file saved');
if (err) throw err;
});
}
scrape(url) //scrape from original entry point
.then(nextStep)
.then(lastStep)
.then(scrape) //scrape again but with remainder url
.then(nextStep)
.then(lastStep)
.then(lastScraperPt1)
.then(lastScraperPt2)
.catch(function(err) {
// handle any error from any request here
console.log(err);
});
I'm console logging the arrayOfLinks in nextStep so I can see that they are being grabbed properly, I just cannot work out why they aren't being passed through to 'lastStep' properly.
Currently scraping http://shirts4mike.com/
[ 'http://shirts4mike.com/shirts.php',
'http://shirts4mike.com/shirts.php',
'http://shirts4mike.com/shirt.php?id=108',
'http://shirts4mike.com/shirt.php?id=107',
'http://shirts4mike.com/shirt.php?id=106',
'http://shirts4mike.com/shirt.php?id=105' ]
The remainder is http://shirts4mike.com/shirts.php
http://shirts4mike.com/shirt.php?id=108
http://shirts4mike.com/shirt.php?id=107
http://shirts4mike.com/shirt.php?id=106
http://shirts4mike.com/shirt.php?id=105
Currently scraping http://shirts4mike.com/shirts.php
[ 'http://shirts4mike.com/shirts.phpshirts.php',
'http://shirts4mike.com/shirts.phpshirt.php?id=101',
'http://shirts4mike.com/shirts.phpshirt.php?id=102',
'http://shirts4mike.com/shirts.phpshirt.php?id=103',
'http://shirts4mike.com/shirts.phpshirt.php?id=104',
'http://shirts4mike.com/shirts.phpshirt.php?id=105',
'http://shirts4mike.com/shirts.phpshirt.php?id=106',
'http://shirts4mike.com/shirts.phpshirt.php?id=107',
'http://shirts4mike.com/shirts.phpshirt.php?id=108' ]
BUT if I choose to only call the first scrape and don't call the second, like this:
scrape(url) //scrape from original entry point
.then(nextStep)
.then(lastStep)
.then(lastScraperPt1)
.then(lastScraperPt2)
.catch(function(err) {
// handle any error from any request here
console.log(err);
});
... Then everything works. I just don't get to all the urls.
What is happening here and how can I fix it? Thank you guys
The issue is tshirtArray is not defined in convertJson2Csv(). At lastlastScraperPt2 pass tshirtArray to convertJsonCsv()
convertJson2Csv(tshirtArray)
at convertJson2Csv
function convertJson2Csv(tshirtArray) {
// do stuff
}
One problem seems to be in your lastStep. It looks like you mean for remainder to be another array of urls. Correct me if I'm wrong there. However, what's happing is that the first time the if($('[type=submit]').length !== 0) condition fails, you'll automatically go down to the next block, because remainder start undefined. Whatever the current url is, you assign that one to remainder. For the rest of the iterations of your for-loop, you will never again hit the condition where remainder == undefined. So if you will only ever end up with one url assigned to remainder, while any more that you were hoping to get will simply be passed over.
You might want to define remainder as remainder = [];. And then instead of saying else if (remainder == undefined), you would just say
} else {
remainder.push(obj.arrayOfUrls[i]);
}
However, then you're passing an array of urls to scrape when scrape is only expecting a single url. If this is what you want and I am right in assuming that you mean for remainder to be an array of urls, you could defined a new function as follows:
function scrapeRemainders(remainders) {
var promises = [];
remainder.forEach(function (url) {
promises.push(requestPromise(url));
});
return Promise.all(promises).then(function (results) {
_.flattenDeep(results);
})
}
Then instead of the second scrape in your promise chain, you would replace it with scrapeRemainders. Also, for you the _ in the previous function, you would need to npm install lodash and then var _ = require('lodash'). On a side note, lodash has nothing to do with promises, but it is a great tool for data manipulation. You should look into it when you have the chance.
Also, in lastScraperPt1, you can change
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return arrayOfHtml;
});
to
return Promise.all(promiseArray);
It does the same thing.
Hope this helps. If this does not answer your question, comment at me and I can change my answer accordingly.
All fixed, it was grabbing the wrong urls in scrape(). Though I only knew this after I logged the statusCodes to the console :
//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
//Save the scraped data in a spreadsheet (CSV format).
'use strict';
//Modules being used:
var cheerio = require('cheerio');
var json2csv = require('json2csv');
var request = require('request');
var moment = require('moment');
var fs = require('fs');
//harcoded url
var urlHome = 'http://shirts4mike.com/';
//url for tshirt pages
var urlSet = [];
var tshirtArray = [];
const requestPromise = function(url) {
return new Promise(function(resolve, reject) {
request(url, function(error, response, html) {
if(error) {
errorHandler(error);
return reject(error);
}
if(!error && response.statusCode == 200){
return resolve(html);
}
if(response.statusCode !== 200){
console.log("response code is " + response.statusCode);
}
return resolve("");
});
});
}
// Go into webpage via url, load html and grab links shirt in url
function scrape (url) {
console.log("Currently scraping " + url)
return requestPromise(url)
.then(function(html) {
var $ = cheerio.load(html);
var links = [];
var URL = 'http://shirts4mike.com/';
//get all the links
$('a[href*=shirt]').each(function(){
var a = $(this).attr('href');
//add into link array
links.push(URL + a);
});
// return array of links
return links;
});
}
function nextStep (arrayOfLinks) {
var promiseArray = [];
console.log(arrayOfLinks);
for(var i = 0; i < arrayOfLinks.length; i++){
promiseArray.push(requestPromise(arrayOfLinks[i]));
}
//return both the html of pages and their urls
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return {arrayOfHtml: arrayOfHtml , arrayOfUrls: arrayOfLinks};
});
}
//go through the html of each url and add to urlSet if there is a checkout button
//add to remainder otherwise to rescrape
function lastStep (obj){
for(var i = 0; i < obj.arrayOfHtml.length; i++){
var $ = cheerio.load(obj.arrayOfHtml[i]);
//if page has a submit it must be a product page
if($('[type=submit]').length !== 0){
//add page to set
urlSet.push(obj.arrayOfUrls[i]);
console.log(obj.arrayOfUrls[i]);
} else if(remainder == undefined) {
//if not a product page, add it to remainder so it another scrape can be performed.
var remainder = obj.arrayOfUrls[i];
console.log("The remainder is " + remainder)
}
}
//return remainder for second run-through of scrape
return remainder;
}
//iterate through urlSet (product pages and grab html)
function lastScraperPt1(){
//call lastScraper so we can grab data from the set (product pages)
//scrape set, product pages
var promiseArray = [];
for(var item of urlSet){
var url = item;
promiseArray.push(requestPromise(url));
}
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return arrayOfHtml;
});
}
//iterate over the html of the product pages and store data as objects
function lastScraperPt2(html){
for(var i = 0; i < html.length; i++){
var $ = cheerio.load(html[i]);
//grab data and store as variables
var price = $('.price').text();
var imgURL = $('.shirt-picture').find('img').attr('src');
var title = $('body').find('.shirt-details > h1').text().slice(4);
var tshirtObject = {};
//add values into tshirt object
tshirtObject.Title = title;
tshirtObject.Price = price;
tshirtObject.ImageURL = urlHome + imgURL;
tshirtObject.URL = urlSet[i];
tshirtObject.Date = moment().format('MMMM Do YYYY, h:mm:ss a');
//add the object into the array of tshirts
tshirtArray.push(tshirtObject);
}
return tshirtArray;
}
//conver tshirt objects and save as CSV file
function convertJson2Csv(tshirtArray){
//The scraper should generate a folder called `data` if it doesn’t exist.
var dir ='./data';
if(!fs.existsSync(dir)){
fs.mkdirSync(dir);
}
var fields = ['Title', 'Price', 'ImageURL', 'URL', 'Date'];
//convert tshirt data into CSV and pass in fields
var csv = json2csv({ data: tshirtArray, fields: fields });
//Name of file will be the date
var fileDate = moment().format('MM-DD-YY');
var fileName = dir + '/' + fileDate + '.csv';
//Write file
fs.writeFile(fileName, csv, {overwrite: true}, function(err) {
console.log('file saved');
if (err) errorHandler(err);
});
}
scrape(urlHome) //scrape from original entry point
.then(nextStep)
.then(lastStep)
.then(scrape)
.then(nextStep)
.then(lastStep)
.then(lastScraperPt1)
.then(lastScraperPt2)
.then(convertJson2Csv)
.catch(function(err) {
// handle any error from any request here
console.log(err);
});
//If the site is down, an error message describing the issue should appear in the console.
//This is to be tested by disabling wifi on your device.
//When an error occurs log it to a file scraper-error.log . It should append to the bottom of the file with a time stamp and error
var errorHandler = function (error) {
console.log(error.message);
console.log('The scraper could not not scrape data from ' + url + ' there is either a problem with your internet connection or the site may be down');
/**
* create new date for log file
*/
var loggerDate = new Date();
/**
* create message as a variable
*/
var errLog = '[' + loggerDate + '] ' + error.message + '\n';
/**
*when the error occurs, log that to the error logger file
*/
fs.appendFile('scraper-error.log', errLog, function (err) {
if (err) throw err;
console.log('There was an error. The error was logged to scraper-error.log');
});
};

IndexedDB open DB request weird behavior

I have an app (questionnaire) that uses indexedDB.
We have one database and several stores in it.
Stores have data already stored in them.
At some point a dashboard html file is loaded. In this file I am calling couple of functions:
function init(){
adjustUsedScreenHeight();
db_init();
setInstitutionInstRow();
loadRecommendations();
loadResultsFromDB();
fillEvaluations();
document.addEventListener("deviceready", onDeviceReady, function(e) {console.log(e);});
}
The init() function is called on body onLoad.
setInstitutionInstRow() looks like these:
function setInstitutionInstRow(localId){
//localId = 10;
if (localId == undefined){
console.log("Localid underfined: ");
//open db, open objectstore;
var request = indexedDB.open("kcapp_db", "1.0");
request.onsuccess = function() {
var db = request.result;
var tx = db.transaction ("LOCALINSTITUTIONS", "readonly");
var store = tx.objectStore("LOCALINSTITUTIONS");
tx.oncomplete = function(){
db.close();
}
tx.onerror = function(){
console.log("Transaction error on setInstInstRow");
}
var cursor = store.openCursor();
cursor.onsuccess= function () {
var match = cursor.result;
console.log ("Retrieved item: " + match.value.instid);
// alert("Added new data");
if (match){
setInstituionInstRow(match.value.instid);
console.log("Got localid: " + math.value.instid);
}
else
console.log("localinsid: it is empty " );
};
cursor.onerror = function () {
console.log("Error: " + item.result.errorCode);
}
}
request.onerror = function () {
console.log("Error: " + request.result.errorCode );
}
request.oncomplete = function (){
console.log("The transaction is done: setInstitutionRow()");
}
request.onupgradeneeded = function (){
console.log("Upgrade needed ...");
}
request.onblocked = function(){
console.log("DB is Blocked ...");
}
} else {
instid = localId;
var now = new Date();
//console.log("["+now.getTime()+"]setInstituionInstRow - instid set to "+localId);
//open db, open objectstore;
var request = indexedDB.open("kcapp_db", "1.0");
request.onsuccess = function() {
var db = this.result;
var tx = db.transaction ("INSTITUTIONS", "readonly");
var store = tx.objectStore("INSTITUTIONS");
var item = store.get(localId);
console.log(item);
item.onsuccess= function () {
console.log ("Retrieved item: ");
if (item.length > 0)
var lInstitution = item.result.value;
kitaDisplayValue = lInstitution.krippe;
};
item.onerror = function () {
console.log("Error: " + item.result.errorCode);
}
}
request.onerror = function () {
console.log("Error: " + request.result.errorCode );
}
}
Now the problem is,
var request = indexedDB.open("kcapp_db", "1.0");
the above request is never getting into any onsuccess, oncomplete, onerror states. I debugged with Chrome tools, it never getting into any above states.
Accordingly I am not getting any data from transactions.
And there are no errors in Chrome console.
And here is the request value from Chrome dev:
From above image the readyState: done , which means it should fire an event (success, error, blocked etc). But it is not going into any of them.
I am looking into it, and still can not figure out why it is not working.
Have to mention that the other functions from init() is behaving the same way.
Looking forward to get some help.
You may be using an invalid version parameter to the open function. Try indexedDB.open('kcapp_db', 1); instead.
Like Josh said, your version parameter should be an integer, not a string.
Your request object can get 4 events in response to the open request: success, error, upgradeneeded, or blocked. Add event listeners for all of those (e.g. request.onblocked = ...) and see which one is getting fired.
I had that problem but only with the "onupgradeneeded" event. I fixed it changing the name of the "open" function. At the begining I had a very long name; I changed it for a short one and start working. I don't know if this is the real problem but it was solved at that moment.
My code:
if (this.isSupported) {
this.openRequest = indexedDB.open("OrdenesMant", 1);
/**
* Creación de la base de datos con tablas y claves primarias
*/
this.openRequest.onupgradeneeded = function(oEvent) {
...
Hope it works for you as well.

Categories

Resources