Firefox WebExtension, store an array in the browser's storage - javascript

Can you store an array using browser.storage.local.set or achieve the same result with a different method?
Details:
My extension currently will redirect a website specified through the options.html form. Currently when you specify a new website the old one will be replaced. Is there a way I can append to an array of websites that will be redirected instead of replacing the website?
options.js: (will process information from form in options.html)
function saveOptions(e) {
e.preventDefault();
browser.storage.local.set({
url: document.querySelector("#url").value
});
}
function restoreOptions() {
function setCurrentChoice(result) {
document.querySelector("#url").value = result.url || "reddit.com";
}
function onError(error) {
console.log(`Error: ${error}`);
}
var getting = browser.storage.local.get("url");
getting.then(setCurrentChoice, onError);
}
document.addEventListener("DOMContentLoaded", restoreOptions);
document.querySelector("form").addEventListener("submit", saveOptions);
redirect.js:
function onError(error) {
console.log(`Error: ${error}`);
}
function onGot(item) {
var url = "reddit.com";
if (item.url) {
url = item.url;
}
var host = window.location.hostname;
if ((host == url) || (host == ("www." + url))) {
window.location = chrome.runtime.getURL("redirect/redirect.html");
}
}
var getting = browser.storage.local.get("url");
getting.then(onGot, onError);
One thought I had was to add a storage location per url, however i would have to also be stored to prevent it getting reset each time options.js is loaded. (Something similar to the below code)
var i = 0;
browser.storage.local.set({
url[i]: document.querySelector("#url").value
});
i++;
A more logical solution would be for the url storage location to be an array.
If there is a way for url to be an array then redirect.html could contain the following:
if ( (url.includes (host) ) || (url.includes ("www." + host) ) ){
window.location = chrome.runtime.getURL("redirect.html");
}

Fresh eyes has solved my problem.
In options.js:
function saveOptions(e) {
e.preventDefault();
var array = (document.querySelector("#url").value).split(",");
browser.storage.local.set({
url: array
});
In redirect.js:
function onGot(item) {
var url = "";
if (item.url) {
url = item.url;
}
var host = window.location.hostname;
if ( (url.includes(host)) || (url.includes("www." + host)) ) {
window.location = chrome.runtime.getURL("redirect/redirect.html");
}
}

Related

Creating global VAR in functions

So I'm having trouble with getting a VAR in a function to be global, I have tried the following resources:
What is the scope of variables in JavaScript?
My previous question was marked as a duplicate but after reviewing the link above it did not help with my issue.
Here is my previous question:
So I'm using OpenTok to create a online conferencing tool and need to grab the session details from an API on a different server. I've created a php script on the other server that grabs session information based on the session id provided by a URL parameter. I know that the php script and most of the JavaScript is working correctly because when I console.log data from the parsed JSON it prints the correct information. However when I try to put the variables into the credentials area I get the following error:
ReferenceError: thesession is not defined
Here is the code used to get the JSON from a PHP script on a separate server:
var url_string = window.location.href;
var url = new URL(url_string);
var session = url.searchParams.get("s");
if (session == '') {
window.location.replace("http://www.google.com");
}
var getJSON = function(url, callback) {
var xhr = new XMLHttpRequest();
xhr.open('GET', url, true);
xhr.responseType = 'json';
xhr.onload = function() {
var status = xhr.status;
if (status === 200) {
callback(null, xhr.response);
} else {
callback(status, xhr.response);
}
};
xhr.send();
};
getJSON('http://192.168.64.2/api/meeting/?uid=' + session,
function(err, data) {
if (err !== null) {
console.log('Error');
}
var thesession = data.sessionID;
var thetoken = data.token;
console.log(thesession);
console.log(thetoken);
});
let otCore;
const options = {
credentials: {
apiKey: "####",
sessionId: thesession,
token: thetoken
},
And here is a screenshot of the console:
The top console log is "thesession" and the second console log is "thetoken". I have tried looking up the error but can't quite find one with the same usage as mine.
The desired outcome would be that I could using the data from the parsed JSON and use the result as the credentials e.g. data.sessionID which is bound the the VAR thesession.
I know this might be a scope issue, but I'm not sure how I could alter the code to make it work as intended.
Any help would be much appreciated, this one has really got me stumped :)
How would I alter the scope to get the desired function? I have reviewed the link that was given on the previous question, but this didn't help me with my issue.
var thesession = data.sessionID;
Is defined within its execution context, which is the callback function you've passed to getJSON.
One step in the right direction is to reverse the assignment. Assign 'thesession' to the options object within the scope where 'thesession' exists.
const options = {
credentials: {
apiKey: "####",
sessionId: null,
token: thetoken
}
};
getJSON('http://192.168.64.2/api/meeting/?uid=' + session,
function(err, data) {
if (err !== null) {
console.log('Error');
}
var thesession = data.sessionID;
var thetoken = data.token;
console.log(thesession);
console.log(thetoken);
options.credentials.sessionId = thesession;
});
However, it's important to realize that your program is not going to wait for this assignment. It will send the getJSON request, and then continue processing. Your options object won't have a sessionId until the getJSON call finishes and its callback has been invoked.
This would be a good opportunity to delve into Promises, which will help you better understand how to handle the non-blocking nature of javascript.
Your problem is that this line var thesession = data.sessionID is scoped within the function function(err, data) { ... }. In order to allow two functions to use the same variable, you need to make sure that the variable isn't declared somewhere they don't have access to.
It's the difference between this:
function func1() {
var x = 3
}
function func2() {
console.log(x)
}
func1();
func2();
and this:
var x;
function func1() {
x = 3
}
function func2() {
console.log(x)
}
func1();
func2();
Similarly, if you declare var thesession; at the start of your script (or at least outside that other function) then just set it with thesession = data.sessionID, your final part will have access to your variable thesession.
Edit
In context:
var url_string = window.location.href;
var url = new URL(url_string);
var session = url.searchParams.get("s");
var thesession;
var thetoken;
if (session == '') {
window.location.replace("http://www.google.com");
}
var getJSON = function(url, callback) {
var xhr = new XMLHttpRequest();
xhr.open('GET', url, true);
xhr.responseType = 'json';
xhr.onload = function() {
var status = xhr.status;
if (status === 200) {
callback(null, xhr.response);
} else {
callback(status, xhr.response);
}
};
xhr.send();
};
getJSON('http://192.168.64.2/api/meeting/?uid=' + session,
function(err, data) {
if (err !== null) {
console.log('Error');
}
thesession = data.sessionID;
thetoken = data.token;
console.log(thesession);
console.log(thetoken);
});
let otCore;
const options = {
credentials: {
apiKey: "####",
sessionId: thesession,
token: thetoken
},
As a side-note - I'd also recommend not using var and instead just using let of const, depending on if you want your variable to be mutable or not.

undefined is not a valid uri or options object. Nodejs

I am currently building a web scraper in NodeJS and I am facing a certain problem. After running my code, I receive this error:
undefined is not a valid uri or options object.
I am not sure how to bypass this error, I've looked at these examples: Example One, Example Two
Here is all my code:
var request = require('request');
var cheerio = require('cheerio');
var URL = require('url-parse');
var START_URL = "http://example.com";
var pagesVisited = {};
var numPagesVisited = 0;
var pagesToVisit = [];
var url = new URL(START_URL);
var baseUrl = url.protocol + "//" + url.hostname;
pagesToVisit.push(START_URL);
setInterval(crawl,5000);
function crawl() {
var nextPage = pagesToVisit.pop();
if (nextPage in pagesVisited) {
// We've already visited this page, so repeat the crawl
setInterval(crawl,5000);
} else {
// New page we haven't visited
visitPage(nextPage, crawl);
}
}
function visitPage(url, callback) {
// Add page to our set
pagesVisited[url] = true;
numPagesVisited++;
// Make the request
console.log("Visiting page " + url);
request(url, function(error, response, body) {
// Check status code (200 is HTTP OK)
console.log("Status code: " + response.statusCode);
if(response.statusCode !== 200) {
console.log(response.statusCode);
callback();
return;
}else{
console.log(error);
}
// Parse the document body
var $ = cheerio.load(body);
collectInternalLinks($);
// In this short program, our callback is just calling crawl()
callback();
});
}
function collectInternalLinks($) {
var relativeLinks = $("a[href^='/']");
console.log("Found " + relativeLinks.length + " relative links on page");
relativeLinks.each(function() {
pagesToVisit.push(baseUrl + $(this).attr('href'));
});
}
Once your pagesToVisit empties, the url will be undefined since calling pop on an empty array returns this value.
I would add a check in visitPage that url is not undefined, e.g.
function visitPage(url, callback) {
if (!url) {
// We're done
return;
}
Or in crawl, check that pagesToVisit has elements, e.g.
function crawl() {
var nextPage = pagesToVisit.pop();
if (!nextPage) {
// We're done!
console.log('Crawl complete!');
} else if (nextPage in pagesVisited) {
// We've already visited this page, so repeat the crawl
setInterval(crawl,5000);
} else {
// New page we haven't visited
visitPage(nextPage, crawl);
}
}
Taking hints from Terry Lennox's answer, I modified the crawl() function slightly:
function crawl() {
var nextPage = pagesToVisit.pop();
if (nextPage in pagesVisited) {
// We've already visited this page, so repeat the crawl
setInterval(crawl, 5000);
} else if(nextPage) {
// New page we haven't visited
visitPage(nextPage, crawl);
}
}
All I am doing is check whether the popped element exists or not before calling visitPage().
I get the following output:
Visiting page http://example.com
Status code: 200
response.statusCode: 200
null
Found 0 relative links on page
^C

Three statuses being returned in Network tab

So I am consuming the Domainer API using this JavaScript code in my script.js:
const DOMAINRURLSTATUS = 'https://domainr.p.mashape.com/v2/status';
const DOMAINRKEY = '<api-key>';
// get data from domainr api
function getDataFromApi(value, callback) {
const QUERY = {
'mashape-key': DOMAINRKEY,
domain: `${value}.com,${value}.net,${value}.org,${value}.biz`
};
$.getJSON(DOMAINRURLSTATUS, QUERY, callback);
}
// create the code that will display for the domain availability
function renderResult(result) {
const OUTPUT = $('#domainContainer');
OUTPUT.prop('hidden', false);
// while (result.domain === ".net" || ".com" || ".biz" || ".org"){
// return `<div class="domainUnavail">LOADING<span class="sold">LOADING</span></div>`;
// }
if (result.summary == 'inactive') {
return `<div class="domain">${
result.domain
}<span class="buyButton"><a href="https://www.namecheap.com/domains/registration/results.aspx?domain=${
result.domain
}" target="_blank">Buy it!</a></span></div>`;
} else if (result.summary == 'active') {
return `<div class="domainUnavail">${
result.domain
}<span class="sold">Unavailable</span></div>`;
} else {
return `<div class="domainUnavail">${
result.domain
}<span class="sold">Unavailable</span></div>`;
}
}
// render results to page (will bring in renderResult code from above.)
function displayDomainResults(data) {
const RESULTS = data.status.map((item, index) => renderResult(item));
$('#domainResults').html(RESULTS);
}
// script for random name generation
function get_new_name() {
const OUTPUT = $('#nameResult');
OUTPUT.prop('hidden', false);
$('#nameResult').html(`<img src="ajax-loader.gif">`);
var xmlhttp;
if (window.XMLHttpRequest) {
xmlhttp = new XMLHttpRequest();
} else {
xmlhttp = new ActiveXObject('Microsoft.XMLHTTP');
}
// get value from drop down
var minlen = document.getElementById('js-dropValue').value,
maxlen = document.getElementById('js-dropValue').value,
param = 'min=' + minlen + '&max=' + maxlen;
// Get data from name generator
xmlhttp.open(
'GET',
'https://cors-anywhere.herokuapp.com/https://uzby.com/api.php?' + param,
true
);
xmlhttp.onreadystatechange = function() {
if (xmlhttp.readyState == 4 && xmlhttp.status == 200) {
document.getElementById('nameResult').innerHTML = xmlhttp.responseText;
}
const THISQUERY = xmlhttp.responseText;
getDataFromApi(THISQUERY, displayDomainResults);
};
xmlhttp.send();
return false;
}
I would get three requests on my Network tab and I was unsure why. I tried different refactors until I decided to reach out to the Domainer team and they told me that the problem was with my code and not their API, specifically the onreadystatechange in my XMLHttpRequest handler.
They said, since I am using jQuery, to use its built-in ajax method instead of using XMLHttpRequest directly.
So I followed their suggestion and refactored the code like so:
const DOMAINRURLSTATUS = "https://domainr.p.mashape.com/v2/status"
const UZBYURL = "https://cors-anywhere.herokuapp.com/https://uzby.com/api.php?"
var DOMAINRKEY = config.DOMAINRKEY;
// get data from domainr api
function getDataFromDomainrApi(value, callback) {
const OUTPUT = $('#domainContainer');
OUTPUT.prop('hidden', false);
$('#domainResults').html('<br><img src="ajax-loader-light.gif" alt="ajax-loader">');
const QUERY = {
"mashape-key": DOMAINRKEY,
domain: `${value}.com,${value}.net,${value}.org,${value}.biz`
}
$.getJSON(DOMAINRURLSTATUS, QUERY, callback)
.fail(function(){alert("Domain connection not working?")});
}
// create the code that will display for the domain availability
function renderResult(result) {
if (result.summary == "inactive") {
return `<div class="domain">${result.domain}<span class="buyButton">Buy it!</span></div>`;
} else if (result.summary == "active") {
return `<div class="domainUnavail">${result.domain}<span class="sold">Unavailable</span></div>`;
} else {
return `<div class="domainUnavail">${result.domain}<span class="sold">Unavailable</span></div>`;
}
}
// render results to page (will bring in renderResult code from above.)
function displayDomainResults(data) {
const RESULTS = data.status.map((item, index) => renderResult(item));
$('#domainResults').html(RESULTS);
}
//Wait for user to click "create my name" button
function watchSubmit() {
$('.js-search-form').submit(event => {
event.preventDefault();
//unhide name section
$('#domainResults').html("");
const NAMEOUTPUT = $('#nameResult');
NAMEOUTPUT.prop('hidden', false);
// add loading image for latency
$('#nameResult').html('<img src="ajax-loader.gif" alt="ajax-loader">');
// set variables for get request
var minlen = $('#js-dropValue').val();
var maxlen = $('#js-dropValue').val();
var param = `min=${minlen}&max=${maxlen}`;
//Get random name result
$.ajax({
type: 'GET',
url: `${UZBYURL}${param}`,
success: function(result) {
const NAME = result;
$('#nameResult').html(NAME);
getDataFromDomainrApi(result, displayDomainResults);
},
error: function() {
alert('error loading names');
}
});
});
}
$(watchSubmit);
It did not change the behavior inside of the Network tab. Any suggestions as to what could be wrong with either versions of my code OR which version is a better solution? Regardless of whether or not it will solve the aforementioned issue? Thank you.

Is Promise.all not working on the second time through? Why not?

I'm just finishing off this basic webscraper project for a tshirt website.
It enters through one hardcoded url, the home page. It will search for any product pages, and add them to an url. If it finds another link (remainder), it will scrape that again and find any more product pages. It adds the product pages to urlSet and will then scrape those again, grab the tshirt data (price, img, title) and then convert, then write them to a CSV file.
For some reason, this is not working on the second run through of the scrape with 'remainder'.
If I remove the second scrape of url, everything works out fine and the file gets written correctly. But if I want to get the other product pages, it seems to be failing somewhere.
Here is my code, i apologise for posting so much of it but I don't know how it will be understood properly without the right context, hopefully it's been commented okay:
//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
//Save the scraped data in a spreadsheet (CSV format).
'use strict';
//Modules being used:
var cheerio = require('cheerio');
var json2csv = require('json2csv');
var request = require('request');
var moment = require('moment');
var fs = require('fs');
//harcoded url
var url = 'http://shirts4mike.com/';
//url for tshirt pages
var urlSet = new Set();
var remainder;
var tshirtArray = [];
const requestPromise = function(url) {
return new Promise(function(resolve, reject) {
request(url, function(error, response, html) {
if(error)return reject(error);
if(!error && response.statusCode == 200){
return resolve(html);
}
});
});
}
// Go into webpage via url, load html and grab links shirt in url
function scrape (url) {
console.log("Currently scraping " + url)
return requestPromise(url)
.then(function(html) {
var $ = cheerio.load(html);
var links = [];
//get all the links
$('a[href*=shirt]').each(function(){
var a = $(this).attr('href');
//add into link array
links.push(url + a);
});
// return array of links
return links;
});
}
function nextStep (arrayOfLinks) {
var promiseArray = [];
console.log(arrayOfLinks);
for(var i = 0; i < arrayOfLinks.length; i++){
promiseArray.push(requestPromise(arrayOfLinks[i]));
}
//return both the html of pages and their urls
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return {arrayOfHtml: arrayOfHtml , arrayOfUrls: arrayOfLinks};
});
}
//go through the html of each url and add to urlSet if there is a checkout button
//add to remainder otherwise to rescrape
function lastStep (obj){
for(var i = 0; i < obj.arrayOfHtml.length; i++){
var $ = cheerio.load(obj.arrayOfHtml[i]);
//if page has a submit it must be a product page
if($('[type=submit]').length !== 0){
//add page to set
urlSet.add(obj.arrayOfUrls[i]);
console.log(obj.arrayOfUrls[i]);
} else if(remainder == undefined) {
//if not a product page, add it to remainder so it another scrape can be performed.
remainder = obj.arrayOfUrls[i];
console.log("The remainder is " + remainder)
}
}
//return remainder for second run-through of scrape
return remainder;
}
//iterate through urlSet (product pages and grab html)
function lastScraperPt1(){
//call lastScraper so we can grab data from the set (product pages)
//scrape set, product pages
var promiseArray = [];
for(var item of urlSet){
var url = item;
promiseArray.push(requestPromise(url));
}
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return arrayOfHtml;
});
}
//iterate over the html of the product pages and store data as objects
function lastScraperPt2(html){
for(var i = 0; i < html.length; i++){
var $ = cheerio.load(html[i]);
//grab data and store as variables
var price = $('.price').text();
var imgURL = $('.shirt-picture').find('img').attr('src');
var title = $('body').find('.shirt-details > h1').text().slice(4);
var tshirtObject = {};
//add values into tshirt object
tshirtObject.Title = title;
tshirtObject.Price = price;
tshirtObject.ImageURL = imgURL;
tshirtObject.URL = url;
tshirtObject.Date = moment().format('MMMM Do YYYY, h:mm:ss a');
//add the object into the array of tshirts
tshirtArray.push(tshirtObject);
}
convertJson2Csv();
}
//convert tshirt objects and save as CSV file
function convertJson2Csv(){
//The scraper should generate a folder called `data` if it doesn’t exist.
var dir ='./data';
if(!fs.existsSync(dir)){
fs.mkdirSync(dir);
}
var fields = ['Title', 'Price', 'ImageURL', 'URL', 'Date'];
//convert tshirt data into CSV and pass in fields
var csv = json2csv({ data: tshirtArray, fields: fields });
//Name of file will be the date
var fileDate = moment().format('MM-DD-YY');
var fileName = dir + '/' + fileDate + '.csv';
//Write file
fs.writeFile(fileName, csv, {overwrite: true}, function(err) {
console.log('file saved');
if (err) throw err;
});
}
scrape(url) //scrape from original entry point
.then(nextStep)
.then(lastStep)
.then(scrape) //scrape again but with remainder url
.then(nextStep)
.then(lastStep)
.then(lastScraperPt1)
.then(lastScraperPt2)
.catch(function(err) {
// handle any error from any request here
console.log(err);
});
I'm console logging the arrayOfLinks in nextStep so I can see that they are being grabbed properly, I just cannot work out why they aren't being passed through to 'lastStep' properly.
Currently scraping http://shirts4mike.com/
[ 'http://shirts4mike.com/shirts.php',
'http://shirts4mike.com/shirts.php',
'http://shirts4mike.com/shirt.php?id=108',
'http://shirts4mike.com/shirt.php?id=107',
'http://shirts4mike.com/shirt.php?id=106',
'http://shirts4mike.com/shirt.php?id=105' ]
The remainder is http://shirts4mike.com/shirts.php
http://shirts4mike.com/shirt.php?id=108
http://shirts4mike.com/shirt.php?id=107
http://shirts4mike.com/shirt.php?id=106
http://shirts4mike.com/shirt.php?id=105
Currently scraping http://shirts4mike.com/shirts.php
[ 'http://shirts4mike.com/shirts.phpshirts.php',
'http://shirts4mike.com/shirts.phpshirt.php?id=101',
'http://shirts4mike.com/shirts.phpshirt.php?id=102',
'http://shirts4mike.com/shirts.phpshirt.php?id=103',
'http://shirts4mike.com/shirts.phpshirt.php?id=104',
'http://shirts4mike.com/shirts.phpshirt.php?id=105',
'http://shirts4mike.com/shirts.phpshirt.php?id=106',
'http://shirts4mike.com/shirts.phpshirt.php?id=107',
'http://shirts4mike.com/shirts.phpshirt.php?id=108' ]
BUT if I choose to only call the first scrape and don't call the second, like this:
scrape(url) //scrape from original entry point
.then(nextStep)
.then(lastStep)
.then(lastScraperPt1)
.then(lastScraperPt2)
.catch(function(err) {
// handle any error from any request here
console.log(err);
});
... Then everything works. I just don't get to all the urls.
What is happening here and how can I fix it? Thank you guys
The issue is tshirtArray is not defined in convertJson2Csv(). At lastlastScraperPt2 pass tshirtArray to convertJsonCsv()
convertJson2Csv(tshirtArray)
at convertJson2Csv
function convertJson2Csv(tshirtArray) {
// do stuff
}
One problem seems to be in your lastStep. It looks like you mean for remainder to be another array of urls. Correct me if I'm wrong there. However, what's happing is that the first time the if($('[type=submit]').length !== 0) condition fails, you'll automatically go down to the next block, because remainder start undefined. Whatever the current url is, you assign that one to remainder. For the rest of the iterations of your for-loop, you will never again hit the condition where remainder == undefined. So if you will only ever end up with one url assigned to remainder, while any more that you were hoping to get will simply be passed over.
You might want to define remainder as remainder = [];. And then instead of saying else if (remainder == undefined), you would just say
} else {
remainder.push(obj.arrayOfUrls[i]);
}
However, then you're passing an array of urls to scrape when scrape is only expecting a single url. If this is what you want and I am right in assuming that you mean for remainder to be an array of urls, you could defined a new function as follows:
function scrapeRemainders(remainders) {
var promises = [];
remainder.forEach(function (url) {
promises.push(requestPromise(url));
});
return Promise.all(promises).then(function (results) {
_.flattenDeep(results);
})
}
Then instead of the second scrape in your promise chain, you would replace it with scrapeRemainders. Also, for you the _ in the previous function, you would need to npm install lodash and then var _ = require('lodash'). On a side note, lodash has nothing to do with promises, but it is a great tool for data manipulation. You should look into it when you have the chance.
Also, in lastScraperPt1, you can change
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return arrayOfHtml;
});
to
return Promise.all(promiseArray);
It does the same thing.
Hope this helps. If this does not answer your question, comment at me and I can change my answer accordingly.
All fixed, it was grabbing the wrong urls in scrape(). Though I only knew this after I logged the statusCodes to the console :
//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
//Save the scraped data in a spreadsheet (CSV format).
'use strict';
//Modules being used:
var cheerio = require('cheerio');
var json2csv = require('json2csv');
var request = require('request');
var moment = require('moment');
var fs = require('fs');
//harcoded url
var urlHome = 'http://shirts4mike.com/';
//url for tshirt pages
var urlSet = [];
var tshirtArray = [];
const requestPromise = function(url) {
return new Promise(function(resolve, reject) {
request(url, function(error, response, html) {
if(error) {
errorHandler(error);
return reject(error);
}
if(!error && response.statusCode == 200){
return resolve(html);
}
if(response.statusCode !== 200){
console.log("response code is " + response.statusCode);
}
return resolve("");
});
});
}
// Go into webpage via url, load html and grab links shirt in url
function scrape (url) {
console.log("Currently scraping " + url)
return requestPromise(url)
.then(function(html) {
var $ = cheerio.load(html);
var links = [];
var URL = 'http://shirts4mike.com/';
//get all the links
$('a[href*=shirt]').each(function(){
var a = $(this).attr('href');
//add into link array
links.push(URL + a);
});
// return array of links
return links;
});
}
function nextStep (arrayOfLinks) {
var promiseArray = [];
console.log(arrayOfLinks);
for(var i = 0; i < arrayOfLinks.length; i++){
promiseArray.push(requestPromise(arrayOfLinks[i]));
}
//return both the html of pages and their urls
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return {arrayOfHtml: arrayOfHtml , arrayOfUrls: arrayOfLinks};
});
}
//go through the html of each url and add to urlSet if there is a checkout button
//add to remainder otherwise to rescrape
function lastStep (obj){
for(var i = 0; i < obj.arrayOfHtml.length; i++){
var $ = cheerio.load(obj.arrayOfHtml[i]);
//if page has a submit it must be a product page
if($('[type=submit]').length !== 0){
//add page to set
urlSet.push(obj.arrayOfUrls[i]);
console.log(obj.arrayOfUrls[i]);
} else if(remainder == undefined) {
//if not a product page, add it to remainder so it another scrape can be performed.
var remainder = obj.arrayOfUrls[i];
console.log("The remainder is " + remainder)
}
}
//return remainder for second run-through of scrape
return remainder;
}
//iterate through urlSet (product pages and grab html)
function lastScraperPt1(){
//call lastScraper so we can grab data from the set (product pages)
//scrape set, product pages
var promiseArray = [];
for(var item of urlSet){
var url = item;
promiseArray.push(requestPromise(url));
}
return Promise.all(promiseArray)
.then(function(arrayOfHtml){
return arrayOfHtml;
});
}
//iterate over the html of the product pages and store data as objects
function lastScraperPt2(html){
for(var i = 0; i < html.length; i++){
var $ = cheerio.load(html[i]);
//grab data and store as variables
var price = $('.price').text();
var imgURL = $('.shirt-picture').find('img').attr('src');
var title = $('body').find('.shirt-details > h1').text().slice(4);
var tshirtObject = {};
//add values into tshirt object
tshirtObject.Title = title;
tshirtObject.Price = price;
tshirtObject.ImageURL = urlHome + imgURL;
tshirtObject.URL = urlSet[i];
tshirtObject.Date = moment().format('MMMM Do YYYY, h:mm:ss a');
//add the object into the array of tshirts
tshirtArray.push(tshirtObject);
}
return tshirtArray;
}
//conver tshirt objects and save as CSV file
function convertJson2Csv(tshirtArray){
//The scraper should generate a folder called `data` if it doesn’t exist.
var dir ='./data';
if(!fs.existsSync(dir)){
fs.mkdirSync(dir);
}
var fields = ['Title', 'Price', 'ImageURL', 'URL', 'Date'];
//convert tshirt data into CSV and pass in fields
var csv = json2csv({ data: tshirtArray, fields: fields });
//Name of file will be the date
var fileDate = moment().format('MM-DD-YY');
var fileName = dir + '/' + fileDate + '.csv';
//Write file
fs.writeFile(fileName, csv, {overwrite: true}, function(err) {
console.log('file saved');
if (err) errorHandler(err);
});
}
scrape(urlHome) //scrape from original entry point
.then(nextStep)
.then(lastStep)
.then(scrape)
.then(nextStep)
.then(lastStep)
.then(lastScraperPt1)
.then(lastScraperPt2)
.then(convertJson2Csv)
.catch(function(err) {
// handle any error from any request here
console.log(err);
});
//If the site is down, an error message describing the issue should appear in the console.
//This is to be tested by disabling wifi on your device.
//When an error occurs log it to a file scraper-error.log . It should append to the bottom of the file with a time stamp and error
var errorHandler = function (error) {
console.log(error.message);
console.log('The scraper could not not scrape data from ' + url + ' there is either a problem with your internet connection or the site may be down');
/**
* create new date for log file
*/
var loggerDate = new Date();
/**
* create message as a variable
*/
var errLog = '[' + loggerDate + '] ' + error.message + '\n';
/**
*when the error occurs, log that to the error logger file
*/
fs.appendFile('scraper-error.log', errLog, function (err) {
if (err) throw err;
console.log('There was an error. The error was logged to scraper-error.log');
});
};

Asana Javascript Oauth Error no route found

So I keep Receiving an error when I'm trying to use OAuth with Asana's API. The error I'm receiving is "Error no route found". What am I doing wrong? I know that the request is most likely correct, but I believe it returns a hashed URL and i'm supposed to unhash it. This is sample code I am using from a Facebook OAuth though, so perhaps the code is incorrect and facebook api specific.
Here is my code:
$(function () {
checkHashLogin();
$('#signon').click(function () {
asanaLogin();
});
})
});
var appID = ****************;
function asanaLogin() {
var path = 'https://app.asana.com/-/oauth_authorize';
var queryParams = ['client_id=' + appID,
'redirect_uri=' + window.location,
'response_type=token'];
var query = queryParams.join('&');
var url = path + query;
window.location.replace(url);
}
function checkHashLogin() {
if (window.location.hash.length > 3) {
var hash = window.location.hash.substring(1);
if(hash.split('=')[0] == 'access_token')
{
var path = "https://app.asana.com/-/oauth_authorize";
var queryParams = [hash, 'callback=displayUser'];
var query = queryParams.join('&');
var url = path + query;
//use jsonp to call the graph
var script = document.createElement('script');
script.src = url;
document.body.appendChild(script);
}
}
}
function displayUser(user) {
setTimeout(function () { }, 1000);
if (user.id != null && user.id != "undefined") {
//Do Stuff
}
else {
alert('user error');
}
}
Here is a photo of my app credentials. My redirect location is just local because I am not hosting it on a server yet.
Looks like you're doing url = path + query when you might need to do url = path + "?" + query - the query string isn't separated, which means you end up requesting a path like https://app.asana.com/-/oauth_authorizeclientId=... which isn't recognized: hence, "no route found".
Hope that helps!

Categories

Resources