I am a newbie in nodejs.
I have this Script: book.js
var page = 0;
exports.setPageCount = function (count) {
page = count;
}
exports.getPageCount = function(){
return page;
}
Along with the follownig script: scripts.js
var bookA = require('./book');
var bookB = require('./book');
bookA.setPageCount(10);
bookB.setPageCount(20);
console.log("Book A Pages : " + bookA.getPageCount());
console.log("Book B Pages : " + bookB.getPageCount());
The Output I get:
Book A Pages : 20
Book B Pages : 20
So, I modified script:
module.exports = function(){
var page = 0;
setPageCount : function(count){
page = count;
},
getPageCount : function(){
return page;
}
}
I am expecting the following output:
Book A Pages : 10
Book B Pages : 20
But still getting the original outcome, does anyone have an idea where I made an error?
There are a few ways to go about this and your last attempt is almost a valid one -- modify your module like so:
module.exports = function() {
var pages = 0;
return {
getPageCount: function() {
return pages;
},
setPageCount: function(p) {
pages = p;
}
}
}
and your usage like so:
var bookFactory = require('./book');
var bookA = bookFactory();
var bookB = bookFactory();
bookA.setPageCount(10);
bookB.setPageCount(20);
console.log("Book A Pages : " + bookA.getPageCount());
console.log("Book B Pages : " + bookB.getPageCount());
Related
I am working on my hello world project. I have two pages let's call them "configuration" and "add configuration" *.html. Each one has its own controller like this:
angular.module('MissionControlApp').controller('ConfigController', ConfigController);
angular.module('MissionControlApp').controller('AddConfigController', AddConfigController);
Now, each controller has some properties that very much overlap:
function ConfigController($routeParams, ConfigFactory, $window){
var vm = this;
vm.status;
vm.projectId = $routeParams.projectId;
vm.selectedProject;
vm.configurations;
vm.selectedConfig;
vm.selectedRecords;
vm.filteredConfig;
vm.newFile;
vm.fileWarningMsg = '';
vm.addFile = function(){
var filePath = vm.newFile;
var encodedUri = encodeURIComponent(filePath);
vm.fileWarningMsg='';
ConfigFactory
.getByEncodedUri(encodedUri).then(function(response){
var configFound = response.data;
var configNames = '';
var configMatched = false;
if(response.status === 200 && configFound.length > 0){
//find an exact match from text search result
for(var i = 0; i < configFound.length; i++) {
var config = configFound[i];
for(var j=0; j<config.files.length; j++){
var file = config.files[j];
if(file.centralPath.toLowerCase() === filePath.toLowerCase()){
configMatched = true;
configNames += ' [' + config.name + '] ';
break;
}
}
}
}
if(configMatched){
vm.fileWarningMsg = 'Warning! File already exists in other configurations.\n' + configNames;
} else if(filePath.length > 0 && filePath.includes('.rvt')){
var file1 = { centralPath: filePath };
vm.selectedConfig.files.push(file1);
vm.newFile = '';
} else{
vm.fileWarningMsg = 'Warning! Please enter a valid file.';
}
}, function(error){
vm.status = 'Unable to get configuration data: ' + error.message;
});
};
My AddConfigController also wants to have the same functionality for addFile() so I just copy pasted the same code, but coming from C# i am sure i can do some class inheritance here, and just inherit from ConfigController and extend...right?
If this is super noob question. then apologies. js is a bit of a mystery to me.
function AddConfigController($routeParams, ConfigFactory, $window){
var vm = this;
vm.status;
vm.projectId = $routeParams.projectId;
vm.selectedProject = {};
vm.newConfig = {};
vm.newFile;
vm.fileWarningMsg = '';
vm.addFile = function(){
var filePath = vm.newFile;
var encodedUri = encodeURIComponent(filePath);
vm.fileWarningMsg='';
ConfigFactory
.getByEncodedUri(encodedUri).then(function(response){
var configFound = response.data;
var configNames = '';
var configMatched = false;
if(response.status === 200 && configFound.length > 0){
//find an exact match from text search result
for(var i = 0; i < configFound.length; i++) {
var config = configFound[i];
for(var j=0; j<config.files.length; j++){
var file = config.files[j];
if(file.centralPath.toLowerCase() === filePath.toLowerCase()){
configMatched = true;
configNames += ' [' + config.name + '] ';
break;
}
}
}
}
if(configMatched){
vm.fileWarningMsg = 'Warning! File already exists in other configurations.\n' + configNames;
} else if(filePath.length > 0 && filePath.includes('.rvt')){
var file1 = { centralPath: filePath };
vm.selectedConfig.files.push(file1);
vm.newFile = '';
} else{
vm.fileWarningMsg = 'Warning! Please enter a valid file.';
}
}, function(error){
vm.status = 'Unable to get configuration data: ' + error.message;
});
};
Since you asked about inheritance and you appear to be using ECMAScript 5, let me suggest taking a look at Object.create(). Specifically, the classical inheritance example.
That said, in AngularJS, a better solution would be to create a Service that manages files or configurations and put the addFile function in there. That way, both controllers could inject the service and call the same function when it is time to add a file. Likewise, other services and controllers that may need access to this functionality could inject it as well.
The following code is a simple scraper written in CasperJS.
var casper = require('casper').create();
var url = casper.cli.get(0);
var page1 = casper.cli.get(1);
var page2 = casper.cli.get(2);
//console.log(page2);
var proxy = casper.cli.get(3);
//alert(page1);
var exp = /[-a-zA-Z0-9#:%_\+.~#?&//=]{2,256}\.[a-z]{2,4}\b(\/[-a-zA-Z0-9#:%_\+.~#?&//=]*)?/gi;
var regex = new RegExp(exp);
var baseUrl = url;
//console.log(baseUrl);
var nextBtn = "a.navigation-button.next";
var allLinks = [];
casper.start(baseUrl);
casper.waitForSelector(nextBtn, processPage);
casper.run();
function processPage() {
for (var i = page1; i < page2; i = i + 1) {
console.log(i);
var pageData = this.evaluate(getPageData);
allLinks = allLinks.concat(pageData);
if (!this.exists(nextBtn)) {
return;
};
this.thenClick(nextBtn).then(function() {
//this.echo(i);
this.echo(this.getCurrentUrl());
//this.wait(1000);
});
};
}
function getPageData(){
//return document.title;
var links = document.getElementsByClassName('pro-title');
links = Array.prototype.map.call(links,function(link){
return link.getAttribute('href');
});
return links;
};
casper.then(function(){
//require('utils').dump(allLinks);
this.each(allLinks,function(self,link){
if (link.match(regex)) {
self.thenOpen(link,function(a){
jsonObj = {};
jsonObj.title = this.fetchText('a.profile-full-name');
jsonObj.services = this.getHTML('div.info-list-text span:nth-child(2) span');
jsonObj.services = jsonObj.services.replace(/&/g,"and");
jsonObj.location = this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(3) div.info-list-text span');
//jsonObj.contact = this.fetchText('span.pro-contact-text');
jsonObj.description = this.getHTML('div.profile-about div:nth-child(1)');
//jsonObj.description.replace(/\s/g, '');
//require('utils').dump(jsonObj);
//jsonObj.description = jsonObj.description.replace(/[\t\n]/g,"");
//jsonObj = JSON.stringify(jsonObj, null, '\t');
//console.log(i);
require('utils').dump(jsonObj);
});
};
});
});
I am executing this script as follows,
casperjs scraping.js http://www.houzz.com/professionals/c/Chicago--IL/p/15 1 3
The first CLI argument is the starting URL. The second and third arguments are the starting and ending page numbers of the scrape.
I am able to extract data from the first page, but I don't understand why I am not able to extract data from any of the consequent pages.
You cannot mix synchronous and asynchronous code like this in processPage. The loop is immediately executed, but the click and the loading of the next page happens asynchronously. The evaluation of the page has to be done asynchronously:
function processPage() {
for (var i = page1; i < page2; i = i + 1) {
this.then(function(){
console.log(i);
var pageData = this.evaluate(getPageData);
allLinks = allLinks.concat(pageData);
if (!this.exists(nextBtn)) {
return;
}
this.thenClick(nextBtn).then(function() {
this.echo(this.getCurrentUrl());
});
});
};
}
So I'm crawling a page, collecting links, then I would like to crawl those links to complete my dataset. Here's some code:
crawl.js:
var casper = require("casper").create({
waitTimeout: 3000,
pageSettings: {
userAgent: "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.8; rv:23.0) Gecko/20130404 Firefox/23.0"
},
clientScripts: ["includes/jquery.min.js"],
verbose: true
});
var followers = require('./followers');
var currentPage = 1;
var x = require('casper').selectXPath;
Object.size = function(obj) {
var size = 0, key;
for (key in obj) {
if (obj.hasOwnProperty(key)) size++
}
return size;
};
var collectFollowers = function() {
var url;
this.echo("capturing page " + currentPage);
this.capture("wowhead-p" + currentPage + ".png");
// don't go too far down the rabbit hole
if (currentPage >= 5 || !this.exists(x('//*[text()="Next ›"]'))) {
processFollowers.call(casper);
return terminate.call(casper);
}
currentPage++;
this.echo("requesting next page: " + currentPage);
url = this.getCurrentUrl();
var links = this.evaluate(function() {
var obj = {}
$('.listview-cleartext').map(function(){
obj[$(this).text()] = $(this).attr('href');
});
return obj;
});
for (key in links) {
followers.followers[key] = links[key];
}
this.echo("Page links: " + Object.size(followers.followers));
//this.emit('update.followers', links);
this.thenClick(x('//*[text()="Next ›"]')).then(function() {
this.waitFor(function() {
return url !== this.getCurrentUrl();
}, collectFollowers, processFollowers);
});
};
var processFollowers = function() {
this.echo("Total followers:" + Object.size(followers.followers));
this.each(Object.keys(followers.followers), function(casper, key) {
this.thenOpen('http://wowhead.com' + followers.followers[key]).then(function() {
this.echo("On http://wowhead.com" + followers.followers[key]);
this.evaluate(function() {
this.echo("Inside the evaluate statement.");
if ($('a[href=#quests]').length) {
this.echo("Has quest!");
$('a[href=#quests]').click();
var questURL = $('#tab-quests').show().find('.listview-cleartext').attr('href');
var questName = $('#tab-quests').show().find('.listview-cleartext').text();
this.echo("Quest URL: " + questURL);
followers.followers[key] = {"name": key, "quest": {"url": questURL, "name": questName}};
} else {
this.echo("Does not have quest!");
}
});
});
});
}
var terminate = function() {
this.echo("Done.").exit();
}
casper.start("http://wowhead.com/followers=2");
casper.waitForSelector(x('//*[text()="Next ›"]'), collectFollowers, processFollowers);
casper.run();
followers.js:
var require = patchRequire(require);
var utils = require('utils');
var followers = {};
exports.followers = followers;
followers is used to store a global variable, an object that I continually build and update as I crawl pages. So I go through 3 pages of data, collect links successfully, then begin to process them. As it stands, CasperJS appears to open each page successfully, however the evaluate function is never called.
I was able to get this functionality to work within PhantomJS with some async logic, but switched to casper because it appeared as though this would be taken care of under the hood. I've tried various combinations of thenOpen(), then() and open(), thenOpen() without the then(), etc.. What am I messing up?
casper.evalute() is the sandboxed page context in the same way the as the PhantomJS version (page.evaluate()). It has no access to variables defined outside.
this inside of evaluate() refers to window and not casper and I doubt that there is such a function like window.echo(). If you want to receive console messages from the page context, you need to register to the remote.message event:
casper.on("remote.message", function(msg){
this.echo("remote: " + msg);
});
You have to explicitly pass the result out of the page context and add it there:
var result = this.evaluate(function() {
console.log("Inside the evaluate statement.");
if ($('a[href=#quests]').length) {
console.log("Has quest!");
$('a[href=#quests]').click();
var questURL = $('#tab-quests').show().find('.listview-cleartext').attr('href');
var questName = $('#tab-quests').show().find('.listview-cleartext').text();
console.log("Quest URL: " + questURL);
return {"url": questURL, "name": questName}};
} else {
console.log("Does not have quest!");
return null;
}
});
if (result) {
followers.followers[key] = {name: key, quest: result};
}
I have been trying to learn how to write modules in JavaScript. With this attempt I am trying to load 10 pictures from Flickr on page load, and then load 10 more pictures once the user scrolls to the bottom of the page. This is not firing consistantly and I am not sure why.
I would like to load 10 pictures at page load, and then 10 additional pictures each time the user scrolls down to the bottom of the page.
I think the issue is with the curPage property that is called using this.settings.curPage
curPage is incremented in the jaxPhotos method using this.settings.curPage++
I am not sure but I think the issue is with either the jaxPhotos method or the scrollMorePics method.
Here's a fiddle with my module:http://jsfiddle.net/R3Bt7/
Here's my HTML:
<div class="flickrContainer" data-options='{"searchQuery" : "candy", "tagQuery" : "candy", "tagMode": "all", "picsPerPage" : "10", "curPage" : 1}'>
</div>
Here's my JS:
var FlickrModule = (function ($element) {
var flickrFeed = function () {
this.$element = $element;
this.init();
};
flickrFeed.prototype.init = function () {
this.setOptions()
.jaxPhotos(this.settings.curPage)
.onScrollHandler();
};
flickrFeed.prototype.setOptions = function () {
var options = this.$element.data().options;
var defaults = {
searchQuery : '',
tagQuery : '',
tagMode : '',
picsPerPage: '1',
curPage: 1
}
this.settings = $.extend({}, defaults, options);
return this;
};
flickrFeed.prototype.jaxPhotos = function (pageNumber) {
var self = this;
// ajax call to flickr json
$.ajax({
url: '//api.flickr.com/services/rest/?method=flickr.photos.search&api_key=xxxxxxxxxxxxxxxxxxxx&tags=' + this.settings.searchQuery + '&tag_mode=' + this.settings.tagMode + '&page=' + this.settings.currPage + '&per_page=' + this.settings.picsPerPage + '&format=json&jsoncallback=?',
dataType: 'jsonp',
data: JSON,
success: function (data) {
// start assembling some dom elements to wrap around each page
var pageTxtWrap = document.createElement('div'),
pageTxt= document.createElement('p');
pageTxt.textContent = 'Page ' + pageNumber + ' - Scroll down for more pictures!';
pageTxt.innerText = 'Page ' + pageNumber + ' - Scroll down for more pictures!';
pageTxtWrap.className = 'pageTextWrap';
pageTxtWrap.appendChild(pageTxt);
// Use createDocumentFragment() as it is the fastest method of element creation
var docFragPageHdr = document.createDocumentFragment();
docFragPageHdr.appendChild(pageTxtWrap);
document.body.appendChild(docFragPageHdr);
// create variables for easier access to the JSON trees we're using
flickr = data.photos,
flickrLength = flickr.photo.length;
// run through the JSON we just got and assemble the pictures
for (var i = 0; i < flickrLength; i++) {
var farmId = flickr.photo[i].farm,
serverId = flickr.photo[i].server,
photoId = flickr.photo[i].id,
secretId = flickr.photo[i].secret,
imgTitle = flickr.photo[i].title;
var flickImg = document.createElement('img');
flickImg.className = 'flickerImg';
flickImg.id = 'flickImg'+i;
flickImg.title = imgTitle;
flickImg.src = 'http://farm' + farmId + '.staticflickr.com/' + serverId + '/' + photoId + '_' + secretId + '_m.jpg';
var docFragFlickImg = document.createDocumentFragment();
docFragFlickImg.appendChild(flickImg);
document.body.appendChild(docFragFlickImg);
}
}
});
// increase currPage so we can go to the next page of pictures
this.settings.curPage++;
return this;
};
flickrFeed.prototype.onScrollHandler = function () {
$(document).on('scroll', this.scrollMorePics.bind(this));
return this;
};
flickrFeed.prototype.scrollMorePics = function(){
if ( $(window).scrollTop() + $(window).height() > $(document).height() - 50 ) {
console.log('Before ajax curPage = ', this.settings.curPage);
this.jaxPhotos(this.settings.curPage);
console.log('After ajax curPage = ', this.settings.curPage);
};
return this;
};
return flickrFeed;
}( $('.flickrContainer') ));
(function () {
var myModule = new FlickrModule();
})();
A small example on how you can access instance variables and methods based on your code:
var FlickrModule = (function ($) {
var flickrFeed = function ($element) {
this.$element = $element;
this.init();
};
flickrFeed.prototype.init = function(){
console.log('init', this.$element);
};
return flickrFeed;
})(jQuery);
$(function(){
var $container = $('.flickrContainer'),
fm = new FlickrModule($container);
});
http://jsfiddle.net/5nJqM/
Can PhantomJS be used an an alternative to BeautifulSoup?
I am trying to search on Etsy and visit all the links in term. In Python, I know how to do this (with BeautifulSoup) but today I want to see if I can do the same with PhantomJS. I'm not getting very far.
This script should search "hello kitty" on Etsy and return all the of products
<a class="listing-thumb" href=...></a> and print them in the console. Ideally I'd visit them later on and get the information I need. Right now it just freezes. Any ideas?
var page = require('webpage').create();
var url = 'http://www.etsy.com/search?q=hello%20kitty';
page.open(url, function(status){
// list all the a.href links in the hello kitty etsy page
var link = page.evaluate(function() {
return document.querySelectorAll('a.listing-thumb');
});
for(var i = 0; i < link.length; i++){ console.log(link[i].href); }
phantom.exit();
});
I have toyed with using CasperJS, which may be better designed for this.
PhantomJS evaluate() cannot serialize and return complex objects like HTMLElements or NodeLists, so you have to map them to serializable things before:
var page = require('webpage').create();
var url = 'http://www.etsy.com/search?q=hello%20kitty';
page.open(url, function(status) {
// list all the a.href links in the hello kitty etsy page
var links = page.evaluate(function() {
return [].map.call(document.querySelectorAll('a.listing-thumb'), function(link) {
return link.getAttribute('href');
});
});
console.log(links.join('\n'));
phantom.exit();
});
Note: here we use [].map.call() in order to treat a NodeList as a standard Array.
The only problem with your code is that you do not understand phantomjs scopes. You have phantom and page scopes. You tried to return JavaScript DOM object references (those can't be serialized) from page scope (page.evaluate runs in page scope) to phantom main scope. I think that is not possible. Here follows code that works:
var page = require('webpage').create();
var url = 'http://www.etsy.com/search?q=hello%20kitty';
// for debug (to see if page returns status code 200)
page.onResourceReceived = function(response) {
if (response.url === url) {
console.log('Resorce: "' + response.url + '" status: ' + response.status);
if (response.status === 200) {
console.log(response.url);
for (var i = 0; i < response.headers.length; i++) {
console.log(response.headers[i].name + ': ' + response.headers[i].value);
}
}
}
};
page.onLoadFinished = function(status){
console.log('Status: ' + status);
console.log('Starting evaluate...');
var links = page.evaluate(function() {
var nodes = [],
matches = document.querySelectorAll("a.listing-thumb");
for(var i = 0; i < matches.length; ++i) {
nodes.push(matches[i].href);
}
return nodes;
});
console.log('Done evaluate... count: ' + links.length);
if (links && links.length > 0) {
for(var i = 0; i < links.length; ++i) {
console.log('(' + i + ') ' + links[i]);
}
} else {
console.log("No match found!");
}
phantom.exit(0);
};
page.open(url);
Here is some code I recently wrote that scrapes urls using PhantomJs, if you provide only a URL it will display all URLS's on the page, if you supply an argument of class|id followed by a "class/id name" it will display the urls of the class/id only.
//////////////////////////////////////////////////////////
///// PhantomJS URL Scraper v.1.3 /////
//
// Copyrighted by +A.M.Danischewski 2016+ (c)
// This program may be reutilized without limits, provided this
// notice remain intact.
//
// Usage: phantomjs phantom_urls.js <URL> [["class"|"id"] [<query id/class name>]]
//
// Argument 1: URL -- "https://www.youtube.com/watch?v=8TniRMwL2Vg"
// Argument 2: "class" or "id"
// Argument 3: If Argument 2 was provided, "class name" or "id name"
//
// By default this program will display ALL urls from a user supplied URL.
// If a class name or id name is provided then only URL's from the class
// or id are displayed.
//
///////////////////////////////////
var page = require('webpage').create(),
system = require('system'),
address;
if (system.args.length === 1) {
console.log(' Usage: phantomjs phantom_urls.js <URL> [["class"|"id"] [<query id/class name>]]');
phantom.exit();
}
address = system.args[1];
querytype= system.args[2];
queryclass = system.args[3];
page.open(address, function(status) {
if (status !== 'success') {
console.log('Error loading address: '+address);
} else {
//console.log('Success! In loading address: '+address);
}
});
page.onConsoleMessage = function(msg) {
console.log(msg);
}
page.onLoadFinished = function(status) {
var dynclass="function() { window.class_urls = new Array(); window.class_urls_next=0; var listings = document.getElementsByClassName('"+queryclass+"'); for (var i=0; i < listings.length; i++) { var el = listings[i]; var ellnks=[].map.call(el.querySelectorAll('a'),function(link) {return link.getAttribute('href');}); var elhtml=el.innerHTML; window.class_urls.push(ellnks.join('\\n')); }; return window.class_urls;}";
var dynid="function() { window.id_urls = new Array(); window.id_urls_next=0; var listings = document.getElementById('"+queryclass+"'); var ellnks=[].map.call(listings.querySelectorAll('a'),function(link) {return link.getAttribute('href');}); var elhtml=listings.innerHTML; window.id_urls.push(ellnks.join('\\n')); return window.id_urls;}";
var allurls="function() { var links = page.evaluate(function() { return [].map.call(document.querySelectorAll('a'), function(link) { return link.getAttribute('href'); };); };); console.log(links.join('\\n')); }";
var page_eval_function="";
if (querytype === "class") {
console.log(page.evaluate(dynclass).toString().replace(/,/g, "\n"));
} else if (querytype === "id") {
console.log(page.evaluate(dynid).toString().replace(/,/g, "\n"));
} else {
var links = page.evaluate(function() {
return [].map.call(document.querySelectorAll('a'), function(link) {
return link.getAttribute('href');
});
});
console.log(links.join('\n'));
}
phantom.exit();
};