I am using this script to store the user's history to a cookie for the last 10 pages accessed. So far I've got the script displaying the cookie data using the document.title, and url in a list.
My question is what would be the simplest way to add a page skip feature, that would let me omit certain pages from being added to the history cookie? Everything I've tried hasn't worked, as it's a little bit outside of my knowledge.
Thanks for your time and help.
JS:
(function($){
var history;
function getHistory() {
var tmp = $.cookie("history");
if (tmp===undefined || tmp===null) tmp = "";
if ($.trim(tmp)=="") tmp = [];
else tmp = tmp.split("||");
history = [];
$.each(tmp, function(){
var split = this.split("|");
history.push({
title: split[0],
url: split[1]
});
});
}
function saveHistory() {
var tmp = [];
$.each(history, function(){
tmp.push(this.title+"|"+this.url);
});
$.cookie("history",tmp.join("||"),{ expires: 60, path: "/" });
}
function addToHistory(title,url) {
var newHistory = []
$.each(history, function(){
if (this.url!=url) newHistory.push(this);
});
history = newHistory;
if (history.length>=10) {
history.shift();
}
history.push({
title: title,
url: url
});
saveHistory();
writeHistory();
}
function writeHistory() {
var list = $("<ul />");
$.each(history, function() {
var element = $("<li />");
var link = $("<a />");
link.attr("href",this.url);
link.text(this.title);
element.append(link);
list.append(element);
});
$("#history").empty().append(list);
}
$(document).ready(function(){
getHistory();
var url = document.location.href;
var split = url.split("#");
var title;
if (split.length > 1) {
title = $("#"+split[1]).text();
} else {
title = document.title;
}
if (title===undefined || title===null || $.trim(title)=="") title = url;
addToHistory(title,url);
url = split[0];
$("a[href^='#']").click(function(){
var link = $(this);
var href = link.attr("href");
var linkUrl = url+href;
var title = $(href).text();
if (title===undefined || title===null || $.trim(title)==="") title = linkUrl;
addToHistory(title,linkUrl);
});
});
})(jQuery);
HTML:
<div id="history"></div>
several ways you could approach this... You could keep an Array of urls not to save, or you could put something in the page that would let the script know not to save that page?...
function saveHistory(){
if ($('.no-save-history')) return false;
/*...*/
}
HTML:
< div id="history" class="no-save-history">
Related
I am trying to redirect from productlisting.html page to search.html using angular on click of a button but the URL appending in URL bar is:
/dashboard-html/product-listing1.html#/#%2Fsearch.html%3Fquery=biscuits
and I want it to be like this
/dashboard-html/search.html%?query=biscuits
My code script.js is:
Array.prototype.chunk = function (groupsize) {
var sets = [];
var chunks = this.length / groupsize;
for (var i = 0, j = 0; i < chunks; i++, j += groupsize) {
sets[i] = this.slice(j, j + groupsize);
}
return sets;
};
var data_bind=angular.module('my_app', []);
data_bind.controller('productController', function ($scope, $http, $location)
{
var isProductListingpage = false;
$scope.init = function (isProductListpage){
isProductListingpage = isProductListpage;
// if this is not search page, get the query params from url and get the products to show from server
if(!isProductListingpage)
{
var queryParam = location.search;
var request_url='http://dmsp1-kakash.boxfuse.io:9000/products?'+queryParam;
sendRequest(request_url);
}
}
function sendRequest(request_url)
{
$http({
method : 'GET',
url : request_url,
headers : {'Content-Type' : 'application/json'}
}).success(function(response){
if(response.errors){
$scope.errorName = response.errors.name ;
$scope.errorUserName = response.errors.username;
$scope.errorEmail = response.errors.email;
}
else
{
$scope.product = response.data.products;
$scope.productGroups = $scope.product.chunk(3);
}
});
}
$scope.search_field={};
$scope.searchdata= function()
{
var queryParam = "query="+encodeURI($scope.search_field.search);
var request_url='http://dmsp1-kakash.boxfuse.io:9000/products?'+queryParam;
// If this is product listing page, we need to redirect to search page,
// otherwise make a request to server
if(isProductListingpage) {
//$window.location.href = '/index.html'
$location.url('#/search.html?'+queryParam);
}
else {
sendRequest(request_url);
}
}
$scope.search_field1={};
$scope.search_page= function(event)
{
var $target = $(event.target);
if($target.hasClass('sub-category'))
{
var subCategory = $target.clone() //clone the element
.children() //select all the children
.remove() //remove all the children
.end() //again go back to selected element
.text();
var $temp = $target.closest('.submenu');
var $mainCategoryElement = $temp.siblings('.main-category').first();
var mainCategory = $mainCategoryElement.text();
// Encode both the values
subCategory = encodeURI(subCategory);
mainCategory = encodeURI(mainCategory);
var queryParam = 'cat='+mainCategory+"&subcat="+subCategory;
var request_url='http://dmsp1-kakash.boxfuse.io:9000/products?'+queryParam;
// If this is product listing page, we need to redirect to search page,
// otherwise make a request to server
if(isProductListingpage) {
$location.url('#/search.html?'+queryParam);
}
else {
sendRequest(request_url);
}
}
}
});``
you should use the default angular router or ui-router and define clear states.
The following code is a simple scraper written in CasperJS.
var casper = require('casper').create();
var url = casper.cli.get(0);
var page1 = casper.cli.get(1);
var page2 = casper.cli.get(2);
//console.log(page2);
var proxy = casper.cli.get(3);
//alert(page1);
var exp = /[-a-zA-Z0-9#:%_\+.~#?&//=]{2,256}\.[a-z]{2,4}\b(\/[-a-zA-Z0-9#:%_\+.~#?&//=]*)?/gi;
var regex = new RegExp(exp);
var baseUrl = url;
//console.log(baseUrl);
var nextBtn = "a.navigation-button.next";
var allLinks = [];
casper.start(baseUrl);
casper.waitForSelector(nextBtn, processPage);
casper.run();
function processPage() {
for (var i = page1; i < page2; i = i + 1) {
console.log(i);
var pageData = this.evaluate(getPageData);
allLinks = allLinks.concat(pageData);
if (!this.exists(nextBtn)) {
return;
};
this.thenClick(nextBtn).then(function() {
//this.echo(i);
this.echo(this.getCurrentUrl());
//this.wait(1000);
});
};
}
function getPageData(){
//return document.title;
var links = document.getElementsByClassName('pro-title');
links = Array.prototype.map.call(links,function(link){
return link.getAttribute('href');
});
return links;
};
casper.then(function(){
//require('utils').dump(allLinks);
this.each(allLinks,function(self,link){
if (link.match(regex)) {
self.thenOpen(link,function(a){
jsonObj = {};
jsonObj.title = this.fetchText('a.profile-full-name');
jsonObj.services = this.getHTML('div.info-list-text span:nth-child(2) span');
jsonObj.services = jsonObj.services.replace(/&/g,"and");
jsonObj.location = this.getHTML('div.pro-info-horizontal-list div.info-list-label:nth-child(3) div.info-list-text span');
//jsonObj.contact = this.fetchText('span.pro-contact-text');
jsonObj.description = this.getHTML('div.profile-about div:nth-child(1)');
//jsonObj.description.replace(/\s/g, '');
//require('utils').dump(jsonObj);
//jsonObj.description = jsonObj.description.replace(/[\t\n]/g,"");
//jsonObj = JSON.stringify(jsonObj, null, '\t');
//console.log(i);
require('utils').dump(jsonObj);
});
};
});
});
I am executing this script as follows,
casperjs scraping.js http://www.houzz.com/professionals/c/Chicago--IL/p/15 1 3
The first CLI argument is the starting URL. The second and third arguments are the starting and ending page numbers of the scrape.
I am able to extract data from the first page, but I don't understand why I am not able to extract data from any of the consequent pages.
You cannot mix synchronous and asynchronous code like this in processPage. The loop is immediately executed, but the click and the loading of the next page happens asynchronously. The evaluation of the page has to be done asynchronously:
function processPage() {
for (var i = page1; i < page2; i = i + 1) {
this.then(function(){
console.log(i);
var pageData = this.evaluate(getPageData);
allLinks = allLinks.concat(pageData);
if (!this.exists(nextBtn)) {
return;
}
this.thenClick(nextBtn).then(function() {
this.echo(this.getCurrentUrl());
});
});
};
}
I was asked to work on web crawler using phantomjs. However, as I read through the example, I was puzzled by some of the code:
Is this a loop? $("table[id^='post']").each(function(index)
What does this line of code mean? var entry = $(this);
How is the id captured? var id = entry.attr('id').substring(4);
This line var poster = entry.find('a.bigusername'); tries to get a username from the page. Is there a tutorial on how to make use of entry.find to scrap data off the page?
page.open(url1, function (status) {
// Check for page load success
if (status !== "success") {
console.log("Unable to access network");
phantom.exit(231);
} else {
if (page.injectJs("../lib/jquery-2.1.0.min.js") && page.injectJs("../lib/moment-with-langs.js") && page.injectJs("../lib/sugar.js") && page.injectJs("../lib/url.js")){
allResults = page.evaluate(function(url) {
var arr = [];
var title = $("meta[property='og:title']").attr('content');
title = title.trim();
$("table[id^='post']").each(function(index){
var entry = $(this);
var id = entry.attr('id').substring(4);
var poster = entry.find('a.bigusername');
poster = poster.text().trim();
var text = entry.find("div[id^='post_message_']");
//remove quotes of other posts
text.find(".quote").remove();
text.find("div[style='margin:20px; margin-top:5px; ']").remove();
text.find(".bbcode_container").remove();
text = text.text().trim();
var postDate = entry.find("td.thead");
postDate = postDate.first().text().trim();
var postUrl = entry.find("a[id^='postcount']");
if (postUrl){
postUrl = postUrl.attr('href');
postUrl = URL.resolve(url, postUrl);
}
else{
postUrl = url;
}
if (postDate.indexOf('Yesterday') >= 0){
postDate = Date.create(postDate).format('{yyyy}-{MM}-{dd} {HH}:{mm}');
}
else if (postDate.indexOf('Today') >= 0){
postDate = Date.create(postDate).format('{yyyy}-{MM}-{dd} {HH}:{mm}');
}
else{
var d = moment(postDate, 'DD-MM-YYYY, hh:mm A');
postDate = d.format('YYYY-MM-DD HH:mm');
}
var obj = {'id': id, 'title': title, 'poster': poster, 'text': text, 'url': postUrl, 'post_date' : postDate, 'location': 'Singapore', 'country': 'SG'};
arr.push(obj);
});
return arr;
}, url);
console.log("##START##");
console.log(JSON.stringify(allResults, undefined, 4));
console.log("##END##");
console.log("##URL=" + url);
fs.write("../cache/" + encodeURIComponent(url), page.content, "w");
phantom.exit();
}
}
});
Is this a loop? $("table[id^='post']").each(function(index)?
Yes
What does this line of code mean? var entry = $(this);
It assigns a jQuery object to variable entry
How is the id captured? var id = entry.attr('id').substring(4);
It uses jQuery which has attr() function.
I am using the galleria script for my website, with the facebook mod.
I want to modify it a bit, so the albumid that should be showing, is the ID given in query string.
My code is:
Galleria.run('#galleria', { facebook: 'album:000000000', width: 745, height: 550, lightbox: true});
Where i want album:000000000, to be album:-querystring albumid-
For example, my page is album.php?albumid=123456, i want the code to be:
Galleria.run('#galleria', { facebook: 'album:123456', width: 745, height: 550, lightbox: true});
Could someone help me with a certain code?
I can't claim too much familiarity with Galleria, but I have used the JS function below to grab query string variable values.
function parseURLParams(name, locat) {
var results = new RegExp('[\\?&]' + name + '=([^&#]*)').exec(locat);
if (results) {
return results[1] || "";
} else {
return "";
}
}
So if you include the above function in your project, you could potentially set up your code like so:
Galleria.run('#galleria', { facebook: 'album:' + parseURLParams("albumid", window.location.href), width: 745, height: 550, lightbox: true});
Hope it helps!
<script type="text/javascript">
$(document).ready(function () {
$('input.letter').click(function () {
//0- prepare values
var qsTargeted = 'letter=' + this.value; //"letter=A";
var windowUrl = '';
var qskey = qsTargeted.split('=')[0];
var qsvalue = qsTargeted.split('=')[1];
//1- get row url
var originalURL = window.location.href;
//2- get query string part, and url
if (originalURL.split('?').length > 1) //qs is exists
{
windowUrl = originalURL.split('?')[0];
var qs = originalURL.split('?')[1];
//3- get list of query strings
var qsArray = qs.split('&');
var flag = false;
//4- try to find query string key
for (var i = 0; i < qsArray.length; i++) {
if (qsArray[i].split('=').length > 0) {
if (qskey == qsArray[i].split('=')[0]) {
//exists key
qsArray[i] = qskey + '=' + qsvalue;
flag = true;
break;
}
}
}
if (!flag)// //5- if exists modify,else add
{
qsArray.push(qsTargeted);
}
var finalQs = qsArray.join('&');
//6- prepare final url
window.location = windowUrl + '?' + finalQs;
}
else {
//6- prepare final url
//add query string
window.location = originalURL + '?' + qsTargeted;
}
})
});
</script>
As seen below I'm trying to get #currentpage to pass client params
Can someone help out thanks.
$(document).ready(function() {
window.addEventListener("load", windowLoaded, false);
function windowLoaded() {
chrome.tabs.getSelected(null, function(tab) {
document.getElementById('currentpage').innerHTML = tab.url;
});
}
var url = $("currentpage");
// yes I relize this is the part not working.
var client = jQuery.param("currentpage");
var page = jQuery.param("currentpage");
var devurl = "http://#/?clientsNumber=" + client + "&pageName=" + page ;
});
This is a method to extract the params from a url
function getUrlParams(url) {
var paramMap = {};
var questionMark = url.indexOf('?');
if (questionMark == -1) {
return paramMap;
}
var parts = url.substring(questionMark + 1).split("&");
for (var i = 0; i < parts.length; i ++) {
var component = parts[i].split("=");
paramMap [decodeURIComponent(component[0])] = decodeURIComponent(component[1]);
}
return paramMap;
}
Here's how to use it in your code
var url = "?c=231171&p=home";
var params = getUrlParams(url);
var devurl = "http://site.com/?c=" + encodeURIComponent(params.c) + "&p=" + encodeURIComponent(params.p) + "&genphase2=true";
// devurl == "http://site.com/?c=231171&p=home&genphase2=true"
See it in action http://jsfiddle.net/mendesjuan/TCpsD/
Here's the code you posted with minimal changes to get it working, it also uses $.param as it's intended, that is to create a query string from a JS object, this works well since my suggested function returns an object from the url
$(document).ready(function() {
// This does not handle arrays because it's not part of the official specs
// PHP and some other server side languages support it but there's no official
// consensus
function getUrlParams(url) {
var paramMap = {};
var questionMark = url.indexOf('?');
if (questionMark == -1) {
return paramMap;
}
var parts = url.substring(questionMark + 1).split("&");
for (var i = 0; i < parts.length; i ++) {
var component = parts[i].split("=");
paramMap [decodeURIComponent(component[0])] = decodeURIComponent(component[1]);
}
return paramMap;
}
// no need for the extra load listener here, jquery.ready already puts
// your code in the onload
chrome.tabs.getSelected(null, function(tab) {
document.getElementById('currentpage').innerHTML = tab.url;
});
var url = $("currentpage");
var paramMap = getUrlParams(url);
// Add the genphase parameter to the param map
paramMap.genphase2 = true;
// Use jQuery.param to create the url to click on
var devurl = "http://site.com/?"+ jQuery.param(paramMap);
$('#mydev').click( function(){
window.open(devurl);
});
});