I was asked to work on web crawler using phantomjs. However, as I read through the example, I was puzzled by some of the code:
Is this a loop? $("table[id^='post']").each(function(index)
What does this line of code mean? var entry = $(this);
How is the id captured? var id = entry.attr('id').substring(4);
This line var poster = entry.find('a.bigusername'); tries to get a username from the page. Is there a tutorial on how to make use of entry.find to scrap data off the page?
page.open(url1, function (status) {
// Check for page load success
if (status !== "success") {
console.log("Unable to access network");
phantom.exit(231);
} else {
if (page.injectJs("../lib/jquery-2.1.0.min.js") && page.injectJs("../lib/moment-with-langs.js") && page.injectJs("../lib/sugar.js") && page.injectJs("../lib/url.js")){
allResults = page.evaluate(function(url) {
var arr = [];
var title = $("meta[property='og:title']").attr('content');
title = title.trim();
$("table[id^='post']").each(function(index){
var entry = $(this);
var id = entry.attr('id').substring(4);
var poster = entry.find('a.bigusername');
poster = poster.text().trim();
var text = entry.find("div[id^='post_message_']");
//remove quotes of other posts
text.find(".quote").remove();
text.find("div[style='margin:20px; margin-top:5px; ']").remove();
text.find(".bbcode_container").remove();
text = text.text().trim();
var postDate = entry.find("td.thead");
postDate = postDate.first().text().trim();
var postUrl = entry.find("a[id^='postcount']");
if (postUrl){
postUrl = postUrl.attr('href');
postUrl = URL.resolve(url, postUrl);
}
else{
postUrl = url;
}
if (postDate.indexOf('Yesterday') >= 0){
postDate = Date.create(postDate).format('{yyyy}-{MM}-{dd} {HH}:{mm}');
}
else if (postDate.indexOf('Today') >= 0){
postDate = Date.create(postDate).format('{yyyy}-{MM}-{dd} {HH}:{mm}');
}
else{
var d = moment(postDate, 'DD-MM-YYYY, hh:mm A');
postDate = d.format('YYYY-MM-DD HH:mm');
}
var obj = {'id': id, 'title': title, 'poster': poster, 'text': text, 'url': postUrl, 'post_date' : postDate, 'location': 'Singapore', 'country': 'SG'};
arr.push(obj);
});
return arr;
}, url);
console.log("##START##");
console.log(JSON.stringify(allResults, undefined, 4));
console.log("##END##");
console.log("##URL=" + url);
fs.write("../cache/" + encodeURIComponent(url), page.content, "w");
phantom.exit();
}
}
});
Is this a loop? $("table[id^='post']").each(function(index)?
Yes
What does this line of code mean? var entry = $(this);
It assigns a jQuery object to variable entry
How is the id captured? var id = entry.attr('id').substring(4);
It uses jQuery which has attr() function.
Related
I am working on my hello world project. I have two pages let's call them "configuration" and "add configuration" *.html. Each one has its own controller like this:
angular.module('MissionControlApp').controller('ConfigController', ConfigController);
angular.module('MissionControlApp').controller('AddConfigController', AddConfigController);
Now, each controller has some properties that very much overlap:
function ConfigController($routeParams, ConfigFactory, $window){
var vm = this;
vm.status;
vm.projectId = $routeParams.projectId;
vm.selectedProject;
vm.configurations;
vm.selectedConfig;
vm.selectedRecords;
vm.filteredConfig;
vm.newFile;
vm.fileWarningMsg = '';
vm.addFile = function(){
var filePath = vm.newFile;
var encodedUri = encodeURIComponent(filePath);
vm.fileWarningMsg='';
ConfigFactory
.getByEncodedUri(encodedUri).then(function(response){
var configFound = response.data;
var configNames = '';
var configMatched = false;
if(response.status === 200 && configFound.length > 0){
//find an exact match from text search result
for(var i = 0; i < configFound.length; i++) {
var config = configFound[i];
for(var j=0; j<config.files.length; j++){
var file = config.files[j];
if(file.centralPath.toLowerCase() === filePath.toLowerCase()){
configMatched = true;
configNames += ' [' + config.name + '] ';
break;
}
}
}
}
if(configMatched){
vm.fileWarningMsg = 'Warning! File already exists in other configurations.\n' + configNames;
} else if(filePath.length > 0 && filePath.includes('.rvt')){
var file1 = { centralPath: filePath };
vm.selectedConfig.files.push(file1);
vm.newFile = '';
} else{
vm.fileWarningMsg = 'Warning! Please enter a valid file.';
}
}, function(error){
vm.status = 'Unable to get configuration data: ' + error.message;
});
};
My AddConfigController also wants to have the same functionality for addFile() so I just copy pasted the same code, but coming from C# i am sure i can do some class inheritance here, and just inherit from ConfigController and extend...right?
If this is super noob question. then apologies. js is a bit of a mystery to me.
function AddConfigController($routeParams, ConfigFactory, $window){
var vm = this;
vm.status;
vm.projectId = $routeParams.projectId;
vm.selectedProject = {};
vm.newConfig = {};
vm.newFile;
vm.fileWarningMsg = '';
vm.addFile = function(){
var filePath = vm.newFile;
var encodedUri = encodeURIComponent(filePath);
vm.fileWarningMsg='';
ConfigFactory
.getByEncodedUri(encodedUri).then(function(response){
var configFound = response.data;
var configNames = '';
var configMatched = false;
if(response.status === 200 && configFound.length > 0){
//find an exact match from text search result
for(var i = 0; i < configFound.length; i++) {
var config = configFound[i];
for(var j=0; j<config.files.length; j++){
var file = config.files[j];
if(file.centralPath.toLowerCase() === filePath.toLowerCase()){
configMatched = true;
configNames += ' [' + config.name + '] ';
break;
}
}
}
}
if(configMatched){
vm.fileWarningMsg = 'Warning! File already exists in other configurations.\n' + configNames;
} else if(filePath.length > 0 && filePath.includes('.rvt')){
var file1 = { centralPath: filePath };
vm.selectedConfig.files.push(file1);
vm.newFile = '';
} else{
vm.fileWarningMsg = 'Warning! Please enter a valid file.';
}
}, function(error){
vm.status = 'Unable to get configuration data: ' + error.message;
});
};
Since you asked about inheritance and you appear to be using ECMAScript 5, let me suggest taking a look at Object.create(). Specifically, the classical inheritance example.
That said, in AngularJS, a better solution would be to create a Service that manages files or configurations and put the addFile function in there. That way, both controllers could inject the service and call the same function when it is time to add a file. Likewise, other services and controllers that may need access to this functionality could inject it as well.
Hello you wonderful people, I am trying to build JavaScript file to extract information from Wikipedia based on search value in the input field and then display the results with the title like link so the user can click the link and read about it. So far I am getting the requested information in(JSON)format from Mediawiki(Wikipedia) but I can't get it to display on the page. I think I have an error code after the JavaScript array.
I'm new at JavaScript any help, or hint will be appreciated.
Sorry my script is messy but I am experimenting a lot with it.
Thanks.
var httpRequest = false ;
var wikiReport;
function getRequestObject() {
try {
httpRequest = new XMLHttpRequest();
} catch (requestError) {
return false;
}
return httpRequest;
}
function getWiki(evt) {
if (evt.preventDefault) {
evt.preventDefault();
} else {
evt.returnValue = false;
}
var search = document.getElementsByTagName("input")[0].value;//("search").value;
if (!httpRequest) {
httpRequest = getRequestObject();
}
httpRequest.abort();
httpRequest.open("GET", "https://en.wikipedia.org/w/api.php?action=query&format=json&gsrlimit=3&generator=search&origin=*&gsrsearch=" + search , true);//("get", "StockCheck.php?t=" + entry, true);
//httpRequest.send();
httpRequest.send();
httpRequest.onreadystatechange = displayData;
}
function displayData() {
if(httpRequest.readyState === 4 && httpRequest.status === 200) {
wikiReport = JSON.parse(httpRequest.responseText);//for sunchronus request
//wikiReport = httpRequest.responseText;//for asynchronus request and response
//var wikiReport = httpRequest.responseXML;//processing XML data
var info = wikiReport.query;
var articleWiki = document.getElementsByTagName("article")[0];//creating the div array for displaying the results
var articleW = document.getElementById("results")[0];
for(var i = 0; i < info.length; i++)
{
var testDiv = document.createElement("results");
testDiv.append("<p><a href='https://en.wikipedia.org/?curid=" + query.pages[i].pageid + "' target='_blank'>" + query.info[i].title + "</a></p>");
testDiv.appendChild("<p><a href='https://en.wikipedia.org/?curid=" + query.info[i].pageid + "' target='_blank'>" + query.info[i].title + "</a></p>");
var newDiv = document.createElement("div");
var head = document.createDocumentFragment();
var newP1 = document.createElement("p");
var newP2 = document.createElement("p");
var newA = document.createElement("a");
head.appendChild(newP1);
newA.innerHTML = info[i].pages;
newA.setAttribute("href", info[i].pages);
newP1.appendChild(newA);
newP1.className = "head";
newP2.innerHTML = info[i].title;
newP2.className = "url";
newDiv.appendChild(head);
newDiv.appendChild(newP2);
articleWiki.appendChild(newDiv);
}
}
}
//
function createEventListener(){
var form = document.getElementsByTagName("form")[0];
if (form.addEventListener) {
form.addEventListener("submit", getWiki, false);
} else if (form.attachEvent) {
form.attachEvent("onsubmit", getWiki);
}
}
//createEventListener when the page load
if (window.addEventListener) {
window.addEventListener("load", createEventListener, false);
} else if (window.attachEvent) {
window.attachEvent("onload", createEventListener);
}
Mediawiki api link
https://en.wikipedia.org/w/api.php?action=query&format=json&gsrlimit=3&generator=search&origin=*&gsrsearch=
You are wrong some points.
1)
var articleW = document.getElementById("results")[0];
This is wrong. This will return a element is a reference to an Element object, or null if an element with the specified ID is not in the document. Doc is here (https://developer.mozilla.org/en-US/docs/Web/API/Document/getElementById)
The correct answer should be :
var articleW = document.getElementById("results");
2)
var info = wikiReport.query;
for(var i = 0; i < info.length; i++) {}
The info is object . it is not array , you can't for-loop to get child value.
wikiReport.query is not correct wiki data. The correct data should be wikiReport.query.pages. And use for-in-loop to get child element
The correct answer:
var pages = wikiReport.query.pages
for(var key in pages) {
var el = pages[key];
}
3) This is incorrect too
testDiv.appendChild("<p><a href='https://en.wikipedia.org/?curid=" + query.info[i].pageid + "' target='_blank'>" + query.info[i].title + "</a></p>");
The Node.appendChild() method adds a node to the end of the list of children of a specified parent node. You are using the method to adds a string . This will cause error. Change it to node element or use append method instead
I have created a sample test.You can check it at this link below https://codepen.io/anon/pen/XRjOQQ?editors=1011
I'm generating a series of variables in a loop (using JS), and I'm assigning them an .id and a .name based on the current index. At each loop I'm sending a request to the server using jQuery.post()method, but the returning response is just an empty variable.
Here's the code:
JavaScript
for ( var index = 0; index < 5; index++ ) {
var myVar = document.createElement('p');
myVar.id = 'myVarID' + index;
myVar.name = 'myVarName' + index;
//Send request to server
$(document).ready(function(){
var data = {};
var i = 'ind';
var id = myVar.id;
var name = myVar.name;
data[id] = name;
data[i] = index;
$.post("script.php", data, function(data){
console.log("Server response:", data);
});
});
}
PHP
<?php
$index = $_POST['ind'];
$myVar = $_POST['myVarID'.$index];
echo $myVar;
?>
Response: Server response: ''
If I instead set a static index in JS code, getting rid of the loop, so for example:
var index = 0;
I get the expected result: Server response: myVarName0
Why is this happening? And how can I solve it?
Assuming the php file is in order. I use this:
function doThing(url) {
getRequest(
url,
doMe,
null
);
}
function doMe(responseText) {
var container = document.getElementById('hahaha');
container.innerHTML = responseText;
}
function getRequest(url, success, error) {
var req = false;
try{
// most browsers
req = new XMLHttpRequest();
} catch (e){
// IE
try{
req = new ActiveXObject("Msxml2.XMLHTTP");
} catch (e) {
// try an older version
try{
req = new ActiveXObject("Microsoft.XMLHTTP");
} catch (e){
return false;
}
}
}
if (!req) return false;
if (typeof success != 'function') success = function () {};
if (typeof error!= 'function') error = function () {};
req.onreadystatechange = function(){
if(req .readyState == 4){
return req.status === 200 ?
success(req.responseText) : error(req.status)
;
}
}
var thing = "script.php?" + url;
req.open("GET", thing, true);
req.send(null);
return req;
}
then use it like this:
doThing("myVarID="+myVar.id+"&i="+index);
also, you will have to change your PHP to something like this:
<?php
$index = $_GET['ind'];
$myVar = $_GET['myVarID'.$index];
echo $myVar;
?>
Obviously this code needs to be edited to suit your own needs
the function doMe is what to do when the webpage responds, in that example I changed the element with the id hahaha to the response text.
This won't win you any prizes but it'll get the job done.
Solution
It is working fine removing:
$(document).ready()
Working code
for ( var index = 0; index < 5; index++ ) {
var myVar = document.createElement('p');
myVar.id = 'myVarID' + index;
myVar.name = 'myVarName' + index;
//Send request to server
var data = {};
var i = 'ind';
var id = myVar.id;
var name = myVar.name;
data[id] = name;
data[i] = index;
$.post("script.php", data, function(data){
console.log("Server response:", data);
});
}
I have a problem on parse.com in which i to take the id of an type and pass it to another function which uploads an image. Then take the type.id among with the image and post it to another function which saves the data to a class.
This is what i've tried until now without success.
--OnClick code
$('#submitId').on("click", function(e, f) {
e.preventDefault();
typeSave(typeid1);
//var objnew1 = typeSave();
console.log("inside onclick " + type2);
var fileUploadControl = $("#profilePhotoFileUpload")[0];
var file = fileUploadControl.files[0];
var name = file.name; //This does *NOT* need to be a unique name
var parseFile = new Parse.File(name, file);
parseFile.save().then(
function() {
//typeSave();
type2 = typeid1;
saveJobApp(parseFile, type2);
console.log("inside save onclick " + type2);
},
function(error) {
alert("error");
}
);
});
-- Type Code
var type;
var typeid1;
var type2;
function typeSave() {
var type = new Parse.Object("type");
var user = new Parse.Object("magazia");
//var bID = objbID;
//user.id = bID;
var cafebar = document.getElementById('cafe_bar').checked;
if (cafebar) {
var valueCafebar = true;
} else {
var valueCafebar = false;
}
var club = document.getElementById('club').checked;
if (club) {
var valueClub = true;
} else {
var valueClub = false;
}
var restaurant = document.getElementById('restaurant').checked;
if (restaurant) {
var valueRestaurant = true;
} else {
var valueRestaurant = false;
}
var pistes = document.getElementById('pistes').checked;
if (pistes) {
var valuePistes = true;
} else {
var valuePistes = false;
}
type.set("cafebar", valueCafebar);
type.set("club", valueClub);
type.set("restaurant", valueRestaurant);
type.set("pistes", valuePistes);
type.save(null, {
success: function(type) {
//saveJobApp(type.id);
var typeid1 = type.id;
console.log("inside type save " + typeid1);
//return ;
},
error: function(type, error) {
alert('Failed to create new object, with error code: ' + error.description);
}
});
}
-- Send Data to parse.com class code
function saveJobApp(objParseFile, type2) {
var jobApplication = new Parse.Object("magazia");
var email = document.getElementById('email').value;
var name = document.getElementById('name').value;
var description = document.getElementById('description').value;
var website = document.getElementById('website').value;
var phone = document.getElementById('phone').value;
var address = document.getElementById('address').value;
var latlon = document.getElementById('latlon').value;
var area = document.getElementById('area').value;
var value = latlon;
value = value.replace(/[\(\)]/g, '').split(', ');
console.log("inside saveJobApp " + type2);
var x = parseFloat(value[0]);
var y = parseFloat(value[1]);
var point = new Parse.GeoPoint(x, y);
jobApplication.set("image", objParseFile);
jobApplication.set("email", email);
jobApplication.set("phone", phone);
jobApplication.set("address", address);
jobApplication.set("name", name);
jobApplication.set("website", website);
jobApplication.set("description", description);
jobApplication.set("area", area);
jobApplication.set("latlon", point);
jobApplication.set("typeID", type2);
jobApplication.save(null, {
success: function(gameScore) {
// typeSave(jobApplication.id);
},
error: function(gameScore, error) {
alert('Failed to create new object, with error code: ' + error.description);
}
});
}
So resuming i am trying when i click the button to first run the typesave() function, after when it posts the type on the type class in parse, to take to type.id from the success function and send it to the parseFile.save().then
and then to send the objectFile and the type2 (which is the type.id) it in saveJobApp and them to save it in class magazia
What i get from the console.logs is this
Which means that my code post to the type class and takes the type.id
but it doesnt send it to the magazia class via the parsefile save.
Any idea of what am i missing?
I noticed your mistake is not about the functions but about trying to pass the type.id as a string and not as a function in the saveJobApp function.
if you try making it like this
function saveJobApp(objParseFile , objtype) {
var jobApplication = new Parse.Object("magazia");
var type = new Parse.Object("type");
type.id = objtype;
jobApplication.set("typeID", type);
I think it will work.
And also update the onclick and the ParseFile save code to this
$('#submitId').on("click", function(e) {
typeSave();
});
function PhotoUpload(objtype){
var fileUploadControl = $("#profilePhotoFileUpload")[0];
var file = fileUploadControl.files[0];
var name = file.name; //This does *NOT* need to be a unique name
var parseFile = new Parse.File(name, file);
parseFile.save().then(
function() {
saveJobApp(parseFile, objtype);
},
function(error) {
alert("error");
}
);
}
And the success function in typeSave()
should be something like this
type.save(null, {
success: function(type) {
PhotoUpload(type.id);
},
Hope this helps :)
I am using this script to store the user's history to a cookie for the last 10 pages accessed. So far I've got the script displaying the cookie data using the document.title, and url in a list.
My question is what would be the simplest way to add a page skip feature, that would let me omit certain pages from being added to the history cookie? Everything I've tried hasn't worked, as it's a little bit outside of my knowledge.
Thanks for your time and help.
JS:
(function($){
var history;
function getHistory() {
var tmp = $.cookie("history");
if (tmp===undefined || tmp===null) tmp = "";
if ($.trim(tmp)=="") tmp = [];
else tmp = tmp.split("||");
history = [];
$.each(tmp, function(){
var split = this.split("|");
history.push({
title: split[0],
url: split[1]
});
});
}
function saveHistory() {
var tmp = [];
$.each(history, function(){
tmp.push(this.title+"|"+this.url);
});
$.cookie("history",tmp.join("||"),{ expires: 60, path: "/" });
}
function addToHistory(title,url) {
var newHistory = []
$.each(history, function(){
if (this.url!=url) newHistory.push(this);
});
history = newHistory;
if (history.length>=10) {
history.shift();
}
history.push({
title: title,
url: url
});
saveHistory();
writeHistory();
}
function writeHistory() {
var list = $("<ul />");
$.each(history, function() {
var element = $("<li />");
var link = $("<a />");
link.attr("href",this.url);
link.text(this.title);
element.append(link);
list.append(element);
});
$("#history").empty().append(list);
}
$(document).ready(function(){
getHistory();
var url = document.location.href;
var split = url.split("#");
var title;
if (split.length > 1) {
title = $("#"+split[1]).text();
} else {
title = document.title;
}
if (title===undefined || title===null || $.trim(title)=="") title = url;
addToHistory(title,url);
url = split[0];
$("a[href^='#']").click(function(){
var link = $(this);
var href = link.attr("href");
var linkUrl = url+href;
var title = $(href).text();
if (title===undefined || title===null || $.trim(title)==="") title = linkUrl;
addToHistory(title,linkUrl);
});
});
})(jQuery);
HTML:
<div id="history"></div>
several ways you could approach this... You could keep an Array of urls not to save, or you could put something in the page that would let the script know not to save that page?...
function saveHistory(){
if ($('.no-save-history')) return false;
/*...*/
}
HTML:
< div id="history" class="no-save-history">