CasperJS loop through table and scrape data for JSON output - javascript

I'm actually trying to get some data from a website in CasperJs. The datas are stocked in a table.
I'm trying to get a proper JSON file after the scrap. A json with :
- name of the company,
- mail,
- website
- description of activity.
Until now I've been able to open the page and get the data but not precisely (mail and website are on the same ). So I've found how to select precisely each element I want.
But in this case I don't get all table information's, only first row...
I would know if somebody could help me, telling me where to look or how to make loop in my case ? Assume I'm not a professional developper, I'm training myself.
Here my code :
var casper = require('casper').create();
var url = 'http://www.rent2016.fr/pages/exposants';
var fs = require('fs');
var length;
casper.start(url);
casper.then(function() {
this.waitForSelector('table#myTable');
});
casper.then(function(){
var info = this.evaluate(function(){
var table_rows = document.querySelectorAll("tr"); //or better selector
return Array.prototype.map.call(table_rows, function(tr){
return {
nom : document.querySelector(".td-width h3").textContent,
description: document.querySelector(".td-width p").textContent,
mail : document.querySelector("td span a").textContent,
site : document.querySelector('td span a[href^="http"]').textContent,
};
});
});
fs.write('test_rent_stringify.json', JSON.stringify(info), 'w');
this.echo(JSON.stringify(info, undefined, 4));
});
casper.run(function() {
});
Here, we don't have loop : JSON repeat the first row information's. To get every rows informations you have to replace
nom : document.querySelector(".td-width h3").textContent,
by
nom : tr.children[1].textContent,
but in this case you can't precisely target the H3, the links... you get all the information. So actually I can :
loop through the rows and get informations, but they unusable
have only the first row informations but with good presentation
Thanks in advance !

In order to take information inside every element, you have to use tr.querySelector rather than document.querySelector.
The following loop works fine with the page:
var table_rows = document.querySelectorAll("tbody tr"); //or better selector
return Array.prototype.map.call(table_rows, function(tr) {
return {
nom: tr.querySelector(".td-width h3").textContent,
description: tr.querySelector(".td-width p").textContent,
mail: tr.querySelector('td span a[href^="mailto"]').textContent,
site: tr.querySelector('td span a:not([href^="mailto"])').textContent
};
});

Related

How to write a test of clicking on a table row?

A project includes a page on which exists a column showing foods associated with a meal and a (paginated) column of foods not associated with a meal. A script allows to click on a food in the paginated column, have that food removed from the list of available foods and appear in the list of associated foods. In the dev environment I can demonstrate that the script works.
In an effort to learn how to use Panther in testing I've tried to reproduce the effect of clicking on a food. The test code below hopes to show that the first row of the table of unassociated foods changes after a click. That test runs without error but fails. The question, then, is how or whether to make a test that shows a change in the table.
Edit: When I forced a Firefox client (static::createPantherClient(['browser' => static::FIREFOX]);) and adding PANTHER_NO_HEADLESS=trueto.env.test.local` the test ran slow enough for me to observe the first table row was removed. So somehow the test needs a way to read the new first row (and is different from how the test tries to do that now).
Edit 2: If I insert the lines $client->request('GET', 'http://diet/meal');$newCrawler = $client->clickLink('edit'); after executing the script I can get the test to pass. This seems to be a different test from expecting the test to pass without leaving and returning to the page. Or is it not possible to test without leaving?
Edit 3: Since Panther says it "can wait for asynchronously loaded elements to show up" I added a data attribute which increments on clicking. The test now includes the line $this->assertSelectorWillNotContain("document.querySelector('#mealid').getAttribute('data-rte')", $rteCount);. Nice, except that Panther returns Given css selector expression "document.querySelector('#mealid').getAttribute('data-rte')" is invalid even though a Firefox console with the identical selector returns an integer.
table row: <td data-foodid="185">dolor</td>
test code:
class MealTest extends PantherTestCase
{
public function testFood()
{
$client = static::createPantherClient();
$client->followRedirects();
$client->request('GET', 'http://diet/meal');
$this->assertPageTitleContains('Meals');
$crawler = $client->clickLink('edit');
$this->assertPageTitleContains('Edit Meal');
$foodLink = $crawler->filter('#meal_pantry td')->first();
$q = $foodLink->attr('data-foodid');
$client->executeScript("document.querySelector('#meal_pantry td').click()");
$client->waitFor('#meal_pantry');
$nextUp = $crawler->filter('#meal_pantry td')->first();
$p = $nextUp->attr('data-foodid');
$this->assertNotEquals($p, $q);
}
}
javascript:
$('td').on('click', function (e) {
var foodId = $(e.currentTarget).data('foodid');
var mealId = $("#mealid").data("mealid");
var tableId = $(this).parents('table').attr('id');
var pageLimit = $("#mealid").data("pagelimit");
$packet = JSON.stringify([foodId, mealId, tableId]);
$.post('http://diet/meal/' + mealId + '/editMealFood', $packet, function (response) {
editFoods = $.parseJSON(response);
var readyToEat = $.parseJSON(editFoods[0]);
var pantry = $.parseJSON(editFoods[1]);
var table = document.getElementById('ready_foods');
$('#ready_foods tr:not(:first)').remove();
$.each(readyToEat, function (key, food) {
row = table.insertRow(-1);
cell = row.insertCell(0);
cell.innerHTML = food;
});
var table = document.getElementById('meal_pantry');
$('#meal_pantry tr:not(:first)').remove();
$('li.active').removeClass('active');
$('li.page-item:nth-of-type(2)').addClass('active');
$.each(pantry.slice(0, pageLimit), function (key, array) {
food = array.split(",");
foodId = food[0];
foodName = food[1];
var row = table.insertRow(-1);
var cell = row.insertCell(0);
cell.innerHTML = foodName;
cell.setAttribute('data-foodid', foodId);
});
location.reload();
});
});
Turns out I've been using the wrong assertions. The eventual solution:
$client = static::createPantherClient();
$client->followRedirects();
$client->request('GET', 'http://diet/meal');
$this->assertPageTitleContains('Meals');
$crawler = $client->clickLink('edit');
$this->assertPageTitleContains('Edit Meal');
$rteCount = $crawler->filter('#mealid')->attr('data-rte');
$client->executeScript("document.querySelector('#meal_pantry td').click()");
$client->waitForAttributeToContain('#mealid', 'data-rte', $rteCount + 1);

For Loops in Blogger, Replacing all characters in a string with the give character Java Script for Blogger

This is a simple question i have,
I have a simple script here :
//<![CDATA[
$(document).ready(function() {
$('img').each(function(){
var $img = $(this);
var filename = $img.attr('src')
var returnt=filename.substring((filename.lastIndexOf('/'))+1,filename.lastIndexOf('.'));
var returna=filename.substring((filename.lastIndexOf('/'))+1,filename.lastIndexOf('.'));
<b:loop values='returnt.length'>
var ctitle=returnt.replace("%2B", " ");
var calt=returna.replace("%2B", " ");
</b:loop>
$img.attr('title', ctitle);
$img.attr('alt', calt);
});
});
//]]>
I wanted all the %2B in the returnt and returna to be replace with space and stored in ctitle and calt respectively, but without a loop it won't work.
I tried something like
for(int i=0;i<returnt.length;i++)
{
var ctitle=returnt.replace("%2B", " ");
var calt=returna.replace("%2B", " ");
}
But didn't work, now I have put the values='returnt.length' in the script but it's still not working.
I know something is missing and is wrong somewhere, please tell me how to do it.
I want the loop to work like the given above for loop.
And I tried .replaceAll("","") method but didn't work. :(
I want a method to replace all the characters or info on how to write the above for loop in blogger FOR LOOP method.
You can use regex matching in replace. The code will look like -
$(document).ready(function() {
$('img').each(function() {
var $img = $(this);
var filename = $img.attr('src')
var returnt = filename.substring((filename.lastIndexOf('/')) + 1, filename.lastIndexOf('.'));
var returna = filename.substring((filename.lastIndexOf('/')) + 1, filename.lastIndexOf('.'));
var ctitle = returnt.replace(/%2B/g," ");
var calt = returna.replace(/%2B/g," ");
$img.attr('title', ctitle);
$img.attr('alt', calt);
});
});
The b:loop tag is native to Blogger platform. Its values attribute only accepts data tags present in Blogger and not custom JavaScript variables. Refer to the documentation here
The general format for using loops is this:
<b:loop var='identifier' values='set-of-data'>
[repeated content goes here]
</b:loop>
The 'identifier' (i) part can be any name you choose, and will be used
to stand in for each new item in the list, each time through the loop.
The set of data you specify for the values can be any piece of data
described in the data tags article as being a list of items.

Jqgrid inline mode with select2

I have found the #Olegs answer for FORM based select2 integration to jQgid, but I need help to get it to work in inline mode,, this jsfiddle is my attempt to get my problem online somehow I'm new with fiddle so please be patient :)
http://jsfiddle.net/mkdizajn/Qaa7L/58/
function(){ ... } // empty fn, take a look on jsfiddle
On this fiddle I can't make it to work to simulate the issue I have in my local network but the problem with this select2 component is that when I update some record(via local or ajax), the grid does not pick up my change and it sends null for values where select2 fields are!
I'm sorry that I can't make jsfiddle to work like on my PC :(
Thanks for any help you can think off that may be the issue here..
P.S. one veeeery strange thing is that when I console.log( select2-fields ), before, and after the value is picked up correctly but I suspect that the grid loose that value somewhere in between .. and send null values to server..
I'm posting this in a good will that I think will help anyone if come to close incounter with similar problem like me..
I'll try to bullet this problem out step by step..
first, on my server side I generate one html tag somewhere near grid table that holds info what columns, fields are lookup type.. like this:
<div id="hold_lookup_<?=$unique_id?>" style="display: none"><?php echo $lokki; ?></div>
that gives me output like this:
<div id="hold_lookup_table1" style="display: none">col1+++col2+++col3</div>
define onselectrow event somewhere
$onSelectRow = "function(){
f = $(this).attr('id'); // grid name
try{
n = $('#hold_lookup_' + f).text().split('+++');
}catch(e){
console.log(e)
}
rez = ''; // results
temp = 'textarea[name='; // template
$.each(n, function(index, item){
rez += temp + item + '],'
});
rez = rez.slice(0,-1); // rezemo zadnji zarez
$( rez ).select2({ .. define my ajax, my init etc.. });
}";
$dg->add_event("jqGridInlineEditRow", $onSelectRow);
last but very tricky part is here.. I destroy select2 columns before sending to database in jqgrid.src file where function for SAVE inline method is.. like this
if (o.save) {
$($t).jqGrid('navButtonAdd', elem, {
caption: o.savetext || '',
title: o.savetitle || 'Save row',
buttonicon: o.saveicon,
position: "first",
id: $t.p.id + "_ilsave",
onClickButton: function() {
var sr = $t.p.savedRow[0].id;
rez = rez.split(',');
rez1 = '';
$.each(rez, function(index, item) {
rez1 += item + ','
})
rez1 = rez1.slice(0, -1);
rez1 = rez1.split(',');
$.each(rez1, function(index, item) {
$(item).select2('destroy');
});
you can see that I inserted the code onclickbutton event via same 'rez' variable that was defined in my php file where I created grid..
That's it, I hope that helped someone, event if not in this particular problem, but with methods that was used here :)
cheers, kreso

Form Not Found When Attempting to Fill Using CasperJS

I'm learning CasperJS and want to try a simple, somewhat useful task. I'd like to make a pdf copy of my gas bill, but I can't even log into the website.
I'm a customer of CT Natural Gas. The URL is:
https://www.cngcorp.com/wps/portal/cng/home/mycng/customerWebAccess/
Logging in should be simple. It should just be my account number and last name.
My code is:
var start_url = "https://www.cngcorp.com/wps/portal/cng/home/mycng/customerWebAccess/"
var casper = require('casper').create()
casper.start(start_url, function() {
this.fill('formid', {
'input1id': '000000000000',
'input2id': 'LastName'
}, true);
this.capture('cng.png');
})
casper.run()
My "real" code uses the full ID shown in the site's HTML, not "formid" or "input1ID". I did not include the full ID in the sample code above because I was unsure what those ID's really were. They look something like: viewns_7_LOIGMC7IA21234QWERASDF1234_:custWebLogin. So much for a "simple" ID. Maybe this is something generated from a WebSphere product?
Anyway, the form is not found. I get:
CasperError: Errors encountered while filling form: form not found
I've also hacked it a bit and put this in my start to see what the form "looks like":
listItems = this.evaluate(function () {
var nodes = document.querySelectorAll('form');
return [].map.call(nodes, function(node) {
return node.id;
})
})
this.echo(listItems);
That returns:
,viewns_7_LOIGMC7IA21234QWERASDF1234_:custWebLogin
I think the form id is messing this up. Can anyone offer any suggestions?
Wait for the form to be loaded to fill the form using the selectors.Use have waitForSelector(),waitFor(),wait() etc other than waitForResource()
casper.start('your_url_here',function(){
this.echo(this.getTitle());
});
casper.waitForResource("your_url_here",function() {
this.fillSelectors('#loginform', {
'input[name="Email"]' : 'your_email',
'input[name="Passwd"]': 'your_password'
}, true);
});
I think the DOM/CSS3 Selectores doesn't work with this ID but it works with xpath :
// Create a function for selectXPath :
var x = require('casper').selectXPath
var start_url = "https://www.cngcorp.com/wps/portal/cng/home/mycng/customerWebAccess/"
var casper = require('casper').create()
casper.start(start_url, function() {
// Query xpath with fillSelectors
this.fill(x('//*[#id="viewns_7_LOIGMC7IA26I00I9TQ9SLI1B12_:custWebLogin"]'), {
'input1id': '000000000000',
'input2id': 'LastName'
}, true);
this.capture('cng.png');
})
casper.run()
Do you need the full ID? Because maybe something like that could work :
this.fill("form[id^='viewns_7_LOIGMC7IA21234QWERASDF1234']", {
'input1id': '000000000000',
'input2id': 'LastName'
Or
this.fill("form[id*='LOIGMC7IA21234QWERASDF1234']", {
'input1id': '000000000000',
'input2id': 'LastName'

jqgrid edited cellurl with javascript

I am using struts2-jqgrid and javascript. when jqgrid load completed this get <s:url var="updateurl" action="pagosActualizar"/> and the jqgrid generate in view source html options_gridtable.cellurl = "/Cuenta_Corriente_LBS/pagosActualizar.action";, now the problem i have a var in javascript and want add a paramater to this url for example: options_gridtable.cellurl = "/Cuenta_Corriente_LBS/pagosActualizar.action?cod=1";. Any form of editad this line with javascript?
//JAVASCRIPT
$.subscribe('rowsel', function(event) {
var id = event.originalEvent.rowid;// this is the parameter for the url
var data = jQuery("#gridtable").jqGrid('getRowData',id);
var estado = data['estado.descripcion'];
var cod = data['correlativo'];
if(estado === 'PENDIENTE'){
jQuery("#gridtable").jqGrid('setColProp', 'fecha_acuerdo', {editable:true});
// here cod for edited url in jqgrid
}
}
//MYJSP
<s:url var="remoteurl" action="jsontableModificar"><s:param name="codigo"><s:property value="cuentacorriente.idcuentacorriente"/></s:param></s:url>
<s:url var="updateurl" action="pagosActualizar"/>
<sjg:grid
id="gridtable"
caption="Plan de Pagos Modificar"
dataType="json"
href="%{remoteurl}"
pager="false"
gridModel="gridModel"
autowidth="true"
sortable="true"
gridview="true"
cellEdit="true"
cellurl="%{updateurl}"
onCellSelectTopics="rowsel"
>
//VIEW SOURCE HTML
options_gridtable.datatype = "json";
options_gridtable.url = "/Cuenta_Corriente_LBS/jsontableModificar.action?codigo=12";
options_gridtable.cellurl = "/Cuenta_Corriente_LBS/pagosActualizar.action";
options_gridtable.height = 'auto';
options_gridtable.pgbuttons = true;
options_gridtable.pginput = true;
options_gridtable.gridview = true;
options_gridtable.autowidth = true;
options_gridtable.caption = "Plan de Pagos Modificar";
options_gridtable.autoencode = true;
options_gridtable.cellEdit = true;
options_gridtable.oncellselecttopics = "rowsel";
Well the flow is he user click in cell of jqgrid, later trigger a event that get the rowid and validated the field to be edited, the problem is that when edited the cell is use the cellurl but only send the content cell to edited and not the rowid that need for update the register, i need the two the rowid and cell content.
You can use "beforeSubmitCell" Event. Documentation is given in following link.
Documentation of "beforeSubmitCell"
Ex.
beforeSubmitCell:{cod:cod}.
1st cod is name of variable posted to a server
2nd cod is value of variable comes from your javascript
that is me banned acc, i found answer: jQuery("#gridtable").jqGrid('setGridParam',{editurl:"jsontable.action?searchString="+valor}).trigger('reloadGrid');
PD: Me banned because i was new, but now i know more.

Categories

Resources