I am scraping websites using CasperJS and one of the tasks involve crawling across url set by a for loop counter. The url looks like this
www.example.com/page/no=
where the no is any number from 0-10 set by the for loop counter. The scraper then goes through all the pages, scrapes the data into a JSON object and repeats until no=10.
The data that I am trying to get is stored in discrete groups in each page- what I would like to work with is a single JSON object by joining all the scraped output from each page.
Imagine Page1 has Expense 1 and the object I am getting is { expense1 } and Page 2 has Expense 2 and object that I am getting is { expense2 }. What I would like to have is one JSON at the end of scraping that looks like this:
scrapedData = {
"expense1": expense1,
"expense2": expense2,
}
What I am having trouble is joining all the JSON object into one array.
I initialized an empty array and then each object gets pushed to array.
I have tried a check where if iterator i in for loop is equal to 10, then the JSON object is printed out but that didnt seem to work. I looked up and it seems Object spread is an option but I am not sure how to use it this case.
Any pointers would be helpful. Should I be using any of the array functions like map?
casper.then(function(){
var url = "https:example.net/secure/SaFinShow?url=";
//We create a for loop to go open the urls
for (i=0; i<11; i++){
this.thenOpen(url+ i, function(response){
expense_amount = this.fetchText("td[headers='amount']");
Date = this.fetchText("td[headers='Date']");
Location = this.fetchText("td[headers='zipcode']");
id = this.fetchText("td[headers='id']");
singleExpense = {
"Expense_Amount": expense_amount,
"Date": Date,
"Location": Location,
"id": id
};
if (i ===10){
expenseArray.push(JSON.stringify(singleExpense, null, 2))
this.echo(expenseArray);
}
});
};
});
Taking your example and expanding on it, you should be able to do something like:
// Initialize empty object to hold all of the expenses
var scrapedData = {};
casper.then(function(){
var url = "https:example.net/secure/SaFinShow?url=";
//We create a for loop to go open the urls
for (i=0; i<11; i++){
this.thenOpen(url+ i, function(response){
expense_amount = this.fetchText("td[headers='amount']");
Date = this.fetchText("td[headers='Date']");
Location = this.fetchText("td[headers='zipcode']");
id = this.fetchText("td[headers='id']");
singleExpense = {
"Expense_Amount": expense_amount,
"Date": Date,
"Location": Location,
"id": id
};
// As we loop over each of the expenses add them to the object containing all of them
scrapedData['expense'+i] = singleExpense;
});
};
});
After this runs the scrapedData variable should be of the form:
scrapedData = {
"expense1": expense1,
"expense2": expense2
}
Updated code
One problem with the above code is that inside the for loop when you loop over the expenses, the variables should be local. The variable names also should not be Date and Location since those are built-in names in JavaScript.
// Initialize empty object to hold all of the expenses
var scrapedData = {};
casper.then(function(){
var url = "https:example.net/secure/SaFinShow?url=";
//We create a for loop to go open the urls
for (i=0; i<11; i++){
this.thenOpen(url+ i, function(response){
// Create our local variables to store data for this particular
// expense data
var expense_amount = this.fetchText("td[headers='amount']");
// Don't use `Date` it is a JS built-in name
var date = this.fetchText("td[headers='Date']");
// Don't use `Location` it is a JS built-in name
var location = this.fetchText("td[headers='zipcode']");
var id = this.fetchText("td[headers='id']");
singleExpense = {
"Expense_Amount": expense_amount,
"Date": date,
"Location": location,
"id": id
};
// As we loop over each of the expenses add them to the object containing all of them
scrapedData['expense'+i] = singleExpense;
});
};
});
I am working on an nodejs app which reads data from a json file. Now I want to edit this json file in js (write to it). How do I do this ?
Here is my js code: `
var fs = require("fs")
//function for file input
function getFile(filename) {
var data = fs.readFileSync(filename,"ascii")
return data }
//parsing json
var jsonString = [getFile("File.json")]
var jsonObj = JSON.parse(jsonString)`
Modify the jsonObj as you want, create a new object or whatever, then write the file:
fs.writeFileSync("File.json", jsonData);
This will overwrite the file if it exists, so that way you edit the file.
You can load a json file by requiring it.
var contents = require('/path/to/file.json');
Iterate contents just like a regular object.
A JSON object, when parsed is, like any other JS object. Use object dot notation to access any data you want.
For example a value:
console.log(isonObi.something.value)
For example a value in an array:
console.log(isonObi.something[0].value)
From eyp
Modify the jsonObj as you want, create a new object or whatever, then write the file:
fs.writeFileSync("File.json", jsonData);
This will overwrite the file if it exists, so that way you edit the file.
With nodeJS, you can require a JSON file.
Supposing you get this JSON file :
//test.json
[
{
"name": "toto",
"code": "4"
},
{
"name": "test",
"code": "5"
}
];
Then, you can require this file and perform some modification :
var json = require('./test.json');
json.forEach(function(elm){
elm.name = 'test';
});
I am using this script to retrive JSON data from a file to my page.
$.getJSON('json/data.json', function(data) {
$('#getJSON-results').html(JSON.stringify(data));
});
<div id="getJSON-results"></div>
Right now it jsut displays all the data from JSON file as a string on the page.
How would I take the data from my JSON file and place each array into a variable? My data in the JSON file looks like this:
[{"target": "summarize(first, \"1d\", \"sum\")", "datapoints": [[38.393148148148143, 1423958400], [90.800555555555633, 1424044800], [159.06037037037032, 1424131200], [245.5933333333335, 1424217600], [126.94796296296299, 1424304000], [120.37111111111113, 1424390400], [103.04148148148151, 1424476800], [99.273796296296368, 1424563200], [89.38203703703708, 1424649600], [92.970462962963012, 1424736000], [105.62666666666664, 1424822400], [110.33962962962967, 1424908800], [118.54981481481482, 1424995200], [100.08018518518523, 1425081600], [92.52277777777779, 1425168000], [98.647619047618974, 1425254400], [94.585000000000008, 1425340800], [85.568796296296284, 1425427200], [157.82222222222222, 1425513600], [109.7596296296296, 1425600000], [112.53324074074077, 1425686400], [89.392592592592649, 1425772800], [97.253518518518518, 1425859200], [73.424629629629635, 1425945600], [92.377592592592578, 1426032000], [76.117870370370397, 1426118400], [77.83953703703699, 1426204800], [66.643518518518533, 1426291200], [63.748055555555531, 1426377600], [137.30018518518517, 1426464000], [53.480648148148134, 1426550400]]},
{"target": "summarize(second, \"1d\", \"sum\")", "datapoints": [[2.7291600529100535, 1423958400], [5.7797089947089892, 1424044800], [3.4261574074074059, 1424131200], [5.0516335978835958, 1424217600], [6.2272420634920582, 1424304000], [11.752605820105822, 1424390400], [7.8688624338624269, 1424476800], [5.7305555555555525, 1424563200], [5.2784391534391499, 1424649600], [6.4652380952380897, 1424736000], [4.7690277777777741, 1424822400], [4.1451587301587258, 1424908800], [8.4178902116402039, 1424995200], [4.7948611111111061, 1425081600], [4.8153835978835939, 1425168000], [5.3873148148148111, 1425254400], [7.2819378306878262, 1425340800], [5.2084391534391488, 1425427200], [8.098492063492051, 1425513600], [5.6563822751322697, 1425600000], [5.3091468253968195, 1425686400], [4.7850396825396793, 1425772800], [3.8716931216931179, 1425859200], [3.1934325396825369, 1425945600], [3.2083531746031722, 1426032000], [3.3434391534391512, 1426118400], [3.6162235449735438, 1426204800], [3.2094179894179891, 1426291200], [2.3699537037037026, 1426377600], [4.3973544973544945, 1426464000], [2.1901388888888893, 1426550400]]},
{"target": "summarize(third, \"1d\", \"sum\")", "datapoints": [[5.3710185185185182, 1423958400], [11.25367724867724, 1424044800], [8.2990079365079268, 1424131200], [8.710694444444437, 1424217600], [9.6381216931216898, 1424304000], [9.3845105820105807, 1424390400], [9.7305820105820047, 1424476800], [8.6268055555555474, 1424563200], [10.589166666666673, 1424649600], [10.235462962962957, 1424736000], [10.455892857142853, 1424822400], [14.282407407407405, 1424908800], [17.774404761904758, 1424995200], [18.154120370370364, 1425081600], [16.249543650793651, 1425168000], [15.29764550264551, 1425254400], [16.267671957671972, 1425340800], [20.121488095238096, 1425427200], [27.007685185185196, 1425513600], [17.577962962962971, 1425600000], [17.020873015873018, 1425686400], [14.627685185185191, 1425772800], [15.824821428571433, 1425859200], [11.837579365079364, 1425945600], [13.292539682539683, 1426032000], [12.064074074074073, 1426118400], [12.279457671957676, 1426204800], [9.3799074074073978, 1426291200], [7.8777314814814732, 1426377600], [13.161825396825407, 1426464000], [7.2587499999999956, 1426550400]]}]
I am new to using JSON and would also appreciate any advice on the approach I'm taking.
How can I now make this data accsessable outside the getJSON?
$.getJSON('json/data.json', function(data) {
yourData = data;
makeMeGlobal = yourData[0];
});
console.log(makeMeGlobal.datapoints);
Changing your function to the following will result in an object you can then reference normally:
$.getJSON('json/data.json', function(data) {
yourData = data;
});
You could then get the first set of datapoints like this:
yourData.datapoints[0]
Should be:
data.forEach(function (obj) {
// obj now has each JSON object in this array
var test = obj.target;
}
When you get a JSON from AJAX, it's already ready for use in the browser. This is one of the nice perks of using JSON over XML.
It's already an array, because the original JSON string was parsed by jQuery. If you use stringify, you will get a string which contains the JSON representing the array (not useful for your purposes, as it's the same string returned by the server).
For example:
$.getJSON('json/data.json', function(data) {
// Here, data is already an array.
var data_length = data.length;
for (var i = 0; i < data_length; i++) {
var obj = data[i]; // Here we have an object from the array
alert("I have an object which target is " + obj.target);
}
});
You need to iterate through json to retrieve the value you need and append the html.More infor # https://stackoverflow.com/a/18238241/909535 Something like this
`$.getJSON('json/data.json', function(data){
data.forEach(
function(val, index, array) {
//val.target will have target attribute's value
//val.datapoints is a array which you can iterate
}
);
}
);`
I am using wikipedia API my json response looks like,
"{
"query": {
"normalized": [
{
"from": "bitcoin",
"to": "Bitcoin"
}
],
"pages": {
"28249265": {
"pageid": 28249265,
"ns": 0,
"title": "Bitcoin",
"extract": "<p><b>Bitcoin</b>isapeer-to-peerpaymentsystemintroducedasopensourcesoftwarein2009.Thedigitalcurrencycreatedandlikeacentralbank,
andthishasledtheUSTreasurytocallbitcoinadecentralizedcurrency....</p>"
}
}
}
}"
this response is coming inside XMLHTTPObject ( request.responseText )
I am using eval to convert above string into json object as below,
var jsonObject = eval('(' +req.responseText+ ')');
In the response, pages element will have dynamic number for the key-value pair as shown in above example ( "28249265" )
How can I get extract element from above json object if my pageId is different for different results.
Please note, parsing is not actual problem here,
If Parse it , I can acess extract as,
var data = jsonObject.query.pages.28249265.extract;
In above line 28249265 is dynamic, This will be something different for different query
assuming that u want to traverse all keys in "jsonObject.query.pages".
u can extract it like this:
var pages = jsonObject.query.pages;
for (k in pages) { // here k represents the page no i.e. 28249265
var data = pages[k].extract;
// what u wana do with data here
}
or u may first extract all page data in array.
var datas = [];
var pages = jsonObject.query.pages;
for (k in pages) {
datas.push(pages[k].extract);
}
// what u wana do with data array here
you can archive that using two methods
obj = JSON.parse(json)
OR
obj = $.parseJSON(json);
UPDATE
Try this this
var obj = JSON.parse("your json data string");
//console.log(obj)
jQuery.each(obj.query.pages,function(a,val){
// here you can get data dynamically
var data = val.extract;
alert(val.extract);
});
JSBIN Example
JSBIN
i'm newbie in javascript so, in this example exists the geometrycontrols.js (for global controls) and markercontrol.js (for marker controls)
my problem is identify the arrays where "data" is saved...
at the reference i see a savedata function but i have no idea how work with this function...
on the other side, in test.html if i've the outup on the Glog startup and output "data", and let me thinking that comes from array...
My objective is save the coordinates and other all atributes to mysql database, and when i discover where are "data" is the easy part.
if someone worked with this example (or not) can help me i'm grateful
ps: i'm really a newbie on javascript :P
edit1:
I was out for a time, and now I focus in geometrycontrols.js specially in: GeometryControls.prototype.saveData = function(opts){
var me = this;
if(opts.allData === true){
//me.saveAllData();
} else {
//construct a json data record
var geomInfo = opts.geomInfo, index = opts.geomInfo.index;
var record = geomInfo.storage[index];
var recordJSON = {};
recordJSON.type = record.type;
recordJSON.coordinates = [];
//determine geometry type, and copy geometry appropriately
if(record.type === "point"){
recordJSON.coordinates.push({lat:record.geometry.getLatLng().lat(),lng:record.geometry.getLatLng().lng()});
alert(recordJSON.coordinates);
} else {
alert("is not point");
var vertex;
for(var i=0;i<record.geometry.getVertexCount();i++){
vertex = record.geometry.getVertex(i);
recordJSON.coordinates.push({lat:vertex.lat(),lng:vertex.lng()});
}
}
//add title and description
recordJSON.title = record.title[0];
recordJSON.description = record.description[0];
//TODO add styles
recordJSON.style = ""; //TODO} //TODO Make separate prototype function?function postData(data){
//TODO
me.debug(data);
//alert(recordJSON.coordinates);
//alert(data);
};postData(me.serialize(recordJSON));}; `
When I alert(recordJSON.coordinates), the outupt is [object Object] and i've no idea why, in theory this array contains the coordinates...
Here is some code I have used to send the data to MySQL. It uses a little bit of jQuery to do the ajax magic (the line starting with the dollarsign is jQuery).
function postData(data){
me.debug(data);
var dataString = JSON.stringify(data);
me.debug(dataString);
$.post('storage.php', { data: dataString });
};
postData(recordJSON);
As you can see I've modified the way the 'recordJSON' object gets sent to the postData function a bit too: I've removed the serialise function.
Next, create a PHP file (called 'storage.php' in my case) and put this in it:
<?php
$received = json_decode($_POST['data'], true);
echo "just received " . $received['name'];
?>
You now have an array in PHP that you can do with as you please.
In the examplecode above I've modified the jQuery post function a bit, so if it doesn't work, look there.
The data is stored in JSON format in this file: http://gmaps-utility-library-dev.googlecode.com/svn/trunk/geometrycontrols/examples/data/testdata.js -- it's pretty much self-documenting, just follow the example to set your coordinates.
Note that if you need to find the latitude and longitude for a given address this is a good site: http://itouchmap.com/latlong.html