I am trying to build a file of json data from repeated calls to a restAPI. The final file to be written is the sum of the data received from all the calls. At present the file is being written with contents of the first call then overwritten by the contents of the first + second call (see console output below code).
As I have to make many calls, once the code is working, I would like to only write the file once the request has finished and the json string has been built. Does anyone now how I would go about doing this? Maybe with a callback(?), which I still don't have the hang of, once the requests have finished or the json string has finished being built.
"use strict";
const fs = require('fs');
const request = require('request');
var parse = require('csv-parse');
const path = "../path tocsv.csv";
const pathJSON = "../pathtoJSON.json";
var shapes = "https://url";
var options = {
url: '',
method: 'GET',
accept: "application/json",
json: true,
};
var csvData = [];
var jsonData = "[";
fs.createReadStream(path)
.pipe(parse({delimiter: ','}))
.on('data', function(data) {
csvData.push(data[1]);
})
.on('end',function() {
var start = Date.now();
var records = csvData.length //2212 objects
console.log(records);
var dataLength = 2 //set low at moment
for (var i = 0; i < dataLength; i += 1) {
var url = shapes + csvData[i];
options.url = url; //set url query
request(options, function(error, response, body) {
var time = Date.now() - start;
var s = JSON.stringify(body.response);
console.log( '\n' + (Buffer.byteLength(s)/1000).toFixed(2)+
" kilobytes downloaded in: " + (time/1000) + " sec");
console.log(i)
buildJSON(s);
});
}
function buildJSON(s) {
var newStr = s.substring(1, s .length-1);
jsonData += newStr + ',';
writeFile(jsonData);
}
function writeFile(jsonData) {
fs.writeFile(pathJSON, jsonData, function(err) {
if (err) {
return console.log(err);
} else {
console.log("file complete")
}
});
}
});
128.13 kilobytes downloaded in: 2.796 sec
2
file complete
256.21 kilobytes downloaded in: 3.167 sec
2
file complete
Perhaps writing to the file after all requests are complete will help. In the current code, the writeFile function is called each time a request is completed (which overwrites the file each time)
A quick way to fix this is to count requests (and failures) and write to file only after all the requests are complete.
"use strict";
const fs = require('fs');
const request = require('request');
var parse = require('csv-parse');
const path = "../path tocsv.csv";
const pathJSON = "../pathtoJSON.json";
var shapes = "https://url";
var options = {
url: '',
method: 'GET',
accept: "application/json",
json: true,
};
var csvData = [];
var jsonData = "[";
fs.createReadStream(path)
.pipe(parse({
delimiter: ','
}))
.on('data', function (data) {
csvData.push(data[1]);
})
.on('end', function () {
var start = Date.now();
var records = csvData.length //2212 objects
console.log(records);
var dataLength = 2 //set low at moment
var jsonsDownloaded = 0; // Counter to track complete JSON requests
var jsonsFailed = 0; // Counter to handle failed JSON requests
for (var i = 0; i < dataLength; i += 1) {
var url = shapes + csvData[i];
options.url = url; //set url query
request(options, function (error, response, body) {
if(error){
jsonsFailed++;
writeFile(jsonData);
return;
}
jsonsDownloaded++;
var time = Date.now() - start;
var s = JSON.stringify(body.response);
console.log('\n' + (Buffer.byteLength(s) / 1000).toFixed(2) +
" kilobytes downloaded in: " + (time / 1000) + " sec");
console.log(i)
buildJSON(s);
});
}
function buildJSON(s) {
var newStr = s.substring(1, s.length - 1);
jsonData += newStr + ',';
writeFile(jsonData);
}
function writeFile(jsonData) {
if(dataLength - (jsonsDownloaded + jsonsFailed) > 0){
return;
}
fs.writeFile(pathJSON, jsonData, function (err) {
if (err) {
return console.log(err);
} else {
console.log("file complete")
}
});
}
});
Note:
Requests being fired in quick succession like (2000 requests in a for loop) in my experience does not work well.. Try batching them. Also, doing it this way does not guarantee order (if that is important in your usecase)
An alternative would be to open your file in append mode. You can do this by passing an extra options object with flag set to your fs.writeFile call.
fs.writeFile(pathJSON, jsonData, {
flag: 'a'
}, function (err) {
if (err) {
return console.log(err);
}
});
References:
fs.writeFile Docs
File system flags
Related
I am porting an old ruby script over to use javascript setting the function as a cron instance so it will run on schedule. The function queries our mysql database and retrieves inventory information for our products and then sends requests to a trading partners api to update our inventory on their site.
Due to nodes a-synchronicity I am running into issues. We need to chunk requests into 1000 items per request, and we are sending 10k products. The issue is each request is just sending the last 1000 items each time. The for loop that is inside the while loop is moving forward before it finishes crafting the json request body. I tried creating anon setTimeout functions in the while loop to try and handle it, as well as creating an object with the request function and the variables to be passed and stuffing it into an array to iterate over once the while loop completes but I am getting the same result. Not sure whats the best way to handle it so that each requests gets the correct batch of items. I also need to wait 3 minutes between each request of 1000 items to not hit the request cap.
query.on('end',()=>{
connection.release();
writeArray = itemArray.slice(0),
alteredArray = [];
var csv = json2csv({data: writeArray,fields:fields}),
timestamp = new Date(Date.now());
timestamp = timestamp.getFullYear() + '-' +(timestamp.getMonth() + 1) + '-' + timestamp.getDate()+ ' '+timestamp.getHours() +':'+timestamp.getMinutes()+':'+timestamp.getSeconds();
let fpath = './public/assets/archives/opalEdiInventory-'+timestamp+'.csv';
while(itemArray.length > 0){
alteredArray = itemArray.splice(0,999);
for(let i = 0; i < alteredArray.length; i++){
jsonObjectArray.push({
sku: alteredArray[i]['sku'],
quantity: alteredArray[i]["quantity"],
overstockquantity: alteredArray[i]["osInv"],
warehouse: warehouse,
isdiscontinued: alteredArray[i]["disc"],
backorderdate: alteredArray[i]["etd"],
backorderavailability: alteredArray[i]["boq"]
});
}
var jsonObject = {
login: user,
password: password,
items: jsonObjectArray
};
postOptions.url = endpoint;
postOptions.body = JSON.stringify(jsonObject);
funcArray.push({func:function(postOptions){request(postOptions,(err,res,body)=>{if(err){console.error(err);throw err;}console.log(body);})},vars:postOptions});
jsonObjectArray.length = 0;
}
var mili = 180000;
for(let i = 0;i < funcArray.length; i++){
setTimeout(()=>{
var d = JSON.parse(funcArray[i]['vars'].body);
console.log(d);
console.log('request '+ i);
//funcArray[i]['func'](funcArray[i]['vars']);
}, mili * i);
}
});
});
You would need async/await or Promise to handle async actions in node js.
I am not sure if you have node version which supports Async/await so i have tried a promise based solution.
query.on('end', () => {
connection.release();
writeArray = itemArray.slice(0),
alteredArray = [];
var csv = json2csv({ data: writeArray, fields: fields }),
timestamp = new Date(Date.now());
timestamp = timestamp.getFullYear() + '-' + (timestamp.getMonth() + 1) + '-' + timestamp.getDate() + ' ' + timestamp.getHours() + ':' + timestamp.getMinutes() + ':' + timestamp.getSeconds();
let fpath = './public/assets/archives/opalEdiInventory-' + timestamp + '.csv';
var calls = chunk(itemArray, 1000)
.map(function(chunk) {
var renameditemsArray = chunk.map((item) => new renamedItem(item, warehouse));
var postOptions = {};
postOptions.url = endpoint;
postOptions.body = JSON.stringify({
login: user,
password: password,
items: renameditemsArray
});
return postOptions;
});
sequenceBatch(calls, makeRequest)
.then(function() {
console.log('done');
})
.catch(function(err) {
console.log('failed', err)
});
function sequenceBatch (calls, cb) {
var sequence = Promise.resolve();
var count = 1;
calls.forEach(function (callOptions) {
count++;
sequence = sequence.then(()=> {
return new Promise(function (resolve, reject){
setTimeout(function () {
try {
cb(callOptions);
resolve(`callsequence${count} done`);
}
catch(err) {
reject(`callsequence ${count} failed`);
}
}, 180000);
});
})
});
return sequence;
}
function makeRequest(postOptions) {
request(postOptions, (err, res, body) => {
if (err) {
console.error(err);
throw err;
}
console.log(body)
});
}
function chunk(arr, len) {
var chunks = [],
i = 0,
n = arr.length;
while (i < n) {
chunks.push(arr.slice(i, i += len));
}
return chunks;
}
function renamedItem(item, warehouse) {
this.sku = item['sku']
this.quantity = item["quantity"]
this.overstockquantity = item["osInv"]
this.warehouse = warehouse
this.isdiscontinued = item["disc"]
this.backorderdate = item["etd"]
this.backorderavailability= item["boq"]
}
});
Could you please try this snippet and let me know if it works?I couldn't test it since made it up on the fly. the core logic is in the sequenceBatch function. the The answer is based on an another question which explains how timeouts and promises works together.
Turns out this wasn't a closure or async issues at all, the request object I was building was using references to objects instead of shallow copies resulting in the data all being linked to the same object ref in the ending array.
I'm having problems trying to write a chunk buffer into a file. My problem is basically that the file only contains the last bytes of the output buffer. The chunk buffer is very large, and my file.out is truncated, the last items are correct.
Here is my code
var reqChart = http.request(chart, function(res1) {
res1.setEncoding( 'utf8' );
res1.on('data', function (chunk) {
var fs = require('fs');
//var b = new Buffer(chunk.length);
var c = "";
for (var i = 0;i < chunk.length;i++) {
// b[i] = chunk[i];
c = c + chunk[i]
}
console.log(c);
fs.writeFile("rich.txt", c, "utf-8",function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
});
And if you are interested here is the BEGINNING the input I get in my file vs the BEGINNING of the "console.log" of my variable... I already tried to write the "console.log" to a file but same results.
File
0,0.120179572670496],[1498028100000,0.105581318680705],[1498028400000,0.167319933562371],[1498028700000,0.147574197583768],[1498029000000,0.114172853959319],[1498029300000,0.241186960587686],[1498029600000,1.45701290672775],[1498029900000,0.154756395075166],[1498030200000,0.0836631006369253],[1498030500000,0.0800486694467188],[1498030800000,0.0861569133026863],[1498031100000,0.092360872083502],[1498031400000,0.0790707908354345],[1498031700000,0.129877438815933],[1498032000000,0.118812121796025],[1498032300000,0.0989746376933164],[1498032600000,2.30001837257628],[1498032900000,0.313639093279438],[1498033200000,0.227936449562983],
Buffer/Variable/log
{"requestData":{"options":{"width":950,"gui_component_mode":1,"exporttocsv":1,"only_stacks_in_legend":false,"reverse_axis":false,"height":446,"datasetTitle":"","legend_average":false,"legend_maximum":false,"legend_minimum":false},"model_group":101,"name":"autil_17","model":"nmis_graph","dataset_id":0,"owner_cid":"c3","data_source_type":"chart","model_view":"graph","parameters":{"graph_type":"interface","resource_index":"17","nmis_data_type":null,"value_column":"value","node":"RT01459","axis":0,"end_date_raw":1498095300,"substitutions":{"time.start":1497922702,"time.end":1498095502},"time_column":"time","translation":"","field":"","lineType":"line","period":"2d","index_graph_type":"autil","resource":"interface","start_date_raw":1497922500,"resolution":300,"class":null},"data_source":"local_nmis","translation":null},"replyData":{"options":{"subtitleText":"ENTERPRISE_RT01","titleText":"Interface gigabitethernet0-2-3913","legend_raw":" Avg In Max In Avg 12.76 % Max 98.99 % \\n Avg Out Max Out Avg 4.98 % Max 52.49 % \\n","yAxis0TitleText":"% Avg Util"},"stacking":"normal","meta_data":{"time_start":1497922500,"start_date_input":"2017-06-19 20:35:00","end_date_input":"2017-06-21 20:35:00"},"data":[{"yAxis":0,"reverse_axis":0,"valueDecimals":2,"value_min":-98.9864025844157,"color":"#00BFFF","suffix":"","dataset_multiplier":1,"sum":-7373.17229868631,"connectNulls":0,"stack":1,"value_max":-0.0591203443255937,"name":"Avg In","data":[[1497922500000,-0.7137202476565],[1497922800000,-1.43305756579003],[1497923100000,-0.150464409649807],[1497923400000,-0.150475661479925],[1497923700000,-0.100369773564214],[1497924000000,-0.0893947123021048]
I thought maybe in a timeout function or something, but the log is WRITING THE INFO THE TERMINAL, maybe I'm missing something.
You realize that you reset c at every chunk as its locally scoped?:
var c="";//not resetted
res1.on('data', function (chunk) {
c+=chunk;
}
And you need to await the stream to finish:
res1.on("end",function(){
console.log(c);
var fs = require('fs');
fs.writeFile("rich.txt", c, "utf-8",function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
The problem you're having is that you're writing the file while you are at stream.on('data',.... You need to add all that data to a file, and use stream.on('end',... event to write to a file. I hope this example helps
var fs = require('fs');
// Some example of getting original data
var readableStream = fs.createReadStream('file.txt');
var data = '';
readableStream.on('data', function(chunk) {
data+=chunk;
});
readableStream.on('end', function() {
console.log(data);
fs.writeFile("rich.txt", data, "utf-8",function(err) {
if(err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
});
Now copying your code and modifying it:
var reqChart = http.request(chart, (res1) => {
res1.setEncoding('utf8');
var fs = require('fs');
var c = "";
res1.on('data', function (chunk) {
//var b = new Buffer(chunk.length);
for (var i = 0; i < chunk.length; i++) {
// b[i] = chunk[i];
c = c + chunk[i]
}
console.log(c);
});
res1.on('end', function () {
fs.writeFile("rich.txt", c, "utf-8", function (err) {
if (err) {
console.log(err);
} else {
console.log("The file was saved!");
}
});
})
});
The pipe function for streams would make your life a lot easier.
const fs = require('fs');
const http = require('http');
const output = fs.createWriteStream('output.html'); // change to rich.txt
const url = 'http://example.com'; // change to chart URL
http.get(url, response => {
response.pipe(output);
});
I'm trying to get the first 5 pages of search results with google custom search API ...
So far I've tried to achieve the result using nested function but with no luck.
I know that I'm messing with callback but, so far I've not figure out the correct way (without using promises library) to solve my problem.
Could some of you point me out in the right direction?
Thanks.
app.get('/assesment', function(req, res){
console.log('route: /assesment');
var api_key = '';
var customsearch = google.customsearch('v1');
var response = "";
var number_of_pages = 5;
var next_page = 1;
var exit = 0
const CX = 'XXXXX';
const API_KEY = 'XXXXX';
const SEARCH = 'Test Query';
console.log('start');
// console.log('QUERY PAGE: '+pages);
doSearch(CX, SEARCH, API_KEY, next_page, function(resp){
res.send(resp);
});
//
// Functions
//
function doSearch(_cx, _search, _api_key, _start, callback ){
var response = '';
customsearch.cse.list({ cx: _cx, q: _search, auth: _api_key, start: _start }, function (err, resp) {
if (err) {
response = JSON.stringify(err);
} else {
// Got the response from custom search
console.log('Result: ' + resp.searchInformation.formattedTotalResults);
if (resp.items && resp.items.length > 0) {
console.log('First result of '+resp.items.length+' is ' + resp.items[0].title);
for (var i = 0; i < resp.items.length; i++) {
response += resp.items[i].title+"<br>";
response += resp.items[i].link +"<br><hr>";
}
}
res = {
response: response,
next_page: resp.queries.nextPage
}
// res =
}
_start += 1;
if (_start < 6 ) {
doSearch(_cx, _search, _api_key, _start, _start*10+1,
function(resp){
response += resp;
});
}
if (callback && typeof callback === "function") callback(response);
});
};
});
You can use a third-party service like SerpApi to scrape Google and get back structured JSON.
Example using the Node.js library to get 4 page of results:
var gsr = require('GoogleSearchResults')
let serp = new gsr.GoogleSearchResults("demo")
serp.json({
q: "Coffee",
num: 10,
start: 30,
location: "Portland"
}, (result) => {
console.log(result)
})
I have a nodejs program that requests a series of XML files, parses them and then puts the output in an array which is written to disk as a CSV file.
The program mostly works, however occasionally the files end up in the wrong order in the array.
I want the order of the results to be in the same as the order as the URLs. The URLs are stored in an array, so when I get the XML file I check what the index of the URL was in the source array and insert the results at the same index in the destination URL.
can anyone see the flaw that is allowing the results to end up in the wrong order?
addResult = function (url, value, timestamp) {
data[config.sources.indexOf(url)] = {
value : value,
timestamp : timestamp,
url : url
};
numResults++;
if (numResults === config.sources.length) { //once all results are in build the output file
createOutputData();
}
}
fs.readFile("config.json", function (fileError, data) {
var eachSource, processResponse = function (responseError, response, body) {
if (responseError) {
console.log(responseError);
} else {
parseXML(body, {
explicitArray : false
}, function (xmlError, result) {
if (xmlError) {
console.log(xmlError);
}
addResult(response.request.uri.href, result.Hilltop.Measurement.Data.E.I1, moment(result.Hilltop.Measurement.Data.E.T));
});
}
};
if (fileError) {
console.log(fileError);
} else {
config = JSON.parse(data); //read in config file
for (eachSource = 0; eachSource < config.sources.length; eachSource++) {
config.sources[eachSource] = config.sources[eachSource].replace(/ /g, "%20"); //replace all %20 with " "
request(config.sources[eachSource], processResponse); //request each source
}
}
});
var writeOutputData, createOutputData, numResults = 0, data = [], eachDataPoint, multipliedFlow = 0;
writeOutputData = function (output, attempts) {
csv.writeToPath(config.outputFile, [ output ], {
headers : false
}).on("finish", function () {
console.log("successfully wrote data to: ", config.outputFile);
}).on("error", function (err) { //on write error
console.log(err);
if (attempts < 2) { //if there has been less than 3 attempts try writing again after 500ms
setTimeout(function () {
writeOutputData(output, attempts + 1);
}, 500);
}
});
};
createOutputData = function () {
var csvTimestamp, output = [];
if (config.hasOwnProperty("timestampFromSource")) {
csvTimestamp = data.filter(function (a) {
return a.url === config.sources[config.timestampFromSource];
})[0].timestamp.format("HHmm");
console.log("timestamp from source [" + config.timestampFromSource + "]:", csvTimestamp);
} else {
csvTimestamp = data.sort(function (a, b) { //sort results from oldest to newest
return a.timestamp.unix() - b.timestamp.unix();
});
csvTimestamp = csvTimestamp[0].timestamp.format("HHmm");//use the oldest date for the timestamp
console.log("timestamp from oldest source:", csvTimestamp);
}
//build array to represent data to be written
output.push(config.plDestVar); //pl var head address first
output.push(config.sources.length + 1); //number if vars to import
output.push(csvTimestamp); //the date of the data
for (eachDataPoint = 0; eachDataPoint < data.length; eachDataPoint++) { //add each data point
if (config.flowMultiplier) {
multipliedFlow = Math.round(data[eachDataPoint].value * config.flowMultiplier); //round to 1dp and remove decimal by *10
} else {
multipliedFlow = Math.round(data[eachDataPoint].value * 10); //round to 1dp and remove decimal by *10
}
if (multipliedFlow > 32766) {
multipliedFlow = 32766;
} else if (multipliedFlow < 0) {
multipliedFlow = 0;
}
output.push(multipliedFlow);
}
console.log(output);
writeOutputData(output, 0); //write the results, 0 is signalling first attempt
};
I think that the url to index code needs debugging.
Here is an example that uses an object that is pre-populated with keys in the for loop.
`
var http = require('http');
var fs = require("fs");
var allRequestsComplete = function(results){
console.log("All Requests Complete");
console.log(results);
};
fs.readFile("urls.json", function (fileError, data) {
var responseCount = 0;
if (fileError) {
console.log(fileError);
} else {
var allResponses = {};
config = JSON.parse(data); //read in config file
var requestComplete = function(url, fileData){
responseCount++;
allResponses[url] = fileData;
if(responseCount===config.sources.length){
allRequestsComplete(allResponses);
}
};
for (var eachSource = 0; eachSource < config.sources.length; eachSource++) {
(function(url){
allResponses[url] = "Waiting";
http.get({host: url,path: "/"}, function(response) {
response.on('error', function (chunk) {
requestComplete(url, "ERROR");
});
var str = ''
response.on('data', function (chunk) {
str += chunk;
});
response.on('end', function () {
requestComplete(url, str);
});
});
}(config.sources[eachSource].replace(/ /g, "%20").replace("http://", "")));
}
}
});
`
I agree with #Kevin B, you cannot assume that async callbacks will return in the same order of which you send them. However, you could ensure the order, by adding an index function on processResponse.
say you add the following to addResult
addResult = function (index, url, value, timestamp) {
data[index] = {
value : value,
timestamp : timestamp,
url : url
};
numResults++;
if (numResults === config.sources.length) { //once all results are in build the output file
createOutputData();
}
}
and use an extra function to call your request
function doRequest(index, url) {
request(url, function(responseError, response, body) {
if (responseError) {
console.log(responseError);
} else {
parseXML(body, {
explicitArray : false
}, function (xmlError, result) {
if (xmlError) {
console.log(xmlError);
}
addResult(index, response.request.uri.href, result.Hilltop.Measurement.Data.E.I1, moment(result.Hilltop.Measurement.Data.E.T));
});
}
});
}
then you can also change your loop to:
for (eachSource = 0; eachSource < config.sources.length; eachSource++) {
config.sources[eachSource] = config.sources[eachSource].replace(/ /g, "%20"); //replace all %20 with " "
doRequest(eachSource, config.sources[eachSource]); //request each source
}
var url = require('url');
var http = require('http');
var downloader = require("./downloader");
http.createServer(onRequest).listen(8080);
function onRequest(request,response)
{
if(request.method=='POST')
handlePost(request,response);
else
handleGet(request,response);
}
function handlePost(request,response)
{
var data = '';
request.on('data',function(chunk){
data+=chunk.toString();
});
request.on('end',downloadTrainStatus);
function downloadTrainStatus()
{
var downloadPromise = downloader.download(data);
downloadPromise.then(function (responses) {
var total = responses.length;
var result = [];
for( var i = 0 ; i < total ; i++)
result.push(JSON.parse(responses[i][1]));
response.writeHead(200);
response.write(JSON.stringify(result));
response.end();
}, function (err) {
console.log(err);
var result = { status : "error" };
response.writeHead(200);
response.write(JSON.stringify(result));
response.end();
})
}
}
function handleGet(request,response)
{
console.log("GET request");
response.writeHead(200);
response.write("Get request works !!");
response.end();
}
The above file server.js simply starts the web server. It uses a file downloader.js included below. The downloader.js just parallely makes 10 or 20 parallel web requests to a url which returns a JSON response. On running this process for half a day, the memory usage of the process shoots to 1.5GB. Is there any memory leak in this code ?
var Promise = require('bluebird')
var request = Promise.promisify(require('request'))
function download(json)
{
var requests = []
var data = JSON.parse(json);
for(var i = 0; i< data.total ; i++)
{
var stationCode = stations[i].StationCode;
var journeyDay = stations[i].JourneyDayCode;
requests.push(downloadStatus());
}
return Promise.all(requests);
}
function downloadStatus()
{
var url = "http://google.com";
var headers = {'User-Agent' : 'Apache-HttpClient/UNAVAILABLE (java 1.4)'};
var options = {
url: url ,
headers: headers,
timeout: 15 * 1000
}
return request(options);
}
module.exports.download = download;