Node.js process out of memory

Node.js process out of memory - javascript

I have written a service to download files from an external partner site. There are around 1000 files of 1 MB each. My process is going out of memory every time I reach around 800 files.
How should I identify the root cause ?
var request = require('sync-request');
var fs = require('graceful-fs')
function find_starting_url(xyz_category){
feed_url = "<url>"
response = request("GET", feed_url).getBody().toString()
response = JSON.parse(response)
apiListings = response['apiGroups']['affiliate']['apiListings']
starting_url = apiListings[xyz_category]['availableVariants']['v0.1.0']['get']
return starting_url
}
function get_all_files(feed_category, count, next_url, retry_count){
var headers = {
'Id': '<my_header>',
'Token': '<my key>'
}
console.log(Date())
console.log(count)
if(next_url){
products_url = next_url
}
else{
products_url = find_starting_url(feed_category)
}
try{
var products = request("GET", products_url, {"headers": headers}).getBody().toString()
var parsed = JSON.parse(products)
var home = process.env.HOME
var fd = fs.openSync(home + "/data/abc/xyz/" + feed_category + "/" + count + ".json", 'w')
fs.writeSync(fd, products)
fs.closeSync(fd)
next_url = parsed['nextUrl']
count++;
if(next_url){
get_all_files(feed_category, count, next_url)
}
}catch(e){
if(retry_count >= 5){
console.log("TERRIBLE ENDING!!!", e)
}else{
retry_count++;
console.log("some error... retrying ..", e)
get_all_files(feed_category, count, next_url, retry_count)
}
}
}
var feed_category = process.argv[2]
get_all_files(feed_category, 1)

You're calling a synchronous function recursively so every single request you have and all the data from each request is retained in memory in your local variables until all of the requests are done and all the recursive calls can unwind and then finally free all the sets of local variables. This requires monster amounts of memory (as you have discovered).
It would be best to restructure your code so that the current request is processed, written to disk and then nothing from that request is retained when it goes onto the next request. The simplest way to do that would be to use a while loop instead of a recursive call. In pseudo code:
initialize counter
while (more to do) {
process the next item
increment counter
}
I don't understand the details of what your code is trying to do well enough to propose a rewrite, but hopefully you can see how you can replace the recursion with the type of non-recursive structure above.

It's because you are performing a recursive call to the get_all_files function and it's keeping the body variable in memory for every single execution, since every child execution needs to be completed before the memory is released.

Related

How to loop through GET/POST calls sequentially (waiting for previous) return?

I'm writing a Tampermonkey script for a web page and trying to extract data from other pages.
I'm trying to make a function that has a loop inside that goes thru a list, llcList, and retrieves data from ajax method GET, but would like to wait for to finish one request before going to second one.
Bonus would be if I could make it wait some extra time.
What should happen:
send request for a llcList[0]
get return data, process it
wait some time
send new request for a llcList[1]
Is this possible? I tried few methods, every time loop send all requests not a second apart. :
function F_Company_LLC(){
for (i = 0; i < llcList.length;i++) {
if(llcList[i][2]=="lab"){
//run function 0
//break;
}
else if(llcList[i][2]=="shop"){
//run function 1
//break;
}
else{
F_GET_CompData(llcList, llcList[i][1],i,function(result){
console.log(result);
});
}
}}
function F_GET_CompData(F_GET_CompData_list, CompID, F_GET_CompData_row, callback){
$.ajax({
method : "GET",
url: base_link+"/company/edit_company/"+CompID,
beforeSend: function(){runningRequest++;},
success: function(data){
//data processing
runningRequest--;
},
error: function() {console.log("Get_ComData");}
});
callback(runningRequest);}

This is a common scenario. Note that it's often unnecessary to process the calls sequentially though. It's usually adequate to just send context with the ajax calls and piece everything together as it comes in semi randomly, as shown in this answer.
One way to force sequential behavior is to chain calls via the complete function. Here is fully functional code that demonstrates the process. To use, paste it into your browser console while on a Stack Overflow page. :
var listO_pages = ["q/48/", "q/27/", "q/34/", "q/69/", "badpage"];
var numPages = listO_pages.length;
getPageN (0); //-- Kick off chained fetches
function getPageN (K) {
if (K >= 0 && K < numPages) {
let targPage = listO_pages[K];
$.ajax ( {
url: "https://stackoverflow.com/" + targPage,
context: {arryIdx: K}, // Object Helps handle K==0, and other things
success: processPage,
complete: finishUpRequest,
error: logError
} );
}
}
function processPage (sData, sStatus, jqXHR) {
//-- Use DOMParser so that images and scripts don't get loaded (like jQuery methods would).
var parser = new DOMParser ();
var doc = parser.parseFromString (sData, "text/html");
var payloadTable = doc.querySelector ("title");
var pageTitle = "Not found!";
if (payloadTable) {
pageTitle = payloadTable.textContent.trim ();
}
var [tIdx, tPage] = getIdxAndPage (this); // Set by `context` property
console.log (`Processed index ${tIdx} (${tPage}). Its title was: "${pageTitle}"`);
}
function finishUpRequest (jqXHR, txtStatus) {
var nextIdx = this.arryIdx + 1;
if (nextIdx < numPages) {
var tPage = listO_pages[nextIdx];
//-- The setTimeout is seldom needed, but added here per OP's request.
setTimeout ( function () {
console.log (`Fetching index ${nextIdx} (${tPage})...`);
getPageN (nextIdx);
}, 222);
}
}
function logError (jqXHR, txtStatus, txtError) {
var [tIdx, tPage] = getIdxAndPage (this); // Set by `context` property
console.error (`Oopsie at index ${tIdx} (${tPage})!`, txtStatus, txtError, jqXHR);
}
function getIdxAndPage (contextThis) {
return [contextThis.arryIdx, listO_pages[contextThis.arryIdx] ];
}
This typically outputs:
Processed index 0 (q/48/). Its title was: "Multiple submit buttons in an HTML form - Stack Overflow"
Fetching index 1 (q/27/)...
Processed index 1 (q/27/). Its title was: "datetime - Calculate relative time in C# - Stack Overflow"
Fetching index 2 (q/34/)...
Processed index 2 (q/34/). Its title was: "flex - Unloading a ByteArray in Actionscript 3 - Stack Overflow"
Fetching index 3 (q/69/)...
Processed index 3 (q/69/). Its title was: ".net - How do I calculate someone's age in C#? - Stack Overflow"
Fetching index 4 (badpage)...
GET https://stackoverflow.com/badpage?_=1512087299126 404 ()
Oopsie at index 4 (badpage)! error Object {...
-- depending on your Stack Overflow reputation.
Important: Do not attempt to use async: false techniques. These will just: lock up your browser, occasionally crash your computer, and make debug and partial results much harder.

Memory efficient message chunk processing using a XMLHttpRequest

I have a XMLHttpRequest with a progress event handler that is requesting a chunked page which continuously sends adds message chunks. If I do not set a responseType, I can access the response property of the XMLHttpRequest in each progress event and handle the additional message chunk. The problem of this approach is that the browser must keep the entire response in memory, and eventually, the browser will crash due to this memory waste.
So, I tried a responseType of arraybuffer in the hope that I can slice the buffer to prevent the previous excessive memory waste. Unfortunately, the progress event handler is no longer capable of reading the response property of the XMLHttpRequest at this point. The event parameter of the progress event does not contain the buffer, either. Here is a short, self-contained example of my attempt at this (this is written for node.js):
var http = require('http');
// -- The server.
http.createServer(function(req, res) {
if (req.url === '/stream') return serverStream(res);
serverMain(res);
}).listen(3000);
// -- The server functions to send a HTML page with the client code, or a stream.
function serverMain(res) {
res.writeHead(200, {'Content-Type': 'text/html'});
res.write('<html><body>Hello World</body><script>');
res.end(client.toString() + ';client();</script></html>');
}
function serverStream(res) {
res.writeHead(200, {'Content-Type': 'text/html'});
setInterval(function() {
res.write('Hello World<br />\n');
}, 1000);
}
// -- The client code which runs in the browser.
function client() {
var xhr = new XMLHttpRequest();
xhr.addEventListener('progress', function() {
if (!xhr.response) return console.log('progress without response :-(');
console.log('progress: ' + xhr.response.size);
}, false);
xhr.open('GET', '/stream', true);
xhr.responseType = 'arraybuffer';
xhr.send();
}
The progress event handler has no access to the response I wanted. How can I handle the message chunks in the browser in a memory-efficient way? Please do not suggest a WebSocket. I do not wish to use one just to process a read-only stream of message chunks.

XMLHttpRequest doesn't seem really designed for this kind of usage. The obvious solution is polling, which is a popular use of XMLHttpRequest but I'm guessing you don't want to miss data from your stream that would slip between the calls.
To my question Can the "real" data chunks be identified in some way or is it basically random data ?, you answered With some effort, the chunks could be identified by adding an event-id of sorts to the server-side
Based on this premise, I propose:
The idea: cooperating concurrent listeners
Connect to the stream and set up the progress listener (referred to as listenerA()).
When a chunk arrives, process it and output it. Keep a reference to the ids of both the first and last chunk received by listenerA(). Count how many chunks listenerA() has received.
After listenerA() has received a certain amount of chunks, spawn another "thread" (connection + listener, listenerB()) doing the steps 1 and 2 in parallel to the first one but keep the processed data in a buffer instead of outputting it.
When listenerA() receives the chunk with the same id as the first chunk received by listenerB(), send a signal to listenerB(), drop the first connection and kill listenerA().
When listenerB() receives the termination signal from the listenerA(), dump the buffer to the output and keep processing normally.
Have listenerB() spawn listenerC() on the same conditions as before.
Keep repeating with as many connections + listeners as necessary.
By using two overlapping connections, you can prevent the possible loss of chunks that would result from dropping a single connection and then reconnecting.
Notes
This assumes the data stream is the same for all connections and doesn't introduce some individualized settings.
Depending on the output rate of the stream and the connection delay, the buffer dump during the transition from one connection to another might be noticeable.
You could also measure the total response size rather than the chunks count to decide when to switch to a new connection.
It might be necessary to keep a complete list of chunks ids to compare against rather than just the first and last one because we can't guarantee the timing of the overlap.
The responseType of XMLHttpRequest must be set to its default value of "" or "text", to return text. Other datatypes will not return a partial response. See https://xhr.spec.whatwg.org/#the-response-attribute
Test server in node.js
The following code is a node.js server that outputs a consistent stream of elements for testing purposes. You can open multiple connections to it, the output will be the same accross sessions, minus possible server lag.
http://localhost:5500/stream
will return data where id is an incremented number
http://localhost:5500/streamRandom
will return data where id is a random 40 characters long string. This is meant to test a scenario where the id can not be relied upon for ordering the data.
var crypto = require('crypto');
// init + update nodeId
var nodeId = 0;
var nodeIdRand = '0000000000000000000000000000000000000000';
setInterval(function() {
// regular id
++nodeId;
//random id
nodeIdRand = crypto.createHash('sha1').update(nodeId.toString()).digest('hex');
}, 1000);
// create server (port 5500)
var http = require('http');
http.createServer(function(req, res) {
if(req.url === '/stream') {
return serverStream(res);
}
else if(req.url === '/streamRandom') {
return serverStream(res, true);
}
}).listen(5500);
// serve nodeId
function serverStream(res, rand) {
// headers
res.writeHead(200, {
'Content-Type' : 'text/plain',
'Access-Control-Allow-Origin' : '*',
});
// remember last served id
var last = null;
// output interval
setInterval(function() {
// output on new node
if(last != nodeId) {
res.write('[node id="'+(rand ? nodeIdRand : nodeId)+'"]');
last = nodeId;
}
}, 250);
}
Proof of concept, using aforementioned node.js server code
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
</head>
<body>
<button id="stop">stop</button>
<div id="output"></div>
<script>
/*
Listening to a never ending page load (http stream) without running out of
memory by using concurrent overlapping connections to prevent loss of data,
using only xmlHttpRequest, under the condition that the data can be identified.
listen arguments
url url of the http stream
chunkMax number of chunks to receive before switching to new connection
listen properties
output a reference to a DOM element with id "output"
queue an array filled with non-duplicate received chunks and metadata
lastFetcherId an incrementing number used to assign an id to new fetchers
fetchers an array listing all active fetchers
listen methods
fire internal use fire an event
stop external use stop all connections
fetch internal use starts a new connection
fetchRun internal use initialize a new fetcher object
Usage
var myListen = new listen('http://localhost:5500/streamRandom', 20);
will listen to url "http://localhost:5500/streamRandom"
will switch connections every 20 chunks
myListen.stop()
will stop all connections in myListen
*/
function listen(url, chunkMax) {
// main ref
var that = this;
// output element
that.output = document.getElementById('output');
// main queue
that.queue = [];
// last fetcher id
that.lastFetcherId = 0;
// list of fetchers
that.fetchers = [];
//********************************************************* event dispatcher
that.fire = function(name, data) {
document.dispatchEvent(new CustomEvent(name, {'detail':data}));
}
//******************************************************** kill all fetchers
that.stop = function() {
that.fire('fetch-kill', -1);
}
//************************************************************** url fetcher
that.fetch = function(fetchId, url, fetchRef) {
//console.log('start fetcher #'+fetchId);
var len = 0;
var xhr = new XMLHttpRequest();
var cb_progress;
var cb_kill;
// progress listener
xhr.addEventListener('progress', cb_progress = function(e) {
// extract chunk data
var chunkData = xhr.response.substr(len);
// chunk id
var chunkId = chunkData.match(/id="([a-z0-9]+)"/)[1];
// update response end point
len = xhr.response.length;
// signal end of chunk processing
that.fire('chunk-ready', {
'fetchId' : fetchId,
'fetchRef' : fetchRef,
'chunkId' : chunkId,
'chunkData' : chunkData,
});
}, false);
// kill switch
document.addEventListener('fetch-kill', cb_kill = function(e) {
// kill this fetcher or all fetchers (-1)
if(e.detail == fetchId || e.detail == -1) {
//console.log('kill fetcher #'+fetchId);
xhr.removeEventListener('progress', cb_progress);
document.removeEventListener('fetch-kill', cb_kill);
xhr.abort();
that.fetchers.shift(); // remove oldest fetcher from list
xhr = null;
delete xhr;
}
}, false);
// go
xhr.open('GET', url, true);
xhr.responseType = 'text';
xhr.send();
};
//****************************************************** start a new fetcher
that.fetchRun = function() {
// new id
var id = ++that.lastFetcherId;
//console.log('create fetcher #'+id);
// create fetcher with new id
var fetchRef = {
'id' : id, // self id
'queue' : [], // internal queue
'chunksIds' : [], // retrieved ids, also used to count
'hasSuccessor' : false, // keep track of next fetcher spawn
'ignoreId' : null, // when set, ignore chunks until this id is received (this id included)
};
that.fetchers.push(fetchRef);
// run fetcher
that.fetch(id, url, fetchRef);
};
//************************************************ a fetcher returns a chunk
document.addEventListener('chunk-ready', function(e) {
// shorthand
var f = e.detail;
// ignore flag is not set, process chunk
if(f.fetchRef.ignoreId == null) {
// store chunk id
f.fetchRef.chunksIds.push(f.chunkId);
// create queue item
var queueItem = {'id':f.chunkId, 'data':f.chunkData};
// chunk is received from oldest fetcher
if(f.fetchId == that.fetchers[0].id) {
// send to main queue
that.queue.push(queueItem);
// signal queue insertion
that.fire('queue-new');
}
// not oldest fetcher
else {
// use fetcher internal queue
f.fetchRef.queue.push(queueItem);
}
}
// ignore flag is set, current chunk id the one to ignore
else if(f.fetchRef.ignoreId == f.chunkId) {
// disable ignore flag
f.fetchRef.ignoreId = null;
}
//******************** check chunks count for fetcher, threshold reached
if(f.fetchRef.chunksIds.length >= chunkMax && !f.fetchRef.hasSuccessor) {
// remember the spawn
f.fetchRef.hasSuccessor = true;
// spawn new fetcher
that.fetchRun();
}
/***********************************************************************
check if the first chunk of the second oldest fetcher exists in the
oldest fetcher.
If true, then they overlap and we can kill the oldest fetcher
***********************************************************************/
if(
// is this the oldest fetcher ?
f.fetchId == that.fetchers[0].id
// is there a successor ?
&& that.fetchers[1]
// has oldest fetcher received the first chunk of its successor ?
&& that.fetchers[0].chunksIds.indexOf(
that.fetchers[1].chunksIds[0]
) > -1
) {
// get index of last chunk of the oldest fetcher within successor queue
var lastChunkId = that.fetchers[0].chunksIds[that.fetchers[0].chunksIds.length-1]
var lastChunkIndex = that.fetchers[1].chunksIds.indexOf(lastChunkId);
// successor has not reached its parent last chunk
if(lastChunkIndex < 0) {
// discard whole queue
that.fetchers[1].queue = [];
that.fetchers[1].chunksIds = [];
// set ignore id in successor to future discard duplicates
that.fetchers[1].ignoreId = lastChunkId;
}
// there is overlap
else {
/**
console.log('triming queue start: '+that.fetchers[1].queue.length
+" "+(lastChunkIndex+1)
+" "+(that.fetchers[1].queue.length-1)
);
/**/
var trimStart = lastChunkIndex+1;
var trimEnd = that.fetchers[1].queue.length-1;
// trim queue
that.fetchers[1].queue = that.fetchers[1].queue.splice(trimStart, trimEnd);
that.fetchers[1].chunksIds = that.fetchers[1].chunksIds.splice(trimStart, trimEnd);
//console.log('triming queue end: '+that.fetchers[1].queue.length);
}
// kill oldest fetcher
that.fire('fetch-kill', that.fetchers[0].id);
}
}, false);
//***************************************************** main queue processor
document.addEventListener('queue-new', function(e) {
// process chunks in queue
while(that.queue.length > 0) {
// get chunk and remove from queue
var chunk = that.queue.shift();
// output item to document
if(that.output) {
that.output.innerHTML += "<br />"+chunk.data;
}
}
}, false);
//****************************************************** start first fetcher
that.fetchRun();
};
// run
var process = new listen('http://localhost:5500/streamRandom', 20);
// bind global kill switch to button
document.getElementById('stop').addEventListener('click', process.stop, false);
</script>
</body>
</html>

Assemble paginated ajax data in a Bacon FRP stream

I'm learning FRP using Bacon.js, and would like to assemble data from a paginated API in a stream.
The module that uses the data has a consumption API like this:
// UI module, displays unicorns as they arrive
beautifulUnicorns.property.onValue(function(allUnicorns){
console.log("Got "+ allUnicorns.length +" Unicorns");
// ... some real display work
});
The module that assembles the data requests sequential pages from an API and pushes onto the stream every time it gets a new data set:
// beautifulUnicorns module
var curPage = 1
var stream = new Bacon.Bus()
var property = stream.toProperty()
var property.onValue(function(){}) # You have to add an empty subscriber, otherwise future onValues will not receive the initial value. https://github.com/baconjs/bacon.js/wiki/FAQ#why-isnt-my-property-updated
var allUnicorns = [] // !!! stateful list of all unicorns ever received. Is this idiomatic for FRP?
var getNextPage = function(){
/* get data for subsequent pages.
Skipping for clarity */
}
var gotNextPage = function (resp) {
Array.prototype.push.apply(allUnicorns, resp) // just adds the responses to the existing array reference
stream.push(allUnicorns)
curPage++
if (curPage <= pageLimit) { getNextPage() }
}
How do I subscribe to the stream in a way that provides me a full list of all unicorns ever received? Is this flatMap or similar? I don't think I need a new stream out of it, but I don't know. I'm sorry, I'm new to the FRP way of thinking. To be clear, assembling the array works, it just feels like I'm not doing the idiomatic thing.
I'm not using jQuery or another ajax library for this, so that's why I'm not using Bacon.fromPromise
You also may wonder why my consuming module wants the whole set instead of just the incremental update. If it were just appending rows that could be ok, but in my case it's an infinite scroll and it should draw data if both: 1. data is available and 2. area is on screen.

This can be done with the .scan() method. And also you will need a stream that emits items of one page, you can create it with .repeat().
Here is a draft code (sorry not tested):
var itemsPerPage = Bacon.repeat(function(index) {
var pageNumber = index + 1;
if (pageNumber < PAGE_LIMIT) {
return Bacon.fromCallback(function(callback) {
// your method that talks to the server
getDataForAPage(pageNumber, callback);
});
} else {
return false;
}
});
var allItems = itemsPerPage.scan([], function(allItems, itemsFromAPage) {
return allItems.concat(itemsFromAPage);
});
// Here you go
allItems.onValue(function(allUnicorns){
console.log("Got "+ allUnicorns.length +" Unicorns");
// ... some real display work
});
As you noticed, you also won't need .onValue(function(){}) hack, and curPage external state.

Here is a solution using flatMap and fold. When dealing with network you have to remember that the data can come back in a different order than you sent the requests - that's why the combination of fold and map.
var pages = Bacon.fromArray([1,2,3,4,5])
var requests = pages.flatMap(function(page) {
return doAjax(page)
.map(function(value) {
return {
page: page,
value: value
}
})
}).log("Data received")
var allData = requests.fold([], function(arr, data) {
return arr.concat([data])
}).map(function(arr) {
// I would normally write this as a oneliner
var sorted = _.sortBy(arr, "page")
var onlyValues = _.pluck(sorted, "value")
var inOneArray = _.flatten(onlyValues)
return inOneArray
})
allData.log("All data")
function doAjax(page) {
// This would actually be Bacon.fromPromise($.ajax...)
// Math random to simulate the fact that requests can return out
// of order
return Bacon.later(Math.random() * 3000, [
"Page"+page+"Item1",
"Page"+page+"Item2"])
}
http://jsbin.com/damevu/4/edit

Chrome App: Cannot retrieve file load status

My Chrome app has a function that asks for a file to be loaded by another function, checks that the function has set a flag signifying success (External.curFile.lodd), then attempts to process it. My problem is that the flags are not set the first time I call the function, but when I call it a second time the flags are already set.
I had a feeling this has to do with Chrome file functions being asynchronous, so I had the first function idle for a bit while the file loads. The first load never succeeds, no matter how long I wait, but the second load always does!
Calling Function:
function load_by_lines_from_cur_dir( fileName, context ){ // determine the 'meaning' of a file line by line, return last 'meaning', otherwise 'null'
var curLineMeaning = null;
var lastLineValid = true;
External.read_file_in_load_path(fileName); // 'External' load 'fileName' and reads lines, REPLacement does not see this file
// This is a dirty workaround that accounts for the fact that 'DirectoryEntry.getFile' is asynchronous, thus pre-parsing checks fail intil loaded
var counter = 0, maxLoops = 10;
nuClock();
do{
sleep(500);
counter++;
preDebug.innerText += '\r\nLoop:' + counter + " , " + time_since_last();
}while( !External.curFile.lodd && (counter < maxLoops) ); //idle and check if file loaded, 5000ms max
preDebug.innerText += '\r\nLoaded?:' + External.curFile.lodd;
preDebug.innerText += '\r\nLines?:' + External.curFile.lins;
if( External.curFile.lodd ){ // The last load operating was successful, attempt to parse and interpret each line
// parse and interpret lines, storing each meaning in 'curLineMeaning', until last line is reached
while(!External.curFile.rEOF){
curLineMeaning = meaning( s( External.readln_from_current_file() ), context);
preDebug.innerText += '\r\nNext Line?: ' + External.curFile.lnnm;
preDebug.innerText += '\r\nEOF?: ' + External.curFile.rEOF;
}
} // else, return 'null'
return curLineMeaning; // return the result of the last form
}
which calls the following:
External.read_file_in_load_path = function(nameStr){ // Read the lines of 'nameStr' into 'External.curFile.lins'
External.curPath.objt.getFile( // call 'DirectoryEntry.getFile' to fetch a file in that directory
nameStr,
{create: false},
function(fileEntry){ // action to perform on the fetched file, success
External.curFile.name = nameStr; // store the file name for later use
External.curFile.objt = fileEntry; // store the 'FileEntry' for later use
External.curFile.objt.file( function(file){ // Returns 'File' object associated with selected file. Use this to read the file's content.
var reader = new FileReader();
reader.onload = function(e){
External.curFile.lodd = true; // File load success
};
reader.onloadend = function(e){
//var contents = e.target.result;
// URL, split string into lines: http://stackoverflow.com/questions/12371970/read-text-file-using-filereader
External.curFile.lins = e.target.result.split('\n'); // split the string result into individual lines
};
reader.readAsText(file);
External.curFile.lnnm = 0; // Set current line to 0 for the newly-loaded file
External.curFile.rEOF = false; // Reset EOF flag
// let's try a message instead of a flag ...
/*chrome.runtime.sendMessage({greeting: "hello"}, function(response) {
console.log(response.farewell);
});*/
} );
},
function(e){ External.curFile.lodd = false; } // There was an error
);
};
This app is a dialect of Scheme. It's important that the app knows that the source file has been loaded or not.

I didn't read through all of your code, but you can't kick off an asynchronous activity and then busy-wait for it to complete, because JavaScript is single threaded. No matter what's happened, the asynchronous function won't be executed until the script completes its current processing. In other words, asynchronous does not imply concurrent.
Generally speaking, if task A is to be performed after asynchronous task B completes, you should execute A from the completion callback for B. That's the straightforward, safe way to do it. Any shortcut, to achieve better responsiveness or to simplify the code, is going to have dependency or race-condition problems, and will require lots of horsing around to get right. Even then, it will be hard to prove that the code operates correctly on all platforms in all circumstances.

A property not initialized... yet it should be?

I have a list containing folders, and I'm trying to get the count of the total number of files in these folders.
I manage to retrieve a ListItemCollection containing my folders. Then it starts being... picky.
ctx is my ClientContext, and collection my ListItemCollection.
function countFiles()
{
var enumCollection = collection.getEnumerator();
while(enumCollection.moveNext())
{
currentItem = enumCollection.get_current();
var folder = currentItem.get_folder();
if (folder === 'undefined')
return;
ctx.load(folder, 'ItemCount');
ctx.executeQueryAsync(Function.createDelegate(this, function()
{
totalCount += folder.get_itemCount();
}), Function.createDelegate(this, onQueryFailed));
}
}
So it works... half of the time. If I have 6 items in my collection, I get 3 or 4 "The property or field 'ItemCount' has not been initialized" exceptions, and obviously my totalCount is wrong. I just can't seem to understand why, since the executeQueryAsync should not happen before the folder is actually loaded.
I'm very new to Javascript, so it may look horrid and be missing some essential code I didn't consider worthy of interest, feel free to ask if it is so.

Referencing closure variables (like folder in this case) from an asynchronous callback is generally a big problem. Thankfully it's easy to fix:
function countFiles()
{
function itemCounter(folder) {
return function() { totalCount += folder.get_itemCount(); };
}
var enumCollection = collection.getEnumerator();
while(enumCollection.moveNext())
{
var folder = enumCollection.getCurrent().get_folder();
if (folder === undefined) // not a string!
return;
ctx.load(folder, 'ItemCount');
ctx.executeQueryAsync(itemCounter(folder), Function.createDelegate(this, onQueryFailed));
}
}
(You don't need that .createDelegate() call because the function doesn't need this.)
Now, after that, you face the problem of knowing when that counter has been finally updated. Those asynchronous callbacks will eventually finish, but when? You could keep a separate counter, one for each query you start, and then decrement that in the callback. When it drops back to zero, then you'll know you're done.

Since SP.ClientContext.executeQueryAsync is an async function it is likely that the loop could be terminated before the first call to callback function completes, so the behavior of specified code could be unexpected.
Instead, i would recommend another and more clean approach for counting files (including files located under nested folders) using SharePoint JSOM.
How to count the total number of files in List using JSOM
The following function allows to count the number of list items in List:
function getItemsCount(listTitle, complete){
var ctx = SP.ClientContext.get_current();
var list = ctx.get_web().get_lists().getByTitle(listTitle);
var items = list.getItems(createQuery());
ctx.load(items);
ctx.executeQueryAsync(
function() {
complete(items.get_count());
},
function() {
complete(-1);
}
);
function createQuery()
{
var query = new SP.CamlQuery();
query.set_viewXml('<View Scope="RecursiveAll"><Query><Where><Eq><FieldRef Name="FSObjType" /><Value Type="Integer">0</Value></Eq></Where></Query></View>');
return query;
}
}
Usage
getItemsCount('Documents', function(itemsCount){
console.log(String.format('Total files count in Documents library: {0}',itemsCount));
});

Develop Reference

JavaScript is the programming language of the Web.

Node.js process out of memory - javascript

It's because you are performing a recursive call to the get_all_files function and it's keeping the body variable in memory for every single execution, since every child execution needs to be completed before the memory is released.

Related

How to loop through GET/POST calls sequentially (waiting for previous) return?

Memory efficient message chunk processing using a XMLHttpRequest

Assemble paginated ajax data in a Bacon FRP stream

Chrome App: Cannot retrieve file load status

A property not initialized... yet it should be?

Categories

Resources