Create Firefox Addon to Watch and modify XHR requests & reponses - javascript

Update: I guess the subject gave a wrong notion that I'm looking for an existing addon. This is a custom problem and I do NOT want an existing solution.
I wish to WRITE (or more appropriately, modify and existing) Addon.
Here's my requirement:
I want my addon to work for a particular site only
The data on the pages are encoded using a 2 way hash
A good deal of info is loaded by XHR requests, and sometimes
displayed in animated bubbles etc.
The current version of my addon parses the page via XPath
expressions, decodes the data, and replaces them
The issue comes in with those bubblified boxes that are displayed
on mouse-over event
Thus, I realized that it might be a good idea to create an XHR
bridge that could listen to all the data and decode/encode on the fly
After a couple of searches, I came across nsITraceableInterface[1][2][3]
Just wanted to know if I am on the correct path. If "yes", then kindly
provide any extra pointers and suggestions that may be appropriate;
and if "No", then.. well, please help with correct pointers :)
Thanks,
Bipin.
[1]. https://developer.mozilla.org/en/NsITraceableChannel
[2]. http://www.softwareishard.com/blog/firebug/nsitraceablechannel-intercept-http-traffic/
[3]. http://www.ashita.org/howto-xhr-listening-by-a-firefox-addon/

nsITraceableChannel is indeed the way to go here. the blog posts by Jan Odvarko (softwareishard.com) and myself (ashita.org) show how to do this. You may also want to see http://www.ashita.org/implementing-an-xpcom-firefox-interface-and-creating-observers/, however it isn't really necessary to do this in an XPCOM component.
The steps are basically:
Create Object prototype implementing nsITraceableChannel; and create observer to listen to http-on-modify-request and http-on-examine-response
register observer
observer listening to the two request types adds our nsITraceableChannel object into the chain of listeners and make sure that our nsITC knows who is next in the chain
nsITC object provides three callbacks and each will be called at the appropriate stage: onStartRequest, onDataAvailable, and onStopRequest
in each of the callbacks above, our nsITC object must pass on the data to the next item in the chain
Below is actual code from a site-specific add-on I wrote that behaves very similarly to yours from what I can tell.
function TracingListener() {
//this.receivedData = [];
}
TracingListener.prototype =
{
originalListener: null,
receivedData: null, // array for incoming data.
onDataAvailable: function(request, context, inputStream, offset, count)
{
var binaryInputStream = CCIN("#mozilla.org/binaryinputstream;1", "nsIBinaryInputStream");
var storageStream = CCIN("#mozilla.org/storagestream;1", "nsIStorageStream");
binaryInputStream.setInputStream(inputStream);
storageStream.init(8192, count, null);
var binaryOutputStream = CCIN("#mozilla.org/binaryoutputstream;1",
"nsIBinaryOutputStream");
binaryOutputStream.setOutputStream(storageStream.getOutputStream(0));
// Copy received data as they come.
var data = binaryInputStream.readBytes(count);
//var data = inputStream.readBytes(count);
this.receivedData.push(data);
binaryOutputStream.writeBytes(data, count);
this.originalListener.onDataAvailable(request, context,storageStream.newInputStream(0), offset, count);
},
onStartRequest: function(request, context) {
this.receivedData = [];
this.originalListener.onStartRequest(request, context);
},
onStopRequest: function(request, context, statusCode)
{
try
{
request.QueryInterface(Ci.nsIHttpChannel);
if (request.originalURI && piratequesting.baseURL == request.originalURI.prePath && request.originalURI.path.indexOf("/index.php?ajax=") == 0)
{
var data = null;
if (request.requestMethod.toLowerCase() == "post")
{
var postText = this.readPostTextFromRequest(request, context);
if (postText)
data = ((String)(postText)).parseQuery();
}
var date = Date.parse(request.getResponseHeader("Date"));
var responseSource = this.receivedData.join('');
//fix leading spaces bug
responseSource = responseSource.replace(/^\s+(\S[\s\S]+)/, "$1");
piratequesting.ProcessRawResponse(request.originalURI.spec, responseSource, date, data);
}
}
catch (e)
{
dumpError(e);
}
this.originalListener.onStopRequest(request, context, statusCode);
},
QueryInterface: function (aIID) {
if (aIID.equals(Ci.nsIStreamListener) ||
aIID.equals(Ci.nsISupports)) {
return this;
}
throw Components.results.NS_NOINTERFACE;
},
readPostTextFromRequest : function(request, context) {
try
{
var is = request.QueryInterface(Ci.nsIUploadChannel).uploadStream;
if (is)
{
var ss = is.QueryInterface(Ci.nsISeekableStream);
var prevOffset;
if (ss)
{
prevOffset = ss.tell();
ss.seek(Ci.nsISeekableStream.NS_SEEK_SET, 0);
}
// Read data from the stream..
var charset = "UTF-8";
var text = this.readFromStream(is, charset, true);
// Seek locks the file so, seek to the beginning only if necko hasn't read it yet,
// since necko doesn't seek to 0 before reading (at lest not till 459384 is fixed).
if (ss && prevOffset == 0)
ss.seek(Ci.nsISeekableStream.NS_SEEK_SET, 0);
return text;
}
else {
dump("Failed to Query Interface for upload stream.\n");
}
}
catch(exc)
{
dumpError(exc);
}
return null;
},
readFromStream : function(stream, charset, noClose) {
var sis = CCSV("#mozilla.org/binaryinputstream;1", "nsIBinaryInputStream");
sis.setInputStream(stream);
var segments = [];
for (var count = stream.available(); count; count = stream.available())
segments.push(sis.readBytes(count));
if (!noClose)
sis.close();
var text = segments.join("");
return text;
}
}
hRO = {
observe: function(request, aTopic, aData){
try {
if (typeof Cc == "undefined") {
var Cc = Components.classes;
}
if (typeof Ci == "undefined") {
var Ci = Components.interfaces;
}
if (aTopic == "http-on-examine-response") {
request.QueryInterface(Ci.nsIHttpChannel);
if (request.originalURI && piratequesting.baseURL == request.originalURI.prePath && request.originalURI.path.indexOf("/index.php?ajax=") == 0) {
var newListener = new TracingListener();
request.QueryInterface(Ci.nsITraceableChannel);
newListener.originalListener = request.setNewListener(newListener);
}
}
} catch (e) {
dump("\nhRO error: \n\tMessage: " + e.message + "\n\tFile: " + e.fileName + " line: " + e.lineNumber + "\n");
}
},
QueryInterface: function(aIID){
if (typeof Cc == "undefined") {
var Cc = Components.classes;
}
if (typeof Ci == "undefined") {
var Ci = Components.interfaces;
}
if (aIID.equals(Ci.nsIObserver) ||
aIID.equals(Ci.nsISupports)) {
return this;
}
throw Components.results.NS_NOINTERFACE;
},
};
var observerService = Cc["#mozilla.org/observer-service;1"]
.getService(Ci.nsIObserverService);
observerService.addObserver(hRO,
"http-on-examine-response", false);
In the above code, originalListener is the listener we are inserting ourselves before in the chain. It is vital that you keep that info when creating the Tracing Listener and pass on the data in all three callbacks. Otherwise nothing will work (pages won't even load. Firefox itself is last in the chain).
Note: there are some functions called in the code above which are part of the piratequesting add-on, e.g.: parseQuery() and dumpError()

Tamper Data Add-on. See also the How to Use it page

You could try making a Greasemonkey script and overwriting the XMLHttpRequest.
The code would look something like:
function request () {
};
request.prototype.open = function (type, path, block) {
GM_xmlhttpRequest({
method: type,
url: path,
onload: function (response) {
// some code here
}
});
};
unsafeWindow.XMLHttpRequest = request;
Also note that you can turn a GM script into an addon for Firefox.

Related

Workaround for new Edge Indexeddb bug?

Microsoft released update kb4088776 in the past couple days, which has had a devastating effect on performance of indexedDb openCursor.
The simple fiddle here shows the problem. With the update, the "retrieval" time is 40 seconds or more. Prior to the update, it is around 1 second.
https://jsfiddle.net/L7q55ad6/23/
Relevant retrieval portion is here:
var _currentVer = 1;
function _openDatabase(fnSuccess) {
var _custDb = window.indexedDB.open("MyDatabase", _currentVer);
_custDb.onsuccess = function (event) {
var db = event.target.result;
fnSuccess(db);
}
_custDb.onerror = function (event) {
_custDb = null;
fnSuccess(null); // should use localData
}
_custDb.onupgradeneeded = function (event) {
var db = event.target.result;
var txn = event.target.transaction;
// Create an objectStore for this database
if (event.oldVersion < _currentVer) {
var customer = db.createObjectStore("customer", { keyPath: "guid" });
var index = customer.createIndex("by_id", "id", { unique: false });
}
};
}
function _retrieveCustomers(fn) {
_openDatabase(function (db) {
if (db == null)
{
alert("not supported");
return;
}
var customers = [];
var transaction = db.transaction("customer", "readonly");
var objectStore = transaction.objectStore("customer");
if (typeof objectStore.getAll === 'function') {
console.log("using getAll");
objectStore.getAll().onsuccess = function (event) {
fn(event.target.result);
};
}
else {
console.log("using openCursor");
objectStore.openCursor().onsuccess = function (event) {
var cursor = event.target.result;
if (cursor) {
customers.push(cursor.value);
cursor.continue();
}
else {
fn(customers);
}
};
}
});
}
The time to create and add the customers is basically normal, only the retrieval is bad. Edge has never supported the getAll method and it still doesn't after the update.
The only workaround I can think of would be to use localStorage instead, but unfortunately our data set is too large to fit into the 10MB limit. It is actually faster now to retrieve from our servers and convert the text to javascript objects, defeating the main purpose of indexeddb.
I don't have Edge so I can't test this, but does it happen with get too, or just openCursor? If get still performs well, you could store an index (in your example, the list of primary keys; in your real app, maybe something more complicated) in localStorage, and then use that to call get on each one.

Dynamics CRM 2016 Javascript forEach not supported

So I am trying to write javascript code for a ribbon button in Dynamics CRM 2016 that will grab a phone number from a list of Leads that can be seen in the Active Leads window.
However, when I try to run it, I get an error telling me
As I step into my code (I'm debugging), I see this error
Here is the code I am working with.
function updateSelected(SelectedControlSelectedItemIds, SelectedEntityTypeName) {
// this should iterate through the list
SelectedControlSelectedItemIds.forEach(
function (selected, index) {
//this should get the id and name of the selected lead
getPhoneNumber(selected, SelectedEntityTypeName);
});
}
//I should have the lead ID and Name here, but it is returning null
function getPhoneNumber(id, entityName) {
var query = "telephone1";
Sdk.WebApi.retrieveRecord(id, entityName, query, "",
function (result) {
var telephone1 = result.telephone1;
// I'm trying to capture the number and display it via alert.
alert(telephone1);
},
function (error) {
alert(error);
})
}
Any help is appreciated.
What you have is an javascript error. In js you can only use forEach on an array. SelectedControlSelectedItemIds is an object not an array.
To loop though an object, you can do the following.
for (var key in SelectedControlSelectedItemIds){
if(SelectedControlSelectedItemIds.hasOwnProperty(key)){
getPhoneNumber(SelectedControlSelectedItemIds[key], SelectedEntityTypeName)
}
}
Okay, so I figured it out. I had help, so I refuse to take full credit.
First, I had to download the SDK.WEBAPI.
I then had to add the webAPI to my Javascript Actions in the Ribbon Tool Bench.
Then, I had to create a function to remove the brackets around the
SelectedControlSelectedItemIds
Firstly, I had to use the API WITH the forEach method in order for it to work.
These are the revisions to my code.
function removeBraces(str) {
str = str.replace(/[{}]/g, "");
return str;
}
function updateSelected(SelectedControlSelectedItemIds, SelectedEntityTypeName) {
//alert(SelectedEntityTypeName);
SelectedControlSelectedItemIds.forEach(
function (selected, index) {
getPhoneNumber(removeBraces(selected), SelectedEntityTypeName);
// alert(selected);
});
}
function getPhoneNumber(id, entityName) {
var query = "telephone1";
SDK.WEBAPI.retrieveRecord(id, entityName, query, "",
function (result) {
var telephone1 = result.telephone1;
formatted = telephone1.replace(/[- )(]/g,'');
dialready = "1" + formatted;
withcolon = dialready.replace(/(.{1})/g,"$1:")
number = telephone1;
if (Xrm.Page.context.getUserName() == "Jerry Ryback") {
url = "http://111.222.333.444/cgi-bin/api-send_key";
} else if(Xrm.Page.context.getUserName() == "Frank Jane") {
url = "http://222.333.444.555/cgi-bin/api-send_key";
}
else if( Xrm.Page.context.getUserName() == "Bob Gilfred"){
url = "http://333.444.555.666/cgi-bin/api-send_key";
}
else if( Xrm.Page.context.getUserName() == "Cheryl Bradley"){
url = "http://444.555.666.777/cgi-bin/api-send_key";
}
else if( Xrm.Page.context.getUserName() == "Bill Dunny"){
url = "http://555.666.777.888/cgi-bin/api-send_key";
}
if (url != "") {
var params = "passcode=admin&keys=" + withcolon + "SEND";
var http = new XMLHttpRequest();
http.open("GET", url + "?" + params, true);
http.onreadystatechange = function () {
if (http.readyState == 4 && http.status == 200) {
alert(http.responseText);
}
}
http.send(null);
}
},
function (error) {
// alert(error);
})
}
To elaborate, once I successfully get the number, I remove the parenthesis, dashes and white-space. Then, I add a "1" to the beginning. Finally, I insert colons in between each number. Then, I create an HTTP command and send it to the office phone of whoever is using CRM at the time. The user eval and HTTP message is my code. I'm showing you all of this because it was a great learning experience, and this feature really adds to the functionality.
I hope some of you find this useful.
Thanks for the help.

The collection has not been initialized - Sharepoint Javascript

I'm getting the following error when attempting to get an enumerator for a collection of lists: "Uncaught Error: The collection has not been initialized. It has not been requested or the request has not been executed. It may need to be explicitly requested."
It happens on the line var listEnumerator = lists.getEnumerator(); it seems to me that there is an issue in my attempt to load lists into the client object with context.load(lists);
Here's the portion of my code that's causing the problem. I've marked the place just before the error is thrown.
//____________________________Required function for accessing the host site's info.___________________________________
function getQueryStringParameter(param) {
var params = document.URL.split("?")[1].split("&");
for (var i = 0; i < params.length; i = i + 1) {
var singleParam = params[i].split("=");
if (singleParam[0] == param) {
return singleParam[1];
}
}
}
//____________________________Begin checking for list_________________________
function checkForList(listToFind, typeOfListToCreateIfTheListIsMissing)
{
var hostUrl = decodeURIComponent(getQueryStringParameter("SPHostUrl"));
var hostcontext = new SP.AppContextSite(context, hostUrl);
var hostweb = hostcontext.get_web();
var lists = hostweb.get_lists();
context.load(lists);
context.executeQueryAsync(checkIfListExistsUsingEnumerator(listToFind, lists, hostweb, typeOfListToCreateIfTheListIsMissing), onQueryFailed);
}
//Failed to get lists for some reason
function onQueryFailed(sender, args) {
alert('We failed to retrieve lists. \n' + args.get_message() + '\n' + args.get_stackTrace());
}
//____________________________Does list exist?____________________________
function checkIfListExistsUsingEnumerator(listToFind, lists, hostweb, typeOfList)
{
var listExists = false;
//!!!!!!!!!!!!!!! ERROR HERE !!!!!!!!!!!!!!!!
var listEnumerator = lists.getEnumerator();
var title;
while (listEnumerator.moveNext())
{
title = listEnumerator.get_current().get_title();
if (title == listToFind)
{
listExists = true;
}
}
if (!listExists)
{
alert("It appears that a required list does not already exist. \nClick ok, and we'll automatically create one for you.");
//Create a new list
createList(listToFind, hostweb, typeOfList);
}
else if (listExists)
{
//Do nothing.
}
}
//____________________________If it doesn't, create one on the local site____________________________
function createList(nameOfNewList, hostweb, typeOfList) {
var listCreationInfo = new SP.ListCreationInformation();
listCreationInfo.set_title(nameOfNewList);
if (typeOfList === "events")
{
listCreationInfo.set_templateType(SP.ListTemplateType.events);
}
else if (typeOfList === "contacts")
{
listCreationInfo.set_templateType(SP.ListTemplateType.contacts);
}
var lists = hostweb.get_lists();
var newList = lists.add(listCreationInfo);
context.load(newList);
context.executeQueryAsync(onListCreationSuccess, onListCreationFail);
}
function onListCreationSuccess() {
alert('List created successfully!');
}
function onListCreationFail(sender, args) {
alert('Failed to create the list. ' + args.get_message());
}
I've looked at this question sharepoint javascript collection not initialized error which seems to be fairly similar to mine, but I'm having trouble implementing the solution provided there, making me think my error may be have a different cause.
I've also tried querying for the lists inside of the function that is throwing the error, but that doesn't seem to solve anything.
For a little background, these functions are attempting to read all lists from the app's host site, check to see if a specified list exists, and create a list if no matching list exists. If there's a better way of doing that than what I'm attempting, I'd be open to that too.
Any pointers?
Some things I've tried that don't seem to work:
Changing the Asynchronous query
context.executeQueryAsync(checkIfListExists(listToFind, hostweb, typeOfListToCreateIfTheListIsMissing), onQueryFailed);
to a Synchronous one.
context.executeQuery(checkIfListExists(listToFind, hostweb, typeOfListToCreateIfTheListIsMissing), onQueryFailed);
I've figured out an alternate, and shorter way to method of achieving the same goal I was trying to achieve before.
Instead of checking to see if a list does not already exist, I just try to create a list, and the Query fails to create a list if one is already there. (That's good because I don't want to overwrite the list if it is already there.)
I'm not totally sure if there are any undesired side effects of what I'm doing here, but in my tests it produced the desired behavior.
//____________________________Required function for accessing the host site's info.___________________________________
function getQueryStringParameter(param) {
var params = document.URL.split("?")[1].split("&");
for (var i = 0; i < params.length; i = i + 1) {
var singleParam = params[i].split("=");
if (singleParam[0] == param) {
return singleParam[1];
}
}
}
//____________________________Create a list if one does not already exist_________________________
function createList(listToCreate, typeOfList)
{
// Create an announcement SharePoint list with the name that the user specifies.
var hostUrl = decodeURIComponent(getQueryStringParameter("SPHostUrl"));
var hostContext = new SP.AppContextSite(currentContext, hostUrl);
var hostweb = hostContext.get_web();
var listCreationInfo = new SP.ListCreationInformation();
listCreationInfo.set_title(listToCreate);
if (typeOfList === "events")
{
listCreationInfo.set_templateType(SP.ListTemplateType.events);
}
else if (typeOfList === "contacts")
{
listCreationInfo.set_templateType(SP.ListTemplateType.contacts);
}
var lists = hostweb.get_lists();
var newList = lists.add(listCreationInfo);
currentContext.load(newList);
currentContext.executeQueryAsync(onListCreationSuccess, onListCreationFail);
}
function onListCreationSuccess() {
alert("We've created a list since one doesn't exist yet." );
}
function onListCreationFail(sender, args) {
alert("We didn't create the list. Here's why: " + args.get_message());
}

IndexedDB via Lawnchair gets larger with every save

I am using Lawnchair.js on a mobile app I am building at work targeting iOS, Android, and Windows phones. My question is I have a relatively simple function(see below), that reads data from an object and saves it in the indexeddb database. It's about 4MB of data and on the first go round when I inspect in Internet explorer(via internet options), I can see the database is about 7MB. If I reload the page and re-run the same function with the same data, it increases to 14MB and then 20MB. Im using the same keys so my understanding is that this should just update the record but it's almost as if it's just inserting all new records every time. I have also had similar behavior using Lawnchair on mobile safari using websql adapter. Has anyone seen this before or have any suggestions as to why this might be ??.
The following code is from a function I am using to populate the database.
populateDatabase: function(database,callback) {
'use strict';
var key;
try {
for(key in MasterData){
if(MasterData.hasOwnProperty(key)){
var itemInfo = DataConfig.checkForDataUpdates[DataConfig.keyMap[key]];
database.save({key:itemInfo["name"],hash:itemInfo["version"],url:itemInfo["url"],data:MasterData[key]});
}
}
callback(true);
} catch(e){
callback(false);
}
}
MasterData is the large data file and itemInfo contains the key name, a hash that is later used to check an api for updates, and the relative url of where to update from. After I create the database I pass it into this function and then pass back true if the inserts are successful and false otherwise.
As previously mentioned, I have seen similar issues in iOS where calling database.save() was allocating a lot of memory but not releasing it and eventually causing a crash if it populated the database and then tried to update some records. Removing Lawnchair from the equation has kept it from crashing but it is still allocating a lot of memory when saving data. Not sure if this is normal for persistent storage on mobile devices, a bug in Lawnchair, or me being a noob and doing something terribly wrong but I could use some pointers on this as well as why indexeddb just keeps getting larger and larger on every save (at least during initial testing in IE10)??
EDIT: Source Code for indexed-db adapter is here:
https://github.com/brianleroux/lawnchair/blob/master/src/adapters/indexed-db.js
and here is the code for the save function I am using:
save:function(obj, callback) {
var self = this;
if(!this.store) {
this.waiting.push(function() {
this.save(obj, callback);
});
return;
}
var objs = (this.isArray(obj) ? obj : [obj]).map(function(o){if(!o.key) { o.key = self.uuid()} return o})
var win = function (e) {
if (callback) { self.lambda(callback).call(self, self.isArray(obj) ? objs : objs[0] ) }
};
var trans = this.db.transaction(this.record, READ_WRITE);
var store = trans.objectStore(this.record);
for (var i = 0; i < objs.length; i++) {
var o = objs[i];
store.put(o, o.key);
}
store.transaction.oncomplete = win;
store.transaction.onabort = fail;
return this;
},
When Creating a new instance, Lawnchair uses the init function from the indexed-db adapter which is the following.
init:function(options, callback) {
this.idb = getIDB();
this.waiting = [];
this.useAutoIncrement = useAutoIncrement();
var request = this.idb.open(this.name, STORE_VERSION);
var self = this;
var cb = self.fn(self.name, callback);
if (cb && typeof cb != 'function') throw 'callback not valid';
var win = function() {
// manually clean up event handlers on request; this helps on chrome
request.onupgradeneeded = request.onsuccess = request.error = null;
if(cb) return cb.call(self, self);
};
var upgrade = function(from, to) {
// don't try to migrate dbs, just recreate
try {
self.db.deleteObjectStore('teststore'); // old adapter
} catch (e1) { /* ignore */ }
try {
self.db.deleteObjectStore(self.record);
} catch (e2) { /* ignore */ }
// ok, create object store.
var params = {};
if (self.useAutoIncrement) { params.autoIncrement = true; }
self.db.createObjectStore(self.record, params);
self.store = true;
};
request.onupgradeneeded = function(event) {
self.db = request.result;
self.transaction = request.transaction;
upgrade(event.oldVersion, event.newVersion);
// will end up in onsuccess callback
};
request.onsuccess = function(event) {
self.db = event.target.result;
if(self.db.version != (''+STORE_VERSION)) {
// DEPRECATED API: modern implementations will fire the
// upgradeneeded event instead.
var oldVersion = self.db.version;
var setVrequest = self.db.setVersion(''+STORE_VERSION);
// onsuccess is the only place we can create Object Stores
setVrequest.onsuccess = function(event) {
var transaction = setVrequest.result;
setVrequest.onsuccess = setVrequest.onerror = null;
// can't upgrade w/o versionchange transaction.
upgrade(oldVersion, STORE_VERSION);
transaction.oncomplete = function() {
for (var i = 0; i < self.waiting.length; i++) {
self.waiting[i].call(self);
}
self.waiting = [];
win();
};
};
setVrequest.onerror = function(e) {
setVrequest.onsuccess = setVrequest.onerror = null;
console.error("Failed to create objectstore " + e);
fail(e);
};
} else {
self.store = true;
for (var i = 0; i < self.waiting.length; i++) {
self.waiting[i].call(self);
}
self.waiting = [];
win();
}
}
request.onerror = function(ev) {
if (request.errorCode === getIDBDatabaseException().VERSION_ERR) {
// xxx blow it away
self.idb.deleteDatabase(self.name);
// try it again.
return self.init(options, callback);
}
console.error('Failed to open database');
};
},
I think you keep adding data instead of updating the present data.
Can you provide some more information about the configuration of the store. Are you using an inline or external key? If it's an internal what is the keypath.

Reading a file in real-time using Node.js

I need to work out the best way to read data that is being written to a file, using node.js, in real time. Trouble is, Node is a fast moving ship which makes finding the best method for addressing a problem difficult.
What I Want To Do
I have a java process that is doing something and then writing the results of this thing it does to a text file. It typically takes anything from 5 mins to 5 hours to run, with data being written the whole time, and can get up to some fairly hefty throughput rates (circa. 1000 lines/sec).
I would like to read this file, in real time, and then, using node aggregate the data and write it to a socket where it can be graphed on the client.
The client, graphs, sockets and aggregation logic are all done but I am confused about the best approach for reading the file.
What I Have Tried (or at least played with)
FIFO - I can tell my Java process to write to a fifo and read this using node, this is in fact how we have this currently implemted using Perl, but because everything else is running in node it makes sense to port the code over.
Unix Sockets - As above.
fs.watchFile - will this work for what we need?
fs.createReadStream - is this better than watchFile?
fs & tail -f - seems like a hack.
What, actually, is my Question
I am tending towards using Unix Sockets, this seems the fastest option. But does node have better built-in features for reading files from the fs in real time?
If you want to keep the file as a persistent store of your data to prevent a loss of stream in case of a system crash or one of the members in your network of running processes dies, you can still continue on writing to a file and reading from it.
If you do not need this file as a persistent storage of produced results from your Java process, then going with a Unix socket is much better for both the ease and also the performance.
fs.watchFile() is not what you need because it works on file stats as filesystem reports it and since you want to read the file as it is already being written, this is not what you want.
SHORT UPDATE: I am very sorry to realize that although I had accused fs.watchFile() for using file stats in previous paragraph, I had done the very same thing myself in my example code below! Although I had already warned readers to "take care!" because I had written it in just a few minutes without even testing well; still, it can be done better by using fs.watch() instead of watchFile or fstatSync if underlying system supports it.
For reading/writing from a file, I have just written below for fun in my break:
test-fs-writer.js: [You will not need this since you write file in your Java process]
var fs = require('fs'),
lineno=0;
var stream = fs.createWriteStream('test-read-write.txt', {flags:'a'});
stream.on('open', function() {
console.log('Stream opened, will start writing in 2 secs');
setInterval(function() { stream.write((++lineno)+' oi!\n'); }, 2000);
});
test-fs-reader.js: [Take care, this is just demonstration, check err objects!]
var fs = require('fs'),
bite_size = 256,
readbytes = 0,
file;
fs.open('test-read-write.txt', 'r', function(err, fd) { file = fd; readsome(); });
function readsome() {
var stats = fs.fstatSync(file); // yes sometimes async does not make sense!
if(stats.size<readbytes+1) {
console.log('Hehe I am much faster than your writer..! I will sleep for a while, I deserve it!');
setTimeout(readsome, 3000);
}
else {
fs.read(file, new Buffer(bite_size), 0, bite_size, readbytes, processsome);
}
}
function processsome(err, bytecount, buff) {
console.log('Read', bytecount, 'and will process it now.');
// Here we will process our incoming data:
// Do whatever you need. Just be careful about not using beyond the bytecount in buff.
console.log(buff.toString('utf-8', 0, bytecount));
// So we continue reading from where we left:
readbytes+=bytecount;
process.nextTick(readsome);
}
You can safely avoid using nextTick and call readsome() directly instead. Since we are still working sync here, it is not necessary in any sense. I just like it. :p
EDIT by Oliver Lloyd
Taking the example above but extending it to read CSV data gives:
var lastLineFeed,
lineArray;
function processsome(err, bytecount, buff) {
lastLineFeed = buff.toString('utf-8', 0, bytecount).lastIndexOf('\n');
if(lastLineFeed > -1){
// Split the buffer by line
lineArray = buff.toString('utf-8', 0, bytecount).slice(0,lastLineFeed).split('\n');
// Then split each line by comma
for(i=0;i<lineArray.length;i++){
// Add read rows to an array for use elsewhere
valueArray.push(lineArray[i].split(','));
}
// Set a new position to read from
readbytes+=lastLineFeed+1;
} else {
// No complete lines were read
readbytes+=bytecount;
}
process.nextTick(readFile);
}
Why do you think tail -f is a hack?
While figuring out I found a good example I would do something similar.
Real time online activity monitor example with node.js and WebSocket:
http://blog.new-bamboo.co.uk/2009/12/7/real-time-online-activity-monitor-example-with-node-js-and-websocket
Just to make this answer complete, I wrote you an example code which would run under 0.8.0 - (the http server is a hack maybe).
A child process is spawned running with tail, and since a child process is an EventEmitter with three streams (we use stdout in our case) you can just add the a listener with on
filename: tailServer.js
usage: node tailServer /var/log/filename.log
var http = require("http");
var filename = process.argv[2];
if (!filename)
return console.log("Usage: node tailServer filename");
var spawn = require('child_process').spawn;
var tail = spawn('tail', ['-f', filename]);
http.createServer(function (request, response) {
console.log('request starting...');
response.writeHead(200, {'Content-Type': 'text/plain' });
tail.stdout.on('data', function (data) {
response.write('' + data);
});
}).listen(8088);
console.log('Server running at http://127.0.0.1:8088/');
this module is an implementation of the principle #hasanyasin suggests:
https://github.com/felixge/node-growing-file
I took the answer from #hasanyasin and wrapped it up into a modular promise. The basic idea is that you pass a file and a handler function that does something with the stringified-buffer that is read from the file. If the handler function returns true, then the file will stop being read. You can also set a timeout that will kill reading if the handler doesn't return true fast enough.
The promiser will return true if the resolve() was called due to timeout, otherwise it will return false.
See the bottom for usage example.
// https://stackoverflow.com/a/11233045
var fs = require('fs');
var Promise = require('promise');
class liveReaderPromiseMe {
constructor(file, buffStringHandler, opts) {
/*
var opts = {
starting_position: 0,
byte_size: 256,
check_for_bytes_every_ms: 3000,
no_handler_resolution_timeout_ms: null
};
*/
if (file == null) {
throw new Error("file arg must be present");
} else {
this.file = file;
}
if (buffStringHandler == null) {
throw new Error("buffStringHandler arg must be present");
} else {
this.buffStringHandler = buffStringHandler;
}
if (opts == null) {
opts = {};
}
if (opts.starting_position == null) {
this.current_position = 0;
} else {
this.current_position = opts.starting_position;
}
if (opts.byte_size == null) {
this.byte_size = 256;
} else {
this.byte_size = opts.byte_size;
}
if (opts.check_for_bytes_every_ms == null) {
this.check_for_bytes_every_ms = 3000;
} else {
this.check_for_bytes_every_ms = opts.check_for_bytes_every_ms;
}
if (opts.no_handler_resolution_timeout_ms == null) {
this.no_handler_resolution_timeout_ms = null;
} else {
this.no_handler_resolution_timeout_ms = opts.no_handler_resolution_timeout_ms;
}
}
startHandlerTimeout() {
if (this.no_handler_resolution_timeout_ms && (this._handlerTimer == null)) {
var that = this;
this._handlerTimer = setTimeout(
function() {
that._is_handler_timed_out = true;
},
this.no_handler_resolution_timeout_ms
);
}
}
clearHandlerTimeout() {
if (this._handlerTimer != null) {
clearTimeout(this._handlerTimer);
this._handlerTimer = null;
}
this._is_handler_timed_out = false;
}
isHandlerTimedOut() {
return !!this._is_handler_timed_out;
}
fsReadCallback(err, bytecount, buff) {
try {
if (err) {
throw err;
} else {
this.current_position += bytecount;
var buff_str = buff.toString('utf-8', 0, bytecount);
var that = this;
Promise.resolve().then(function() {
return that.buffStringHandler(buff_str);
}).then(function(is_handler_resolved) {
if (is_handler_resolved) {
that.resolve(false);
} else {
process.nextTick(that.doReading.bind(that));
}
}).catch(function(err) {
that.reject(err);
});
}
} catch(err) {
this.reject(err);
}
}
fsRead(bytecount) {
fs.read(
this.file,
new Buffer(bytecount),
0,
bytecount,
this.current_position,
this.fsReadCallback.bind(this)
);
}
doReading() {
if (this.isHandlerTimedOut()) {
return this.resolve(true);
}
var max_next_bytes = fs.fstatSync(this.file).size - this.current_position;
if (max_next_bytes) {
this.fsRead( (this.byte_size > max_next_bytes) ? max_next_bytes : this.byte_size );
} else {
setTimeout(this.doReading.bind(this), this.check_for_bytes_every_ms);
}
}
promiser() {
var that = this;
return new Promise(function(resolve, reject) {
that.resolve = resolve;
that.reject = reject;
that.doReading();
that.startHandlerTimeout();
}).then(function(was_resolved_by_timeout) {
that.clearHandlerTimeout();
return was_resolved_by_timeout;
});
}
}
module.exports = function(file, buffStringHandler, opts) {
try {
var live_reader = new liveReaderPromiseMe(file, buffStringHandler, opts);
return live_reader.promiser();
} catch(err) {
return Promise.reject(err);
}
};
Then use the above code like this:
var fs = require('fs');
var path = require('path');
var Promise = require('promise');
var liveReadAppendingFilePromiser = require('./path/to/liveReadAppendingFilePromiser');
var ending_str = '_THIS_IS_THE_END_';
var test_path = path.join('E:/tmp/test.txt');
var s_list = [];
var buffStringHandler = function(s) {
s_list.push(s);
var tmp = s_list.join('');
if (-1 !== tmp.indexOf(ending_str)) {
// if this return never occurs, then the file will be read until no_handler_resolution_timeout_ms
// by default, no_handler_resolution_timeout_ms is null, so read will continue forever until this function returns something that evaluates to true
return true;
// you can also return a promise:
// return Promise.resolve().then(function() { return true; } );
}
};
var appender = fs.openSync(test_path, 'a');
try {
var reader = fs.openSync(test_path, 'r');
try {
var options = {
starting_position: 0,
byte_size: 256,
check_for_bytes_every_ms: 3000,
no_handler_resolution_timeout_ms: 10000,
};
liveReadAppendingFilePromiser(reader, buffStringHandler, options)
.then(function(did_reader_time_out) {
console.log('reader timed out: ', did_reader_time_out);
console.log(s_list.join(''));
}).catch(function(err) {
console.error('bad stuff: ', err);
}).then(function() {
fs.closeSync(appender);
fs.closeSync(reader);
});
fs.write(appender, '\ncheck it out, I am a string');
fs.write(appender, '\nwho killed kenny');
//fs.write(appender, ending_str);
} catch(err) {
fs.closeSync(reader);
console.log('err1');
throw err;
}
} catch(err) {
fs.closeSync(appender);
console.log('err2');
throw err;
}

Categories

Resources