Firefox Addon Reading Local XPI, File and Directory Listing - javascript

Following a chat in #amo-editors, I was wondering if the following are possible from a Firefox Addon:
Opening a local XPI for reading
Listing all files in above XPI with their sizes
Reading selected files

Absolutely possible.
1 + 2) Have to use nsIZipReader to read the xpi. This gives you all files within it.
3) To read contents you have to use zip readers getInputStream function and then wrap it in stream instance, then read it with argument of entry.realSize as read on stream takes characters to read.
MDN :: nsIZipWriter
MDN :: nsIZipReader
edit: i was curious. i think i got it. heres example of how to make it dump contents of a zip (list all files within). see the console.log(entryPointer) that spits out the "zip path". it also reads the contents of the files.
var zr = Cc["#mozilla.org/libjar/zip-reader;1"].createInstance(Ci.nsIZipReader);
Cu.import('resource://gre/modules/osfile.jsm');
Cu.import('resource://gre/modules/FileUtils.jsm');
var reusableStreamInstance = Cc['#mozilla.org/scriptableinputstream;1'].createInstance(Ci.nsIScriptableInputStream);
//var pathExtFolder = OS.Path.join(OS.Constants.Path.profileDir, 'extensions');
var pathToXpiToRead = OS.Path.join(OS.Constants.Path.profileDir, 'extensions', 'PortableTester#jetpack.xpi');
var nsiFileXpi = new FileUtils.File(pathToXpiToRead);
//Services.ww.activeWindow.alert(pathToXpiToRead);
try {
zr.open(nsiFileXpi); //if file dne it throws here
var entries = zr.findEntries('*');
while (entries.hasMore()) {
var entryPointer = entries.getNext(); //just a string of "zip path" (this means path to file in zip, and it uses forward slashes remember)
var entry = zr.getEntry(entryPointer); // should return true on `entry instanceof Ci.nsIZipEntry`
console.log('entryPointer', entryPointer);
/* CONSOLE OUTPUT
* "entryPointer" "bootstrap.js" Scratchpad/1:18
*/
console.info('entry', entry);
/* CONSOLE OUTPUT
* "entry" XPCWrappedNative_NoHelper { QueryInterface: QueryInterface(), compression: Getter, size: Getter, realSize: Getter, CRC32: Getter, isDirectory: Getter, lastModifiedTime: Getter, isSynthetic: Getter, permissions: Getter, compression: 8 } Scratchpad/1:19
*/
if (!entry.isDirectory) {
var inputStream = zr.getInputStream(entryPointer);
reusableStreamInstance.init(inputStream);
var fileContents = reusableStreamInstance.read(entry.realSize);
console.log('contenst of file=', fileContents);
} else {
console.log('is directory, no stream to read');
}
}
} catch (ex) {
console.warn('exception occured = ', ex);
if (ex.name == 'NS_ERROR_FILE_NOT_FOUND') {
Services.ww.activeWindow.alert('XPI at path does not exist!\n\nPath = ' + pathToXpiToRead);
}
} finally {
zr.close();
console.log('zr closed');
//Cu.forceGC(); //im not sure shoud i do this here?
}
I'm not sure if I should do a Cu.forceGC() in the finally, maybe #nmaier can advise us on that.
I'm also not sure if I handled reading the input stream properly, it works, but i dont know memory wise. I did .read(entry.realSize) first time doing this.
variable viewer on entry:

Related

How to get unknown file path - the extension path - using Javascript?

I am trying to develop an extension for VSCode and I need to search for the exact file path where my extension is saved. I've tried this code:
var text;
const directoryPath = path.join(Os.homedir(), '.vscode/extensions');
fs.readdir(directoryPath, function (err, files) {
if (err) {
y2.appendLine('Unable to scan directory: ' + err);
}
for (let i=0;i<files.length;i++){
if (files[i].startsWith("MY_EXTENSION")){
text = files[i];
y2.appendLine(text);
break;
}
}
});
y2.appendLine(text);
y2 is only an output channel I added.
The outcome of this code is:
undefined
MY_EXTENSION
If I take out the second y2.appendLine(text) I receive only MY_EXTENSION but I don't get how to make text global or how to modify the value of the global variable text inside readdir.
I also don't understand why it first executes y2.appendLine(text) and then readdir.
It is really important and I would be grateful to have your help!
I'm not sure what exactly you are trying to do there, but looking at your code it seems like you are trying to save some data for your extension in particular.
For this purpose, you probably want to use the globalStorage object instead.
You can learn more about it in the Data storage section and the ExtensionContext documentation.
For the path to your extension, use the context object and its properties:
extensionPath
The absolute file path of the directory containing the extension.
Shorthand notation for ExtensionContext.extensionUri.fsPath
(independent of the uri scheme).
extensionUri
The uri of the directory containing the extension.
From ExtensionContext. If it is just for the purpose of storage, see #mausworks's answer about globalStorage or storagePath, ets.
You could put it in a function like this:
function searchExt() {
const directoryPath = path.join(Os.homedir(), '.vscode/extensions');
fs.readdir(directoryPath, (err, files) => {
let text = null;
if (err) {
y2.appendLine(`Unable to scan directory: ${err}`);
} else {
for (let i = 0; i < files.length; i++) {
if (files[i].startsWith('MY_EXTENSION')) {
text = files[i];
break;
}
}
}
return text;
});
}
const searchResult = searchExt();

Node.js How to read a large JSON file line by line (without completely loading it in memory) [duplicate]

I have a file which stores many JavaScript objects in JSON form and I need to read the file, create each of the objects, and do something with them (insert them into a db in my case). The JavaScript objects can be represented a format:
Format A:
[{name: 'thing1'},
....
{name: 'thing999999999'}]
or Format B:
{name: 'thing1'} // <== My choice.
...
{name: 'thing999999999'}
Note that the ... indicates a lot of JSON objects. I am aware I could read the entire file into memory and then use JSON.parse() like this:
fs.readFile(filePath, 'utf-8', function (err, fileContents) {
if (err) throw err;
console.log(JSON.parse(fileContents));
});
However, the file could be really large, I would prefer to use a stream to accomplish this. The problem I see with a stream is that the file contents could be broken into data chunks at any point, so how can I use JSON.parse() on such objects?
Ideally, each object would be read as a separate data chunk, but I am not sure on how to do that.
var importStream = fs.createReadStream(filePath, {flags: 'r', encoding: 'utf-8'});
importStream.on('data', function(chunk) {
var pleaseBeAJSObject = JSON.parse(chunk);
// insert pleaseBeAJSObject in a database
});
importStream.on('end', function(item) {
console.log("Woot, imported objects into the database!");
});*/
Note, I wish to prevent reading the entire file into memory. Time efficiency does not matter to me. Yes, I could try to read a number of objects at once and insert them all at once, but that's a performance tweak - I need a way that is guaranteed not to cause a memory overload, not matter how many objects are contained in the file.
I can choose to use FormatA or FormatB or maybe something else, just please specify in your answer. Thanks!
To process a file line-by-line, you simply need to decouple the reading of the file and the code that acts upon that input. You can accomplish this by buffering your input until you hit a newline. Assuming we have one JSON object per line (basically, format B):
var stream = fs.createReadStream(filePath, {flags: 'r', encoding: 'utf-8'});
var buf = '';
stream.on('data', function(d) {
buf += d.toString(); // when data is read, stash it in a string buffer
pump(); // then process the buffer
});
function pump() {
var pos;
while ((pos = buf.indexOf('\n')) >= 0) { // keep going while there's a newline somewhere in the buffer
if (pos == 0) { // if there's more than one newline in a row, the buffer will now start with a newline
buf = buf.slice(1); // discard it
continue; // so that the next iteration will start with data
}
processLine(buf.slice(0,pos)); // hand off the line
buf = buf.slice(pos+1); // and slice the processed data off the buffer
}
}
function processLine(line) { // here's where we do something with a line
if (line[line.length-1] == '\r') line=line.substr(0,line.length-1); // discard CR (0x0D)
if (line.length > 0) { // ignore empty lines
var obj = JSON.parse(line); // parse the JSON
console.log(obj); // do something with the data here!
}
}
Each time the file stream receives data from the file system, it's stashed in a buffer, and then pump is called.
If there's no newline in the buffer, pump simply returns without doing anything. More data (and potentially a newline) will be added to the buffer the next time the stream gets data, and then we'll have a complete object.
If there is a newline, pump slices off the buffer from the beginning to the newline and hands it off to process. It then checks again if there's another newline in the buffer (the while loop). In this way, we can process all of the lines that were read in the current chunk.
Finally, process is called once per input line. If present, it strips off the carriage return character (to avoid issues with line endings – LF vs CRLF), and then calls JSON.parse one the line. At this point, you can do whatever you need to with your object.
Note that JSON.parse is strict about what it accepts as input; you must quote your identifiers and string values with double quotes. In other words, {name:'thing1'} will throw an error; you must use {"name":"thing1"}.
Because no more than a chunk of data will ever be in memory at a time, this will be extremely memory efficient. It will also be extremely fast. A quick test showed I processed 10,000 rows in under 15ms.
Just as I was thinking that it would be fun to write a streaming JSON parser, I also thought that maybe I should do a quick search to see if there's one already available.
Turns out there is.
JSONStream "streaming JSON.parse and stringify"
Since I just found it, I've obviously not used it, so I can't comment on its quality, but I'll be interested to hear if it works.
It does work consider the following Javascript and _.isString:
stream.pipe(JSONStream.parse('*'))
.on('data', (d) => {
console.log(typeof d);
console.log("isString: " + _.isString(d))
});
This will log objects as they come in if the stream is an array of objects. Therefore the only thing being buffered is one object at a time.
As of October 2014, you can just do something like the following (using JSONStream) - https://www.npmjs.org/package/JSONStream
var fs = require('fs'),
JSONStream = require('JSONStream'),
var getStream() = function () {
var jsonData = 'myData.json',
stream = fs.createReadStream(jsonData, { encoding: 'utf8' }),
parser = JSONStream.parse('*');
return stream.pipe(parser);
}
getStream().pipe(MyTransformToDoWhateverProcessingAsNeeded).on('error', function (err) {
// handle any errors
});
To demonstrate with a working example:
npm install JSONStream event-stream
data.json:
{
"greeting": "hello world"
}
hello.js:
var fs = require('fs'),
JSONStream = require('JSONStream'),
es = require('event-stream');
var getStream = function () {
var jsonData = 'data.json',
stream = fs.createReadStream(jsonData, { encoding: 'utf8' }),
parser = JSONStream.parse('*');
return stream.pipe(parser);
};
getStream()
.pipe(es.mapSync(function (data) {
console.log(data);
}));
$ node hello.js
// hello world
I had similar requirement, i need to read a large json file in node js and process data in chunks and call a api and save in mongodb.
inputFile.json is like:
{
"customers":[
{ /*customer data*/},
{ /*customer data*/},
{ /*customer data*/}....
]
}
Now i used JsonStream and EventStream to achieve this synchronously.
var JSONStream = require("JSONStream");
var es = require("event-stream");
fileStream = fs.createReadStream(filePath, { encoding: "utf8" });
fileStream.pipe(JSONStream.parse("customers.*")).pipe(
es.through(function(data) {
console.log("printing one customer object read from file ::");
console.log(data);
this.pause();
processOneCustomer(data, this);
return data;
}),
function end() {
console.log("stream reading ended");
this.emit("end");
}
);
function processOneCustomer(data, es) {
DataModel.save(function(err, dataModel) {
es.resume();
});
}
I realize that you want to avoid reading the whole JSON file into memory if possible, however if you have the memory available it may not be a bad idea performance-wise. Using node.js's require() on a json file loads the data into memory really fast.
I ran two tests to see what the performance looked like on printing out an attribute from each feature from a 81MB geojson file.
In the 1st test, I read the entire geojson file into memory using var data = require('./geo.json'). That took 3330 milliseconds and then printing out an attribute from each feature took 804 milliseconds for a grand total of 4134 milliseconds. However, it appeared that node.js was using 411MB of memory.
In the second test, I used #arcseldon's answer with JSONStream + event-stream. I modified the JSONPath query to select only what I needed. This time the memory never went higher than 82MB, however, the whole thing now took 70 seconds to complete!
I wrote a module that can do this, called BFJ. Specifically, the method bfj.match can be used to break up a large stream into discrete chunks of JSON:
const bfj = require('bfj');
const fs = require('fs');
const stream = fs.createReadStream(filePath);
bfj.match(stream, (key, value, depth) => depth === 0, { ndjson: true })
.on('data', object => {
// do whatever you need to do with object
})
.on('dataError', error => {
// a syntax error was found in the JSON
})
.on('error', error => {
// some kind of operational error occurred
})
.on('end', error => {
// finished processing the stream
});
Here, bfj.match returns a readable, object-mode stream that will receive the parsed data items, and is passed 3 arguments:
A readable stream containing the input JSON.
A predicate that indicates which items from the parsed JSON will be pushed to the result stream.
An options object indicating that the input is newline-delimited JSON (this is to process format B from the question, it's not required for format A).
Upon being called, bfj.match will parse JSON from the input stream depth-first, calling the predicate with each value to determine whether or not to push that item to the result stream. The predicate is passed three arguments:
The property key or array index (this will be undefined for top-level items).
The value itself.
The depth of the item in the JSON structure (zero for top-level items).
Of course a more complex predicate can also be used as necessary according to requirements. You can also pass a string or a regular expression instead of a predicate function, if you want to perform simple matches against property keys.
If you have control over the input file, and it's an array of objects, you can solve this more easily. Arrange to output the file with each record on one line, like this:
[
{"key": value},
{"key": value},
...
This is still valid JSON.
Then, use the node.js readline module to process them one line at a time.
var fs = require("fs");
var lineReader = require('readline').createInterface({
input: fs.createReadStream("input.txt")
});
lineReader.on('line', function (line) {
line = line.trim();
if (line.charAt(line.length-1) === ',') {
line = line.substr(0, line.length-1);
}
if (line.charAt(0) === '{') {
processRecord(JSON.parse(line));
}
});
function processRecord(record) {
// Process the records one at a time here!
}
I solved this problem using the split npm module. Pipe your stream into split, and it will "Break up a stream and reassemble it so that each line is a chunk".
Sample code:
var fs = require('fs')
, split = require('split')
;
var stream = fs.createReadStream(filePath, {flags: 'r', encoding: 'utf-8'});
var lineStream = stream.pipe(split());
linestream.on('data', function(chunk) {
var json = JSON.parse(chunk);
// ...
});
Using the #josh3736 answer, but for ES2021 and Node.js 16+ with async/await + AirBnb rules:
import fs from 'node:fs';
const file = 'file.json';
/**
* #callback itemProcessorCb
* #param {object} item The current item
*/
/**
* Process each data chunk in a stream.
*
* #param {import('fs').ReadStream} readable The readable stream
* #param {itemProcessorCb} itemProcessor A function to process each item
*/
async function processChunk(readable, itemProcessor) {
let data = '';
let total = 0;
// eslint-disable-next-line no-restricted-syntax
for await (const chunk of readable) {
// join with last result, remove CR and get lines
const lines = (data + chunk).replace('\r', '').split('\n');
// clear last result
data = '';
// process lines
let line = lines.shift();
const items = [];
while (line) {
// check if isn't a empty line or an array definition
if (line !== '' && !/[\[\]]+/.test(line)) {
try {
// remove the last comma and parse json
const json = JSON.parse(line.replace(/\s?(,)+\s?$/, ''));
items.push(json);
} catch (error) {
// last line gets only a partial line from chunk
// so we add this to join at next loop
data += line;
}
}
// continue
line = lines.shift();
}
total += items.length;
// Process items in parallel
await Promise.all(items.map(itemProcessor));
}
console.log(`${total} items processed.`);
}
// Process each item
async function processItem(item) {
console.log(item);
}
// Init
try {
const readable = fs.createReadStream(file, {
flags: 'r',
encoding: 'utf-8',
});
processChunk(readable, processItem);
} catch (error) {
console.error(error.message);
}
For a JSON like:
[
{ "name": "A", "active": true },
{ "name": "B", "active": false },
...
]
https.get(url1 , function(response) {
var data = "";
response.on('data', function(chunk) {
data += chunk.toString();
})
.on('end', function() {
console.log(data)
});
});
I think you need to use a database. MongoDB is a good choice in this case because it is JSON compatible.
UPDATE:
You can use mongoimport tool to import JSON data into MongoDB.
mongoimport --collection collection --file collection.json

node.js - pngjs error: "Stream not writable" randomly

I am working with pngjs through many of it's methods. Most of the time, they work fine. However, like in the following example, I get an error: "Stream is not writable"
var fs = require('fs'),
PNG = require('pngjs').PNG;
var dst = new PNG({width: 100, height: 50});
fs.createReadStream('http://1.1m.yt/hry7Eby.png') //download this picture in order to examine the code.
.pipe(new PNG())
.on('parsed', function(data) {
console.log(data);
});
This case is not singular, I get this error on 1 random png image once a day, through all of pngjs methods, and that error obviously crashes my app.
(note: you can't use the http link I gave you with a readStream, you will have to download & rename it and do something like):
fs.createReadStream('1.png')
Thank you for your time and effort.
This seems to be a bug in the library, though I'm wary of saying so as I'm no expert in PNGs. The parser seems to complete while the stream is still writing. It encounters the IEND, and so calls this:
ParserAsync.prototype._finished = function() {
if (this.errord) {
return;
}
if (!this._inflate) {
this.emit('error', 'No Inflate block');
}
else {
// no more data to inflate
this._inflate.end();
}
this.destroySoon();
};
If you comment out the this.destroySoon(); it finishes the image correctly, instead of eventually calling this function:
ChunkStream.prototype.end = function(data, encoding) {
if (data) {
this.write(data, encoding);
}
this.writable = false;
// already destroyed
if (!this._buffers) {
return;
}
// enqueue or handle end
if (this._buffers.length === 0) {
this._end();
}
else {
this._buffers.push(null);
this._process();
}
};
...which would otherwise end up setting the stream.writeable to false, or, if you comment that out, to pushing a null value into the _buffers array and screwing up the ChunkStream._processRead.
I'm fairly certain this is a synchronicity problem between the time the zlib parser takes to complete and the time the stream takes to complete, since if you do this synchronously it works fine:
var data = fs.readFileSync('pic.png');
var png = PNG.sync.read(data);
var buff = PNG.sync.write(png);
fs.writeFileSync('out2.png', buff);

node.js directory search for file with name

i need help writing a node.js application that searches for all sub directories under the current directory which their names contain the specified string.
for example the user want to search all directories that have the string 'test' in it.
what is the js code i need to use?
i try using this:
var walk = function(dir) {
var results = []
var list = fs.readdirSync(dir)
list.forEach(function(file) {
file = dir + '/' + file
var stat = fs.statSync(file)
if (stat && stat.isDirectory()) results = results.concat(walk(file))
else results.push(file)
})
return results
}
Take a look at node-glob
In your case you could use it like this. This pattern will give you all files in the folder that contain at least once test in the name.
var glob = require("glob")
glob("+(test).js", options, function (er, files) {
// files is an array of filenames.
// If the `nonull` option is set, and nothing
// was found, then files is ["**/*.js"]
// er is an error object or null.
if (er) {
// omg something went wrong
throw new Exception(er);
}
var requiredFiles = files.map(function(filename) {
return require(filename);
});
// do something with the required files
});

Access to files from extension return sometimes NS_ERROR_FILE_IS_LOCKED

Our extension (Addon SDK) looking for new files in folder C:\scan and send it to server. Every second extension look for latest file creation time and defined it as latest.(compare new file creation time and file creation time 1 sec ago.)
Files put to C:\scan from scanner Brother 7050 on Windows 7.
But sometimes into console.error we see:
Exception
message: "Component returned failure code: 0x8052000e (NS_ERROR_FILE_IS_LOCKED)
[nsIFileInputStream.init]",
result: 2152857614,
name: "NS_ERROR_FILE_IS_LOCKED"
I think Brother 7050 application have no time to unlock file before our extension can start to read it.
Q: How we can read latest file in folder true way without read file lock error?
/*
adr- folder path
array2 - array for search
mode - search or not search in array2 (0-1)
*/
function getfilelist(adr,array2, mode)
{
filelist2=[];
filelist2[0]="";
filelist2[1]=0;
var file = new FileUtils.File(adr);
var enumerator = file.directoryEntries;
while (enumerator.hasMoreElements())
{
inner = enumerator.getNext().QueryInterface(Ci.nsIFile);
if (inner.isFile())
{
namearray=inner.leafName.split(".");
r=namearray[namearray.length-1];
if (r=="jpg" || r=="jpeg")
{
if (mode==0)
{
if (inner.lastModifiedTime>filelist2[1])
{
filelist2[0]=inner.leafName;
filelist2[1]=inner.lastModifiedTime;
}
}
else if (mode==1)
{
if (inner.lastModifiedTime>array2[1] && inner.isReadable()==true)
return inner.leafName;
}
}
}
}
if (mode==0)
{
return filelist2;
}
return false;
}
The reason why you see NS_ERROR_FILE_IS_LOCKED is most likely that the file is still being written and you are trying to access it too early. However, it is also possible that some other software immediately locks the file to check it, e.g. your anti-virus.
Either way, there is no way to ignore the lock. Even if you could, you might get an incomplete file as a result. What you should do is noting that exception and remembering that you should try to read that file on next run. Something along these lines:
var {Cr} = require("chrome");
var unaccessible = null;
setInterval(checknewfiles, 1000);
function checknewfiles()
{
var files = getfilelist(...);
if (unaccessible)
{
// Add any files that we failed to read before to the end of the list
files.push.apply(files, unaccessible);
unaccessible = null;
}
for (var file of files)
{
try
{
readfile(file);
}
except(e if e.result == Cr.NS_ERROR_FILE_IS_LOCKED)
{
if (!unaccessible)
unaccessible = [];
unaccessible.push(file);
}
}
}
For reference:
Components.results
Chrome authority
Conditional catch clauses
for..of loop

Categories

Resources