Trading RAM by CPU (performance issue) - javascript

I'm working with a program that deals with files, I can do several things like rename them, read the contents of them, etc.
Today I'm initializing it as follows:
return new Promise((resolve, reject) => {
glob("path/for/files/**/*", {
nodir: true
}, (error, files) => {
files = files.map((file) => {
// properties like full name, basename, extension, etc.
});
resolve(files);
});
});
So, i read the content of a specific directory, return all files within an array, and then use the Array.map to iterate over the array and change the paths for a object with properties.
Sometimes i work with 200.000 text files, so, this is becoming a problem because it is consuming too much RAM.
So, i want replace by a construction function with lazy loading.. but i never did that before... so i'm looking for a help hand.
That's my code:
class File {
constructor(path) {
this.path = path;
}
extension() {
return path.extname(this.path);
}
// etc
}
So, my main question is: should i only return the evaluation of the property, or should i replace it? Like this:
extension() {
this.extension = path.extname(this.path);
}
I understand this is a trade off.. i'm going to trade the memory by cpu usage.
Thank you.

If you want to reduce RAM usage, I suggest you store an extra meta-data file for each path, as follows:
Keep the paths in memory, or some of them, as necessary.
Save files properties to hard drive
files.forEach( (file) => {
// collect the properties you want for the file
// ...
var json = { path: file, extension: extension, .. }
// mark the metadata file so you can access it later, for example: put it in the same path with a suffix
var metaFile = path + '_meta.json';
fs.writeFile(metaFile, JSON.stringify(json), (err) => {
if (err) throw err;
});
});
Now all the meta data is on hard drive. This way, I believe, you trade memory for disk space and CPU calls.
If you wish to get properties for a file, just read and JSON.parse its corresponding meta data file.

There's no reason to trade CPU for space. Just walk the tree and process files as they're found. The space needed for walking the tree is proportional to the tree depth if it's done depth first. This is almost certainly has same overhead as just creating the list of paths in your existing code.
For directory walking, the node.js FAQ recommends node-findit. The documentation there is pretty clear. Your code will look something like:
var finder = require('findit')(root_directory);
var path = require('path');
var basenames = [];
finder.on('file', function (file, stat) {
basenames.push(path.basename(file));
// etc
}
Or you can wrap the captured values in an object if you like.

If you store only path property NodeJS class instance take for your example 200k * (path.length * 2 + 6) bytes memory.
If you want to use lazy loading for basenames, extenstions etc use lazy getters
class File {
constructor(path) {
this.path = path;
this._basename = null;
this._extname = null;
}
get extname() {
return this._extname || (this._extname = path.extname(this.path));
}
get basename() {
return this._basename || (this._basename = path.basename(this.path));
}
}

Related

How would you be able to use fast-json package in the browser?

I have been browsing ways on how I could improve my apps performance and came across these 2 packages. I am building a forum styled app that receives a bunch of information from APIs and regularly parses and stringifies it. I have already optimized my front end JS as much as I could(it's just vanilla JS at this point) but it sometimes still struggles to load on older phones. I would love to try these packages as they could potentially relieve my issue but they are both in NPM form.
I would like to use them in my browser UI scripts.Is there any way to use NPM packages directly in browser/ can you get a single script version of them?
Specifically:
https://www.npmjs.com/package/fast-json
https://www.npmjs.com/package/fast-json-stringify
Very simple!
Copy the code from fast-json/blob/master/lib/EventTree.js in your source code, but without module.exports block (is to bottom of the code)
Copy the code from fast-json/blob/master/lib/FastJson.js in your source code, but without const { EventTree } = require('./EventTree'); (at top) and without module.exports = FastJson; (bottom)
TEST TIME ^_^
const fastJson = new FastJson();
// Path is a string representing a javascript object path
fastJson.on('ireland.people', (value) => {
console.log('ireland.people ->', value);
});
// Paths can be also an array of keys
fastJson.on(['spain', 'people', '1', 'name'], (value) => {
console.log(['spain', 'people', '1','name'], '->', value);
});
// Wildcards can be used to match all items in object or array
fastJson.on('spain.people[*].name', (value) => {
console.log('spain.people[*].name ->', value);
});
fastJson.on('*.people[*].name', (value) => {
console.log('*.people[*].name ->', value);
});
fastJson.write(data);

Webpack plugin API - update content of an asset and rebuild hash

I am writing a plugin that needs to swap contents of a certain JSON file after all of the modules were bundled. I implemented it in 2 steps: loader replaces content with a placeholder, and the plugin replaces the placeholder.
the loader looks like this:
const loader = function(source) {
this.clearDependencies();
return JSON.stringify('REGENERATED_JSON');
};
the plugin looks roughly like this:
compilation.hooks.optimizeChunkAssets.tapAsync(PLUGIN_NAME, (chunks, callback) => {
chunks.forEach((chunk) => {
chunk.files.forEach((filePath) => {
const asset = compilation.assets[filePath];
const source = asset.source();
replacements.forEach((id) => {
const pattern = 'JSON.parse("\\"REGENERATED_JSON\\"")';
const index = source.indexOf(pattern);
if (index < 0) return;
const content = JSON.stringify(json_content, null, 2);
const updatedSource = new ReplaceSource(asset);
updatedSource.replace(index, index + pattern.length, content);
compilation.assets[filePath] = updatedSource;
});
});
});
callback();
});
This code has several issues:
Fragile because it's tied to JSON.parse call. I wasn't able to trick webpack into treating file as javascript after it was imported as JSON.
The content hash isn't being rebuilt, neither is the file size assessment, the JSON might be very large but Webpack wouldn't know.
Is there a way to solve these problems within webpack?
This comment helped me solve the issue: https://github.com/webpack/webpack/issues/8830#issuecomment-580095801
In short:
use compilation.hooks.finishModules.tap to get to the modules after every module was parsed
inject a new loader there and then, or add information to the module(s) in question (right to the module object)
trigger a rebuild of those module(s) with compilation.rebuildModule(module, callback), use promisify from built in utils package to convert to a promise to handle multiple parallel rebuilds

How to Properly merge 2 JS Objects on a single property

I have 3 JSON files. Lets call them file1.json , file2.json and file3.json.
They all look very similar and are in this structure:
"orgItems":[...]
There is a top level orgItems property. Now what I'm struggling with is that these are 3 large files. Almost 20-30mb each. I want to concatenate all of these into 1 file.
Whats the best way to do this ? To be able to grab the orgItems object out of each of the 3 files and then have it all in one object in one file. So I just want one file combinedResponses.json and that should have
combinedResponses.json
"orgItems":[...file1.orgItems,...file2.orgItems,...file3.orgItems]
Take a look at JSONStream. It provides an easy way to stream json files (so you don't have to load it all into memory at once) and receive events for matching paths. You could pipe the input stream to an output stream that writes the concatenated file.
This is a bit outside my wheelhouse and I'm sure it could be improved, but I think something like this does what you want. You could tweak the JSONStream.parse argument to get only the objects you care about.
const JSONStream = require('JSONStream');
const fs = require('fs');
function appendData (sourceFile, outputStream) {
const file = fs.createReadStream(sourceFile);
const jsonStream = JSONStream.parse('*');
jsonStream.on('data', data => outputStream.write(JSON.stringify(data)));
file.pipe(jsonStream);
file.on('end', () => {
file.close();
});
}
const out = fs.createWriteStream('data/concatted.json', {autoClose: true});
appendData('data/data-1.json', out);
appendData('data/data-2.json', out);
appendData('data/data-3.json', out);

Writing to file multiple times vs Saving it in memory and writing once

I hope it's okay to ask questions about optimization here, if not let me know where I can and i'll delete this post.
Also if it matters, I'm using NodeJS.
So I'm coming up to a point in time where I want to optimize my application. I'm still in the process of creating it, but I want to think long term from the beginning. With that being said, I have two optimization questions
1) In one of my functions I have an array, I loop through this array and I create some text and write/append it to the file through each loop. Meaning if there are 5 items in the array, I'd write to the file 5 times. However on the other hand I could create a variable, through each loop append the text to the variable, and at the very end write to the file. The text in question/length of the array could be few to potentially unlimited (but learning much more on the side of the few)
2) What's faster, deleting a file or writing to the file an empty string (when I write to the file I use appendFile so it appends to it if the file exists or creates the file if it doesn't, so either deleting the file or writing to the file works)
Thanks!
On the topic of optimization, I'm using a custom scroll wheel for one of my elements and for a brief few seconds when I refresh the page the default scroll wheel appears and then the custom one loads, the custom one is smaller in size so it flickers the elements position when it switches. Is there a way to like not load the element till the scroll wheel css file is loaded or something along those lines?
Generally, there are 3 main ways to do that with pro/cons:
append to the file: open and close always the file descriptor is slow, but you are sure of what you have in file system
stream to file: open the file and write to it, close when necessary
buffer and write once: beware on program exit (SIGNT), all the buffer could be lost if you don't manage it correctly
So it depends on your scope: speed? robustness? high rate changes on a file?
After your consideration, to find which one is fastest you can write a benchmark like this:
const Benchmark = require('benchmark');
const suite = new Benchmark.Suite;
const { Readable } = require('stream')
const util = require('util');
const fs = require('fs');
const append = util.promisify(fs.appendFile);
const text = new Array(500).fill('a'.repeat(5000))
// add tests
const banch = suite
.add('Append sequentially', {
defer: true,
fn: function (deferred) {
Promise.all(text.map(t => append('append.txt', `${t}\n`)))
.then(() => deferred.resolve())
}
})
.add('Write once', {
defer: true,
fn: function (deferred) {
const out = text.reduce((a, b) => `${a}\n${b}`, '')
fs.writeFile('write.txt', out, () => deferred.resolve())
}
})
.add('Stream', {
defer: true,
fn: function (deferred) {
const readable = new Readable()
const writerStream = fs.createWriteStream('stream.txt')
.on('finish', () => deferred.resolve())
readable.pipe(writerStream);
text.forEach(s => readable.push(`${s}\n`))
readable.push(null)
}
})
.on('cycle', function (event) {
console.log(String(event.target));
})
.on('complete', function () {
console.log('Fastest is ' + this.filter('fastest').map('name'));
})
banch.run({ async: true })
That will output (on my pc):
Append sequentially x 8.69 ops/sec ±21.07% (45 runs sampled)
Write once x 52.22 ops/sec ±5.27% (63 runs sampled)
Stream x 37.76 ops/sec ±2.72% (63 runs sampled)
Fastest is Write once
with node 8

Issue with output list for learnyounode #6 MAKE IT MODULAR

Just started coding last thursday, bear with me here:
my code for this question of the tutorial is returning a list of just the extension names from the directory and not a list of the files with the said extension, e.g. if i used a directory with 3 .js files and used js as my extension argument in the command line, then i would get
1. js
2. js
3. js
as the output, here is the question from the tutorial and my code. THANK YOU!
the question from learnyounode tutorial number 6:
LEARN YOU THE NODE.JS FOR MUCH WIN!
─────────────────────────────────────
MAKE IT MODULAR
Exercise 6 of 13
This problem is the same as the previous but introduces the concept of modules. You will need to create two files to solve this.
Create a program that prints a list of files in a given directory, filtered by the extension of the files. The first argument is the directory name and the second argument is the extension filter. Pr
int the list of files (one file per line) to the console. You must use asynchronous I/O.
You must write a module file to do most of the work. The module must export a single function that takes three arguments: the directory name, the filename extension string and a callback function, in
that order. The filename extension argument must be the same as was passed to your program. i.e. don't turn it into a RegExp or prefix with "." or do anything else but pass it to your module where y
ou can do what you need to make your filter work.
The callback function must be called using the idiomatic node(err, data) convention. This convention stipulates that unless there's an error, the first argument passed to the callback will be null, a
nd the second will be your data. In this case, the data will be your filtered list of files, as an Array. If you receive an error, e.g. from your call to fs.readdir(), the callback must be called wi
th the error, and only the error, as the first argument.
You must not print directly to the console from your module file, only from your original program.
In the case of an error bubbling up to your original program file, simply check for it and print an informative message to the console.
These four things are the contract that your module must follow.
Export a single function that takes exactly the arguments described.
Call the callback exactly once with an error or some data as described.
Don't change anything else, like global variables or stdout.
Handle all the errors that may occur and pass them to the callback.
The benefit of having a contract is that your module can be used by anyone who expects this contract. So your module could be used by anyone else who does learnyounode, or the verifier, and just work. *
and my code is:
module (p6m.js):
var fs=require('fs'), ph=require('path'), exports =module.exports={}
exports.f=function(path,ext,callbk){
fs.readdir(path,function(err,files){
if(err){
return callbk(err,null)
}
files=files.filter(
function(file){
return ph.extname(file)==="."+ext
}
)
return callbk(null,files)}
)}
and my program (p6.js):
var p6m=require('./p6m'), path=process.argv[2], ext=process.argv[3]
p6m.f(path, ext, function(err,files){
if(err){return console.log.error('Error occured:', err)};
files.forEach(function(file){
console.log(file)})})
I got the same problem with my code as of need to use a single function export . So instead of exporting a module function like this :
exports =module.exports={}
exports.f=function(path,ext,callbk){...};
try it doing this way :
module.exports = function (path, ext, callbk) {...};
because its a single function so you don't need to specify that function with a name " f " as if you are doing it in this statement :
exports.f = function(path,ext,callbk){...};
whenever you will import the module,it will automatically call this function only, since the module contains this single function.
You can try this piece of code, it works well for me.
module code: mymodule.js
var fs = require('fs');
var ph= require('path');
module.exports = function (path, ext, callbk) {
var pathio = "." + ext;
fs.readdir(path, function (err, files) {
if (err)
return callbk(err);
else {
var listf = []; //listf is the resultant list
for (var i = 0; i < files.length; i++) {
if (ph.extname(files[i]) === pathio) {
listf.push(files[i]);
}
}
callbk(null, listf);
}
});
}
program code : moduletest.js
var mod = require('./mymodule');
mod(process.argv[2], process.argv[3], function (err, listf) {
if (err) {
console.log('Error!')
} else {
for (var i = 0; i < listf.length; i++) {
console.log(listf[i]);
}
}
});
and do remember, learnyounode series is very specific about its way of coding and syntax, so even if you are doing the logic right way still you won't get pass,you need your code to be the best and optimized. I'll suggest you to refer to discussions on nodeschool itself for various issues you might get in learnyounode series.
That will work and output the right results, but what they are looking for is something like this:
module.exports = function() {};
Because they only want one function total in the exports.
You could also do something like this:
module.exports = FindFilesByExtension;
function FindFilesByExtension(path, ext, callback) {
//your code
}
Here is my solution,
Thsi is my module file filteredls.js
var fs = require('fs');
var path = require('path');
module.exports = function filterFiles(folder, extension, callback) {
fs.readdir(folder, function(err, files) {
if(err) return callback(err);
var filesArray = [];
files.forEach(function(file) {
if(path.extname(file) === "."+extension) {
filesArray.push(file);
}
});
return callback(null, filesArray);
});
}
And here is my test file for reading module modular.js
var ff = require('./filteredls.js');
ff(process.argv[2], process.argv[3], function(err, data) {
if(err)
return console.error(err);
data.forEach(function(file) {
console.log(file);
});
});
And this is my result screenshot,

Categories

Resources