Append JSON to a file using node streams - javascript

Based on what I have read here, I'm using fs.createWriteStream to write some JSON to a file. I am processing the data in chunks of about 50. So at the beginning of the script, I open my streamand then use a function to pass it in, along with some JSON, which is working pretty well for writing.
const myStream = fs.createWriteStream(
path.join(RESULTS_DIR, `my-file.json`),
{
flags: 'a'
}
)
function appendJsonToFile(stream, jsonToAppend) {
return new Promise((resolve, reject) => {
try {
stream.write(JSON.stringify(jsonToAppend, null, 2)
resolve('STREAM_WRITE_SUCCESS')
} catch (streamError) {
reject('STREAM_WRITE_FAILURE', streamError)
}
})
}
appendJsonToFile(myStream, someJson)
However, because each piece of data to be written is an array of objects, the structure I eventually get in my file will look like this:
[
{
"data": "test data 1",
},
{
"data": "test data 2",
}
][
{
"data": "test data 3",
},
{
"data": "test data 4",
}
]
How can I append these pieces of data so that the result is properly formatted JSON, rather than just a series of arrays?

If the file is always formatted as you say above the three things you'd need to do are:
Find out the current length of the file and substract 2 chars (\n], since there's no newline at end),
Remove the first char of the JSON you're saving,
Save the file using r+ mode and with start
End the stream after each save.
Here's the link to createWriteStream options.
Now the other thing is point 4 makes this rather inefficient and questions the whole idea if streaming should be used here. I think it does make sense, but the question here is if you need the file to be readable between writes - if not then you should use a transform stream in the middle and add flush in between the files and after all work is done (beforeExit) you simply end the stream.
You can do this by definition, but I'm the author of a framework called scramjet which makes these cases much easier:
const myStream = new scramjet.DataStream();
const file = path.join(RESULTS_DIR, `my-file.json`)
const start = fs.statSync(file).size - 2;
myStream
.flatten()
.toJSONArray()
.shift(1)
.pipe(fs.createWriteStream(
file,
{flags: 'r+', start}
));
function appendJsonToFile(stream, jsonToAppend) {
return new Promise((resolve, reject) => {
try {
stream.write(jsonToAppend)
resolve('STREAM_WRITE_SUCCESS')
} catch (streamError) {
reject('STREAM_WRITE_FAILURE', streamError)
}
})
}
appendJsonToFile(myStream, someJson)
process.on('beforeExit', myStream.end());
You can use this as above, but if you'd prefer to work on this with plain node streams this should nudge you in the right direction.

I'll resolved the code, with Error and FILE NOT FOUND handler. Solution derived from Michał Karpacki.
const path = require('path');
const fs = require('fs');
const getFolderPath = () => __dirname || process.cwd();
const getFilePath = (fileName) => path.join(getFolderPath(), `${fileName}`);
/**
* #param {string} fileName - Included File Name & its Extension
* #param {Array<*>} arrayData
* #return {Promise<*>}
*/
const writeFileAsync = async (fileName, arrayData) => {
const filePath = getFilePath(fileName);
return new Promise((resolve, reject) => {
try {
const _WritableStream = fs.createWriteStream(filePath, {flags: 'r+', start: fs.statSync(filePath).size - 2});
_WritableStream.write(JSON.stringify(arrayData, null, 2).replace(/\[/, ','), (streamError) => {
return reject(['STREAM_WRITE_FAILURE', streamError]);
});
return resolve('STREAM_WRITE_SUCCESS');
} catch (streamError) {
/** ERROR NOT FOUND SUCH FILE OR DIRECTORY !*/
if (streamError.code === 'ENOENT') {
fs.mkdirSync(getFolderPath(), {recursive: true});
return resolve(fs.writeFileSync(filePath, JSON.stringify(
Array.from({...arrayData, length: arrayData.length}), null, 2
)));
}
/** ERROR OUT OF BOUND TO FILE SIZE RANGE - INVALID START POSITION FOR WRITE STREAM !*/
if (streamError instanceof RangeError) {
console.error(`> [ERR_OUT_OF_RANGE] =>`, streamError);
const _WritableStream = fs.createWriteStream(filePath, {flags: 'r+'});
return resolve(_WritableStream.write(JSON.stringify(arrayData, null, 2), (streamError) => {
return reject(['STREAM_WRITE_FAILURE', streamError]);
}));
}
return reject(['STREAM_WRITE_FAILURE', streamError]);
}
});
};
(() => writeFileAsync('test1.json',
[{
key: "value 1"
}, {
key: "value 2"
}]
))();
/* Output after 1st time run =>
[
{
"key": "value 1"
},
{
"key": "value 2"
}
]
*/
/* Output after 2nd time run =>
[
{
"key": "value 1"
},
{
"key": "value 2"
},
{
"key": "value 1"
},
{
"key": "value 2"
}
]
*/

Related

Iterate deeply nested object with unknown level and add remove key/value based on user provided conditions

Could anyone please guide me how to achieve the below challenge which I am facing?
I have thousands of mock API request response JSON files. They are deeply nested, and they all are structured differently. I need to add/update/delete entry at the specfic location where the condition match which will be provided by user. I am not sure how to approach this problem? I have tried doing something like below. I am asking user for path for where to start looking. But this will increase time as user has to look for path in all file and pass that info to api. below code work upto 2 level only. need to search full tree where all user provides conditions matches, and at that place, I need to add/update/delete data. I took condition as an array of objects.
Draft Code
const _ = require("lodash");
const file = "./sample.json";
const actions = ["add", "delete", "update"];
const consumer = (file, key, where, data, action) => {
try {
const act = action.toLowerCase();
if(!actions.includes(act) throw new Error("invalid action provided");
if(_.isArray(where) && _.every(where, _.isObject())) throw new Error("no where clause condition provided");
let content = require(file);
let typeKeyContent = null;
let keyContent = _.get(content, key);
if(!keyContent) throw new Error("invalid key");
if(_.isArray(keyContent)) {
typeKeyContent = "array"
} else if (_.isObject(keyContent)) {
typeKeyContent = "object"
}
switch (act) {
case "add":
if (typeKeyContent === "array") {
// array logic
for (let i = 0; i < keyContent.length; i++) {
const result = where.every(element => {
for (let key in element) {
return keyContent[key] && element[key] === keyContent[key];
}
});
if (!result) {
console.log("attributes matching -> ", result);
return;
}
keyContent[i] = {...keyContent[i], ...data }
}
let newcontent = _.set(content, key, keyContent);
console.log("newcontent -> \n",JSON.stringify(newcontent, null, 2));
return;
}
const result = where.every(element => {
for (let key in element) {
return keyContent[key] && element[key] === keyContent[key];
}
});
if (!result) {
console.log("attributes matching -> ", result);
return;
}
keyContent = { ...keyContent, ...data };
let newcontent = _.set(content, key, keyContent);
console.log("newcontent -> \n",JSON.stringify(newcontent, null, 2));
// TODO :: store back in json file
break;
default:
console.log("reached default case");
return;
}
} catch(err) {
console.log("ERROR :: CONSUMER ::", error);
}
}
// AND based condition only
const conditions = [
{ name: "Essential Large" },
{ selected: true }
];
const newdata = { description: "our best service" } // wants to add new prop
consumer(file, "selected_items.essential", conditions, newdata, "add");
sample json
{
"status": 200,
"request": {},
"response": {
"ffs": false,
"customer": {
"customer_id": 1544248,
"z_cx_id": 123456
},
"selected_items": {
"essential": [
{
"id": 4122652,
"name": "Essential Large",
"selected": true,
"description": "our best service" // will be added
},
{
"id": 4122653,
"name": "Essential Large",
"selected": true,
"description": "our best service" // will be added
}
]
},
"service_partner": {
"id": 3486,
"name": "Some String",
"street": "1234 King St."
},
"subject": "Project",
"description": "Issue: (copy/paste service request details here Required"
}
}
So you want to go through every key of a nested object right?
function forEvery(object,fn){
//obj is the object, fn is the function
//this function should go through each item in an object loaded from JSON string
//fn takes in 3 arguments: current element, that element's parent, level of depth(starts at 1)
var arr=[]
function recurse(obj,map,depth){
Object.keys(obj).forEach((a,i)=>{
fn(obj[a],obj,a,depth) //because fn can affect the object so the if statement should after not before ;-;
if(typeof obj[a]=="object"&&obj[a]!=null){ //if nested value is another object
map.push(a); arr.push(map)
recurse(obj[a],[...map],depth+1)
}
})
}
recurse(object,[],1)
}
//usage would be like:
//let customerCondition=/*some logic here*/
//let testObj=JSON.parse( (require('fs')).readFileSync('dirToSomeFile.json') )
forEvery(testObj,customerCondition)
Here's a live example
let testObj={"status":200,"request":{},"response":{"ffs":false,"customer":{"customer_id":1544248,"z_cx_id":123456},"selected_items":{"essential":[{"id":4122652,"name":"Essential Large","selected":true},{"id":4122653,"name":"Essential Medium","selected":false}]},"service_partner":{"id":3486,"name":"Some String","street":"1234 King St."},"subject":"Project","description":"Issue: (copy/paste service request details here Required"}}
function forEvery(object,fn){
//obj is the object, fn is the function
//this function should go through each item in an object loaded from JSON string
//fn takes in 3 arguments: current element, that element's parent, level of depth(starts at 1)
var arr=[]
function recurse(obj,map,depth){
Object.keys(obj).forEach((a,i)=>{
fn(obj[a],obj,a,depth) //because fn can affect the object so the if statement should after not before ;-;
if(typeof obj[a]=="object"&&obj[a]!=null){ //if nested value is another object
map.push(a); arr.push(map)
recurse(obj[a],[...map],depth+1)
}
})
}
recurse(object,[],1)
}
//example usage
let userQuery=[{ name: "Essential Large" },{ selected: true }]; //the user query in the format you gave
let userCondition={} //assuming each key across userQuery is unique, I set a model object for comparisons later on
userQuery.forEach(obj=>{ //I fill the model object :D
Object.keys(obj).forEach(key=>{
userCondition[key]=obj[key]
})
})
let testFn=(elem,parent,key,depth)=>{
//I use comparisons with the model object
let condition=typeof elem!="object"?false:
Object.keys(userCondition)
.every(item=>userCondition[item]==elem[item])
//true if matches user condition(meaning elem must be an object), false otherwise
if(condition){
console.log(parent[key],"will now be deleted")
delete(parent[key]) //deletion example(if user conditions match)
}
}
forEvery(testObj,testFn)
console.log("and the changed object looks like",testObj)

How to parse through large JSONL data Node js

I am trying to read through a large JSONL, maybe couple hundreds up to thousands or possibly million line, below is sample of of the data.
{"id":"gid://shopify/Product/1921569226808"}
{"id":"gid://shopify/ProductVariant/19435458986040","__parentId":"gid://shopify/Product/1921569226808"}
{"id":"gid://shopify/Product/1921569259576"}
{"id":"gid://shopify/ProductVariant/19435459018808","__parentId":"gid://shopify/Product/1921569259576"}
{"id":"gid://shopify/Product/1921569292344"}
{"id":"gid://shopify/ProductVariant/19435459051576","__parentId":"gid://shopify/Product/1921569292344"}
{"id":"gid://shopify/Product/1921569325112"}
{"id":"gid://shopify/ProductVariant/19435459084344","__parentId":"gid://shopify/Product/1921569325112"}
{"id":"gid://shopify/Product/1921569357880"}
{"id":"gid://shopify/ProductVariant/19435459117112","__parentId":"gid://shopify/Product/1921569357880"}
{"id":"gid://shopify/ProductVariant/19435458986123","__parentId":"gid://shopify/Product/1921569226808"}
So each line is json object, either its a Product, or a Product Child identified by __parentId, given that the data may contain thousands of lines, what's the best way to read through it and return a regular JSON object, like this.
{
"id": "gid://shopify/Product/1921569226808",
"childrens": {
{"id":"gid://shopify//ProductImage//20771195224224","__parentId":"gid:////shopify//Product//1921569226808"},
{"id":"gid:////shopify//ProductImage//20771195344224","__parentId":"gid:////shopify//Product//1921569226808"}
{"id":"gid:////shopify//ProductImage//20771329344224","__parentId":"gid:////shopify//Product//1921569226808"}
}
}
The data is coming back from Shopify and they advice to:
Because nested connections are no longer nested in the response data
structure, the results contain the __parentId field, which is a
reference to an object's parent. This field doesn’t exist in the API
schema, so you can't explicitly query it. It's included automatically
in bulk operation result.
Read the JSONL file in reverse Reading the JSONL file in reverse makes
it easier to group child nodes and avoids missing any that appear
after the parent node. For example, while collecting variants, there
won't be more variants further up the file when you come to the
product that the variants belong to. After you download the JSONL
file, read it in reverse, and then parse it so that any child nodes
are tracked before the parent node is discovered.
You can look for look here to read more about all of thisenter link description here.
Consider using streams so that you don't have to load the entire file in memory.
You can use readline (a native module) to process each line individually.
I took the line processing part from #terrymorse https://stackoverflow.com/a/65484413/14793527
const readline = require('readline');
const fs = require('fs');
let res = {};
function processLine(line) {
const {id, __parentId} = line;
// if there is no `__parentId`, this is a parent
if (typeof __parentId === 'undefined') {
res[line.id] = {
id,
childrens: []
};
return res;
}
// this is a child, create its parent if necessary
if (typeof res[__parentId] === 'undefined') {
res[__parentId] = {
id: __parentId,
childrens: []
}
}
// add child to parent's children
res[__parentId].childrens.push(line);
return res;
}
const readInterface = readline.createInterface({
input: fs.createReadStream('large.jsonl'),
output: process.stdout,
console: false
});
readInterface.on('line', processLine);
readInterface.on('close', function() {
const resultArray = Object.values(res);
console.log(resultArray);
});
Here's a technique that:
forms an object with properties of the parent ids
converts that object to an array
(input lines converted to an array for simplicity)
const lines = [
{ "id": "gid://shopify/Product/1921569226808" },
{ "id": "gid://shopify/ProductVariant/19435458986040", "__parentId": "gid://shopify/Product/1921569226808" },
{ "id": "gid://shopify/Product/1921569259576" },
{ "id": "gid://shopify/ProductVariant/19435459018808", "__parentId": "gid://shopify/Product/1921569259576" },
{ "id": "gid://shopify/Product/1921569292344" },
{ "id": "gid://shopify/ProductVariant/19435459051576", "__parentId": "gid://shopify/Product/1921569292344" },
{ "id": "gid://shopify/Product/1921569325112" },
{ "id": "gid://shopify/ProductVariant/19435459084344", "__parentId": "gid://shopify/Product/1921569325112" },
{ "id": "gid://shopify/Product/1921569357880" },
{ "id": "gid://shopify/ProductVariant/19435459117112", "__parentId": "gid://shopify/Product/1921569357880" },
{ "id": "gid://shopify/ProductVariant/19435458986123", "__parentId": "gid://shopify/Product/1921569226808" }
];
// form object keyed to parent ids
const result = lines.reduce((res, line) => {
const {id, __parentId} = line;
// if there is no `__parentId`, this is a parent
if (typeof __parentId === 'undefined') {
res[id] = {
id,
childrens: []
};
return res;
}
// this is a child, create its parent if necessary
if (typeof res[__parentId] === 'undefined') {
res[__parentId] = {
id: __parentId,
childrens: []
}
}
// add child to parent's children
res[__parentId].childrens.push(line);
return res;
}, {});
// convert object to array
const resultArray = Object.values(result);
const pre = document.querySelector('pre');
pre.innerText = 'resultArray: ' + JSON.stringify(resultArray, null, 2);
<pre></pre>

How can I parse a large JSON file with repeating values in JavaScript?

I am parsing a large JSON file using JSON stream. This works but it returns the file line by line. So when I try and restructure the data I can only get the data that is not repeating.
For example, this is the structure:
{
"Test": {
"id": 3454534344334554345434,
"details": {
"text": "78679786787"
},
"content": {
"text": 567566767656776
},
"content": {
"text": 567566767656776
},
"content": {
"text": 567566767656776
}
}
}
I'm able to get Test.id or Test.details.id but I can only get the First Test.content per line.
I've tried to set in an array but this still gets only the first line of Test.content.
Is there another way to transform a large file other than using JSONStream?
Parsing code:
var getStream = function () {
let stream = fs.createReadStream(path.join(__dirname, '../test.json'), {
encoding: 'utf8'
}),
parser = JSONStream.parse('*.Test')
return stream.pipe(parser);
};
getStream()
.pipe(es.mapSync(function (data) {
let dataObj = []
dataObj.push(data)
processData(dataObj)
}))
function processData(d) {
let js = JSON.parse(JSON.stringify(d))
console.log(js)
// js.forEach(function (value, index) {
// Object.keys(value).forEach(function (v, i) {});
// })
}

JS build object recursively

I am attempting to build a file-structure index using nodeJS. I'm using the fs.readir function to iterate the files, which works fine. My problem is descending into the directory structure and returning a full object with the correct structure.
I have a simple function named identify which, when given file name "myfile.txt" will return an object {name: "myfile", type: "txt"}, which will explain that part of the function below...
My problem is that nothing is being returned when I run the indexer into the "me" variable. The console.log(results) line does return, however. This leaves me quite confused.
Any help would be greatly appreciated!
indexer =
function(directory){
Self.indexleft++;
var results = {};
Self.client.readdir(directory, function(err,fileLst){
if(err){ return; }
for(var count=0; count < fileLst.length; count++){
var ident = identify(fileLst[count]);
if(ident.type = 'dir'){
var descendant = (directory !== '') ?
directory + '\\' + ident.name : ident.name;
ident.children = indexer(descendant);
}
//directory = (directory.split('\\').pop());
results[ident.name] = ident;
}
console.log(results);
return results;
});
}
var me = indexer(''); console.log(me);
EDIT::
I've actually got something working now, though it's not quite as elegant as I'd like. Below is what I did. If anyone has a suggestion on optimizing I'd be happy to hear it!!
Newest (working) Code:
var events = require('events'),
event = new events.EventEmitter(),
setToValue = function(obj, value, path) {
path = path.split('\\');
for (i = 0; i < path.length - 1; i++)
obj = obj[path[i]];
obj[path[i]] = value;
},
identify = function(file){
var split = file.split('.'),
type = (split.length > 1) ? split.pop() : 'dir',
filename = split.join('.');
return { name: filename, type: type };
};
Indexer = function(cli,dir,callback){
this.client = cli; // File reading client
this.startDir = dir; // Starting directory
this.results = {}; // Result object
this.running = 0; // How many itterations of start() are running
this.start(dir); // Start indexing
this.monit(); // Start never returns anything, monit() checks ever 5 seconds and will fire callback if 0 itterations are running.
this.callbackDone = false; // Checks whether the callback has already been fired. Important in case of interval staggering
this.cb = callback;
}
Indexer.prototype = {
start: function(directory){
var Self = this;
Self.running++;
Self.client.readdir(directory, function(err,fileLst){
if(err){ Self.running--; return; }
for(var count=0; count < fileLst.length; count++){
var ident = identify(fileLst[count]);
var descendant = (directory !== '') ? directory + '\\' + ident.name : ident.name;
if(ident.type === 'dir'){
Self.start(descendant);
}
setToValue(Self.results, ident, descendant);
}
Self.running--;
console.log('running' + Self.running);
});
},
monit: function(){
var Self = this;
Self.intervalA = setInterval(function(){
if(Self.running < 1){
if(!Self.callbackDone){
this.callbackDone=true;
Self.cb(Self.results);
}
clearInterval(Self.intervalA);
}
}, 5000)
}
}
var ix = new Indexer(Self.client,'',function(res){
console.log("Index Complete!");
fs.writeFile(path.join(Self.localLibBase,'/index.json'), JSON.stringify(res), (err)=> {
console.log("FileWrite Complete!");
});
});
Example of returned object structure :
{
"Applications" : {
"name" : "Applications",
"type" : "dir",
"Microsoft Exchange Server 2007" : {
"name" : "Microsoft Exchange Server 2007",
"type" : "dir",
"Microsoft Exchange Server 2007 SP1" : {
"name" : "Microsoft Exchange Server 2007 SP1",
"type" : "iso"
}
}
}
}
The result is only available asynchronously, so you are trying to output the result too soon. The inner code is only executed later.
You can solve this in many ways. A very nice solution to working with asynchronous code is using promises.
As you have a recursive call, you'll have to resolve that with promises too.
NB: Note you had a bug in the comparison with "dir": you assigned instead of comparing.
Here is how your code would look:
var indexer = function(directory) {
// return a promise object
return new Promise(function (resolve, reject) {
Self.indexleft++;
var results = {};
Self.client.readdir(directory, function(err,fileLst){
if(err) {
reject(); // promise is rejected
return;
}
// "Iterate" over file list asyonchronously
(function nextFile(fileList) {
if (!fileList.length) {
resolve(results); // promise is resolved
return;
}
var file = fileLst.shift(); // shop off first file
var ident = identify(file);
results[ident.name] = ident;
if(ident.type === 'dir'){ // There was a bug here: equal sign!
var descendant = directory !== ''
? directory + '\\' + ident.name : ident.name;
// recursively call indexer: it is again a promise!
indexer(descendant).then(function (result) {
ident.children = result;
// recursively continue with next file from list
nextFile(fileList);
});
} else {
nextFile(fileLst);
}
})(fileLst); // start first iteration with full list
});
});
};
// Call as a promise. Result is passed async to callback.
indexer('').then(function(me) {
console.log(me);
});
I made some dummy functions for your external references to make this snippet work:
// Below code added to mimic the external references -- can be ignored
var filesystem = [
"",
"images",
"images\\photo.png",
"images\\backup",
"images\\backup\\old_photo.png",
"images\\backup\\removed_pic.jpg",
"images\\panorama.jpg",
"docs",
"docs\\essay.doc",
"readme.txt",
];
var Self = {
indexLeft: 0,
client: {
readdir: function (directory, callback) {
var list = filesystem.filter( path =>
path.indexOf(directory) == 0
&& path.split('\\').length == directory.split('\\').length + (directory!=='')
&& path !== directory
).map ( path => path.split('\\').pop() );
setTimeout(callback.bind(null, 0, list), 100);
}
}
}
function identify(item) {
return {
name: item,
type: item.indexOf('.') > -1 ? 'file' : 'dir'
};
}
// Above code added to mimic the external references -- can be ignored
var indexer = function(directory) {
// return a promise object
return new Promise(function (resolve, reject) {
Self.indexleft++;
var results = {};
Self.client.readdir(directory, function(err,fileLst){
if(err) {
reject(); // promise is rejected
return;
}
// "Iterate" over file list asyonchronously
(function nextFile(fileList) {
if (!fileList.length) {
resolve(results); // promise is resolved
return;
}
var file = fileLst.shift(); // shop off first file
var ident = identify(file);
results[ident.name] = ident;
if(ident.type === 'dir'){ // There was a bug here: equal sign!
var descendant = directory !== ''
? directory + '\\' + ident.name : ident.name;
// recursively call indexer: it is again a promise!
indexer(descendant).then(function (result) {
ident.children = result;
// recursively continue with next file from list
nextFile(fileList);
});
} else {
nextFile(fileLst);
}
})(fileLst); // start first iteration with full list
});
});
};
// Call as a promise. Result is passed async to callback.
indexer('').then(function(me) {
console.log(me);
});
It's not really obvious how you're expecting to that returned object from the code you have, but I can help you get the object nonetheless.
The shape of the object is bad because you're using filenames as keys on the object but that's wrong. Keys should be identifiers known to your program, and since filenames can be almost anything, using a filename as a key is terrible.
For example, consider if a file was named name in your structure
{ "Applications" : {
"name" : "Applications",
"type" : "dir",
"name" : {
"name" : "name"
... } } }
Yep, it just broke. Don't worry tho, our solution won't run into such troubles.
const co = require('co')
const {stat,readdir} = require('fs')
const {extname,join} = require('path')
// "promisified" fs functions
const readdirp = path =>
new Promise ((t,f) => readdir (path, (err, res) => err ? f (err) : t (res)))
const statp = fd =>
new Promise ((t,f) => stat (fd, (err,stats) => err ? f (err) : t (stats)))
// tree data constructors
const Dir = (path, children) =>
({type: 'd', path, children})
const File = (path, ext) =>
({type: 'f', path, ext})
// your function
const indexer = function* (path) {
const stats = yield statp (path)
if (stats.isDirectory ())
return Dir (path, yield (yield readdirp (path)) .map (p => indexer (join (path,p))))
else
return File (path, extname (path))
}
This is good design because we didn't tangle directory tree building in with whatever Self.client is. Parsing a directory and building a tree is its own thing, and if you need an Object to inherit that behaviour there are other ways to do it.
Ok let's setup a sample tree of files and then run it
$ mkdir test
$ cd test
$ mkdir foo
$ touch foo/disk.iso foo/image.jpg foo/readme.txt
$ mkdir foo/bar
$ touch foo/bar/build foo/bar/code.js foo/bar/migrate.sql
Using indexer is easy
// co returns a Promise
// once indexer is done, you will have a fully built tree
co (indexer ('./test')) .then (
tree => console.log (JSON.stringify (tree, null, ' ')),
err => console.error (err.message)
)
Output (some \n removed for brevity)
{
"type": "d",
"path": "./foo",
"children": [
{
"type": "d",
"path": "foo/bar",
"children": [
{ "type": "f", "path": "foo/bar/build", "ext": "" },
{ "type": "f", "path": "foo/bar/code.js", "ext": ".js" },
{ "type": "f", "path": "foo/bar/migrate.sql", "ext": ".sql" }
]
},
{ "type": "f", "path": "foo/disk.iso", "ext": ".iso" },
{ "type": "f", "path": "foo/image.jpg", "ext": ".jpg" },
{ "type": "f", "path": "foo/readme.txt", "ext": ".txt" }
]
}
If you try indexer on a path to a file, it will not fail
co (indexer ('./test/foo/disk.iso')) .then (
tree => console.log (JSON.stringify (tree, null, ' ')),
err => console.error (err.message)
)
Output
{ "type": "f", "path": "./foo/disk.iso", "ext": ".iso" }

How could I pass 'context' with a stream?

I have a simple app running that pipes objects through a stream, like so:
new ReadStreamThatCreatesData()
.pipe(new TransformerStream())
.pipe(new WriteStreamThatActsOnData()
But I want the WriteStreamThatActsOnData to have access to a property in from the ReadStreamThatCreatesData, without the TransformerStream having to know about it or be able to access it. The pseudocode of what I'd want is basically this:
new ReadStreamThatCreatesData()
.storeContext((obj) => obj.property)
.pipe(new TransformerStream())
.retrieveContext((obj, context) => obj.property = context)
.pipe(new WriteStreamThatActsOnData()
but given the nature of streams I don't really see how that's possible. Does anyone have any smart ideas on how I could do something like this?
One way I can think to do this is to pipe the ReadStreamThatCreatesData to a function that splits it into two different streams: one stream of the context properties you have pulled out of the item and another stream with the remainder of the object. You pipe the second stream into the TransformerStream and then pipe that output along with the context stream through a zip operator that combines the two streams back into one. Then you send that to the WriteStreamThatActsOnData.
I don't think there are built-in node.js functions to do this, but you can use another library such as RxJS or highland.
Here's an example implementation with highland:
'use strict';
/*
Put this content into input.txt. Make sure there are no blank lines in the file:
{ "secret": 1, "val": "a" }
{ "secret": 2, "val": "b" }
{ "secret": 3, "val": "c" }
{ "secret": 4, "val": "d" }
{ "secret": 5, "val": "e" }
After running, output.txt should have this content:
{"val":"A","secret":1}
{"val":"B","secret":2}
{"val":"C","secret":3}
{"val":"D","secret":4}
{"val":"E","secret":5}
*/
const fs = require('fs');
const stream = require('stream');
const highland = require('highland');
const input = fs.createReadStream('input.txt');
const output = fs.createWriteStream('output.txt');
function readStreamThatCreatesData() {
return highland(input).split().map(JSON.parse);
}
class TransformerStream extends stream.Transform {
constructor(options) {
if (!options) {
options = {};
}
options.objectMode = true;
super(options);
}
_transform(item, enc, cb) {
if (item.secret) {
item.secret = 'removed';
}
item.val = item.val.toUpperCase();
this.push(item);
cb();
}
};
function removeSecret(item) {
delete item.secret;
return item;
}
function extractSecret(item) {
return item.secret;
}
const inputStream = readStreamThatCreatesData();
const secretStream = inputStream.fork().map(extractSecret);
const mainStream = inputStream.fork().map(removeSecret);
secretStream.zip(highland(mainStream.pipe(new TransformerStream())))
.map((combined) => {
const secret = combined[0];
const item = combined[1];
item.secret = secret;
return JSON.stringify(item) + '\n';
})
.pipe(output);

Categories

Resources