How could I pass 'context' with a stream? - javascript

I have a simple app running that pipes objects through a stream, like so:
new ReadStreamThatCreatesData()
.pipe(new TransformerStream())
.pipe(new WriteStreamThatActsOnData()
But I want the WriteStreamThatActsOnData to have access to a property in from the ReadStreamThatCreatesData, without the TransformerStream having to know about it or be able to access it. The pseudocode of what I'd want is basically this:
new ReadStreamThatCreatesData()
.storeContext((obj) => obj.property)
.pipe(new TransformerStream())
.retrieveContext((obj, context) => obj.property = context)
.pipe(new WriteStreamThatActsOnData()
but given the nature of streams I don't really see how that's possible. Does anyone have any smart ideas on how I could do something like this?

One way I can think to do this is to pipe the ReadStreamThatCreatesData to a function that splits it into two different streams: one stream of the context properties you have pulled out of the item and another stream with the remainder of the object. You pipe the second stream into the TransformerStream and then pipe that output along with the context stream through a zip operator that combines the two streams back into one. Then you send that to the WriteStreamThatActsOnData.
I don't think there are built-in node.js functions to do this, but you can use another library such as RxJS or highland.
Here's an example implementation with highland:
'use strict';
/*
Put this content into input.txt. Make sure there are no blank lines in the file:
{ "secret": 1, "val": "a" }
{ "secret": 2, "val": "b" }
{ "secret": 3, "val": "c" }
{ "secret": 4, "val": "d" }
{ "secret": 5, "val": "e" }
After running, output.txt should have this content:
{"val":"A","secret":1}
{"val":"B","secret":2}
{"val":"C","secret":3}
{"val":"D","secret":4}
{"val":"E","secret":5}
*/
const fs = require('fs');
const stream = require('stream');
const highland = require('highland');
const input = fs.createReadStream('input.txt');
const output = fs.createWriteStream('output.txt');
function readStreamThatCreatesData() {
return highland(input).split().map(JSON.parse);
}
class TransformerStream extends stream.Transform {
constructor(options) {
if (!options) {
options = {};
}
options.objectMode = true;
super(options);
}
_transform(item, enc, cb) {
if (item.secret) {
item.secret = 'removed';
}
item.val = item.val.toUpperCase();
this.push(item);
cb();
}
};
function removeSecret(item) {
delete item.secret;
return item;
}
function extractSecret(item) {
return item.secret;
}
const inputStream = readStreamThatCreatesData();
const secretStream = inputStream.fork().map(extractSecret);
const mainStream = inputStream.fork().map(removeSecret);
secretStream.zip(highland(mainStream.pipe(new TransformerStream())))
.map((combined) => {
const secret = combined[0];
const item = combined[1];
item.secret = secret;
return JSON.stringify(item) + '\n';
})
.pipe(output);

Related

Multiple paramater based routes for an API Endoint in an Azure Function

I have a simple Function App in Azure that returns data in JSON.
https://myurl.com/api/symbolcountry/{id}?
It takes in a single id for a parameter. It works well.
However I'd like to parameterize part of the url
https://myurl.com/api/{endpoint}/{id}?
I've never messed around with javascript and am going a bit nuts trying to figure this one out.
function.json file:
{
"bindings": [
{
"authLevel": "anonymous",
"type": "httpTrigger",
"direction": "in",
"name": "req",
"methods": [
"get"
],
"route": "symbolcountry/{id}"
},
{
"type": "http",
"direction": "out",
"name": "res"
}
]
}
index.js file:
module.exports = function (context, req) {
const method = req.method.toLowerCase();
var payload = null;
var entity = "";
switch(method) {
case "get":
if (req.params.id) {
entity = "symbolcountry"
payload = { "SymbolID": req.params.id};
}
else {
entity = "symbols"
}
break;
}
}
Tried adding paramters in function.json to no avail.
If you want to access the params passed in the URL instead of params use the key work query
Your code will look like this:
module.exports = function (context, req) {
const metthod = req.method.toLowerCase();
var payload = null;
var entity = "";
switch(metthod)
{
case "get" : if(req.query.id)
{
entity="symbolcountry";
payload = {
"SymbolId":req.query.id
}
}
break;
}
context.res = {
body = payload
}
The above code will return the payload
output :
If you want to parameters through routing, you can refer this article by JOE GATT

Construct MongoDB query from GraphQL request

Let's say we query the server with this request, we only want to get the following user's Email, My current implementation requests the whole User object from the MongoDB, which I can imagine is extremely inefficient.
GQL
{
user(id:"34567345637456") {
email
}
}
How would you go about creating a MongoDB filter that would only return those Specified Fields? E.g,
JS object
{
"email": 1
}
My current server is running Node.js, Fastify and Mercurius
which I can imagine is extremely inefficient.
Doing this task is an advanced feature with many pitfalls. I would suggest starting building a simple extraction that read all the fields. This solution works and does not return any additional field to the client.
The pitfalls are:
nested queries
complex object composition
aliasing
multiple queries into one request
Here an example that does what you are looking for.
It manages aliasing and multiple queries.
const Fastify = require('fastify')
const mercurius = require('mercurius')
const app = Fastify({ logger: true })
const schema = `
type Query {
select: Foo
}
type Foo {
a: String
b: String
}
`
const resolvers = {
Query: {
select: async (parent, args, context, info) => {
const currentQueryName = info.path.key
// search the input query AST node
const selection = info.operation.selectionSet.selections.find(
(selection) => {
return (
selection.name.value === currentQueryName ||
selection.alias.value === currentQueryName
)
}
)
// grab the fields requested by the user
const project = selection.selectionSet.selections.map((selection) => {
return selection.name.value
})
// do the query using the projection
const result = {}
project.forEach((fieldName) => {
result[fieldName] = fieldName
})
return result
},
},
}
app.register(mercurius, {
schema,
resolvers,
graphiql: true,
})
app.listen(3000)
Call it using:
query {
one: select {
a
}
two: select {
a
aliasMe:b
}
}
Returns
{
"data": {
"one": {
"a": "a"
},
"two": {
"a": "a",
"aliasMe": "b"
}
}
}
Expanding from #Manuel Spigolon original answer, where he stated that one of the pitfalls of his implementation is that it doesn't work on nested queries and 'multiple queries into one request' which this implementation seeks to fix.
function formFilter(context:any) {
let filter:any = {};
let getValues = (selection:any, parentObj?:string[]) => {
//selection = labelSelection(selection);
selection.map((selection:any) => {
// Check if the parentObj is defined
if(parentObj)
// Merge the two objects
_.merge(filter, [...parentObj, null].reduceRight((obj, next) => {
if(next === null) return ({[selection.name?.value]: 1});
return ({[next]: obj});
}, {}));
// Check for a nested selection set
if(selection.selectionSet?.selections !== undefined){
// If the selection has a selection set, then we need to recurse
if(!parentObj) getValues(selection.selectionSet?.selections, [selection.name.value]);
// If the selection is nested
else getValues(selection.selectionSet?.selections, [...parentObj, selection.name.value]);
}
});
}
// Start the recursive function
getValues(context.operation.selectionSet.selections);
return filter;
}
Input
{
role(id: "61f1ccc79623d445bd2f677f") {
name
users {
user_name
_id
permissions {
roles
}
}
permissions
}
}
Output (JSON.stringify)
{
"role":{
"name":1,
"users":{
"user_name":1,
"_id":1,
"permissions":{
"roles":1
}
},
"permissions":1
}
}

How to parse through large JSONL data Node js

I am trying to read through a large JSONL, maybe couple hundreds up to thousands or possibly million line, below is sample of of the data.
{"id":"gid://shopify/Product/1921569226808"}
{"id":"gid://shopify/ProductVariant/19435458986040","__parentId":"gid://shopify/Product/1921569226808"}
{"id":"gid://shopify/Product/1921569259576"}
{"id":"gid://shopify/ProductVariant/19435459018808","__parentId":"gid://shopify/Product/1921569259576"}
{"id":"gid://shopify/Product/1921569292344"}
{"id":"gid://shopify/ProductVariant/19435459051576","__parentId":"gid://shopify/Product/1921569292344"}
{"id":"gid://shopify/Product/1921569325112"}
{"id":"gid://shopify/ProductVariant/19435459084344","__parentId":"gid://shopify/Product/1921569325112"}
{"id":"gid://shopify/Product/1921569357880"}
{"id":"gid://shopify/ProductVariant/19435459117112","__parentId":"gid://shopify/Product/1921569357880"}
{"id":"gid://shopify/ProductVariant/19435458986123","__parentId":"gid://shopify/Product/1921569226808"}
So each line is json object, either its a Product, or a Product Child identified by __parentId, given that the data may contain thousands of lines, what's the best way to read through it and return a regular JSON object, like this.
{
"id": "gid://shopify/Product/1921569226808",
"childrens": {
{"id":"gid://shopify//ProductImage//20771195224224","__parentId":"gid:////shopify//Product//1921569226808"},
{"id":"gid:////shopify//ProductImage//20771195344224","__parentId":"gid:////shopify//Product//1921569226808"}
{"id":"gid:////shopify//ProductImage//20771329344224","__parentId":"gid:////shopify//Product//1921569226808"}
}
}
The data is coming back from Shopify and they advice to:
Because nested connections are no longer nested in the response data
structure, the results contain the __parentId field, which is a
reference to an object's parent. This field doesn’t exist in the API
schema, so you can't explicitly query it. It's included automatically
in bulk operation result.
Read the JSONL file in reverse Reading the JSONL file in reverse makes
it easier to group child nodes and avoids missing any that appear
after the parent node. For example, while collecting variants, there
won't be more variants further up the file when you come to the
product that the variants belong to. After you download the JSONL
file, read it in reverse, and then parse it so that any child nodes
are tracked before the parent node is discovered.
You can look for look here to read more about all of thisenter link description here.
Consider using streams so that you don't have to load the entire file in memory.
You can use readline (a native module) to process each line individually.
I took the line processing part from #terrymorse https://stackoverflow.com/a/65484413/14793527
const readline = require('readline');
const fs = require('fs');
let res = {};
function processLine(line) {
const {id, __parentId} = line;
// if there is no `__parentId`, this is a parent
if (typeof __parentId === 'undefined') {
res[line.id] = {
id,
childrens: []
};
return res;
}
// this is a child, create its parent if necessary
if (typeof res[__parentId] === 'undefined') {
res[__parentId] = {
id: __parentId,
childrens: []
}
}
// add child to parent's children
res[__parentId].childrens.push(line);
return res;
}
const readInterface = readline.createInterface({
input: fs.createReadStream('large.jsonl'),
output: process.stdout,
console: false
});
readInterface.on('line', processLine);
readInterface.on('close', function() {
const resultArray = Object.values(res);
console.log(resultArray);
});
Here's a technique that:
forms an object with properties of the parent ids
converts that object to an array
(input lines converted to an array for simplicity)
const lines = [
{ "id": "gid://shopify/Product/1921569226808" },
{ "id": "gid://shopify/ProductVariant/19435458986040", "__parentId": "gid://shopify/Product/1921569226808" },
{ "id": "gid://shopify/Product/1921569259576" },
{ "id": "gid://shopify/ProductVariant/19435459018808", "__parentId": "gid://shopify/Product/1921569259576" },
{ "id": "gid://shopify/Product/1921569292344" },
{ "id": "gid://shopify/ProductVariant/19435459051576", "__parentId": "gid://shopify/Product/1921569292344" },
{ "id": "gid://shopify/Product/1921569325112" },
{ "id": "gid://shopify/ProductVariant/19435459084344", "__parentId": "gid://shopify/Product/1921569325112" },
{ "id": "gid://shopify/Product/1921569357880" },
{ "id": "gid://shopify/ProductVariant/19435459117112", "__parentId": "gid://shopify/Product/1921569357880" },
{ "id": "gid://shopify/ProductVariant/19435458986123", "__parentId": "gid://shopify/Product/1921569226808" }
];
// form object keyed to parent ids
const result = lines.reduce((res, line) => {
const {id, __parentId} = line;
// if there is no `__parentId`, this is a parent
if (typeof __parentId === 'undefined') {
res[id] = {
id,
childrens: []
};
return res;
}
// this is a child, create its parent if necessary
if (typeof res[__parentId] === 'undefined') {
res[__parentId] = {
id: __parentId,
childrens: []
}
}
// add child to parent's children
res[__parentId].childrens.push(line);
return res;
}, {});
// convert object to array
const resultArray = Object.values(result);
const pre = document.querySelector('pre');
pre.innerText = 'resultArray: ' + JSON.stringify(resultArray, null, 2);
<pre></pre>

Append JSON to a file using node streams

Based on what I have read here, I'm using fs.createWriteStream to write some JSON to a file. I am processing the data in chunks of about 50. So at the beginning of the script, I open my streamand then use a function to pass it in, along with some JSON, which is working pretty well for writing.
const myStream = fs.createWriteStream(
path.join(RESULTS_DIR, `my-file.json`),
{
flags: 'a'
}
)
function appendJsonToFile(stream, jsonToAppend) {
return new Promise((resolve, reject) => {
try {
stream.write(JSON.stringify(jsonToAppend, null, 2)
resolve('STREAM_WRITE_SUCCESS')
} catch (streamError) {
reject('STREAM_WRITE_FAILURE', streamError)
}
})
}
appendJsonToFile(myStream, someJson)
However, because each piece of data to be written is an array of objects, the structure I eventually get in my file will look like this:
[
{
"data": "test data 1",
},
{
"data": "test data 2",
}
][
{
"data": "test data 3",
},
{
"data": "test data 4",
}
]
How can I append these pieces of data so that the result is properly formatted JSON, rather than just a series of arrays?
If the file is always formatted as you say above the three things you'd need to do are:
Find out the current length of the file and substract 2 chars (\n], since there's no newline at end),
Remove the first char of the JSON you're saving,
Save the file using r+ mode and with start
End the stream after each save.
Here's the link to createWriteStream options.
Now the other thing is point 4 makes this rather inefficient and questions the whole idea if streaming should be used here. I think it does make sense, but the question here is if you need the file to be readable between writes - if not then you should use a transform stream in the middle and add flush in between the files and after all work is done (beforeExit) you simply end the stream.
You can do this by definition, but I'm the author of a framework called scramjet which makes these cases much easier:
const myStream = new scramjet.DataStream();
const file = path.join(RESULTS_DIR, `my-file.json`)
const start = fs.statSync(file).size - 2;
myStream
.flatten()
.toJSONArray()
.shift(1)
.pipe(fs.createWriteStream(
file,
{flags: 'r+', start}
));
function appendJsonToFile(stream, jsonToAppend) {
return new Promise((resolve, reject) => {
try {
stream.write(jsonToAppend)
resolve('STREAM_WRITE_SUCCESS')
} catch (streamError) {
reject('STREAM_WRITE_FAILURE', streamError)
}
})
}
appendJsonToFile(myStream, someJson)
process.on('beforeExit', myStream.end());
You can use this as above, but if you'd prefer to work on this with plain node streams this should nudge you in the right direction.
I'll resolved the code, with Error and FILE NOT FOUND handler. Solution derived from Michał Karpacki.
const path = require('path');
const fs = require('fs');
const getFolderPath = () => __dirname || process.cwd();
const getFilePath = (fileName) => path.join(getFolderPath(), `${fileName}`);
/**
* #param {string} fileName - Included File Name & its Extension
* #param {Array<*>} arrayData
* #return {Promise<*>}
*/
const writeFileAsync = async (fileName, arrayData) => {
const filePath = getFilePath(fileName);
return new Promise((resolve, reject) => {
try {
const _WritableStream = fs.createWriteStream(filePath, {flags: 'r+', start: fs.statSync(filePath).size - 2});
_WritableStream.write(JSON.stringify(arrayData, null, 2).replace(/\[/, ','), (streamError) => {
return reject(['STREAM_WRITE_FAILURE', streamError]);
});
return resolve('STREAM_WRITE_SUCCESS');
} catch (streamError) {
/** ERROR NOT FOUND SUCH FILE OR DIRECTORY !*/
if (streamError.code === 'ENOENT') {
fs.mkdirSync(getFolderPath(), {recursive: true});
return resolve(fs.writeFileSync(filePath, JSON.stringify(
Array.from({...arrayData, length: arrayData.length}), null, 2
)));
}
/** ERROR OUT OF BOUND TO FILE SIZE RANGE - INVALID START POSITION FOR WRITE STREAM !*/
if (streamError instanceof RangeError) {
console.error(`> [ERR_OUT_OF_RANGE] =>`, streamError);
const _WritableStream = fs.createWriteStream(filePath, {flags: 'r+'});
return resolve(_WritableStream.write(JSON.stringify(arrayData, null, 2), (streamError) => {
return reject(['STREAM_WRITE_FAILURE', streamError]);
}));
}
return reject(['STREAM_WRITE_FAILURE', streamError]);
}
});
};
(() => writeFileAsync('test1.json',
[{
key: "value 1"
}, {
key: "value 2"
}]
))();
/* Output after 1st time run =>
[
{
"key": "value 1"
},
{
"key": "value 2"
}
]
*/
/* Output after 2nd time run =>
[
{
"key": "value 1"
},
{
"key": "value 2"
},
{
"key": "value 1"
},
{
"key": "value 2"
}
]
*/

Convert JSON object to object which not in JSON format

I have a JSON object with the structure as below
const inputObj = {
"prop1": "val1",
"prop2": {
"prop2_1": "val2_1",
"prop2_2": "val2_2"
}
"prop3": "val3"
}
My objective: I would like to take the property, including the nested property, and store the result in a txt file, but not in JSON format. To make it clear, here is my expected output in the txt file:
{
prop1: {
id: 'prop1'
},
prop2_prop2_1: {
id: 'prop2.prop2_1'
},
prop2_prop2_2: {
id: 'prop2.prop2_2'
}
prop3: {
id: 'prop3'
}
}
So far, I could write the non nested property, but still not in the structure which I expected. Here is the result so far:
{
"prop1": "prop1",
"prop3": "prop3"
}
Its still in JSON format, not in the structure that I expected, and the nested property still not caught (I still thinking how to get it)
here is the code so far to make my current result:
const fs = require('fs')
const fileName = "./results.txt"
function getAllKeys(obj, path = [], result = []) {
Object.entries(obj).forEach(([k, v]) => {
if (typeof v === 'object') getAllKeys(v, path.concat(k), result)
else result.push(path.concat(k).join("."))
})
return result
}
const inputToFile = getAllKeys(inputObj)
// console.log(inputToFile)
// result of the console.log
// prop1
// prop2.prop2_1
// prop2.prop2_2
// prop3
const newObj = {}
for (var i = 0; i < inputToFile.length; i++) {
var input = inputToFile[i]
var dotIndex = input.indexOf('.') // to check if its from the nested JSON property of the inputObj
if (dotIndex === -1) {
// no dot or nested property in the JSON
newObj[input] = input.toString()
} else {
// if the input contain dot, which is a nested JSON
}
}
fs.writeFileSync(fileName, JSON.stringfy(newObj))
// if I use above line, the result in the file is as I had mention above. But, if the code is like below:
const finals = JSON.stringfy(newObj)
fs.writeFileSync(fileName, JSON.parse(finals))
// the output in the file is only "[Object object]" without double quote
Update
The reason why I need the result to be formatted like that, is because I want to use react-intl. I already have the locale file (the translation), which looks like the inputObj (the structure). Then, I need to make a file, which like this (below), so the lib could translate it:
import { defineMessages } from 'react-intl';
const MessagesId = defineMessages({
prop1: {
id: 'prop1'
},
prop2_prop2_1: {
id: 'prop2.prop2_1'
},
prop2_prop2_2: {
id: 'prop2.prop2_2'
},
prop3: {
id: 'prop3'
}
})
export default MessagesId;
Thats why, I need it to be not like JSON. Because I already have thousand codes for the translation, but need to define it in the MessagesId. It would be so much takes time rite if I do it manually .__.
Ps: the react-intl is works, the problem is only the converting as my initial questions
This script can handle multiple levels of nestied object.
const outputObj = {};
const convertNestedObj = (obj, parentKey = []) => {
for (key in obj) {
newParentKey = [...parentKey, key];
if (typeof obj[key] === 'object') {
convertNestedObj(obj[key], newParentKey);
} else {
outputObj[newParentKey.join('_')] = { id: newParentKey.join('_') };
}
}
};
convertNestedObj(inputObj);

Categories

Resources