How to parse through large JSONL data Node js - javascript

I am trying to read through a large JSONL, maybe couple hundreds up to thousands or possibly million line, below is sample of of the data.
{"id":"gid://shopify/Product/1921569226808"}
{"id":"gid://shopify/ProductVariant/19435458986040","__parentId":"gid://shopify/Product/1921569226808"}
{"id":"gid://shopify/Product/1921569259576"}
{"id":"gid://shopify/ProductVariant/19435459018808","__parentId":"gid://shopify/Product/1921569259576"}
{"id":"gid://shopify/Product/1921569292344"}
{"id":"gid://shopify/ProductVariant/19435459051576","__parentId":"gid://shopify/Product/1921569292344"}
{"id":"gid://shopify/Product/1921569325112"}
{"id":"gid://shopify/ProductVariant/19435459084344","__parentId":"gid://shopify/Product/1921569325112"}
{"id":"gid://shopify/Product/1921569357880"}
{"id":"gid://shopify/ProductVariant/19435459117112","__parentId":"gid://shopify/Product/1921569357880"}
{"id":"gid://shopify/ProductVariant/19435458986123","__parentId":"gid://shopify/Product/1921569226808"}
So each line is json object, either its a Product, or a Product Child identified by __parentId, given that the data may contain thousands of lines, what's the best way to read through it and return a regular JSON object, like this.
{
"id": "gid://shopify/Product/1921569226808",
"childrens": {
{"id":"gid://shopify//ProductImage//20771195224224","__parentId":"gid:////shopify//Product//1921569226808"},
{"id":"gid:////shopify//ProductImage//20771195344224","__parentId":"gid:////shopify//Product//1921569226808"}
{"id":"gid:////shopify//ProductImage//20771329344224","__parentId":"gid:////shopify//Product//1921569226808"}
}
}
The data is coming back from Shopify and they advice to:
Because nested connections are no longer nested in the response data
structure, the results contain the __parentId field, which is a
reference to an object's parent. This field doesn’t exist in the API
schema, so you can't explicitly query it. It's included automatically
in bulk operation result.
Read the JSONL file in reverse Reading the JSONL file in reverse makes
it easier to group child nodes and avoids missing any that appear
after the parent node. For example, while collecting variants, there
won't be more variants further up the file when you come to the
product that the variants belong to. After you download the JSONL
file, read it in reverse, and then parse it so that any child nodes
are tracked before the parent node is discovered.
You can look for look here to read more about all of thisenter link description here.

Consider using streams so that you don't have to load the entire file in memory.
You can use readline (a native module) to process each line individually.
I took the line processing part from #terrymorse https://stackoverflow.com/a/65484413/14793527
const readline = require('readline');
const fs = require('fs');
let res = {};
function processLine(line) {
const {id, __parentId} = line;
// if there is no `__parentId`, this is a parent
if (typeof __parentId === 'undefined') {
res[line.id] = {
id,
childrens: []
};
return res;
}
// this is a child, create its parent if necessary
if (typeof res[__parentId] === 'undefined') {
res[__parentId] = {
id: __parentId,
childrens: []
}
}
// add child to parent's children
res[__parentId].childrens.push(line);
return res;
}
const readInterface = readline.createInterface({
input: fs.createReadStream('large.jsonl'),
output: process.stdout,
console: false
});
readInterface.on('line', processLine);
readInterface.on('close', function() {
const resultArray = Object.values(res);
console.log(resultArray);
});

Here's a technique that:
forms an object with properties of the parent ids
converts that object to an array
(input lines converted to an array for simplicity)
const lines = [
{ "id": "gid://shopify/Product/1921569226808" },
{ "id": "gid://shopify/ProductVariant/19435458986040", "__parentId": "gid://shopify/Product/1921569226808" },
{ "id": "gid://shopify/Product/1921569259576" },
{ "id": "gid://shopify/ProductVariant/19435459018808", "__parentId": "gid://shopify/Product/1921569259576" },
{ "id": "gid://shopify/Product/1921569292344" },
{ "id": "gid://shopify/ProductVariant/19435459051576", "__parentId": "gid://shopify/Product/1921569292344" },
{ "id": "gid://shopify/Product/1921569325112" },
{ "id": "gid://shopify/ProductVariant/19435459084344", "__parentId": "gid://shopify/Product/1921569325112" },
{ "id": "gid://shopify/Product/1921569357880" },
{ "id": "gid://shopify/ProductVariant/19435459117112", "__parentId": "gid://shopify/Product/1921569357880" },
{ "id": "gid://shopify/ProductVariant/19435458986123", "__parentId": "gid://shopify/Product/1921569226808" }
];
// form object keyed to parent ids
const result = lines.reduce((res, line) => {
const {id, __parentId} = line;
// if there is no `__parentId`, this is a parent
if (typeof __parentId === 'undefined') {
res[id] = {
id,
childrens: []
};
return res;
}
// this is a child, create its parent if necessary
if (typeof res[__parentId] === 'undefined') {
res[__parentId] = {
id: __parentId,
childrens: []
}
}
// add child to parent's children
res[__parentId].childrens.push(line);
return res;
}, {});
// convert object to array
const resultArray = Object.values(result);
const pre = document.querySelector('pre');
pre.innerText = 'resultArray: ' + JSON.stringify(resultArray, null, 2);
<pre></pre>

Related

Iterate deeply nested object with unknown level and add remove key/value based on user provided conditions

Could anyone please guide me how to achieve the below challenge which I am facing?
I have thousands of mock API request response JSON files. They are deeply nested, and they all are structured differently. I need to add/update/delete entry at the specfic location where the condition match which will be provided by user. I am not sure how to approach this problem? I have tried doing something like below. I am asking user for path for where to start looking. But this will increase time as user has to look for path in all file and pass that info to api. below code work upto 2 level only. need to search full tree where all user provides conditions matches, and at that place, I need to add/update/delete data. I took condition as an array of objects.
Draft Code
const _ = require("lodash");
const file = "./sample.json";
const actions = ["add", "delete", "update"];
const consumer = (file, key, where, data, action) => {
try {
const act = action.toLowerCase();
if(!actions.includes(act) throw new Error("invalid action provided");
if(_.isArray(where) && _.every(where, _.isObject())) throw new Error("no where clause condition provided");
let content = require(file);
let typeKeyContent = null;
let keyContent = _.get(content, key);
if(!keyContent) throw new Error("invalid key");
if(_.isArray(keyContent)) {
typeKeyContent = "array"
} else if (_.isObject(keyContent)) {
typeKeyContent = "object"
}
switch (act) {
case "add":
if (typeKeyContent === "array") {
// array logic
for (let i = 0; i < keyContent.length; i++) {
const result = where.every(element => {
for (let key in element) {
return keyContent[key] && element[key] === keyContent[key];
}
});
if (!result) {
console.log("attributes matching -> ", result);
return;
}
keyContent[i] = {...keyContent[i], ...data }
}
let newcontent = _.set(content, key, keyContent);
console.log("newcontent -> \n",JSON.stringify(newcontent, null, 2));
return;
}
const result = where.every(element => {
for (let key in element) {
return keyContent[key] && element[key] === keyContent[key];
}
});
if (!result) {
console.log("attributes matching -> ", result);
return;
}
keyContent = { ...keyContent, ...data };
let newcontent = _.set(content, key, keyContent);
console.log("newcontent -> \n",JSON.stringify(newcontent, null, 2));
// TODO :: store back in json file
break;
default:
console.log("reached default case");
return;
}
} catch(err) {
console.log("ERROR :: CONSUMER ::", error);
}
}
// AND based condition only
const conditions = [
{ name: "Essential Large" },
{ selected: true }
];
const newdata = { description: "our best service" } // wants to add new prop
consumer(file, "selected_items.essential", conditions, newdata, "add");
sample json
{
"status": 200,
"request": {},
"response": {
"ffs": false,
"customer": {
"customer_id": 1544248,
"z_cx_id": 123456
},
"selected_items": {
"essential": [
{
"id": 4122652,
"name": "Essential Large",
"selected": true,
"description": "our best service" // will be added
},
{
"id": 4122653,
"name": "Essential Large",
"selected": true,
"description": "our best service" // will be added
}
]
},
"service_partner": {
"id": 3486,
"name": "Some String",
"street": "1234 King St."
},
"subject": "Project",
"description": "Issue: (copy/paste service request details here Required"
}
}
So you want to go through every key of a nested object right?
function forEvery(object,fn){
//obj is the object, fn is the function
//this function should go through each item in an object loaded from JSON string
//fn takes in 3 arguments: current element, that element's parent, level of depth(starts at 1)
var arr=[]
function recurse(obj,map,depth){
Object.keys(obj).forEach((a,i)=>{
fn(obj[a],obj,a,depth) //because fn can affect the object so the if statement should after not before ;-;
if(typeof obj[a]=="object"&&obj[a]!=null){ //if nested value is another object
map.push(a); arr.push(map)
recurse(obj[a],[...map],depth+1)
}
})
}
recurse(object,[],1)
}
//usage would be like:
//let customerCondition=/*some logic here*/
//let testObj=JSON.parse( (require('fs')).readFileSync('dirToSomeFile.json') )
forEvery(testObj,customerCondition)
Here's a live example
let testObj={"status":200,"request":{},"response":{"ffs":false,"customer":{"customer_id":1544248,"z_cx_id":123456},"selected_items":{"essential":[{"id":4122652,"name":"Essential Large","selected":true},{"id":4122653,"name":"Essential Medium","selected":false}]},"service_partner":{"id":3486,"name":"Some String","street":"1234 King St."},"subject":"Project","description":"Issue: (copy/paste service request details here Required"}}
function forEvery(object,fn){
//obj is the object, fn is the function
//this function should go through each item in an object loaded from JSON string
//fn takes in 3 arguments: current element, that element's parent, level of depth(starts at 1)
var arr=[]
function recurse(obj,map,depth){
Object.keys(obj).forEach((a,i)=>{
fn(obj[a],obj,a,depth) //because fn can affect the object so the if statement should after not before ;-;
if(typeof obj[a]=="object"&&obj[a]!=null){ //if nested value is another object
map.push(a); arr.push(map)
recurse(obj[a],[...map],depth+1)
}
})
}
recurse(object,[],1)
}
//example usage
let userQuery=[{ name: "Essential Large" },{ selected: true }]; //the user query in the format you gave
let userCondition={} //assuming each key across userQuery is unique, I set a model object for comparisons later on
userQuery.forEach(obj=>{ //I fill the model object :D
Object.keys(obj).forEach(key=>{
userCondition[key]=obj[key]
})
})
let testFn=(elem,parent,key,depth)=>{
//I use comparisons with the model object
let condition=typeof elem!="object"?false:
Object.keys(userCondition)
.every(item=>userCondition[item]==elem[item])
//true if matches user condition(meaning elem must be an object), false otherwise
if(condition){
console.log(parent[key],"will now be deleted")
delete(parent[key]) //deletion example(if user conditions match)
}
}
forEvery(testObj,testFn)
console.log("and the changed object looks like",testObj)

Generate Treeview from Url in pure Javascript

I am getting the Url of files in the form of array like below
and I want to achieve an object like this
var mainObj = [
{
name: "Home",
files: ["excel doc 1.xlsx", "excel doc 2.xlsx"],
folders: [{
name: "Procedure",
files: ["excel doc 2.xlsx"],
folders: []
}],
},
{
name: "BusinessUnits",
files: [],
folders:[
{
name:"Administration",
files:[],
folders:[{
name: "AlKhorDocument",
files: [],
folders:[
{
name: "Album1",
files: [],
folders:[......]
}
]
}]
}
]
}
]
.......
kindly let me know if you can help in it.
By the way I want to achieve like that below
If you can suggest better, then it would help me..
By having only the path parts of the strings, you could reduce the array by reducing the folder structure and add the final file to the folder structure.
var data = ['Home/excel doc 1.xlsx', 'Home/excel doc 2.xlsx', 'Home/Procedure/excel doc', 'Home/Procedure/2.xlsx', 'BusinessUnits/Administration/AlKhorDocument/Album1/text.txt'],
result = data.reduce((r, p) => {
var path = p.split('/'),
file = path.pop(),
final = path.reduce((o, name) => {
var temp = (o.folders = o.folders || []).find(q => q.name === name);
if (!temp) o.folders.push(temp = { name });
return temp;
}, r);
(final.files = final.files || []).push(file);
return r;
}, {});
console.log(result);
.as-console-wrapper { max-height: 100% !important; top: 0; }
You need to perform some string parsing to split up the URL strings into different parts, gathering all information you need to create a tree-like structure.
Basically you could split all your URL strings into their sections and create the final data structure by analyzing all sub-sections of URL strings.
let urls = [
'http://host.com/Performance/excel doc 1.xlsx',
'http://host.com/BusinessUnits/Administration/AlKhorDocument/Album1/...',
// ...
];
let result = [];
urls.forEach(url => {
let relevantUrl = url.replace('http://host.com/', ''); // remove the unnecessary host name
let sections = relevantUrl.split('/'); // get all string sections from the URL
sections.forEach(section => {
// check if that inner object already exists
let innerObject = result.find(obj => obj.name === section);
if(!innerObject) {
// create an inner object for the section
innerObject = {
name: section,
files: [],
folders: []
};
}
// add the current URL section (as object) to the result
result.push(innerObject);
});
});
What you still have to deal with is saving the current sub-level of your section object, which you can do either iteratively or by calling a recursive function.

Convert JSON object to object which not in JSON format

I have a JSON object with the structure as below
const inputObj = {
"prop1": "val1",
"prop2": {
"prop2_1": "val2_1",
"prop2_2": "val2_2"
}
"prop3": "val3"
}
My objective: I would like to take the property, including the nested property, and store the result in a txt file, but not in JSON format. To make it clear, here is my expected output in the txt file:
{
prop1: {
id: 'prop1'
},
prop2_prop2_1: {
id: 'prop2.prop2_1'
},
prop2_prop2_2: {
id: 'prop2.prop2_2'
}
prop3: {
id: 'prop3'
}
}
So far, I could write the non nested property, but still not in the structure which I expected. Here is the result so far:
{
"prop1": "prop1",
"prop3": "prop3"
}
Its still in JSON format, not in the structure that I expected, and the nested property still not caught (I still thinking how to get it)
here is the code so far to make my current result:
const fs = require('fs')
const fileName = "./results.txt"
function getAllKeys(obj, path = [], result = []) {
Object.entries(obj).forEach(([k, v]) => {
if (typeof v === 'object') getAllKeys(v, path.concat(k), result)
else result.push(path.concat(k).join("."))
})
return result
}
const inputToFile = getAllKeys(inputObj)
// console.log(inputToFile)
// result of the console.log
// prop1
// prop2.prop2_1
// prop2.prop2_2
// prop3
const newObj = {}
for (var i = 0; i < inputToFile.length; i++) {
var input = inputToFile[i]
var dotIndex = input.indexOf('.') // to check if its from the nested JSON property of the inputObj
if (dotIndex === -1) {
// no dot or nested property in the JSON
newObj[input] = input.toString()
} else {
// if the input contain dot, which is a nested JSON
}
}
fs.writeFileSync(fileName, JSON.stringfy(newObj))
// if I use above line, the result in the file is as I had mention above. But, if the code is like below:
const finals = JSON.stringfy(newObj)
fs.writeFileSync(fileName, JSON.parse(finals))
// the output in the file is only "[Object object]" without double quote
Update
The reason why I need the result to be formatted like that, is because I want to use react-intl. I already have the locale file (the translation), which looks like the inputObj (the structure). Then, I need to make a file, which like this (below), so the lib could translate it:
import { defineMessages } from 'react-intl';
const MessagesId = defineMessages({
prop1: {
id: 'prop1'
},
prop2_prop2_1: {
id: 'prop2.prop2_1'
},
prop2_prop2_2: {
id: 'prop2.prop2_2'
},
prop3: {
id: 'prop3'
}
})
export default MessagesId;
Thats why, I need it to be not like JSON. Because I already have thousand codes for the translation, but need to define it in the MessagesId. It would be so much takes time rite if I do it manually .__.
Ps: the react-intl is works, the problem is only the converting as my initial questions
This script can handle multiple levels of nestied object.
const outputObj = {};
const convertNestedObj = (obj, parentKey = []) => {
for (key in obj) {
newParentKey = [...parentKey, key];
if (typeof obj[key] === 'object') {
convertNestedObj(obj[key], newParentKey);
} else {
outputObj[newParentKey.join('_')] = { id: newParentKey.join('_') };
}
}
};
convertNestedObj(inputObj);

Search for a related json data

How can i find data that is related to the already known data?
( I'm a newb. )
For example here is my json :
[
{ "id": "1", "log": "1","pass": "1111" },
{ "id": 2, "log": "2","pass": "2222" },
{ "id": 3, "log": "3","pass": "3333" }
]
Now i know that "log" is 1 and i want to find out the data "pass" that is related to it.
i've tried to do it so :
The POST request comes with log and pass data , i search the .json file for the same log value and if there is the same data then i search for related pass
fs.readFile("file.json", "utf8", function (err, data) {
var jsonFileArr = [];
jsonFileArr = JSON.parse(data); // Parse .json objekts
var log = loginData.log; // The 'log' data that comes with POST request
/* Search through .json file for the same data*/
var gibtLog = jsonFileArr.some(function (obj) {
return obj.log == log;
});
if (gotLog) { // If there is the same 'log'
var pass = loginData.pass; // The 'pass' data that comes with POST request
var gotPass = jsonFileArr.some(function (obj) {
// How to change this part ?
return obj.pass == pass;
});
}
else
console.log("error");
});
The problem is that when i use
var gotPass = jsonFileArr.some(function (obj) {
return obj.pass == pass;
});
it searches through the whole .json file and not through only one objekt.
Your main problem is that .some() returns a boolean, whether any of the elements match your predicate or not, but not the element itself.
You want .find() (which will find and return the first element matching the predicate):
const myItem = myArray.find(item => item.log === "1"); // the first matching item
console.log(myItem.pass); // "1111"
Note that it is possible for .find() to not find anything, in which case it returns undefined.
The .some() method returns a boolean that just tells you whether there is at least one item in the array that matches the criteria, it doesn't return the matching item(s). Try .filter() instead:
var jsonFileArr = JSON.parse(data);
var log = loginData.log;
var matchingItems = jsonFileArr.filter(function (obj) {
return obj.log == log;
});
if (matchingItems.length > 0) { // Was at least 1 found?
var pass = matchingItems[0].pass; // The 'pass' data that comes with the first match
} else
console.log("error"); // no matches
Using ES6 Array#find is probably the easiest, but you could also do (among other things)
const x = [{
"id": "1",
"log": "1",
"pass": "1111"
}, {
"id": 2,
"log": "2",
"pass": "2222"
}, {
"id": 3,
"log": "3",
"pass": "3333"
}];
let myItem;
for (let item of x) {
if (item.log === '1') {
myItem = item;
break;
}
}
console.log(myItem);

How to traverse JSON object locating particular property and pushing its contents to array?

I am working with a JSON object which can have a property ids at any leaf. I want to traverse the object and find all of the instances of the ids property and store each id in a collection.
Mocked up JSON Object (the ids property could be at much deeper property locations).
{
"id": "b38a683d-3fb6-408f-9ef6-f4b853ed1193",
"foo": {
"ids": [
{
"id": "bd0bf3bd-d6b9-4706-bfcb-9c867e47b881"
},
{
"id": "d1cc529d-d5d2-4460-b2bb-acf24a7c5999"
},
{
"id": "b68d0c8c-548e-472f-9b01-f25d4b199a71"
}
],
"baz": "super"
},
"bar": {
"ids": [
{
"id": "bd0bf3bd-d6b9-4706-bfcb-9c867e47b881"
},
{
"id": "d1cc529d-d5d2-4460-b2bb-acf24a7c5999"
},
{
"id": "b68d0c8c-548e-472f-9b01-f25d4b199a71"
}
]
}
}
I am using the following code to traverse the above JSON.
var jsonFile = require('./file_test.json'); // the above in my local directory
function traverse(obj, ids) {
for (var prop in obj) {
if (typeof obj[prop] == "object" && obj[prop]) {
if (prop == 'ids') {
for (var i = obj[prop].length - 1; i >= 0; i--) {
ids.push(obj[prop][i]._id);
};
}
traverse(obj[prop], ids);
}
}
}
var ids = new Array();
traverse(jsonFile, ids);
console.log('ids', ids);
The above nets the following:
ids
[
'b68d0c8c-548e-472f-9b01-f25d4b199a71',
'd1cc529d-d5d2-4460-b2bb-acf24a7c5999',
'bd0bf3bd-d6b9-4706-bfcb-9c867e47b881',
'b68d0c8c-548e-472f-9b01-f25d4b199a71',
'd1cc529d-d5d2-4460-b2bb-acf24a7c5999',
'bd0bf3bd-d6b9-4706-bfcb-9c867e47b881'
]
While my code works I am not convinced that I am doing this the most efficient or best way. Is there a better way to find all instances of the ids property? Perhaps without passing in an array but returning one? Or setting up for a callback with an ids array?
If the data was actually a JSON string, and not a JavaScript object, you could have something like:
// assuming `json` is the data string
var ids = [];
var data = JSON.parse(json, function(key, value) {
if (key === "id")
ids.push(value);
return value;
});
See reviver on JSON.parse method.
what you have is fine, but this is a little shorter and uses the .map function:
var jsonFile = require('./file_test.json'); // the above in my local directory
function traverse(obj) {
var ids = [];
for (var prop in obj) {
if (typeof obj[prop] == "object" && obj[prop]) {
if (prop == 'ids') {
ids = obj[prop].map(function(elem){
return elem.id;
})
}
ids =ids.concat(traverse(obj[prop]));
}
}
return ids;
}
var ids =traverse(jsonFile);
console.log('ids', ids);
What you're basically trying to do is a tree search of this JSON object, am I right? So if we assume that ids is always a leaf then we do not need to traverse those nodes as we know they are at the leaf and will contain what we want.
Change the if {...} traverse to if {...} else {traverse}
If it is possible to change the data structure of ids to a list of strings instead of a list of objects then you will be able to save the iteration over the array and just merge it onto the ids array passed in, but it depends completely on the context and whether or not you can make this change!
Sorry I'm not of more help!
Assuming ES5 is available natively or via a shim:
function gimmeIds(obj) {
return Object.keys(obj||{})
.reduce(function(ids, key) {
if(key === 'ids') {
return ids.concat(obj[key].map(function(idObj) {
return idObj.id;
}));
}
if(obj[key] && typeof obj[key] == 'object') {
return ids.concat(gimmeIds(obj[key]));
}
return ids;
}, []);
}
Using object-scan this becomes very simple. Note that you can easily specify what is targeted (in this case **.ids[*].id)
// const objectScan = require('object-scan');
const data = { id: 'b38a683d-3fb6-408f-9ef6-f4b853ed1193', foo: { ids: [{ id: 'bd0bf3bd-d6b9-4706-bfcb-9c867e47b881' }, { id: 'd1cc529d-d5d2-4460-b2bb-acf24a7c5999' }, { id: 'b68d0c8c-548e-472f-9b01-f25d4b199a71' }], baz: 'super' }, bar: { ids: [{ id: 'bd0bf3bd-d6b9-4706-bfcb-9c867e47b881' }, { id: 'd1cc529d-d5d2-4460-b2bb-acf24a7c5999' }, { id: 'b68d0c8c-548e-472f-9b01-f25d4b199a71' }] } };
const findIds = (input) => objectScan(['**.ids[*].id'], { rtn: 'value' })(input);
console.log(findIds(data));
/* => [ 'b68d0c8c-548e-472f-9b01-f25d4b199a71',
'd1cc529d-d5d2-4460-b2bb-acf24a7c5999',
'bd0bf3bd-d6b9-4706-bfcb-9c867e47b881',
'b68d0c8c-548e-472f-9b01-f25d4b199a71',
'd1cc529d-d5d2-4460-b2bb-acf24a7c5999',
'bd0bf3bd-d6b9-4706-bfcb-9c867e47b881' ]
*/
.as-console-wrapper {max-height: 100% !important; top: 0}
<script src="https://bundle.run/object-scan#13.7.1"></script>
Disclaimer: I'm the author of object-scan

Categories

Resources