JS RegEx: strings to JSON object - javascript

I need to convert this:
-[firstLink](https://webapp.com/topic/ab75ca14-dc7c-4c3f-9115-7b1b94f88ff6)
--[secondLink](https://webapp.com/source/d93f154c-fb1f-4967-a70d-7d120cacfb05)
-[thirdLink](https://webapp.com/topic/31b85921-c4af-48e5-81ae-7ce45f55df81)
into this:
const obj = [{
name: 'firstLink',
type: 'topic',
id: 'ab75ca14-dc7c-4c3f-9115-7b1b94f88ff6',
spacing: 1, // single "-"
}, {
name: 'secondLink',
type: 'source',
id: 'd93f154c-fb1f-4967-a70d-7d120cacfb05',
spacing: 2, // two "-"
}, {
name: 'thirdLink',
type: 'topic',
id: '31b85921-c4af-48e5-81ae-7ce45f55df81',
spacing: 1,
}]
Here what i'm trying now:
const items = text.split('\n');
const re = /(-+)\[([^\]]+)\]\(([^)"]+)\)/
const arrays = items.map(function (item) {
return item.split(re);
})
for some reason, arrays contains bunch of empty strings ("") at the start and end of each item.
1. So, first question, is how to get rid of it?
2. Is there alternative to this regex tester, which actually shows such issues? This one doesn't show empty strings in results and, probably, some other issues.
Next, i'll do this:
const result = arrays.map(function (singleArray) {
if (!singleArray[1]) return null
const spacing = singleArray[1].length;
const name = singleArray[2]
const typeRe = /(source|topic)/;
const uuidRe = /([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})/;
const type = typeRe.exec(singleArray[3])[1]
const id = uuidRe.exec(singleArray[3])[1]
return {
name, type, id, spacing,
}
})
And third question: this code looks very ugly, what am i doing wrong?

I think you're trying to solve this problem with a lot of regexp when the first one you have is enough to solve this problem
So for the matches got for the regex matches = line.match(re)
matches[1] represents the number of hyphens, therefore separator = nHyphens.length
matches[2] is the hyperlink alt and for your case the name
matches[3] is the hyperlink src which needs to be split by /, the two last items are type and id, note that this assumes that the src will always have the same structure
function solve(content) {
const re = /(-+)\[([^\]]+)\]\(([^)"]+)\)/
return content.split('\n').map(line => {
// name,type,id,spacing
let [full, hyp, alt, src] = line.match(re)
src = src.split('/')
return {
name: alt,
spacing: hyp.length,
id: src[src.length - 1],
type: src[src.length - 2]
}
})
}
const lines = `-[firstLink](https://webapp.com/topic/ab75ca14-dc7c-4c3f-9115-7b1b94f88ff6)
--[secondLink](https://webapp.com/source/d93f154c-fb1f-4967-a70d-7d120cacfb05)
-[thirdLink](https://webapp.com/topic/31b85921-c4af-48e5-81ae-7ce45f55df81)`
console.log(solve(lines))

And this would be my solution with str.match() without using any capture groups, RegExp.exec() method and while loops.
var reg = /-+(?=\[)|[^[]+(?=])|[a-z]+(?=\/\w*-)|[\w-]+(?=\))/g,
data = '-[firstLink](https://webapp.com/topic/ab75ca14-dc7c-4c3f-9115-7b1b94f88ff6)\n--[secondLink](https://webapp.com/source/d93f154c-fb1f-4967-a70d-7d120cacfb05)\n-[thirdLink](https://webapp.com/topic/31b85921-c4af-48e5-81ae-7ce45f55df81)',
obj = data.match(reg).reduce((p,c,i) => (i%4 ? p[p.length-1].push(c) : p.push([c]) ,p),[])
.map(e => ({"name": e[1], "type": e[2], "id": e[3], "spacing": e[0].length}));
console.log(obj);

The needed array of objects can be obtained with few lines of code using String.split, String.match and Array.map functions(and I suppose, the following code doesn't look ugly at all):
const items = text.split(/\n/).map(function(v){
var parts = v.match(/([-]+?)\[(\w+?)\]\(https?:\/\/[^/]+\/(\w+?)\/([^)]+)\b/);
return {'name': parts[2], 'type': parts[3], 'id': parts[4], 'spacing': parts[1].length};
});
console.log(JSON.stringify(items, 0, 4));
The output:
[
{
"name": "firstLink",
"type": "topic",
"id": "ab75ca14-dc7c-4c3f-9115-7b1b94f88ff6",
"spacing": 1
},
{
"name": "secondLink",
"type": "source",
"id": "d93f154c-fb1f-4967-a70d-7d120cacfb05",
"spacing": 2
},
{
"name": "thirdLink",
"type": "topic",
"id": "31b85921-c4af-48e5-81ae-7ce45f55df81",
"spacing": 1
}
]

Here you go:
const array = text.split(/\n/);
let box, sliced;
const obj = array.map(line => {
box = {};
sliced = line.match(/\[[a-z]+\]/i)[0];
box.name = sliced.slice(1, sliced.length - 1);
box.spacing = line.match(/\-+/)[0].length;
box.type = line.replace(/.*\.com\/([a-z]+)\/.*/, '$1');
box.id = line.replace(/.*\/([a-z0-9\-]+)\)$/, '$1')
return box;
});

I think it's better to use RegEx.exec. Please try as my script.
const regex = /(-+)\[(\w+)\]\(([^)]+)\)/img;
const data = '-[firstLink](https://webapp.com/topic/ab75ca14-dc7c-4c3f-9115-7b1b94f88ff6)\n--[secondLink](https://webapp.com/source/d93f154c-fb1f-4967-a70d-7d120cacfb05)\n-[thirdLink](https://webapp.com/topic/31b85921-c4af-48e5-81ae-7ce45f55df81)';
var groups, items = [];
while(groups = regex.exec(data)){
items.push({
spacing: groups[1],
name: groups[2],
link: groups[3]
})
}
If your links are always in the same domain, extracting type and id could be place in single regex.
const regex = /(-+)\[(\w+)\]\(https:\/\/webapp\.com\/(\w+)\/([^)]+)\)/img;
const data = '-[firstLink](https://webapp.com/topic/ab75ca14-dc7c-4c3f-9115-7b1b94f88ff6)\n--[secondLink](https://webapp.com/source/d93f154c-fb1f-4967-a70d-7d120cacfb05)\n-[thirdLink](https://webapp.com/topic/31b85921-c4af-48e5-81ae-7ce45f55df81)';
var groups, items = [];
while(groups = regex.exec(data)){
items.push({
spacing: groups[1].length,
name: groups[2],
type: groups[3],
id: groups[4]
})
}
Regarding your question on why your split result contains two additional empty strings, you should read section Capturing parentheses in this guide
Hope this will help.

Related

find duplicate values in array of objects in javascript

I have two lists in javascript that are of same structure like below:
var required_documents = [{"id":1,"dt":1},{"id":2,"dt":2},{"id":3,"dt":3}];
var existing_documents = [{"id":1,"dt":1},{"id":2,"dt":2},{"id":3,"dt":4}];
I need to remove all records from database that are in existing documents list (i.e "dt") but NOT in required_documents list.
For the above scenario I should remove only {"id":3,"dt":4} and insert {"id":3,"dt":3}. I am not sure how I can compare on just one property. This is below that I found on SOF sometime ago but can't find it again apologies for not referencing it.
required_documents.forEach((obj) => {
const elementInArr2 = existing_documents.find((o) => o.dt === obj.dt);
console.log('found elementinarr: ' + obj.dt);
});
This returns unique objects like dt:1,dt:2,dt:3 but I need dt:4 from the existing documents list as it is the one that is not in the required documents list and needs to be deleted. How can I get just the one that is not in the required documents list.
Assuming both id and dt properties are significant, I would first create a means of hashing an entry and then build a hashed set of required_documents.
Then you can filter out anything from existing_documents that is in the set, leaving only the results you want.
const required_documents = [{"id":1,"dt":1},{"id":2,"dt":2},{"id":3,"dt":3}];
const existing_documents = [{"id":1,"dt":1},{"id":2,"dt":2},{"id":3,"dt":4}];
// a simple stringify hash
const createHash = ({ id, dt }) => JSON.stringify({ id, dt });
const requiredHashSet = new Set(required_documents.map(createHash));
const result = existing_documents.filter(
(doc) => !requiredHashSet.has(createHash(doc))
);
console.log(result);
The hash creation can be anything that produces a comparable entity that can uniquely identify a record.
You need to run it twice to confirm there is no elements left in existing. So create a function and use it.
var required_documents = [{"id":1,"dt":1},{"id":2,"dt":2},{"id":3,"dt":3}];
var existing_documents = [{"id":1,"dt":1},{"id":2,"dt":2},{"id":3,"dt":4}]
let output = [];
output = output.concat(extractUniqueValues(required_documents, output));
output = output.concat(extractUniqueValues(existing_documents, output));
console.log(output)
function extractUniqueValues(input, output){
return input.filter((item)=>{
return !output.find(v => v.dt == item.dt)
})
}
You can do like below
var required_documents = [
{ id: 1, dt: 1 },
{ id: 2, dt: 2 },
{ id: 3, dt: 3 },
];
var existing_documents = [
{ id: 1, dt: 1 },
{ id: 2, dt: 2 },
{ id: 3, dt: 4 },
];
for (let index = 0; index < required_documents.length; index++) {
const element = required_documents[index];
for (var i = existing_documents.length - 1; i >= 0; i--) {
const child = existing_documents[i];
if (element.id === child.id && element.dt === child.dt) {
existing_documents.splice(i, 1);
} else {
required_documents.push(element);
}
}
}
LOG not exist [{"dt": 4, "id": 3}]
LOG unique items [{"dt": 1, "id": 1}, {"dt": 2, "id": 2}, {"dt": 3, "id": 3}]
If you don't care about time complexity, something this should work:
var new_documents = existing_documents.filter(ed => {
return required_documents.find(rd => rd.dt == ed.dt);
});
Edit Okay, I just reread your question and I'm a bit confused. Do you want the object {id: 3, dt: 3} inside the new array as well?

JS - Filter array of objects returning only a specific field (some kind of combination between map and filter)

I have the following array
const arr = [
{ id: 1, token: "aAdsDDwEwe43svdwe2Xua" },
{ id: 2, token: undefined }
];
And I need to filter out undefined tokens, and ignore the id field.
Something like:
const arr = [
{ id: 1, token: "aAdsDDwEwe43svdwe2Xua" },
{ id: 2, token: undefined },
];
const result = arr
.filter(({ token }) => token !== undefined)
.map(({ token }) => token);
console.log(result);
Is it possible to do it in O(n) ? I mean, without navigating through the list twice.
const result = arr.reduce((acc,curr) => {
return curr.token !==undefined ? [...acc,curr.token] : acc
},[])
Firstly it is O(n). Just because we run over a loop two times, it does not become O(n^2).
Additionally, if you simply use a for loop you will realise how simple it is:
const arr = [
{ id: 1, token: "aAdsDDwEwe43svdwe2Xua", extraField : "x" },
{ id: 2, token: undefined, extraField : "x" }
];
let ans = [];
for(let i = 0 ; i < arr.length; i++){
if(arr[i].token!== undefined){
let { id, ...newBody } = arr[i];
ans.push(newBody);
}
}
console.log(ans);
Used spread operator (...), to remove a particular property (here id).
If you are looking for array methods, the above could also be achieved using a .forEach()

Add unique generated Identifier on array of json object

How to add uniqueId field in below JSON. This array has large number of data and needs to dynamic unique identifier on existing array.
[{"title":"Accompanying"},{"title":"Chamber music"},{"title":"Church
music"}......]
so, this should look as follow:
[{"title":"Accompanying", "uniqueId": 1},{"title":"Chamber music", "uniqueId": 2}..]
uniqueId- type, number or guid.
Note: don't know the "title" or what other fields could be, so, could not map the fields by name.
I would go for a simple for loop
let myArray = [{"title":"Accompanying"},{"title":"Chamber music"},{"title":"Church music"}];
let i = 0, ln = myArray.length;
for (i;i<ln;i++){
myArray[i].uniqueId = i+1;
}
console.log(myArray);
If this is a one time thing you could do the following:
const newArray = oldArray.map((x, i) => ({
// If the object is dynamic you can spread it out here and add the ID
...x,
// Use the items index in the array as a unique key
uniqueId: i,
}));
If you want to use a guid generator instead (I'd recommend that) just replace i with whatever you use to generate a GUID and ensure that any time you add to the collection you generate a new GUID for the data.
const newArray = oldArray.map((x) => ({ ...x, uniqueId: generateGuid() }));
const yourDynamicObjects = [
{
title: 'A title',
author: 'A. Author'
},
{
foo: 'bar',
},
{
quotient: 2,
irrational: Math.sqrt(2)
}
];
const updatedData = yourDynamicObjects.map((x, i) => ({ ...x, uniqueId: i, }));
console.log(updatedData);
You can use map & in it's call back function use the index parameter to create uniqueId
item.title is not known actually as its dynamic array and so, could
not map with particular field names
In this case use Object.keys to get an array of all the keys . Then loop over it and add the key to a new object
let k = [{
"title": "Accompanying"
}, {
"title": "Chamber music"
}, {
"title": "Church"
}]
let getArrayKey = Object.keys(k[0]);
let n = k.map(function(item, index) {
let obj = {};
getArrayKey.forEach(function(elem) {
obj[elem] = item[elem];
})
obj.uniqueId = index + 1
return obj;
});
console.log(n)
Also you can use spread operator
let k = [{
"title": "Accompanying"
}, {
"title": "Chamber music"
}, {
"title": "Church"
}]
let n = k.map(function(item, index) {
return Object.assign({}, { ...item,
uniqueId: index + 1
})
});
console.log(n)

flatten array of object into an array

const x = [{
name:"abc",
},{
name:"xyz"
}]
how to turn above array of object into an array?
expected output
x = ['abc','xyz']
I know I can do a native loop, use push to a new empty array but I'm looking for one line es2015/es6 or even lodash method
Simply use the map function:
const y = x.map(c => c.name);
const x = [{
name:"abc",
},{
name:"xyz"
}]
const names = x.map(c => c.name);
console.log(names);
Solution in Lodash (very similar to plain js):
const x = [{
name:"abc",
},{
name:"xyz"
}]
const names _.map(x, 'name'); // => ['abc', 'xyz']
Edit
as requested also in plain js
const names = x.map(el => el.name);
or
const names = x.map(function(el) {
return el.name;
});
x = [{
name:"abc",
},{
name:"xyz"
}];
x = x.map(function (value) {
return value.name;
});
Use map()
let res = x.map(o => o.name);

Efficiently rename/re-map javascript/json object keys within array of objects

I have some structured JSON data like so. Let's assume this is interchangeable, via JSON.parse():
[
{
"title": "pineapple",
"uid": "ab982d34c98f"
},
{
"title": "carrots",
"uid": "6f12e6ba45ec"
}
]
I need it to look like this, remapping title to name, and uid to id with the result:
[
{
"name": "pineapple",
"id": "ab982d34c98f"
},
{
"name": "carrots",
"id": "6f12e6ba45ec"
}
]
The most obvious way of doing it is like this:
str = '[{"title": "pineapple","uid": "ab982d34c98f"},{"title": "carrots", "uid": "6f12e6ba45ec"}]';
var arr = JSON.parse(str);
for (var i = 0; i<arr.length; i++) {
arr[i].name = arr[i].title;
arr[i].id = arr[i].uid;
delete arr[i].title;
delete arr[i].uid;
}
str = '[{"title": "pineapple","uid": "ab982d34c98f"},{"title": "carrots", "uid": "6f12e6ba45ec"}]';
var arr = JSON.parse(str);
for (var i = 0; i<arr.length; i++) {
arr[i].name = arr[i].title;
arr[i].id = arr[i].uid;
delete arr[i].title;
delete arr[i].uid;
}
$('body').append("<pre>"+JSON.stringify(arr, undefined, 4)+"</pre>");
<script src="https://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
...or using something more complex (albeit not more efficient) like this.
This is all fine and dandy, but what if there were 200,000 objects in the array? This is a lot of processing overhead.
Is there a more efficient way to remap a key name? Possibly without looping through the entire array of objects? If your method is more efficient, please provide proof/references.
As I already mentioned in the comments, if you can make certain assumptions about the values of the objects, you could use a regular expression to replace the keys, for example:
str = str.replace(/"title":/g, '"name":');
It's not as "clean", but it might get the job done faster.
If you have to parse the JSON anyway, a more structured approach would be to pass a reviver function to JSON.parse and you might be able to avoid an additional pass over the array. This probably depends on how engine implement JSON.parse though (maybe they parse the whole string first and then make a second pass with the reviver function, in which case you wouldn't get any advantage).
var arr = JSON.parse(str, function(prop, value) {
switch(prop) {
case "title":
this.name = value;
return;
case "uid":
this.id = value;
return;
default:
return value;
}
});
Benchmarks, using the Node.js script below to test 3 times:
1389822740739: Beginning regex rename test
1389822740761: Regex rename complete
// 22ms, 22ms, 21ms
1389822740762: Beginning parse and remap in for loop test
1389822740831: For loop remap complete
// 69ms, 68ms, 68ms
1389822740831: Beginning reviver function test
1389822740893: Reviver function complete
// 62ms, 61ms, 60ms
It appears as if the regex (in this case) is the most efficient, but be careful when trying to parse JSON with regular expressions.
Test script, loading 100,230 lines of the OP's sample JSON:
fs = require('fs');
fs.readFile('test.json', 'utf8', function (err, data) {
if (err) {
return console.log(err);
}
console.log(new Date().getTime() + ": Beginning regex rename test");
var str = data.replace(/"title":/g, '"name":');
str = str.replace(/"uid":/g, '"id":');
JSON.parse(str);
console.log(new Date().getTime() + ": Regex rename complete");
console.log(new Date().getTime() + ": Beginning parse and remap in for loop test");
var arr = JSON.parse(data);
for (var i = 0; i < arr.length; i++) {
arr[i].name = arr[i].title;
arr[i].id = arr[i].uid;
delete arr[i].title;
delete arr[i].uid;
}
console.log(new Date().getTime() + ": For loop remap complete");
console.log(new Date().getTime() + ": Beginning reviver function test");
var arr = JSON.parse(data, function (prop, value) {
switch (prop) {
case "title":
this.name = value;
return;
case "uid":
this.id = value;
return;
default:
return value;
}
});
console.log(new Date().getTime() + ": Reviver function complete");
});
Asked this question a long time ago, and since then, I've grown acustomed to using Array.prototype.map() to get the job done, more for stability and cleanliness of code than performance. While it's certainly not the most performant, it looks great:
var repl = orig.map(function(obj) {
return {
name: obj.title,
id: obj.uid
}
})
If you need a more flexible (and ES6-compatible function), try:
let replaceKeyInObjectArray = (a, r) => a.map(o =>
Object.keys(o).map((key) => ({ [r[key] || key] : o[key] })
).reduce((a, b) => Object.assign({}, a, b)))
e.g.
const arr = [{ abc: 1, def: 40, xyz: 50 }, { abc: 1, def: 40, xyz: 50 }, { abc: 1, def: 40, xyz: 50 }]
const replaceMap = { "abc": "yyj" }
replaceKeyInObjectArray(arr, replaceMap)
/*
[
{
"yyj": 1,
"def": 40,
"xyz": 50
},
{
"yyj": 1,
"def": 40,
"xyz": 50
},
{
"yyj": 1,
"def": 40,
"xyz": 50
}
]
*/
Here's another take on the OP's suggestion to use map() for clarity (not performance).
var newItems = items.map(item => ({
name: item.title,
id: item.uid
}));
This uses ES6 arrow functions and the shortcut syntaxes that are possible when there's only one parm passed to the function and only one statement in the body of the function.
Depending on your history with lambda expressions in various languages, this form may or may not resonate with you.
Be careful when returning an object literal in the arrow function shortcut syntax like this. Don't forget the additional parens around the object literal!
If you want to make it a little more reusable. Maybe this is a decent approach.
function rekey(arr, lookup) {
for (var i = 0; i < arr.length; i++) {
var obj = arr[i];
for (var fromKey in lookup) {
var toKey = lookup[fromKey];
var value = obj[fromKey];
if (value) {
obj[toKey] = value;
delete obj[fromKey];
}
}
}
return arr;
}
var arr = [{ apple: 'bar' }, { apple: 'foo' }];
var converted = rekey(arr, { apple: 'kung' });
console.log(converted);
Using ES6:
const renameFieldInArrayOfObjects = (arr, oldField, newField) => {
return arr.map(s => {
return Object.keys(s).reduce((prev, next) => {
if(next === oldField) {
prev[newField] = s[next]
} else {
prev[next] = s[next]
}
return prev
}, {})
})
}
Using ES7:
const renameFieldInArrayOfObjects = (arr, oldField, newField) => {
return arr.map(s => {
return Object.keys(s).reduce((prev, next) => {
return next === oldField
? {...prev, [newField]: s[next]}
: {...prev, [next]: s[next]}
}, {})
})
}
You can use an npm package named node-data-transform.
Your data :
const data = [
{
title: 'pineapple',
uid: 'ab982d34c98f',
},
{
title: 'carrots',
uid: '6f12e6ba45ec',
},
];
Your mapping :
const map = {
item: {
name: 'title',
id: 'uid',
},
};
And use the package :
const DataTransform = require("node-json-transform").DataTransform;
const dataTransform = DataTransform(data, map);
const result = dataTransform.transform();
console.log(result);
Result :
[
{
name: 'pineapple',
id: 'ab982d34c98f'
},
{
name: 'carrots',
id: '6f12e6ba45ec'
}
]
Maybe it's not the best way for performance, but it's quite elegant.
var jsonObj = [/*sample array in question*/ ]
Based on different benchmarks discussed below, fastest solution is native for:
var arr = [];
for(var i = 0, len = jsonObj .length; i < len; i++) {
arr.push( {"name": jsonObj[i].title, "id" : jsonObj[i].uid});
}
I think alternatively without using a frameworks this will be option 2:
var arr = []
jsonObj.forEach(function(item) { arr.push({"name": item.title, "id" : item.uid }); });
There is always debate between using navite and non-navite functions. If I remember correctly lodash argued they were faster than underscore because the use non-native functions for key operations.
However different browsers will produce sometimes very different results. I always looked for the best average.
For benchmarks you can take a look at this:
http://jsperf.com/lo-dash-v1-1-1-vs-underscore-v1-4-4/8
function replaceElem(value, replace, str) {
while (str.indexOf(value) > -1) {
str = str.replace(value, replace);
}
return str;
}
call this from main
var value = "tittle";
var replace = "name";
replaceElem(value, replace, str);

Categories

Resources