JSON Schema extract the required fields - javascript

I need to get a list of the required fields out of a JSON-Schema+Data.
Currently, we are using AJV to get error messages in our forms with JSON Schema and it is working great.
I need a way to get all the required fields (even if filled) in order to mark those fields with * as "required". required fields might change depending on the schema and data combinations.
Also tried hacking tv4 to extract the required fields without success.
Please help.
Example for such schema:
{
"type": "object",
"required": [
"checkbox"
],
"properties": {
"checkbox": {
"type": "boolean"
},
"textbox": {
"type": "string"
}
},
"oneOf": [
{
"required": [
"textbox"
],
"properties": {
"checkbox": {
"enum": [
true
]
}
}
},
{
"properties": {
"checkbox": {
"enum": [
false
]
}
}
}
],
"additionalProperties": false
}

Rereading your question the easiest way to do what you'd like would be to
get the Json data on page load,
iterate over the json data to remove valid values (see sample 1),
Call tv4.validateMultiple(data, schema),
check the result object and get the required fields (see sample 2).
sample 1
for(let prop in data) {
if(data.hasOwnProperty(prop) {
//set value to null, -1, or some other universally bad value
data[prop]...value = null;
}
}
sample 2
let result = tv4.validateMultiple(data, schema);
let required = result.errors;

We solved it by:
Forking tv4 (tv4 - because it was easy to edit):
https://github.com/mikila85/tv4
outputting an array of "Requireds".
We itereted each required field, emptying it's data and sending data+schema to AJV for validation (AJV and not tv4 because its faster at parsing).
By doing that we could know individually which required field is required for the given data.
these are the working functions we came out with (not the cleanest but will help get the idea)
function getAllRequiredFields() {
var allRequiredFields = tv4.validateMultiple($scope.formModel, $scope.formSchema).requireds;
allRequiredFields = allRequiredFields.filter(function onlyUnique(value, index, self) {
return self.indexOf(value) === index;
});
return allRequiredFields;
}
function getRequiredFields() {
var t0 = performance.now();
//should be called every model change because of optimization in tv4 for the data+schema.
var allRequiredFields = getAllRequiredFields();
angular.forEach(allRequiredFields, function (requiredPath) {
var modelWithDeletedRequiredProperty = angular.copy($scope.formModel);
deleteValue(modelWithDeletedRequiredProperty, requiredPath);
if (!validateForm(modelWithDeletedRequiredProperty)) {
var requiredError = getErrorObjectsArray(validateForm.errors).find(function (error) {
return error.path === requiredPath;
});
if (requiredError) {
localValidation[requiredError.inputName] = localValidation[requiredError.inputName] || {};
localValidation[requiredError.inputName].isRequired = true;
requiredFieldsPath.push(requiredError.inputName);
}
}
});
var t1 = performance.now();
console.log("form checking took " + (t1 - t0) + " milliseconds.");
}

This function grabs schema indices recursively, so maybe you could adapt it a little
// https://github.com/pubkey/rxdb/blob/master/src/rx-schema.js
export function getIndexes(jsonID, prePath = '') {
let indexes = [];
Object.entries(jsonID).forEach(entry => {
const key = entry[0];
const obj = entry[1];
const path = key === 'properties' ? prePath : util.trimDots(prePath + '.' + key);
if (obj.index)
indexes.push([path]);
if (typeof obj === 'object' && !Array.isArray(obj)) {
const add = getIndexes(obj, path);
indexes = indexes.concat(add);
}
});
if (prePath === '') {
const addCompound = jsonID.compoundIndexes || [];
indexes = indexes.concat(addCompound);
}
indexes = indexes
.filter((elem, pos, arr) => arr.indexOf(elem) === pos); // unique;
return indexes;
}

Related

Iterate deeply nested object with unknown level and add remove key/value based on user provided conditions

Could anyone please guide me how to achieve the below challenge which I am facing?
I have thousands of mock API request response JSON files. They are deeply nested, and they all are structured differently. I need to add/update/delete entry at the specfic location where the condition match which will be provided by user. I am not sure how to approach this problem? I have tried doing something like below. I am asking user for path for where to start looking. But this will increase time as user has to look for path in all file and pass that info to api. below code work upto 2 level only. need to search full tree where all user provides conditions matches, and at that place, I need to add/update/delete data. I took condition as an array of objects.
Draft Code
const _ = require("lodash");
const file = "./sample.json";
const actions = ["add", "delete", "update"];
const consumer = (file, key, where, data, action) => {
try {
const act = action.toLowerCase();
if(!actions.includes(act) throw new Error("invalid action provided");
if(_.isArray(where) && _.every(where, _.isObject())) throw new Error("no where clause condition provided");
let content = require(file);
let typeKeyContent = null;
let keyContent = _.get(content, key);
if(!keyContent) throw new Error("invalid key");
if(_.isArray(keyContent)) {
typeKeyContent = "array"
} else if (_.isObject(keyContent)) {
typeKeyContent = "object"
}
switch (act) {
case "add":
if (typeKeyContent === "array") {
// array logic
for (let i = 0; i < keyContent.length; i++) {
const result = where.every(element => {
for (let key in element) {
return keyContent[key] && element[key] === keyContent[key];
}
});
if (!result) {
console.log("attributes matching -> ", result);
return;
}
keyContent[i] = {...keyContent[i], ...data }
}
let newcontent = _.set(content, key, keyContent);
console.log("newcontent -> \n",JSON.stringify(newcontent, null, 2));
return;
}
const result = where.every(element => {
for (let key in element) {
return keyContent[key] && element[key] === keyContent[key];
}
});
if (!result) {
console.log("attributes matching -> ", result);
return;
}
keyContent = { ...keyContent, ...data };
let newcontent = _.set(content, key, keyContent);
console.log("newcontent -> \n",JSON.stringify(newcontent, null, 2));
// TODO :: store back in json file
break;
default:
console.log("reached default case");
return;
}
} catch(err) {
console.log("ERROR :: CONSUMER ::", error);
}
}
// AND based condition only
const conditions = [
{ name: "Essential Large" },
{ selected: true }
];
const newdata = { description: "our best service" } // wants to add new prop
consumer(file, "selected_items.essential", conditions, newdata, "add");
sample json
{
"status": 200,
"request": {},
"response": {
"ffs": false,
"customer": {
"customer_id": 1544248,
"z_cx_id": 123456
},
"selected_items": {
"essential": [
{
"id": 4122652,
"name": "Essential Large",
"selected": true,
"description": "our best service" // will be added
},
{
"id": 4122653,
"name": "Essential Large",
"selected": true,
"description": "our best service" // will be added
}
]
},
"service_partner": {
"id": 3486,
"name": "Some String",
"street": "1234 King St."
},
"subject": "Project",
"description": "Issue: (copy/paste service request details here Required"
}
}
So you want to go through every key of a nested object right?
function forEvery(object,fn){
//obj is the object, fn is the function
//this function should go through each item in an object loaded from JSON string
//fn takes in 3 arguments: current element, that element's parent, level of depth(starts at 1)
var arr=[]
function recurse(obj,map,depth){
Object.keys(obj).forEach((a,i)=>{
fn(obj[a],obj,a,depth) //because fn can affect the object so the if statement should after not before ;-;
if(typeof obj[a]=="object"&&obj[a]!=null){ //if nested value is another object
map.push(a); arr.push(map)
recurse(obj[a],[...map],depth+1)
}
})
}
recurse(object,[],1)
}
//usage would be like:
//let customerCondition=/*some logic here*/
//let testObj=JSON.parse( (require('fs')).readFileSync('dirToSomeFile.json') )
forEvery(testObj,customerCondition)
Here's a live example
let testObj={"status":200,"request":{},"response":{"ffs":false,"customer":{"customer_id":1544248,"z_cx_id":123456},"selected_items":{"essential":[{"id":4122652,"name":"Essential Large","selected":true},{"id":4122653,"name":"Essential Medium","selected":false}]},"service_partner":{"id":3486,"name":"Some String","street":"1234 King St."},"subject":"Project","description":"Issue: (copy/paste service request details here Required"}}
function forEvery(object,fn){
//obj is the object, fn is the function
//this function should go through each item in an object loaded from JSON string
//fn takes in 3 arguments: current element, that element's parent, level of depth(starts at 1)
var arr=[]
function recurse(obj,map,depth){
Object.keys(obj).forEach((a,i)=>{
fn(obj[a],obj,a,depth) //because fn can affect the object so the if statement should after not before ;-;
if(typeof obj[a]=="object"&&obj[a]!=null){ //if nested value is another object
map.push(a); arr.push(map)
recurse(obj[a],[...map],depth+1)
}
})
}
recurse(object,[],1)
}
//example usage
let userQuery=[{ name: "Essential Large" },{ selected: true }]; //the user query in the format you gave
let userCondition={} //assuming each key across userQuery is unique, I set a model object for comparisons later on
userQuery.forEach(obj=>{ //I fill the model object :D
Object.keys(obj).forEach(key=>{
userCondition[key]=obj[key]
})
})
let testFn=(elem,parent,key,depth)=>{
//I use comparisons with the model object
let condition=typeof elem!="object"?false:
Object.keys(userCondition)
.every(item=>userCondition[item]==elem[item])
//true if matches user condition(meaning elem must be an object), false otherwise
if(condition){
console.log(parent[key],"will now be deleted")
delete(parent[key]) //deletion example(if user conditions match)
}
}
forEvery(testObj,testFn)
console.log("and the changed object looks like",testObj)

How the java script complex object and complex array iterate?

Below is running code snippet for the javascript object and array.
I have one jsonObj and here the ResultElementLevel could be the array or
object.
According to I just put if else condition and compare if Array and 'object'.
My question is,How would it be possible without if else condition?
can we write one function which compare object and Array inside single if.
The jsonObj is populating dynamically.
Here it would be possible CHECK object is also come into the Array or Object.
var jsonObj = {
"Response": {
"Errors": {
"Check": {
"_attributes": {
"id": "51416",
"name": "lucyocftest090601"
},
"CheckLevel": {
},
"ResultElementLevel": {
"_text": "Line No (2) [Missing Reporting Category] "
}
}
},
"Success": {
}
}
}
iterateObjorArr(jsonObj);
function iterateObjorArr(jsonObj){
let checkArr = jsonObj.Response.Errors.Check;
let checkID = checkArr._attributes.id;
let checkName = checkArr._attributes.name;
let status = 'failed';
let resultElementLevel = checkArr.ResultElementLevel;
let errorUploadArr = [];
let errorUploadObj;
if (Array.isArray(resultElementLevel)) {
resultElementLevel.map(function (data, index) {
errorUploadObj = {
'id': checkID,
'checkName': checkName,
'status': status,
'errors/warnings': data._text
};
errorUploadArr.push(errorUploadObj);
});
} else {
if (typeof (resultElementLevel) === 'object') {
errorUploadObj = {
'id': checkID,
'checkName': checkName,
'status': status,
'errors/warnings': resultElementLevel._text
};
errorUploadArr.push(errorUploadObj);
}
}
console.log("errorUploadArr", errorUploadArr);
}
You can test to see if resultElementLevel has the length property or not using hasOwnProperty(). Arrays have a length while objects do not (generally):
if (resultElementLevel.hasOwnProperty('length')) {
// Handle it as an array
} else {
// Handle as an object
}
This will, however, only work if the object assigned to resultElementLevel is guaranteed to not have a length property.
My question is,How would it be possible without if else condition? can we write one function which compare object and Array inside single if.
I don't think you'd want to get rid of the condition, but being able to deal with the passed data the same way, wether it's an array, a single item, or null/undefined
You could normalize the data first
function toArray(value){
return value == null? []:
Array.isArray(value)? value:
//isArrayLike(value)? Array.from(value):
[value];
}
//Objects that look like Arrays
function isArrayLike(value){
return value !== null && typeof value === "object" && value.length === (value.length >>> 0);
}
so that from here on, you always deal with an Array:
let errorUploadArr = toArray(checkArr.ResultElementLevel)
.map(function(item){
return {
id: checkID,
checkName: checkName,
status: status,
"errors/warnings": item._text
};
});
var jsonObj = {
Response: {
Errors: {
Check: {
_attributes: {
id: "51416",
name: "lucyocftest090601"
},
CheckLevel: {},
ResultElementLevel: {
_text: "Line No (2) [Missing Reporting Category] "
}
}
},
Success: {}
}
};
iterateObjorArr(jsonObj);
function toArray(value) {
return value == null ? [] :
Array.isArray(value) ? value :
//isArrayLike(value)? Array.from(value):
[value];
}
//Objects that look like Arrays
function isArrayLike(value) {
return value !== null && typeof value === "object" && value.length === (value.length >>> 0);
}
function iterateObjorArr(jsonObj) {
let checkArr = jsonObj.Response.Errors.Check;
let checkID = checkArr._attributes.id;
let checkName = checkArr._attributes.name;
let status = "failed";
let errorUploadArr = toArray(checkArr.ResultElementLevel)
.map(function(data) {
return {
id: checkID,
checkName: checkName,
status: status,
"errors/warnings": data._text
}
});
console.log("errorUploadArr", errorUploadArr);
}
.as-console-wrapper{top:0;max-height:100%!important}

Recursive function returning empty arrray

I am having issue with my recursive function getPath, as it is returning an empty array, when it should be returning an array that looks something like this:
['main', 'children', 'name']
I am not sure if the logic pare is right, as that isn't what the question is about, the question is, why is my array empty? It is pushing data onto the array, but the final result is an empty array.
let dataScope = [{
"name": "main",
"location": [":data"]
}, {
"name": "child",
"location": ["main", "children"]
}]
function getLocation(key) {
let val = dataScope.find(i => i.name == key)
return val ? val.location : []
}
function getPath(items) {
let path = []
let item = items.shift()
if (item) {
let loc = getLocation(item)
if (loc.length > 0 && loc.join('.') != ':data') {
path.push(...getPath(loc))
console.log('added to array')
}
}
return path
}
console.log(getPath(['child', 'name']))
You don't do anything with loc so, it seems nothing gets pushed to the array
Note: I'm still trying to get to grips with why your original code results in an empty array - however, this code produces the expected result :p
let dataScope = [{
"name": "main",
"location": [":data"]
}, {
"name": "child",
"location": ["main", "children"]
}]
function getLocation(key) {
let val = dataScope.find(i => i.name == key);
return val ? val.location : []
}
function getPath(items, indent = 0) {
let z = items.join(',');
console.log(`${' '.repeat(indent)}called with ${z}`);
let path = [];
let item = items.shift();
let loc = [];
if (item) {
loc = getLocation(item);
if (loc.length > 0 && loc.join('.') != ':data') {
path.push(...getPath(loc.slice(), indent + 4)); // .slice() so loc isn't mutated
console.log(`${' '.repeat(indent)}${z} has path [${path.join(',')}]`);
}
path.push(...loc); // add loc to the path - comment this out to see the difference
}
console.log(`${' '.repeat(indent)}${z} returns [${path.join(',')}]`);
return path
}
console.log(`[${getPath(['child', 'name'])}]`)
First youre passing an array of names to getPath but then later youre passing the location array. Which one should it be? Logic needs tweaking. And also there's nothing in the dataset using the value "name" so your test is incorrect as well.
Its because you're doing recursion sending dataScope location but you implemented getPath expecting dataScope keys:
let dataScope = [{
"name": "main",
"location": [":data"]
}, {
"name": "child",
"location": ["main", "children"]
}]
function getLocation(key) {
let val = dataScope.find(i => i.name == key)
return val ? val.location : []
}
function getPath(keys) { // changing items name to keys for clarification
let path = []
let key = keys.shift()
if (key) {
let loc = getLocation(key);
if (loc.length > 0 && loc.join('.') != ':data') {
path.push(...loc) // push locs into array
getPath(keys) // call getPath with remaining keys
console.log('added to array')
}
}
return path
}
console.log(getPath(['child', 'main']))
You will not have :data into your path result because of this statement: loc.join('.') != ':data'. If you remove it you will get your expected output.

MongoDB retrieve all keys with Node.js [duplicate]

I'd like to get the names of all the keys in a MongoDB collection.
For example, from this:
db.things.insert( { type : ['dog', 'cat'] } );
db.things.insert( { egg : ['cat'] } );
db.things.insert( { type : [] } );
db.things.insert( { hello : [] } );
I'd like to get the unique keys:
type, egg, hello
You could do this with MapReduce:
mr = db.runCommand({
"mapreduce" : "my_collection",
"map" : function() {
for (var key in this) { emit(key, null); }
},
"reduce" : function(key, stuff) { return null; },
"out": "my_collection" + "_keys"
})
Then run distinct on the resulting collection so as to find all the keys:
db[mr.result].distinct("_id")
["foo", "bar", "baz", "_id", ...]
With Kristina's answer as inspiration, I created an open source tool called Variety which does exactly this: https://github.com/variety/variety
You can use aggregation with the new $objectToArray aggregation operator in version 3.4.4 to convert all top key-value pairs into document arrays, followed by $unwind and $group with $addToSet to get distinct keys across the entire collection. (Use $$ROOT for referencing the top level document.)
db.things.aggregate([
{"$project":{"arrayofkeyvalue":{"$objectToArray":"$$ROOT"}}},
{"$unwind":"$arrayofkeyvalue"},
{"$group":{"_id":null,"allkeys":{"$addToSet":"$arrayofkeyvalue.k"}}}
])
You can use the following query for getting keys in a single document.
db.things.aggregate([
{"$match":{_id: "<<ID>>"}}, /* Replace with the document's ID */
{"$project":{"arrayofkeyvalue":{"$objectToArray":"$$ROOT"}}},
{"$project":{"keys":"$arrayofkeyvalue.k"}}
])
A cleaned up and reusable solution using pymongo:
from pymongo import MongoClient
from bson import Code
def get_keys(db, collection):
client = MongoClient()
db = client[db]
map = Code("function() { for (var key in this) { emit(key, null); } }")
reduce = Code("function(key, stuff) { return null; }")
result = db[collection].map_reduce(map, reduce, "myresults")
return result.distinct('_id')
Usage:
get_keys('dbname', 'collection')
>> ['key1', 'key2', ... ]
If your target collection is not too large, you can try this under mongo shell client:
var allKeys = {};
db.YOURCOLLECTION.find().forEach(function(doc){Object.keys(doc).forEach(function(key){allKeys[key]=1})});
allKeys;
If you are using mongodb 3.4.4 and above then you can use below aggregation using $objectToArray and $group aggregation
db.collection.aggregate([
{ "$project": {
"data": { "$objectToArray": "$$ROOT" }
}},
{ "$project": { "data": "$data.k" }},
{ "$unwind": "$data" },
{ "$group": {
"_id": null,
"keys": { "$addToSet": "$data" }
}}
])
Here is the working example
Try this:
doc=db.thinks.findOne();
for (key in doc) print(key);
Using python. Returns the set of all top-level keys in the collection:
#Using pymongo and connection named 'db'
reduce(
lambda all_keys, rec_keys: all_keys | set(rec_keys),
map(lambda d: d.keys(), db.things.find()),
set()
)
Here is the sample worked in Python:
This sample returns the results inline.
from pymongo import MongoClient
from bson.code import Code
mapper = Code("""
function() {
for (var key in this) { emit(key, null); }
}
""")
reducer = Code("""
function(key, stuff) { return null; }
""")
distinctThingFields = db.things.map_reduce(mapper, reducer
, out = {'inline' : 1}
, full_response = True)
## do something with distinctThingFields['results']
I am surprise, no one here has ans by using simple javascript and Set logic to automatically filter the duplicates values, simple example on mongo shellas below:
var allKeys = new Set()
db.collectionName.find().forEach( function (o) {for (key in o ) allKeys.add(key)})
for(let key of allKeys) print(key)
This will print all possible unique keys in the collection name: collectionName.
I think the best way do this as mentioned here is in mongod 3.4.4+ but without using the $unwind operator and using only two stages in the pipeline. Instead we can use the $mergeObjects and $objectToArray operators.
In the $group stage, we use the $mergeObjects operator to return a single document where key/value are from all documents in the collection.
Then comes the $project where we use $map and $objectToArray to return the keys.
let allTopLevelKeys = [
{
"$group": {
"_id": null,
"array": {
"$mergeObjects": "$$ROOT"
}
}
},
{
"$project": {
"keys": {
"$map": {
"input": { "$objectToArray": "$array" },
"in": "$$this.k"
}
}
}
}
];
Now if we have a nested documents and want to get the keys as well, this is doable. For simplicity, let consider a document with simple embedded document that look like this:
{field1: {field2: "abc"}, field3: "def"}
{field1: {field3: "abc"}, field4: "def"}
The following pipeline yield all keys (field1, field2, field3, field4).
let allFistSecondLevelKeys = [
{
"$group": {
"_id": null,
"array": {
"$mergeObjects": "$$ROOT"
}
}
},
{
"$project": {
"keys": {
"$setUnion": [
{
"$map": {
"input": {
"$reduce": {
"input": {
"$map": {
"input": {
"$objectToArray": "$array"
},
"in": {
"$cond": [
{
"$eq": [
{
"$type": "$$this.v"
},
"object"
]
},
{
"$objectToArray": "$$this.v"
},
[
"$$this"
]
]
}
}
},
"initialValue": [
],
"in": {
"$concatArrays": [
"$$this",
"$$value"
]
}
}
},
"in": "$$this.k"
}
}
]
}
}
}
]
With a little effort, we can get the key for all subdocument in an array field where the elements are object as well.
This works fine for me:
var arrayOfFieldNames = [];
var items = db.NAMECOLLECTION.find();
while(items.hasNext()) {
var item = items.next();
for(var index in item) {
arrayOfFieldNames[index] = index;
}
}
for (var index in arrayOfFieldNames) {
print(index);
}
Maybe slightly off-topic, but you can recursively pretty-print all keys/fields of an object:
function _printFields(item, level) {
if ((typeof item) != "object") {
return
}
for (var index in item) {
print(" ".repeat(level * 4) + index)
if ((typeof item[index]) == "object") {
_printFields(item[index], level + 1)
}
}
}
function printFields(item) {
_printFields(item, 0)
}
Useful when all objects in a collection has the same structure.
To get a list of all the keys minus _id, consider running the following aggregate pipeline:
var keys = db.collection.aggregate([
{ "$project": {
"hashmaps": { "$objectToArray": "$$ROOT" }
} },
{ "$group": {
"_id": null,
"fields": { "$addToSet": "$hashmaps.k" }
} },
{ "$project": {
"keys": {
"$setDifference": [
{
"$reduce": {
"input": "$fields",
"initialValue": [],
"in": { "$setUnion" : ["$$value", "$$this"] }
}
},
["_id"]
]
}
}
}
]).toArray()[0]["keys"];
I know I am late to the party, but if you want a quick solution in python finding all keys (even the nested ones) you could do with a recursive function:
def get_keys(dl, keys=None):
keys = keys or []
if isinstance(dl, dict):
keys += dl.keys()
list(map(lambda x: get_keys(x, keys), dl.values()))
elif isinstance(dl, list):
list(map(lambda x: get_keys(x, keys), dl))
return list(set(keys))
and use it like:
dl = db.things.find_one({})
get_keys(dl)
if your documents do not have identical keys you can do:
dl = db.things.find({})
list(set(list(map(get_keys, dl))[0]))
but this solution can for sure be optimized.
Generally this solution is basically solving finding keys in nested dicts, so this is not mongodb specific.
Based on #Wolkenarchitekt answer: https://stackoverflow.com/a/48117846/8808983, I write a script that can find patterns in all keys in the db and I think it can help others reading this thread:
"""
Python 3
This script get list of patterns and print the collections that contains fields with this patterns.
"""
import argparse
import pymongo
from bson import Code
# initialize mongo connection:
def get_db():
client = pymongo.MongoClient("172.17.0.2")
db = client["Data"]
return db
def get_commandline_options():
description = "To run use: python db_fields_pattern_finder.py -p <list_of_patterns>"
parser = argparse.ArgumentParser(description=description)
parser.add_argument('-p', '--patterns', nargs="+", help='List of patterns to look for in the db.', required=True)
return parser.parse_args()
def report_matching_fields(relevant_fields_by_collection):
print("Matches:")
for collection_name in relevant_fields_by_collection:
if relevant_fields_by_collection[collection_name]:
print(f"{collection_name}: {relevant_fields_by_collection[collection_name]}")
# pprint(relevant_fields_by_collection)
def get_collections_names(db):
"""
:param pymongo.database.Database db:
:return list: collections names
"""
return db.list_collection_names()
def get_keys(db, collection):
"""
See: https://stackoverflow.com/a/48117846/8808983
:param db:
:param collection:
:return:
"""
map = Code("function() { for (var key in this) { emit(key, null); } }")
reduce = Code("function(key, stuff) { return null; }")
result = db[collection].map_reduce(map, reduce, "myresults")
return result.distinct('_id')
def get_fields(db, collection_names):
fields_by_collections = {}
for collection_name in collection_names:
fields_by_collections[collection_name] = get_keys(db, collection_name)
return fields_by_collections
def get_matches_fields(fields_by_collections, patterns):
relevant_fields_by_collection = {}
for collection_name in fields_by_collections:
relevant_fields = [field for field in fields_by_collections[collection_name] if
[pattern for pattern in patterns if
pattern in field]]
relevant_fields_by_collection[collection_name] = relevant_fields
return relevant_fields_by_collection
def main(patterns):
"""
:param list patterns: List of strings to look for in the db.
"""
db = get_db()
collection_names = get_collections_names(db)
fields_by_collections = get_fields(db, collection_names)
relevant_fields_by_collection = get_matches_fields(fields_by_collections, patterns)
report_matching_fields(relevant_fields_by_collection)
if __name__ == '__main__':
args = get_commandline_options()
main(args.patterns)
As per the mongoldb documentation, a combination of distinct
Finds the distinct values for a specified field across a single collection or view and returns the results in an array.
and indexes collection operations are what would return all possible values for a given key, or index:
Returns an array that holds a list of documents that identify and describe the existing indexes on the collection
So in a given method one could do use a method like the following one, in order to query a collection for all it's registered indexes, and return, say an object with the indexes for keys (this example uses async/await for NodeJS, but obviously you could use any other asynchronous approach):
async function GetFor(collection, index) {
let currentIndexes;
let indexNames = [];
let final = {};
let vals = [];
try {
currentIndexes = await collection.indexes();
await ParseIndexes();
//Check if a specific index was queried, otherwise, iterate for all existing indexes
if (index && typeof index === "string") return await ParseFor(index, indexNames);
await ParseDoc(indexNames);
await Promise.all(vals);
return final;
} catch (e) {
throw e;
}
function ParseIndexes() {
return new Promise(function (result) {
let err;
for (let ind in currentIndexes) {
let index = currentIndexes[ind];
if (!index) {
err = "No Key For Index "+index; break;
}
let Name = Object.keys(index.key);
if (Name.length === 0) {
err = "No Name For Index"; break;
}
indexNames.push(Name[0]);
}
return result(err ? Promise.reject(err) : Promise.resolve());
})
}
async function ParseFor(index, inDoc) {
if (inDoc.indexOf(index) === -1) throw "No Such Index In Collection";
try {
await DistinctFor(index);
return final;
} catch (e) {
throw e
}
}
function ParseDoc(doc) {
return new Promise(function (result) {
let err;
for (let index in doc) {
let key = doc[index];
if (!key) {
err = "No Key For Index "+index; break;
}
vals.push(new Promise(function (pushed) {
DistinctFor(key)
.then(pushed)
.catch(function (err) {
return pushed(Promise.resolve());
})
}))
}
return result(err ? Promise.reject(err) : Promise.resolve());
})
}
async function DistinctFor(key) {
if (!key) throw "Key Is Undefined";
try {
final[key] = await collection.distinct(key);
} catch (e) {
final[key] = 'failed';
throw e;
}
}
}
So querying a collection with the basic _id index, would return the following (test collection only has one document at the time of the test):
Mongo.MongoClient.connect(url, function (err, client) {
assert.equal(null, err);
let collection = client.db('my db').collection('the targeted collection');
GetFor(collection, '_id')
.then(function () {
//returns
// { _id: [ 5ae901e77e322342de1fb701 ] }
})
.catch(function (err) {
//manage your error..
})
});
Mind you, this uses methods native to the NodeJS Driver. As some other answers have suggested, there are other approaches, such as the aggregate framework. I personally find this approach more flexible, as you can easily create and fine-tune how to return the results. Obviously, this only addresses top-level attributes, not nested ones.
Also, to guarantee that all documents are represented should there be secondary indexes (other than the main _id one), those indexes should be set as required.
We can achieve this by Using mongo js file. Add below code in your getCollectionName.js file and run js file in the console of Linux as given below :
mongo --host 192.168.1.135 getCollectionName.js
db_set = connect("192.168.1.135:27017/database_set_name"); // for Local testing
// db_set.auth("username_of_db", "password_of_db"); // if required
db_set.getMongo().setSlaveOk();
var collectionArray = db_set.getCollectionNames();
collectionArray.forEach(function(collectionName){
if ( collectionName == 'system.indexes' || collectionName == 'system.profile' || collectionName == 'system.users' ) {
return;
}
print("\nCollection Name = "+collectionName);
print("All Fields :\n");
var arrayOfFieldNames = [];
var items = db_set[collectionName].find();
// var items = db_set[collectionName].find().sort({'_id':-1}).limit(100); // if you want fast & scan only last 100 records of each collection
while(items.hasNext()) {
var item = items.next();
for(var index in item) {
arrayOfFieldNames[index] = index;
}
}
for (var index in arrayOfFieldNames) {
print(index);
}
});
quit();
Thanks #ackuser
Following the thread from #James Cropcho's answer, I landed on the following which I found to be super easy to use. It is a binary tool, which is exactly what I was looking for:
mongoeye.
Using this tool it took about 2 minutes to get my schema exported from command line.
I know this question is 10 years old but there is no C# solution and this took me hours to figure out. I'm using the .NET driver and System.Linq to return a list of the keys.
var map = new BsonJavaScript("function() { for (var key in this) { emit(key, null); } }");
var reduce = new BsonJavaScript("function(key, stuff) { return null; }");
var options = new MapReduceOptions<BsonDocument, BsonDocument>();
var result = await collection.MapReduceAsync(map, reduce, options);
var list = result.ToEnumerable().Select(item => item["_id"].ToString());
This one lines extracts all keys from a collection into a comma separated sorted string:
db.<collection>.find().map((x) => Object.keys(x)).reduce((a, e) => {for (el of e) { if(!a.includes(el)) { a.push(el) } }; return a}, []).sort((a, b) => a.toLowerCase() > b.toLowerCase()).join(", ")
The result of this query typically looks like this:
_class, _id, address, city, companyName, country, emailId, firstName, isAssigned, isLoggedIn, lastLoggedIn, lastName, location, mobile, printName, roleName, route, state, status, token
I extended Carlos LM's solution a bit so it's more detailed.
Example of a schema:
var schema = {
_id: 123,
id: 12,
t: 'title',
p: 4.5,
ls: [{
l: 'lemma',
p: {
pp: 8.9
}
},
{
l: 'lemma2',
p: {
pp: 8.3
}
}
]
};
Type into the console:
var schemafy = function(schema, i, limit) {
var i = (typeof i !== 'undefined') ? i : 1;
var limit = (typeof limit !== 'undefined') ? limit : false;
var type = '';
var array = false;
for (key in schema) {
type = typeof schema[key];
array = (schema[key] instanceof Array) ? true : false;
if (type === 'object') {
print(Array(i).join(' ') + key+' <'+((array) ? 'array' : type)+'>:');
schemafy(schema[key], i+1, array);
} else {
print(Array(i).join(' ') + key+' <'+type+'>');
}
if (limit) {
break;
}
}
}
Run:
schemafy(db.collection.findOne());
Output
_id <number>
id <number>
t <string>
p <number>
ls <object>:
0 <object>:
l <string>
p <object>:
pp <number>
I was trying to write in nodejs and finally came up with this:
db.collection('collectionName').mapReduce(
function() {
for (var key in this) {
emit(key, null);
}
},
function(key, stuff) {
return null;
}, {
"out": "allFieldNames"
},
function(err, results) {
var fields = db.collection('allFieldNames').distinct('_id');
fields
.then(function(data) {
var finalData = {
"status": "success",
"fields": data
};
res.send(finalData);
delteCollection(db, 'allFieldNames');
})
.catch(function(err) {
res.send(err);
delteCollection(db, 'allFieldNames');
});
});
After reading the newly created collection "allFieldNames", delete it.
db.collection("allFieldNames").remove({}, function (err,result) {
db.close();
return;
});
I have 1 simpler work around...
What you can do is while inserting data/document into your main collection "things" you must insert the attributes in 1 separate collection lets say "things_attributes".
so every time you insert in "things", you do get from "things_attributes" compare values of that document with your new document keys if any new key present append it in that document and again re-insert it.
So things_attributes will have only 1 document of unique keys which you can easily get when ever you require by using findOne()

Search for a related json data

How can i find data that is related to the already known data?
( I'm a newb. )
For example here is my json :
[
{ "id": "1", "log": "1","pass": "1111" },
{ "id": 2, "log": "2","pass": "2222" },
{ "id": 3, "log": "3","pass": "3333" }
]
Now i know that "log" is 1 and i want to find out the data "pass" that is related to it.
i've tried to do it so :
The POST request comes with log and pass data , i search the .json file for the same log value and if there is the same data then i search for related pass
fs.readFile("file.json", "utf8", function (err, data) {
var jsonFileArr = [];
jsonFileArr = JSON.parse(data); // Parse .json objekts
var log = loginData.log; // The 'log' data that comes with POST request
/* Search through .json file for the same data*/
var gibtLog = jsonFileArr.some(function (obj) {
return obj.log == log;
});
if (gotLog) { // If there is the same 'log'
var pass = loginData.pass; // The 'pass' data that comes with POST request
var gotPass = jsonFileArr.some(function (obj) {
// How to change this part ?
return obj.pass == pass;
});
}
else
console.log("error");
});
The problem is that when i use
var gotPass = jsonFileArr.some(function (obj) {
return obj.pass == pass;
});
it searches through the whole .json file and not through only one objekt.
Your main problem is that .some() returns a boolean, whether any of the elements match your predicate or not, but not the element itself.
You want .find() (which will find and return the first element matching the predicate):
const myItem = myArray.find(item => item.log === "1"); // the first matching item
console.log(myItem.pass); // "1111"
Note that it is possible for .find() to not find anything, in which case it returns undefined.
The .some() method returns a boolean that just tells you whether there is at least one item in the array that matches the criteria, it doesn't return the matching item(s). Try .filter() instead:
var jsonFileArr = JSON.parse(data);
var log = loginData.log;
var matchingItems = jsonFileArr.filter(function (obj) {
return obj.log == log;
});
if (matchingItems.length > 0) { // Was at least 1 found?
var pass = matchingItems[0].pass; // The 'pass' data that comes with the first match
} else
console.log("error"); // no matches
Using ES6 Array#find is probably the easiest, but you could also do (among other things)
const x = [{
"id": "1",
"log": "1",
"pass": "1111"
}, {
"id": 2,
"log": "2",
"pass": "2222"
}, {
"id": 3,
"log": "3",
"pass": "3333"
}];
let myItem;
for (let item of x) {
if (item.log === '1') {
myItem = item;
break;
}
}
console.log(myItem);

Categories

Resources