Using Javascript aggregation in druid queries

Using Javascript aggregation in druid queries - javascript

I am trying to write javascript aggregator for my druid queries. i need to to calculate average of a metric "Base_SalesRank".
So far i have been able to this by writing:
{
"queryType": "groupBy",
"dataSource": "marketdata",
"granularity": "all",
"dimensions" : ["Item"],
"filter": { "type": "and", "fields" : [{"type": "selector", "dimension": "Item", "value": "MN10CESWW"}]},
"intervals": ["2018-06-28T00:00Z/2018-07-04T00:00Z"],
"aggregations" : [
{ "type" : "count", "name" : "rows" },
{ "type" : "doubleSum", "name" : "Base_SalesRank", "fieldName" : "Base_SalesRank" }
],
"postAggregations" : [{
"type": "javascript",
"name": "Target DOS Average",
"fieldNames": ["Base_SalesRank", "rows"],
"function": "function(Base_SalesRank, rows) {return Base_SalesRank/ rows;}"
}]
}
But I noticed that many values in Base_SalesRank is 0.
[ {
"timestamp" : "2018-06-28T05:06:03.000Z",
"result" : {
"pagingIdentifiers" : {
"marketdata_2018-06-28T00:00:00.000Z_2018-06-29T00:00:00.000Z_2018-07-06T08:11:02.499Z" : 3
},
"dimensions" : [ "Item" ],
"metrics" : [ "Base_SalesRank" ],
"events" : [ {
"segmentId" : "marketdata_2018-06-28T00:00:00.000Z_2018-06-29T00:00:00.000Z_2018-07-06T08:11:02.499Z",
"offset" : 0,
"event" : {
"timestamp" : "2018-06-28T07:10:02.000Z",
"Item" : "MN10CESWW",
"Base_SalesRank" : 0
}
},
{
"segmentId" : "marketdata_2018-06-28T00:00:00.000Z_2018-06-29T00:00:00.000Z_2018-07-06T08:11:02.499Z",
"offset" : 3,
"event" : {
"timestamp" : "2018-06-28T07:20:21.000Z",
"Item" : "MN10CESWW",
"Base_SalesRank" : 5558
}
} ]
}
} ]
So i am not getting true average. Now i need to weed out these 0 values and then calucate average. We can do this by using filters
{"type": "not", "field": {"type": "selector", "dimension": "Base_SalesRank", "value": "0"}}
But I have constraint that I have to perform this filter operation inside the javascript function only.

You can achieve the same with just adding a having query -
"having": {
"type": "greaterThan",
"aggregation": "Base_SalesRank",
"value": 0
}
If you want to do the same in javascript function than it can be done as below -
You should add a dimension (key/value) say "isValid" as "0" or "1" during pre-ingestion json data based on if Base_SalesRank is 0 than "isValid" will be 0 else 1.
Apply filter on this field in your query.
Use the rows in your post Aggregration.

Related

Tabulator - Tree Structure - Branch Totals

I have a tabulator table with below data / configuration;
Data;
var data_tab =
[ {
"code": "A",
"desc_tr": "Top Level",
"mylink": [
{
"code": "A.1",
"desc_tr": "Sub Level 1",
"mylink": [
{
"code": "A.1.1",
"desc_tr": "Sub Level 2",
"mylink": [
{
"code": "A.1.1.1",
"desc_tr": "Sub Level 3",
"mylink": [
{
"code": "A.1.1.1.001",
"desc_tr": "Item 1 at Last Level",
"income": "5",
},
{
"code": "A.1.1.1.002",
"desc_tr": "Item 2 at Last Level",
"income": "2",
}
]
}
]
}
]
},
]
} ]
Config / parameters for table;
let tmp_Tabulator = new Tabulator( "#tabulator_table",
{
data : data_tab,
columns : stg_Tabulator[ src_coldata ],
reactiveData : false,
dataTree : true,
dataTreeBranchElement : false,
dataTreeChildIndent : 0,
dataTreeChildField : "mylink",
height : 683,
}
)
This is the config / parameters for columns;
tbl_Sample : [
{
title : "G",
field : "",
width : 51,
formatter : null,
hozAlign : "left",
formatterParams : null
},
{
title : "CODE",
field : "code",
width : 100,
hozAlign : "left",
formatterParams : null
},
{
title : "DESCRIPTION",
field : "desc_tr",
width : 400,
formatter : null,
hozAlign : "left",
formatterParams : null
},
{
title : "INCOME",
field : "income",
width : 160,
formatter : "money",
hozAlign : "right",
formatterParams : fmt_num_2,
topCalc : "sum",
},
],
This is how table looks like when rendered ;
What should I do to populate income fields at upper levels ?
Thanks in advance...
PS : This is just a sample data and I do not want to do it at source data side (by changing query / json structure ect.) as it is very resource demanding.

Remove all properties in a deeply nested JavaScript object based on a particular condition

I have an object that looks something like this (it is an Avro Schema):
{
"type": "record",
"namespace": "company.car.v1",
"name": "CarV1",
"fields": [
{
"name": "plateNumber",
"type": "string"
},
{
"name": "ownerId",
"type": "string"
},
{
"name" : "details",
"type" : {
"type" : "record",
"name" : "DetailsV1",
"fields" : [
{
"name": "engine",
"type": {
"type": "record",
"name": "EngineV1",
"fields": [
{
"name": "size",
"type": "int",
"default": 0
},
{
"name": "valvesCount",
"type": "int",
"default": 0
}
]
}
},
{
"name" : "color",
"type" : "string",
"default" : "NONE"
},
{
"name" : "rimSize",
"type" : "int",
"default" : "NONE"
}
]},
"default" : {}
},
{
"name": "isBrandNew",
"type": "boolean"
}
]
}
My main goal is to have a function that takes such an object as input and extract only some particular fields and produce a subSet of this schema.
So a function that looks something like this reduceSchema(avroSchema, [paths])
For example the:
function reduceSchema(avroSchemaOnTop, ['ownerId', 'details.engine.size']
And then this would produce the desired output:
{
"type":"record",
"namespace":"company.car.v1",
"name":"CarV1",
"fields":[
{
"name":"ownerId",
"type":"string"
},
{
"name":"details",
"type":{
"type":"record",
"name":"DetailsV1",
"fields":[
{
"name":"engine",
"type":{
"type":"record",
"name":"EngineV1",
"fields":[
{
"name":"size",
"type":"int",
"default":0
}
]
}
}
]
},
"default":{}
}
]
}
Currently I am able to attach a property keepThisField to every field (and its parent-tree) that I want to keep, in this case the details.engine.size and ownerId
{
"type": "record",
"namespace": "company.car.v1",
"name": "CarV1",
"fields": [
{
"name": "plateNumber",
"type": "string"
},
{
"name": "ownerId",
"type": "string",
"keepThisField": "true"
},
{
"name" : "details",
"keepThisField": "true"
"type" : {
"type" : "record",
"name" : "DetailsV1",
"fields" : [
{
"name": "engine",
"type": {
"type": "record",
"name": "EngineV1",
"fields": [
{
"name": "size",
"type": "int",
"default": 0,
"keepThisField": "true"
},
{
"name": "valvesCount",
"type": "int",
"default": 0
}
]
}
},
{
"name" : "color",
"type" : "string",
"default" : "NONE"
},
{
"name" : "rimSize",
"type" : "int",
"default" : "NONE"
}
]},
"default" : {}
},
{
"name": "isBrandNew",
"type": "boolean"
}
]
}
What I need now is a mechanism to be able to remove all other fields (in a deeply nested manner) that don't have the property keepThisField in them, and then after the keepThisField property itself. So we are left with the desired output.
Anyone have an idea how one can achieve the removal process in a generic manner in JavaScript?
Update:
This is what I have tried with the flatMap:
function fn(o) {
const hasMore = _.get(o, 'type.fields');
if (o.keepThisField === true) {
if (hasMore) {
const retObj = {
...o,
type: {
...o.type,
fields: _.flatMap(o.type.fields, fn),
}
};
_.unset(retObj, 'keepThisField');
return retObj;
}
const cpO = o;
_.unset(cpO, 'keepThisField');
return ({
...cpO,
});
}
return [];
}
parentSchema.fields = _.flatMap(parentSchema.fields, fn)

[Answering my own question]
A potential solution for the removal part of the process. Credits to #Kinglish for the reference to this Stack Overflow question to use the flatMap.
function fn(o) {
const hasMore = _.get(o, 'type.fields');
if (o.keepThisField === true) {
if (hasMore) {
const retObj = {
...o,
type: {
...o.type,
fields: _.flatMap(o.type.fields, fn),
}
};
_.unset(retObj, 'keepThisField');
return retObj;
}
const cpO = o;
_.unset(cpO, 'keepThisField');
return ({
...cpO,
});
}
return [];
}
parentSchema.fields = _.flatMap(parentSchema.fields, fn)

Elasticsearch sorting by custom item weight

I have stored the documents which include status property. I would like to sort the documents by status priority (not status alphabetically). I have followed previous answers and composed the following function which still doesnt work as expected; the documents are sorted by status names (alphabetically):
function getESSortingByStatusQuery(query, order) {
let statusOrder = ['BLUE', 'RED', 'BLACK', 'YELLOW', 'GREEN'];
if(order == 'desc'){
statusOrder.reverse();
}
const functions = statusOrder.map((item) => {
const idx = statusOrder.indexOf(item);
return {filter: {match: {statusColor: item}},
weight: (idx + 1) * 50}
});
const queryModified = {
"function_score": {
"query": {"match_all": {}}, // this is for testing purposes and should be replaced with original query
"boost": "5",
"functions": functions,
"score_mode": "multiply",
"boost_mode": "replace"
}
}
return queryModified;
}
I would be thankful if anyone suggested the way to sort items according to predefined priority of the property (in this case status).

Below is a sample custom sort script which I think is what you are looking for. I've added sample mapping, documents, query and the response as how it appears.
Mapping:
PUT color_index
{
"mappings": {
"properties": {
"color":{
"type": "keyword"
},
"product":{
"type": "text"
}
}
}
}
Sample Documents:
POST color_index/_doc/1
{
"color": "BLUE",
"product": "adidas and nike"
}
POST color_index/_doc/2
{
"color": "GREEN",
"product": "adidas and nike and puma"
}
POST color_index/_doc/3
{
"color": "GREEN",
"product": "adidas and nike"
}
POST color_index/_doc/4
{
"color": "RED",
"product": "nike"
}
POST color_index/_doc/5
{
"color": "RED",
"product": "adidas and nike"
}
Query:
POST color_index/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"default_field": "*",
"query": "adidas OR nike"
}
}
]
}
},
"sort": [
{ "_score": { "order": "desc"} }, <---- First sort by score
{ "_script": { <---- Second sort by Colors
"type": "number",
"script": {
"lang": "painless",
"source": "if(params.scores.containsKey(doc['color'].value)) { return params.scores[doc['color'].value];} return 100000;",
"params": {
"scores": {
"BLUE": 0,
"RED": 1,
"BLACK": 2,
"YELLOW": 3,
"GREEN": 4
}
}
},
"order": "asc"
}
}
]
}
Firstly it would return documents sorted by its score, and then it would apply the second sorting logic to that result.
For the second sorting, i.e. using script sort, notice how I have added the numeric values to the colors in the scores section. You would need to construct your query accordingly.
The logic as how it works is in the source section which I believe is self-explainable, where I used doc['color'].value as that was my field on which I'm applying custom sort logic.
Response:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.5159407,
"_source" : {
"color" : "BLUE",
"product" : "adidas and nike"
},
"sort" : [
0.5159407, <--- This value is score(desc by nature)
0.0 <--- This value comes from script sort as its BLUE and I've used value 0 in the script which is in 'asc' order
]
},
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "5",
"_score" : 0.5159407,
"_source" : {
"color" : "RED",
"product" : "adidas and nike"
},
"sort" : [
0.5159407,
1.0
]
},
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "3",
"_score" : 0.5159407,
"_source" : {
"color" : "GREEN",
"product" : "adidas and nike"
},
"sort" : [
0.5159407,
4.0
]
},
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "2",
"_score" : 0.40538198,
"_source" : {
"color" : "GREEN",
"product" : "adidas and nike and puma"
},
"sort" : [
0.40538198,
4.0
]
},
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "4",
"_score" : 0.10189847,
"_source" : {
"color" : "RED",
"product" : "nike"
},
"sort" : [
0.10189847,
1.0
]
}
]
}
}
Notice the first three documents, it has exact value of product but different color and you can see that they are grouped together as we first sorted by _score then we sort that by color
Let me know if this helps!

Here's the code sample of sorting result. I think this will helps you. If you don't want to get entire documents as result you can filter results using includes.
GET testindex/_search
{
"_source": {
"includes": [
"filed1"
]
},
"aggs": {
"emp_figures": {
"terms": {
"field": "status"
}
}
}
}
This is the sample result you should retrieve
{
"took": 11,
"timed_out": false,
"_shards": {
"total": 2,
"successful": 2,
"failed": 0
},
"hits": {
"total": 84968,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "type",
"_id": "0001",
"_score": 1,
"_source": {
"filed1": "color1,
}
},
{
"_index": "test",
"_type": "type",
"_id": "0002",
"_score": 1,
"_source": {
"filed1": "color2,
}
}
}
}
}

Cannot access a field in JSON

I am trying to access an image in a JSON response however the field I need to access is an id value that is unique or rather is random. We are fetching this data from a server so we cannot hard code the id's.
The JSON is as follows:
{ "error" : { "occured" : "false" },
"errors" : [ ],
"executiontime" : 2500,
"metadata" : { },
"value" : [ { "activity_duration" : "1 hour, ½ day & full day packages",
"adult_rate_high_period_high_price" : 275,
"adult_rate_high_period_low_price" : 49,
"adult_rate_low_period_high_price" : "",
"adult_rate_low_period_low_price" : "",
"amenities" : [ ],
"assets" : { "logo" : { "436209" : { "asset_type" : "image",
"caption" : "",
"credit" : "",
"description" : "",
"exists" : "true",
"height" : 82,
"label" : "Copy of Monarch logo",
"latitude" : 0,
"longitude" : 0,
"market" : "$",
"o_id" : 3221685,
"type_o_id" : 2543991,
"unique_id" : 436209,
"url" : "http://c0481729.cdn2.cloudfiles.rackspacecloud.com/p-DD951E3E-C7AF-F22C-77E98D299833B38F-2544001.jpg",
"width" : 220
} },
We are trying to display the business logo for each amenity. To do this I need to access the url field in the above JSON. How do I access the url field under assest.

The Problem is to get the id of the Logo 436209.
var theid;
var l = obj.value[0].assets.logo
for (var p in l) {
if (l[p].hasOwnProperty('unique_id')) {
theid = l[p].unique_id;
break;
}
}
This is untestet. The idee is to use the in-operator to iterate over the properties of the logo-object and get the propterty that has the unique_id.

Correction:
obj.value[0].assets.logo["436209"].url = 'foo';
// or
var foo = obj.value[0].assets.logo["436209"].url;
This assumes that your object is well formed and continues with more parts of obj.value[0].
Specifically, if your object were completed, perhaps, like this:
var obj = {
"error": { "occured": "false" },
"errors": [],
"executiontime": 2500,
"metadata": {},
"value": [{
"activity_duration": "1 hour, ½ day & full day packages",
"adult_rate_high_period_high_price": 275,
"adult_rate_high_period_low_price": 49,
"adult_rate_low_period_high_price": "",
"adult_rate_low_period_low_price": "",
"amenities": [],
"assets": {
"logo": {
"436209": {
"asset_type": "image",
"caption": "",
"credit": "",
"description": "",
"exists": "true",
"height": 82,
"label": "Copy of Monarch logo",
"latitude": 0,
"longitude": 0,
"market": "$",
"o_id": 3221685,
"type_o_id": 2543991,
"unique_id": 436209,
"url": "http://c0481729.cdn2.cloudfiles.rackspacecloud.com/p-DD951E3E-C7AF-F22C-77E98D299833B38F-2544001.jpg",
"width": 220
}
}
}
}]
};

Multi options in select2

I am trying to use select2 to get remote JSON data and display it with mutli levels.
http://ivaynberg.github.io/select2/index.html
This is my response
{
"Company": [
{
"name": "athenahealth Inc"
},
{
"name": "Localiza Rent a Car"
},
{
"name": "M and B Switchgears"
}
],
"Functional": [
{
"name": "arranger"
},
{
"name": "ambassadors"
}
],
"Persons": [
{
"name": "Moustapha al"
},
{
"name": "Saleh al"
}
]
}
I want to show the result in Multi-Value format - http://ivaynberg.github.io/select2/index.html#multi
So far i am able to fetch data from server side , but then i have no idea how to enable the multi select option.

JSON in following format will work fine
Related issue - https://github.com/ivaynberg/select2/issues/58
{ "Data" : [ {
"id" :1 ,
"text" : "Subsection" ,
"children" : [{
"id" : 2,
"text" : "Paru"
},
{
"id" : 3,
"text" : "Vinu"
}]
},
{ "id" : 4 ,
"text" : "Family" ,
"children" : [{
"id" : 5,
"text" : "ChildVM"
},
{
"id" : 6,
"text" : "ChildPM"
}]
}
]
}

Develop Reference

JavaScript is the programming language of the Web.

Using Javascript aggregation in druid queries - javascript

Related

Tabulator - Tree Structure - Branch Totals

Remove all properties in a deeply nested JavaScript object based on a particular condition

Elasticsearch sorting by custom item weight

Cannot access a field in JSON

Multi options in select2

Categories

Resources