Elasticsearch sorting by custom item weight - javascript

I have stored the documents which include status property. I would like to sort the documents by status priority (not status alphabetically). I have followed previous answers and composed the following function which still doesnt work as expected; the documents are sorted by status names (alphabetically):
function getESSortingByStatusQuery(query, order) {
let statusOrder = ['BLUE', 'RED', 'BLACK', 'YELLOW', 'GREEN'];
if(order == 'desc'){
statusOrder.reverse();
}
const functions = statusOrder.map((item) => {
const idx = statusOrder.indexOf(item);
return {filter: {match: {statusColor: item}},
weight: (idx + 1) * 50}
});
const queryModified = {
"function_score": {
"query": {"match_all": {}}, // this is for testing purposes and should be replaced with original query
"boost": "5",
"functions": functions,
"score_mode": "multiply",
"boost_mode": "replace"
}
}
return queryModified;
}
I would be thankful if anyone suggested the way to sort items according to predefined priority of the property (in this case status).

Below is a sample custom sort script which I think is what you are looking for. I've added sample mapping, documents, query and the response as how it appears.
Mapping:
PUT color_index
{
"mappings": {
"properties": {
"color":{
"type": "keyword"
},
"product":{
"type": "text"
}
}
}
}
Sample Documents:
POST color_index/_doc/1
{
"color": "BLUE",
"product": "adidas and nike"
}
POST color_index/_doc/2
{
"color": "GREEN",
"product": "adidas and nike and puma"
}
POST color_index/_doc/3
{
"color": "GREEN",
"product": "adidas and nike"
}
POST color_index/_doc/4
{
"color": "RED",
"product": "nike"
}
POST color_index/_doc/5
{
"color": "RED",
"product": "adidas and nike"
}
Query:
POST color_index/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"default_field": "*",
"query": "adidas OR nike"
}
}
]
}
},
"sort": [
{ "_score": { "order": "desc"} }, <---- First sort by score
{ "_script": { <---- Second sort by Colors
"type": "number",
"script": {
"lang": "painless",
"source": "if(params.scores.containsKey(doc['color'].value)) { return params.scores[doc['color'].value];} return 100000;",
"params": {
"scores": {
"BLUE": 0,
"RED": 1,
"BLACK": 2,
"YELLOW": 3,
"GREEN": 4
}
}
},
"order": "asc"
}
}
]
}
Firstly it would return documents sorted by its score, and then it would apply the second sorting logic to that result.
For the second sorting, i.e. using script sort, notice how I have added the numeric values to the colors in the scores section. You would need to construct your query accordingly.
The logic as how it works is in the source section which I believe is self-explainable, where I used doc['color'].value as that was my field on which I'm applying custom sort logic.
Response:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.5159407,
"_source" : {
"color" : "BLUE",
"product" : "adidas and nike"
},
"sort" : [
0.5159407, <--- This value is score(desc by nature)
0.0 <--- This value comes from script sort as its BLUE and I've used value 0 in the script which is in 'asc' order
]
},
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "5",
"_score" : 0.5159407,
"_source" : {
"color" : "RED",
"product" : "adidas and nike"
},
"sort" : [
0.5159407,
1.0
]
},
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "3",
"_score" : 0.5159407,
"_source" : {
"color" : "GREEN",
"product" : "adidas and nike"
},
"sort" : [
0.5159407,
4.0
]
},
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "2",
"_score" : 0.40538198,
"_source" : {
"color" : "GREEN",
"product" : "adidas and nike and puma"
},
"sort" : [
0.40538198,
4.0
]
},
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "4",
"_score" : 0.10189847,
"_source" : {
"color" : "RED",
"product" : "nike"
},
"sort" : [
0.10189847,
1.0
]
}
]
}
}
Notice the first three documents, it has exact value of product but different color and you can see that they are grouped together as we first sorted by _score then we sort that by color
Let me know if this helps!

Here's the code sample of sorting result. I think this will helps you. If you don't want to get entire documents as result you can filter results using includes.
GET testindex/_search
{
"_source": {
"includes": [
"filed1"
]
},
"aggs": {
"emp_figures": {
"terms": {
"field": "status"
}
}
}
}
This is the sample result you should retrieve
{
"took": 11,
"timed_out": false,
"_shards": {
"total": 2,
"successful": 2,
"failed": 0
},
"hits": {
"total": 84968,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "type",
"_id": "0001",
"_score": 1,
"_source": {
"filed1": "color1,
}
},
{
"_index": "test",
"_type": "type",
"_id": "0002",
"_score": 1,
"_source": {
"filed1": "color2,
}
}
}
}
}

Related

Tabulator - Tree Structure - Branch Totals

I have a tabulator table with below data / configuration;
Data;
var data_tab =
[ {
"code": "A",
"desc_tr": "Top Level",
"mylink": [
{
"code": "A.1",
"desc_tr": "Sub Level 1",
"mylink": [
{
"code": "A.1.1",
"desc_tr": "Sub Level 2",
"mylink": [
{
"code": "A.1.1.1",
"desc_tr": "Sub Level 3",
"mylink": [
{
"code": "A.1.1.1.001",
"desc_tr": "Item 1 at Last Level",
"income": "5",
},
{
"code": "A.1.1.1.002",
"desc_tr": "Item 2 at Last Level",
"income": "2",
}
]
}
]
}
]
},
]
} ]
Config / parameters for table;
let tmp_Tabulator = new Tabulator( "#tabulator_table",
{
data : data_tab,
columns : stg_Tabulator[ src_coldata ],
reactiveData : false,
dataTree : true,
dataTreeBranchElement : false,
dataTreeChildIndent : 0,
dataTreeChildField : "mylink",
height : 683,
}
)
This is the config / parameters for columns;
tbl_Sample : [
{
title : "G",
field : "",
width : 51,
formatter : null,
hozAlign : "left",
formatterParams : null
},
{
title : "CODE",
field : "code",
width : 100,
hozAlign : "left",
formatterParams : null
},
{
title : "DESCRIPTION",
field : "desc_tr",
width : 400,
formatter : null,
hozAlign : "left",
formatterParams : null
},
{
title : "INCOME",
field : "income",
width : 160,
formatter : "money",
hozAlign : "right",
formatterParams : fmt_num_2,
topCalc : "sum",
},
],
This is how table looks like when rendered ;
What should I do to populate income fields at upper levels ?
Thanks in advance...
PS : This is just a sample data and I do not want to do it at source data side (by changing query / json structure ect.) as it is very resource demanding.

Elasticsearch only one record based on userid?

In post index, postid is primary key and userid is foreign key.
i want all post but only post from one userid, such that only one user have the one post in results sort by postdate(optional latest first)
//Actual Result
[
{
userid: "u1",
postid: "p1"
},
{
userid: "u1",
postid: "p2"
},
{
userid: "u2",
postid: "p3"
},
{
userid: "u3",
postid: "p4"
},
{
userid: "u3",
postid: "p5"
},
{
userid: "u3",
postid: "p6"
}
]
needed as below
//Expecting Result
[
{
userid: "u1",
postid: "p1"
},
{
userid: "u2",
postid: "p3"
},
{
userid: "u3",
postid: "p4"
}
]
I think you can use top hit for this. Here the sample for this :
DELETE my-index-000001
PUT my-index-000001
{
"mappings": {
"properties": {
"userid": {
"type": "keyword"
},
"postid": {
"type": "keyword"
},
"postdate": {
"type": "date"
}
}
}
}
PUT my-index-000001/_doc/1
{"userid": "u1", "postid": "p1", "postdate": "2021-03-01"}
PUT my-index-000001/_doc/2
{"userid": "u1", "postid": "p2", "postdate": "2021-03-02"}
PUT my-index-000001/_doc/3
{"userid": "u2", "postid": "p3", "postdate": "2021-03-03"}
PUT my-index-000001/_doc/4
{"userid": "u3", "postid": "p4", "postdate": "2021-03-04"}
PUT my-index-000001/_doc/5
{"userid": "u3", "postid": "p5", "postdate": "2021-03-05"}
PUT my-index-000001/_doc/6
{"userid": "u3", "postid": "p6", "postdate": "2021-03-06"}
These are the sample index creating steps. And here the query :
GET my-index-000001/_search
{
"size": 0,
"aggs": {
"top_users": {
"terms": {
"field": "userid",
"size": 100
},
"aggs": {
"top": {
"top_hits": {
"sort": [
{
"postdate": {
"order": "desc"
}
}
],
"_source": {
"includes": [ "postdate", "postid" ]
},
"size": 1
}
}
}
}
}
}
And, inside the resultset you can see the top post for the every users inside the aggregations:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 6,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"top_users" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "u3",
"doc_count" : 3,
"top" : {
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "6",
"_score" : null,
"_source" : {
"postdate" : "2021-03-06",
"postid" : "p6"
},
"sort" : [
1614988800000
]
}
]
}
}
},
{
"key" : "u1",
"doc_count" : 2,
"top" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "2",
"_score" : null,
"_source" : {
"postdate" : "2021-03-02",
"postid" : "p2"
},
"sort" : [
1614643200000
]
}
]
}
}
},
{
"key" : "u2",
"doc_count" : 1,
"top" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "3",
"_score" : null,
"_source" : {
"postdate" : "2021-03-03",
"postid" : "p3"
},
"sort" : [
1614729600000
]
}
]
}
}
}
]
}
}
}
Assuming an index mapping of the form:
PUT user_posts
{
"mappings": {
"properties": {
"userid": {
"type": "keyword"
},
"postid": {
"type": "keyword"
},
"postdate": {
"type": "date"
}
}
}
}
You could:
aggregate on the userid and order the IDs alphabetically
sub-aggregate on the postid and sort the post by posttime descending via a max aggregation.
filter the response through the filter_path option to only retrieve what you need
POST user_posts/_search?filter_path=aggregations.*.buckets.key,aggregations.*.buckets.*.buckets.key
{
"size": 0,
"aggs": {
"by_userid": {
"terms": {
"field": "userid",
"order": {
"_key": "asc"
},
"size": 100
},
"aggs": {
"by_latest_postid": {
"terms": {
"field": "postid",
"size": 1,
"order": {
"latest_posttime": "desc"
}
},
"aggs": {
"latest_posttime": {
"max": {
"field": "postdate"
}
}
}
}
}
}
}
}
Yielding:
{
"aggregations" : {
"by_userid" : {
"buckets" : [
{
"key" : "u1",
"by_latest_postid" : {
"buckets" : [
{
"key" : "p1"
}
]
}
},
{
"key" : "u2",
"by_latest_postid" : {
"buckets" : [
{
"key" : "p3"
}
]
}
},
{
"key" : "u3",
"by_latest_postid" : {
"buckets" : [
{
"key" : "p4"
}
]
}
}
]
}
}
}
which you can then post-process as you normally would:
...
const response = await ...; // transform the above request for use in the ES JS lib of your choice
const result = response.aggregations.by_userid.buckets.map(b => {
return {
userid: b.key,
postid: b.by_latest_postid.buckets && b.by_latest_postid.buckets[0].key
}
})
You can use the top hits sub-aggregation. So first do a terms aggregation by userId, then you can use top-hits with a sort by post-date to get the latest post by each user.
I should say that if you have many userIds and you want the top hit for each one, you should probably use composite aggregation as your top-level agg, and not terms.

Using Javascript aggregation in druid queries

I am trying to write javascript aggregator for my druid queries. i need to to calculate average of a metric "Base_SalesRank".
So far i have been able to this by writing:
{
"queryType": "groupBy",
"dataSource": "marketdata",
"granularity": "all",
"dimensions" : ["Item"],
"filter": { "type": "and", "fields" : [{"type": "selector", "dimension": "Item", "value": "MN10CESWW"}]},
"intervals": ["2018-06-28T00:00Z/2018-07-04T00:00Z"],
"aggregations" : [
{ "type" : "count", "name" : "rows" },
{ "type" : "doubleSum", "name" : "Base_SalesRank", "fieldName" : "Base_SalesRank" }
],
"postAggregations" : [{
"type": "javascript",
"name": "Target DOS Average",
"fieldNames": ["Base_SalesRank", "rows"],
"function": "function(Base_SalesRank, rows) {return Base_SalesRank/ rows;}"
}]
}
But I noticed that many values in Base_SalesRank is 0.
[ {
"timestamp" : "2018-06-28T05:06:03.000Z",
"result" : {
"pagingIdentifiers" : {
"marketdata_2018-06-28T00:00:00.000Z_2018-06-29T00:00:00.000Z_2018-07-06T08:11:02.499Z" : 3
},
"dimensions" : [ "Item" ],
"metrics" : [ "Base_SalesRank" ],
"events" : [ {
"segmentId" : "marketdata_2018-06-28T00:00:00.000Z_2018-06-29T00:00:00.000Z_2018-07-06T08:11:02.499Z",
"offset" : 0,
"event" : {
"timestamp" : "2018-06-28T07:10:02.000Z",
"Item" : "MN10CESWW",
"Base_SalesRank" : 0
}
},
{
"segmentId" : "marketdata_2018-06-28T00:00:00.000Z_2018-06-29T00:00:00.000Z_2018-07-06T08:11:02.499Z",
"offset" : 3,
"event" : {
"timestamp" : "2018-06-28T07:20:21.000Z",
"Item" : "MN10CESWW",
"Base_SalesRank" : 5558
}
} ]
}
} ]
So i am not getting true average. Now i need to weed out these 0 values and then calucate average. We can do this by using filters
{"type": "not", "field": {"type": "selector", "dimension": "Base_SalesRank", "value": "0"}}
But I have constraint that I have to perform this filter operation inside the javascript function only.
You can achieve the same with just adding a having query -
"having": {
"type": "greaterThan",
"aggregation": "Base_SalesRank",
"value": 0
}
If you want to do the same in javascript function than it can be done as below -
You should add a dimension (key/value) say "isValid" as "0" or "1" during pre-ingestion json data based on if Base_SalesRank is 0 than "isValid" will be 0 else 1.
Apply filter on this field in your query.
Use the rows in your post Aggregration.

Nested queries don't work with cosmosdb-mongodb

My document in cosmosdb looks like this
{
"todayDate": "2017-12-08",
"data": [
{
"group": {"priority": 1, "total": 10},
"severity": 1
},
{
"group": {"priority": 2, "total": 13},
"priority": 2
}
]
}
The following query when issued from either mongoShell for cosmosdb in azure portal or using my spring data mongodb project works fine and returns results in no time:
db.myCollection.find({ "$or" : [ { "data" : { "$elemMatch" : { "priority" : 1}} , "$or" : [ { "data" : { "$elemMatch" : { "group.priority" : 1}}}] }]})
However, the following query on the same lines with more OR conditions which basically is two of the above queries with OR operator, hangs indefinitely:
db.myCollection.find({ "$or": [ { "data" : { "$elemMatch" : { "priority" : 1}} , "$or" : [ { "data" : { "$elemMatch" : { "group.priority" : 1}}}] }, { "data" : { "$elemMatch" : { "severity" : 2}} , "$or" : [ { "data" : { "$elemMatch" : { "group.severity" : 2}}}] } ] })
Is there anything wrong with the last query that makes it hang indefinitely? Even if I replace initial OR with AND, still the same result i.e. hangs indefinitely.
I created 3 documents in my cosmos db according to the document template you provided.
[
{
"id": "1",
"todayDate": "2017-12-08",
"data": [
{
"group": {
"severity": 1,
"total": 10
},
"severity": 1
},
{
"group": {
"priority": 1,
"total": 13
},
"priority": 1
}
]
},
{
"id": "2",
"todayDate": "2017-12-09",
"data": [
{
"group": {
"priority": 3,
"total": 10
},
"severity": 1
},
{
"group": {
"priority": 3,
"total": 13
},
"priority": 1
}
]
},
{
"id": "3",
"todayDate": "2017-12-10",
"data": [
{
"group": {
"priority": 1,
"total": 10
},
"severity": 1
},
{
"group": {
"priority": 2,
"total": 13
},
"priority": 2
}
]
}
]
Then I use Robo 3T tool to execute your sql.
db.coll.find({
"$or": [
{ "data" : { "$elemMatch" : { "priority" : 1}} ,
"$or" : [
{ "data" : { "$elemMatch" : { "group.priority" : 1}}}
] },
{ "data" : { "$elemMatch" : { "severity" : 2}} ,
"$or" : [
{ "data" : { "$elemMatch" : { "group.severity" : 2}}}
] }
]
})
result:
The syntax of the $or that I found on the official document is:
{ $or: [ { <expression1> }, { <expression2> }, ... , { <expressionN> } ] }
It seems that your SQL can be executed normally though it is different from the above syntax. Per my experience, $or is generally used to be nested with $and (MongoDB Nested OR/AND Where?) ,so I do not quite understand what is the purpose of your $or nested here.
Surely, an indefinite hang is probably because the data is too large so that SQL runs too long and you need to optimize your SQL.
Hope it helps you.Any concern ,please let me know.
Update Answer:
I have properly modified my 3 sample documents then query 2 eligible documents via the SQL you provided.
SQL:
db.coll.find(
{
"$and": [
{
"$or": [
{
"data": {
"$elemMatch": {
"priority": 2
}
}
},
{
"data": {
"$elemMatch": {
"group.priority": 2
}
}
}
]
},
{
"$or": [
{
"data": {
"$elemMatch": {
"severity": 1
}
}
},
{
"data": {
"$elemMatch": {
"group.severity": 1
}
}
}
]
}
]
}
)
Results:
So , I think your SQL is correct. Is the data in the database very large? If you've been hanging for a long time, did you have seen timeout error messages? Or you could check RUs setting's issue.

Multi options in select2

I am trying to use select2 to get remote JSON data and display it with mutli levels.
http://ivaynberg.github.io/select2/index.html
This is my response
{
"Company": [
{
"name": "athenahealth Inc"
},
{
"name": "Localiza Rent a Car"
},
{
"name": "M and B Switchgears"
}
],
"Functional": [
{
"name": "arranger"
},
{
"name": "ambassadors"
}
],
"Persons": [
{
"name": "Moustapha al"
},
{
"name": "Saleh al"
}
]
}
I want to show the result in Multi-Value format - http://ivaynberg.github.io/select2/index.html#multi
So far i am able to fetch data from server side , but then i have no idea how to enable the multi select option.
JSON in following format will work fine
Related issue - https://github.com/ivaynberg/select2/issues/58
{ "Data" : [ {
"id" :1 ,
"text" : "Subsection" ,
"children" : [{
"id" : 2,
"text" : "Paru"
},
{
"id" : 3,
"text" : "Vinu"
}]
},
{ "id" : 4 ,
"text" : "Family" ,
"children" : [{
"id" : 5,
"text" : "ChildVM"
},
{
"id" : 6,
"text" : "ChildPM"
}]
}
]
}

Categories

Resources