Elasticsearch only one record based on userid? - javascript

In post index, postid is primary key and userid is foreign key.
i want all post but only post from one userid, such that only one user have the one post in results sort by postdate(optional latest first)
//Actual Result
[
{
userid: "u1",
postid: "p1"
},
{
userid: "u1",
postid: "p2"
},
{
userid: "u2",
postid: "p3"
},
{
userid: "u3",
postid: "p4"
},
{
userid: "u3",
postid: "p5"
},
{
userid: "u3",
postid: "p6"
}
]
needed as below
//Expecting Result
[
{
userid: "u1",
postid: "p1"
},
{
userid: "u2",
postid: "p3"
},
{
userid: "u3",
postid: "p4"
}
]

I think you can use top hit for this. Here the sample for this :
DELETE my-index-000001
PUT my-index-000001
{
"mappings": {
"properties": {
"userid": {
"type": "keyword"
},
"postid": {
"type": "keyword"
},
"postdate": {
"type": "date"
}
}
}
}
PUT my-index-000001/_doc/1
{"userid": "u1", "postid": "p1", "postdate": "2021-03-01"}
PUT my-index-000001/_doc/2
{"userid": "u1", "postid": "p2", "postdate": "2021-03-02"}
PUT my-index-000001/_doc/3
{"userid": "u2", "postid": "p3", "postdate": "2021-03-03"}
PUT my-index-000001/_doc/4
{"userid": "u3", "postid": "p4", "postdate": "2021-03-04"}
PUT my-index-000001/_doc/5
{"userid": "u3", "postid": "p5", "postdate": "2021-03-05"}
PUT my-index-000001/_doc/6
{"userid": "u3", "postid": "p6", "postdate": "2021-03-06"}
These are the sample index creating steps. And here the query :
GET my-index-000001/_search
{
"size": 0,
"aggs": {
"top_users": {
"terms": {
"field": "userid",
"size": 100
},
"aggs": {
"top": {
"top_hits": {
"sort": [
{
"postdate": {
"order": "desc"
}
}
],
"_source": {
"includes": [ "postdate", "postid" ]
},
"size": 1
}
}
}
}
}
}
And, inside the resultset you can see the top post for the every users inside the aggregations:
{
"took" : 3,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 6,
"relation" : "eq"
},
"max_score" : null,
"hits" : [ ]
},
"aggregations" : {
"top_users" : {
"doc_count_error_upper_bound" : 0,
"sum_other_doc_count" : 0,
"buckets" : [
{
"key" : "u3",
"doc_count" : 3,
"top" : {
"hits" : {
"total" : {
"value" : 3,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "6",
"_score" : null,
"_source" : {
"postdate" : "2021-03-06",
"postid" : "p6"
},
"sort" : [
1614988800000
]
}
]
}
}
},
{
"key" : "u1",
"doc_count" : 2,
"top" : {
"hits" : {
"total" : {
"value" : 2,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "2",
"_score" : null,
"_source" : {
"postdate" : "2021-03-02",
"postid" : "p2"
},
"sort" : [
1614643200000
]
}
]
}
}
},
{
"key" : "u2",
"doc_count" : 1,
"top" : {
"hits" : {
"total" : {
"value" : 1,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "my-index-000001",
"_type" : "_doc",
"_id" : "3",
"_score" : null,
"_source" : {
"postdate" : "2021-03-03",
"postid" : "p3"
},
"sort" : [
1614729600000
]
}
]
}
}
}
]
}
}
}

Assuming an index mapping of the form:
PUT user_posts
{
"mappings": {
"properties": {
"userid": {
"type": "keyword"
},
"postid": {
"type": "keyword"
},
"postdate": {
"type": "date"
}
}
}
}
You could:
aggregate on the userid and order the IDs alphabetically
sub-aggregate on the postid and sort the post by posttime descending via a max aggregation.
filter the response through the filter_path option to only retrieve what you need
POST user_posts/_search?filter_path=aggregations.*.buckets.key,aggregations.*.buckets.*.buckets.key
{
"size": 0,
"aggs": {
"by_userid": {
"terms": {
"field": "userid",
"order": {
"_key": "asc"
},
"size": 100
},
"aggs": {
"by_latest_postid": {
"terms": {
"field": "postid",
"size": 1,
"order": {
"latest_posttime": "desc"
}
},
"aggs": {
"latest_posttime": {
"max": {
"field": "postdate"
}
}
}
}
}
}
}
}
Yielding:
{
"aggregations" : {
"by_userid" : {
"buckets" : [
{
"key" : "u1",
"by_latest_postid" : {
"buckets" : [
{
"key" : "p1"
}
]
}
},
{
"key" : "u2",
"by_latest_postid" : {
"buckets" : [
{
"key" : "p3"
}
]
}
},
{
"key" : "u3",
"by_latest_postid" : {
"buckets" : [
{
"key" : "p4"
}
]
}
}
]
}
}
}
which you can then post-process as you normally would:
...
const response = await ...; // transform the above request for use in the ES JS lib of your choice
const result = response.aggregations.by_userid.buckets.map(b => {
return {
userid: b.key,
postid: b.by_latest_postid.buckets && b.by_latest_postid.buckets[0].key
}
})

You can use the top hits sub-aggregation. So first do a terms aggregation by userId, then you can use top-hits with a sort by post-date to get the latest post by each user.
I should say that if you have many userIds and you want the top hit for each one, you should probably use composite aggregation as your top-level agg, and not terms.

Related

Modify the query to get the expected result

I am trying to modify query to get expected output.I am able to write the query but not getting the output as expected so that I may bind in the front end.
Actual output:-
{
"_id" : null,
"first" : 3571.0,
"second" : 24.0
}
Expected output:-
{ "_id" : null,
"opertion":edit,
"count" : 3571.0,
}
{ "_id" : null,
"opertion":read,
"count" : 24,
}
{ "_id" : null,
"opertion":update,
"count" : 9000,
}
Myquery:-
db.getCollection('blog').aggregate([
{ "$group": {
"_id": null,
"first": {
"$sum": {
"$cond": [{ "$in": ["$Operation", ["edit1", "edit2"]] }, 1, 0]
}
},
"second": {
"$sum": {
"$cond": [{ "$in": ["$Operation", ["read1", "read2"]] }, 1, 0]
}
}
},
},
])
if you have collection which is like as below:
[
{
"_id" : 1,
"operation" : "edit1" # some extra fields
},
{
"_id" : 2,
"operation" : "read1"
},
{
"_id" : 3,
"operation" : "update1"
}
]
by using $project and $cond you can rename the "read1", "read2" to read or updates to update, or edits to edit then by grouping on the new operation field you can get the count of each operation.
you can use this query:
db.aggregate([
{
"$project": {
"new_operation":
{
"$cond": [
{"$in":
["$Operation", ["edit1", "edit2"]]
}, "edit", {
"$cond": [
{"$in":
["$operation", ["read1", "read2"]]
}, "read", "update"]
}
]
}
}
},
{
"$group": {
"_id": "$new_operation",
"count": {"$sum": 1}
}
}
])

Elasticsearch sorting by custom item weight

I have stored the documents which include status property. I would like to sort the documents by status priority (not status alphabetically). I have followed previous answers and composed the following function which still doesnt work as expected; the documents are sorted by status names (alphabetically):
function getESSortingByStatusQuery(query, order) {
let statusOrder = ['BLUE', 'RED', 'BLACK', 'YELLOW', 'GREEN'];
if(order == 'desc'){
statusOrder.reverse();
}
const functions = statusOrder.map((item) => {
const idx = statusOrder.indexOf(item);
return {filter: {match: {statusColor: item}},
weight: (idx + 1) * 50}
});
const queryModified = {
"function_score": {
"query": {"match_all": {}}, // this is for testing purposes and should be replaced with original query
"boost": "5",
"functions": functions,
"score_mode": "multiply",
"boost_mode": "replace"
}
}
return queryModified;
}
I would be thankful if anyone suggested the way to sort items according to predefined priority of the property (in this case status).
Below is a sample custom sort script which I think is what you are looking for. I've added sample mapping, documents, query and the response as how it appears.
Mapping:
PUT color_index
{
"mappings": {
"properties": {
"color":{
"type": "keyword"
},
"product":{
"type": "text"
}
}
}
}
Sample Documents:
POST color_index/_doc/1
{
"color": "BLUE",
"product": "adidas and nike"
}
POST color_index/_doc/2
{
"color": "GREEN",
"product": "adidas and nike and puma"
}
POST color_index/_doc/3
{
"color": "GREEN",
"product": "adidas and nike"
}
POST color_index/_doc/4
{
"color": "RED",
"product": "nike"
}
POST color_index/_doc/5
{
"color": "RED",
"product": "adidas and nike"
}
Query:
POST color_index/_search
{
"query": {
"bool": {
"must": [
{
"query_string": {
"default_field": "*",
"query": "adidas OR nike"
}
}
]
}
},
"sort": [
{ "_score": { "order": "desc"} }, <---- First sort by score
{ "_script": { <---- Second sort by Colors
"type": "number",
"script": {
"lang": "painless",
"source": "if(params.scores.containsKey(doc['color'].value)) { return params.scores[doc['color'].value];} return 100000;",
"params": {
"scores": {
"BLUE": 0,
"RED": 1,
"BLACK": 2,
"YELLOW": 3,
"GREEN": 4
}
}
},
"order": "asc"
}
}
]
}
Firstly it would return documents sorted by its score, and then it would apply the second sorting logic to that result.
For the second sorting, i.e. using script sort, notice how I have added the numeric values to the colors in the scores section. You would need to construct your query accordingly.
The logic as how it works is in the source section which I believe is self-explainable, where I used doc['color'].value as that was my field on which I'm applying custom sort logic.
Response:
{
"took" : 5,
"timed_out" : false,
"_shards" : {
"total" : 1,
"successful" : 1,
"skipped" : 0,
"failed" : 0
},
"hits" : {
"total" : {
"value" : 5,
"relation" : "eq"
},
"max_score" : null,
"hits" : [
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "1",
"_score" : 0.5159407,
"_source" : {
"color" : "BLUE",
"product" : "adidas and nike"
},
"sort" : [
0.5159407, <--- This value is score(desc by nature)
0.0 <--- This value comes from script sort as its BLUE and I've used value 0 in the script which is in 'asc' order
]
},
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "5",
"_score" : 0.5159407,
"_source" : {
"color" : "RED",
"product" : "adidas and nike"
},
"sort" : [
0.5159407,
1.0
]
},
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "3",
"_score" : 0.5159407,
"_source" : {
"color" : "GREEN",
"product" : "adidas and nike"
},
"sort" : [
0.5159407,
4.0
]
},
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "2",
"_score" : 0.40538198,
"_source" : {
"color" : "GREEN",
"product" : "adidas and nike and puma"
},
"sort" : [
0.40538198,
4.0
]
},
{
"_index" : "color_index",
"_type" : "_doc",
"_id" : "4",
"_score" : 0.10189847,
"_source" : {
"color" : "RED",
"product" : "nike"
},
"sort" : [
0.10189847,
1.0
]
}
]
}
}
Notice the first three documents, it has exact value of product but different color and you can see that they are grouped together as we first sorted by _score then we sort that by color
Let me know if this helps!
Here's the code sample of sorting result. I think this will helps you. If you don't want to get entire documents as result you can filter results using includes.
GET testindex/_search
{
"_source": {
"includes": [
"filed1"
]
},
"aggs": {
"emp_figures": {
"terms": {
"field": "status"
}
}
}
}
This is the sample result you should retrieve
{
"took": 11,
"timed_out": false,
"_shards": {
"total": 2,
"successful": 2,
"failed": 0
},
"hits": {
"total": 84968,
"max_score": 1,
"hits": [
{
"_index": "test",
"_type": "type",
"_id": "0001",
"_score": 1,
"_source": {
"filed1": "color1,
}
},
{
"_index": "test",
"_type": "type",
"_id": "0002",
"_score": 1,
"_source": {
"filed1": "color2,
}
}
}
}
}

Group by Day and Item Total, but Output Item Names as Keys

I've been trying these examples : https://docs.mongodb.com/manual/reference/operator/aggregation/push/ and
https://docs.mongodb.com/manual/reference/operator/aggregation/addToSet/
Sample documents:
{ "_id" : 1, "item" : "abc", "price" : 10, "quantity" : 2, "date" : ISODate("2014-01-01T08:00:00Z") }
{ "_id" : 2, "item" : "jkl", "price" : 20, "quantity" : 1, "date" : ISODate("2014-02-03T09:00:00Z") }
{ "_id" : 3, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-03T09:05:00Z") }
{ "_id" : 4, "item" : "abc", "price" : 10, "quantity" : 10, "date" : ISODate("2014-02-15T08:00:00Z") }
{ "_id" : 5, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T09:05:00Z") }
{ "_id" : 6, "item" : "xyz", "price" : 5, "quantity" : 5, "date" : ISODate("2014-02-15T12:05:10Z") }
{ "_id" : 7, "item" : "xyz", "price" : 5, "quantity" : 10, "date" : ISODate("2014-02-15T14:12:12Z") }
But my need is kind of mixes of them. In push example, the results look like:
{
"_id" : { "day" : 46, "year" : 2014 },
"itemsSold" : [
{ "item" : "abc", "quantity" : 10 },
{ "item" : "xyz", "quantity" : 10 },
{ "item" : "xyz", "quantity" : 5 },
{ "item" : "xyz", "quantity" : 10 }
]
}
{
"_id" : { "day" : 34, "year" : 2014 },
"itemsSold" : [
{ "item" : "jkl", "quantity" : 1 },
{ "item" : "xyz", "quantity" : 5 }
]
}
{
"_id" : { "day" : 1, "year" : 2014 },
"itemsSold" : [ { "item" : "abc", "quantity" : 2 } ]
}
And in $addToSet example, results look like:
{ "_id" : { "day" : 46, "year" : 2014 }, "itemsSold" : [ "xyz", "abc" ] }
{ "_id" : { "day" : 34, "year" : 2014 }, "itemsSold" : [ "xyz", "jkl" ] }
{ "_id" : { "day" : 1, "year" : 2014 }, "itemsSold" : [ "abc" ] }
What I want is going to be like:
{ "_id" : { "day" : 46, "year" : 2014 }, "itemsSold" : { "xyz": 25, "abc": 10 } }
{ "_id" : { "day" : 34, "year" : 2014 }, "itemsSold" : { "xyz": 5, "jkl": 1 ] }
{ "_id" : { "day" : 1, "year" : 2014 }, "itemsSold" : { "abc": 2 } }
Is this possible? If it is, any guide, direction would be helpful.
Based on your data you want two $group stages, in order to first collect per "item" and then to add those item details to an array.
Depending on your MongoDB version you have available is how you process the rest. For MongoDB 3.6 ( of from 3.4.7 ) you can use $arrayToObject in order to reshape the data:
db.collection.aggregate([
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"dayOfYear": { "$dayOfYear": "$date" },
"item": "$item"
},
"total": { "$sum": "$quantity" }
}},
{ "$group": {
"_id": {
"year": "$_id.year",
"dayOfYear": "$_id.dayOfYear"
},
"itemsSold": { "$push": { "k": "$_id.item", "v": "$total" } }
}},
{ "$sort": { "_id": -1 } },
{ "$addFields": {
"itemsSold": { "$arrayToObject": "$itemsSold" }
}}
])
Or with earlier versions, you can simply post process the results. All the "aggregation" work is done before the last stage anyway:
db.collection.aggregate([
{ "$group": {
"_id": {
"year": { "$year": "$date" },
"dayOfYear": { "$dayOfYear": "$date" },
"item": "$item"
},
"total": { "$sum": "$quantity" }
}},
{ "$group": {
"_id": {
"year": "$_id.year",
"dayOfYear": "$_id.dayOfYear"
},
"itemsSold": { "$push": { "k": "$_id.item", "v": "$total" } }
}},
{ "$sort": { "_id": -1 } },
/*
{ "$addFields": {
"itemsSold": { "$arrayToObject": "$itemsSold" }
}}
*/
]).map( d => Object.assign( d,
{
itemsSold: d.itemsSold.reduce((acc,curr) =>
Object.assign(acc, { [curr.k]: curr.v }),
{}
)
}
))
Either way produces the same desired result:
{
"_id" : {
"year" : 2014,
"dayOfYear" : 46
},
"itemsSold" : {
"xyz" : 25,
"abc" : 10
}
}
{
"_id" : {
"year" : 2014,
"dayOfYear" : 34
},
"itemsSold" : {
"jkl" : 1,
"xyz" : 5
}
}
{
"_id" : {
"year" : 2014,
"dayOfYear" : 1
},
"itemsSold" : {
"abc" : 2
}
}
So you can do things with new aggregation features, but really that end result is just "reshaping" which is usually best left to client processing instead.

Join two collection in mongoDB and extract out data in node js

I am using MongoDB 3.6 for my project.
I have 2 collections "users" and "follow". I want to extract out details of user's followers and following (like an Instagram app).
users collection
{
"id" : "1",
"name" : "abc",
"age" : "26"
},
{
"id" : "2",
"name" : "xyz",
"age" : "22"
},
{
"id" : "3",
"name" : "qwe",
"age" : "23"
}
follow collection
{
"id" : "2",
"follow id" : "1"
},
{
"id" : "3",
"follow id" : "1"
},
{
"id" : "1",
"follow id" : "2"
},
{
"id" : "2",
"follow id" : "3"
},
{
"id" : "1",
"follow id" : "3"
}
Now i want following list of id 2 So id 2 is following id 1 and id 3
So, Output should be like this
{
"id" : "1",
"name" : "abc",
"age" : "26"
},
{
"id" : "3",
"name" : "qwe",
"age" : "23"
}
For that, I am using $lookup aggregation. But this is not giving the desired output which I want.
Here is my code -
Follow.aggregate([
{
$lookup:{
from:"users",
localField:"id",
foreignField:"id",
as:"fromItems"
}
},
{
$replaceRoot:{newRoot: {$mergeObjects: [ { $arrayElemAt: ["$fromItems", 0 ] }, "$$ROOT" ] } }
},
{ $project :
{
fromItems : 0
}
}
], callback)
For more understanding please refer the image
To get following list of id 2 you can use following query:
Follow.aggregate([
{
$match: { "id": "2" }
},
{
$lookup:{
from:"users",
localField:"follow id",
foreignField:"id",
as:"fromItems"
}
},
{
$replaceRoot:{newRoot: {$mergeObjects: [ { $arrayElemAt: ["$fromItems", 0 ] }, "$$ROOT" ] } }
},
{ $project :
{
id : "$follow id",
name: 1,
age: 1
}
}
])
So the point here is that you have a relation between id and follow id and after $lookup phase follow id becomes the new id since it's parent-child relation.
EDIT:
3.4 solution below
Follow.aggregate([
{
$match: { "id": "2" }
},
{
$lookup:{
from:"users",
localField:"follow id",
foreignField:"id",
as:"fromItems"
}
},
{
$project: {
id: "$follow id",
from: { $arrayElemAt: ["$fromItems", 0 ] }
}
},
{ $project :
{
id : 1,
name: "$from.name",
age: "$from.age"
}
}
])

How to extract value of nested object array?

I'm trying to extract all links IDs of the object array shown below. This is how I was trying to get that:
const linkIDs = array
.filter(d => d.links)
.map(d => d.links)
But this gives me a nested array, which is not what I wanted.
[
{
"id: "1",
"links": [
{
"id" : "Dn59y87PGhkJXpaiZ",
"type" : "article"
},
{
"id" : "PGhkJXDn59y87paiZ",
"type" : "article"
}
]
},
{
"id: "2",
"links": [
{
"id" : "GhkJXpaiZDn59y87P",
"type" : "article"
}
]
},
{
"id": "3"
}
]
So in this example I need the result
[ "Dn59y87PGhkJXpaiZ", "PGhkJXDn59y87paiZ", "GhkJXpaiZDn59y87P" ]
You can do like bellow, without using any other library.
var data = [
{
"id": "1",
"links": [
{
"id" : "Dn59y87PGhkJXpaiZ",
"type" : "article"
},
{
"id" : "PGhkJXDn59y87paiZ",
"type" : "article"
}
]
},
{
"id": "2",
"links": [
{
"id" : "GhkJXpaiZDn59y87P",
"type" : "article"
}
]
},
{
"id": "3"
}
];
var result = data.filter(e => e.links)
.map(e => e.links.map(link => link.id))
.reduce((a, b) => a.concat(b), []);
console.log(result);
I propose a more readable syntax in plain JS:
var data = [
{
"id": "1",
"links": [
{
"id" : "Dn59y87PGhkJXpaiZ",
"type" : "article"
},
{
"id" : "PGhkJXDn59y87paiZ",
"type" : "article"
}
]
},
{
"id": "2",
"links": [
{
"id" : "GhkJXpaiZDn59y87P",
"type" : "article"
}
]
},
{
"id": "3"
}
];
var result = data.filter(e => e.links)
.map(e => e.links)
.flat()
.map(e => e.id)
console.log(result);
You need to produce your array before mapping. Reduce in Js is very useful function ;)
arr = [
{
"id": "1",
"links": [
{
"id" : "Dn59y87PGhkJXpaiZ",
"type" : "article"
},
{
"id" : "PGhkJXDn59y87paiZ",
"type" : "article"
}
]
},
{
"id": "2",
"links": [
{
"id" : "GhkJXpaiZDn59y87P",
"type" : "article"
}
]
},
{
"id": "3"
}
];
var result = arr.filter(a=>a.links).reduce((acc, a) => {
return acc.concat(a.links)
}, []).map(a=>a.id);
console.log(result);
You can use lodash's flatMap() , where each filtered item is transformed using map().
DEMO
var data = [
{
"id": 1,
"links": [
{
"id": "Dn59y87PGhkJXpaiZ",
"type": "article"
},
{
"id": "PGhkJXDn59y87paiZ",
"type": "article"
}
]
},
{
"id": "2",
"links": [
{
"id": "GhkJXpaiZDn59y87P",
"type": "article"
}
]
},
{
"id": "3"
}
];
var result = _.flatMap(data, item =>
_(item.links)
.map(v => (v.id))
.value()
);
console.log(result);
<script src="https://cdnjs.cloudflare.com/ajax/libs/lodash.js/4.12.0/lodash.js"></script>

Categories

Resources