mongodb group by multiple keys values vise versa - php

I have a user collection with following data
[
{
"user_id": "5625c95ac2d34f27148b64fa",
"friend_id": "561f40bac2d34f17148b462c"
},
{
"user_id": "562744ccc2d34f27148b6eb7",
"friend_id": "561f40bac2d34f17148b462c"
},
{
"user_id": "56248eb9c2d34f2f148b5a18",
"friend_id": "561f40bac2d34f17148b462c"
},
{
"user_id": "561f40bac2d34f17148b462c",
"friend_id": "561f3e06c2d34f27148b45f6"
},
{
"user_id": "561f40bac2d34f17148b462c",
"friend_id": "5620de97c2d34f2f148b578f"
},
{
"user_id": "56276b52c2d34f27148b7128",
"friend_id": "561f40bac2d34f17148b462c"
},
{
"user_id": "561f40bac2d34f17148b462c",
"friend_id": "56276b52c2d34f27148b7128"
}
]
i need to fetch the documents in which combination of user_id and friend_id not repeated. i.e in the above example last two documents user_id repeated in friend_id of next document.
I tried with mongo aggrigate and group by but could not reduce it.

In order to do this you basically need to combine both user_id and friend_id values in a uniquely sorted combination. This means creating an array for each document with those members and sorting that array so that the order is always the same.
Then you can $group on that sorted array content to see which documents contain that same combination and then only return those that do not share that same combination.
This leads to this aggregate statement:
db.collection.aggregate([
{ "$project": {
"user_id": 1,
"friend_id": 1,
"combined": {
"$map": {
"input": ["A","B"],
"as": "el",
"in": {
"$cond": [
{ "$eq": [ "$$el", "A" ] },
"$user_id",
"$friend_id"
]
}
}
}
}},
{ "$unwind": "$combined" },
{ "$sort": { "combined": 1 } },
{ "$group": {
"_id": "$_id",
"combined": { "$push": "$combined" },
"user_id": { "$first": "$user_id" },
"friend_id": { "$first": "$friend_id" }
}},
{ "$group": {
"_id": "$combined",
"docs": { "$push": {
"_id": "$_id",
"user_id": "$user_id",
"friend_id": "$friend_id"
}}
}},
{ "$redact": {
"$cond": {
"if": { "$ne": [{ "$size": "$docs" }, 1] },
"then": "$$PRUNE",
"else": "$$KEEP"
}
}}
])
The PHP translation for laravel means to need to access the raw collection object from the manager, where "collection" is the actual name of the collection in MongoDB:
$result = DB::collection("collection")->raw(function($collection) {
return $collection->aggregate(
array(
array(
'$project' => array(
'user_id' => 1,
'friend_id' => 1,
'combined' => array(
'$map' => array(
'input' => array("A","B"),
'as' => 'el',
'in' => array(
'$cond' => array(
array( '$eq' => array( '$el', 'A' ) ),
'$user_id',
'$friend_id'
)
)
)
)
)
),
array( '$unwind' =>'$combined' ),
array( '$sort' => array( 'combined' => 1 ) ),
array(
'$group' => array(
'_id' => '$_id',
'combined' => array( '$push' => '$combined' ),
'user_id' => array( '$first' => '$user_id' ),
'friend_id' => array( '$first' => '$friend_id' )
)
),
array(
'$group' => array(
'_id' => '$combined',
'docs' => array(
'$push' => array(
'_id' => '$_id',
'user_id' => '$user_id',
'friend_id' => 'friend_id'
)
)
)
),
array(
'$redact' => array(
'$cond' => array(
'if' => array( '$ne' => array( array( '$size' => '$docs'), 1) ),
'then' => '$$PRUNE',
'else' => '$$KEEP'
)
)
)
)
);
});
Or if your MongoDB version is less than 2.6, and you lack operators like $map and $redact, then you can still do this, but not as efficiently:
$result = DB::collection("collection")->raw(function($collection) {
return $collection->aggregate(
array(
array(
'$project' => array(
'user_id' => 1,
'friend_id' => 1,
'type' => array( '$const' => array( 'A', 'B' ) )
)
),
array( '$unwind' => '$type' ),
array(
'$group' => array(
'_id' => '$_id',
'user_id' => array( '$first' => '$user_id' ),
'friend_id' => array( '$first' => '$friend_id' ),
'combined' => array(
'$push' => array(
'$cond' => array(
array( '$eq' => array( '$type', 'A' ) ),
'$user_id',
'$friend_id'
)
)
)
)
)
array( '$unwind' =>'$combined' ),
array( '$sort' => array( 'combined' => 1 ) ),
array(
'$group' => array(
'_id' => '$_id',
'combined' => array( '$push' => '$combined' ),
'user_id' => array( '$first' => '$user_id' ),
'friend_id' => array( '$first' => '$friend_id' )
)
),
array(
'$group' => array(
'_id' => '$combined',
'docs' => array(
'$push' => array(
'_id' => '$_id',
'user_id' => '$user_id',
'friend_id' => 'friend_id'
)
),
'count' => array( '$sum' => 1 )
)
),
array( '$match' => array( 'count' => 1 ) )
)
);
});
Where the first three stages mimic what the first stage is doing in the first example listing by putting both values in a single array. Of course the last two stages by "counting" the array members while grouping and then filtering out anything that does not have a "count" of 1.
In either case this leaves you with output that only lists the documents where that combination does not occur in either order:
{
"_id" : [ "561f40bac2d34f17148b462c", "5625c95ac2d34f27148b64fa" ],
"docs" : [
{
"_id" : ObjectId("56306f6cd2387ad4c95b0cc9"),
"user_id" : "5625c95ac2d34f27148b64fa",
"friend_id" : "561f40bac2d34f17148b462c"
}
]
}
{
"_id" : [ "561f3e06c2d34f27148b45f6", "561f40bac2d34f17148b462c" ],
"docs" : [
{
"_id" : ObjectId("56306f6cd2387ad4c95b0ccc"),
"user_id" : "561f40bac2d34f17148b462c",
"friend_id" : "561f3e06c2d34f27148b45f6"
}
]
}
{
"_id" : [ "561f40bac2d34f17148b462c", "56248eb9c2d34f2f148b5a18" ],
"docs" : [
{
"_id" : ObjectId("56306f6cd2387ad4c95b0ccb"),
"user_id" : "56248eb9c2d34f2f148b5a18",
"friend_id" : "561f40bac2d34f17148b462c"
}
]
}
{
"_id" : [ "561f40bac2d34f17148b462c", "5620de97c2d34f2f148b578f" ],
"docs" : [
{
"_id" : ObjectId("56306f6cd2387ad4c95b0ccd"),
"user_id" : "561f40bac2d34f17148b462c",
"friend_id" : "5620de97c2d34f2f148b578f"
}
]
}
{
"_id" : [ "561f40bac2d34f17148b462c", "562744ccc2d34f27148b6eb7" ],
"docs" : [
{
"_id" : ObjectId("56306f6cd2387ad4c95b0cca"),
"user_id" : "562744ccc2d34f27148b6eb7",
"friend_id" : "561f40bac2d34f17148b462c"
}
]
}
You can pretty up the output, but this serves the purpose of showing the ordered combination used along with the original document data.

Related

Query get data in cakephp 3

I have problem with query cakephp3.x . i want return data have like project_id return a list name inside project_id,
But project_id is unique.unduplicated project_id.
sorry my english.please help.
$result = $query->find('all', [
'fields' => [
'project_id' => 't_project_member.project_id',
'project_name' => 't_project.name',
'member_name' => 'concat(t_member.first_name," ",t_member.last_name)'
],
'join' => [
['table' => 't_project',
'type' => 'LEFT',
'conditions' => 't_project.id = t_project_member.project_id'],
['table' => 't_member',
'type' => 'LEFT',
'conditions' => 't_member.id = t_project_member.member_id'],
],
'conditions' => ['t_project.id' => $project_id]
]);
Result:
{
"result": [
{
"project_id": 4,
"project_name": "Ueno Rebrand : Business cards #1",
"member_name": "User_3 c"
},
{
"project_id": 4,
"project_name": "Ueno Rebrand : Business cards #1",
"member_name": "User_4 d"
},
{
"project_id": 4,
"project_name": "Ueno Rebrand : Business cards #1",
"member_name": "User_5 e"
},
]
}
I want return data like below.
{
"result": [
{
"project_id": 4,
"project_name": "Ueno Rebrand : Business cards #1",
"member_name": {
"User_3 c",
"User_4 d",
"User_5 e
}
},
]
}
Thank you very much.
$result = $query->find('all', [
'fields' => [
'project_id' => 't_project_member.project_id',
'project_name' => 't_project.name',
'member_name' => 'concat(t_member.first_name," ",t_member.last_name)'
],
'join' => [
['table' => 't_project',
'type' => 'LEFT',
'conditions' => 't_project.id = t_project_member.project_id'],
['table' => 't_member',
'type' => 'LEFT',
'conditions' => 't_member.id = t_project_member.member_id'],
],
'group' => '`t_project_member`.`project_id`',
'conditions' => ['t_project.id' => $project_id]
]);

Multiple $group mongo request PHP

I would like to make a request with two $ group.
Here you can see the structure of my data :
{
"_id" : ObjectId("573495af4e998fec800041a7"),
"uniqid" : "not573495aeda725",
"status" : "waiting,
"date" : ISODate("2016-05-12T14:39:42.000+0000"),
"id_transaction" : null,
"hash_file" : null,
"user" : "Michel",
"desc" : "undefined",
"pharmacy" : "p56cdc980ba57f"
},
{
"_id" : ObjectId("573495af4e998fec800041a7"),
"uniqid" : "not573495aeda725",
"status" : "waiting,
"date" : ISODate("2016-05-12T14:39:42.000+0000"),
"id_transaction" : null,
"hash_file" : null,
"user" : "Julien",
"desc" : "undefined",
"pharmacy" : "p72gdf210xs68t"
}
Here is where i am in my aggregate from now :
$pipeline = array(
array(
'$project' => array(
'year' => array('$year' => array('$add' => array('$date',$offset))),
'month' => array('$month' => array('$add' => array('$date',$offset))),
'day' => array('$dayOfMonth' => array('$add' => array('$date',$offset))),
'hour' => array('$hour' => array('$add' => array('$date',$offset))),
"uniqid" => 1,
"status" => 1,
"date" => 1,
"pharmacy" => 1
)
),
array(
'$match' => array(
'$and' => array(
array(
"status" => array('$ne' => null)
),
array(
"status" => array('$ne' => "error")
),
array(
"date" => array('$lte' => new MongoDate(time()))
),
array(
"date" => array('$gt' => new MongoDate(strtotime($sixMonthAgo->format('Y-m-d'))))
)
)
)
),
array(
'$group' => array(
'_id' => array(
"month" => array('$month' => '$date'),
"year" => array('$year' => '$date'),
),
'count' => array('$sum' => 1)
)
),
array(
'$group' => array(
'_id' => array(
),
'count' => array('$sum' => 1)
)
)
);
I would like to have the number on lines where status = "waiting" grouping my request by "year/month" and by "pharmacy".
Thank you so much !
array(
'$group' => array(
'_id' => array(
"month" => array('$month' => '$date'),
"year" => array('$year' => '$date'),
"status"
),
'count' => array('$sum' => 1)
)
),
then instead of grouping - use match for _id.status ="waiting"
You do not need any $project ... Mongo Query should be -
[
{
"$match": {
"status": "waiting",
"date": {
"$gt": {
"$date": "DATE_SIX_MONTH_AGO"
},
"$lt": {
"$date": "CURRENT_DATE"
}
}
}
},
{
"$group": {
"_id": {
"month": {
"$month": "$date"
},
"year": {
"$year": "$date"
},
"pharmacy": "$pharmacy"
},
"count": {
"$sum": 1
}
}
}
]
In PHP it should be -
$pipeline = array(
array(
'$match' => array(
"status" => "waiting",
"date" => array(
'$lte' => new MongoDate(time()),
'$gt' => new MongoDate(strtotime($sixMonthAgo->format('Y-m-d')))
)
)
),
array(
'$group' => array(
'_id' => array(
"month" => array('$month' => '$date'),
"year" => array('$year' => '$date'),
"pharmacy" => '$pharmacy'
),
'count' => array('$sum' => 1)
)
)
);
I did not test in PHP, check and correct the syntax errors if any.

How to create an elasticsearch bool query with php client

I need to run one ES bool query which is looks something like this
{
"aggs": {
"column1": {
"terms": {
"field": "column1.raw",
"size": 5
}
}
},
"query": {
"filtered": {
"filter": {
"bool": {
"must": [
{
"range": {
"time": {
"gt": "2015-02-19 00:00:00",
"lt": "2015-02-20 00:00:00"
}
}
}
],
"should": [
{
"term": {
"column1.raw": "value1"
}
},
{
"term": {
"column1.raw": "value2"
}
}
]
}
}
}
}
}
I have done the things upto should and now need to formulate terms part. As it is having the same index value multiple times , how can I formulate the array for it ?
I am using the following PHP code now :
foreach ($terms as $term) {
$termFIlter = array(
"term" => array(
"column1.raw" => $term
)
);
}
$options['body']['query'] = array(
"filtered" => array(
"filter" => array(
"bool" => array(
"must" => array(
"range" => array(
"time" => array(
"gt" => $start,
"lt" => $end
)
)
),
"should" => $termFIlter
)
)
)
);
take this example please change fields name.set what are fields your are using .
$options = array(
'fields' => array('title', 'content', 'profile_id', 'type', 'name', 'description', 'date', 'url'),
'from' => 0,
'size' => 10,
'query' => array(
($type ?
array(
'bool' => array(
'must' => array(
array('term' => array('_all' => $term)),
array('term' => array('type' => $type))
)
)
) :
array('match' => array('_all' => $term))
)
)
);
In elasticsearch php client documentation this is how you handle needing to put multiple index values of match:
$params = [
'index' => 'my_index',
'type' => 'my_type',
'body' => [
'query' => [
'bool' => [
'must' => [
[ 'match' => [ 'testField' => 'abc' ] ],
[ 'match' => [ 'testField2' => 'xyz' ] ],
]
]
]
]
];
So I think it would be the same for your query but with term. So like you showed in your edit or like this:
$options['body']['query'] = array(
"filtered" => array(
"filter" => array(
"bool" => array(
"must" => array(
"range" => array(
"time" => array(
"gt" => $start,
"lt" => $end
)
)
),
"should" => [
[
"term" => [
"column1.raw" => "value1"
]
],
[
"term" => [
"column1.raw" => "value2"
]
]
]
)
)
)
);

Problems with converting MongoDB query using aggregate to PHP

I have the following (working) MongoDB query to generate a list of the hashtag count.
db.twitter.aggregate([
{
$group: {
_id: "$status.entities.hashtags.text",
hashtags: {
$addToSet : "$status.entities.hashtags.text"
}
}
},
{ $unwind : "$hashtags" },
{ $unwind : "$hashtags" },
{ $group : { _id : "$hashtags", count: { $sum : 1 } } },
{ $sort : { count : -1, _id : 1 } }
]);
Now I try to convert this query to PHP code (for laravel):
$cursor = DB::collection('twitter')->raw(function($collection)
{
return $collection->aggregate(array(
array(
'$group' => array(
'_id' => '$status.entities.hashtags.text',
'hashtags' => array(
'$addToSet' => '$status.entities.hashtags.text',
),
),
),
array(
'$unwind' => '$hashtags',
),
array(
'$unwind' => '$hashtags',
),
array(
'$group' => array(
'_id' => '$hashtags', '
count' => array(
'$sum => 1',
),
),
),
array(
'$sort' => array(
'count' => '-1',
'_id' => '1',
),
),
));
});
dd($cursor);
What I can derive from the Laravel-MongoDB docs is that the raw query input works the same as in PHP mongodb.
The error returned is this:
MongoResultException (15951)
localhost:27017: exception: the group aggregate field 'count' must be defined as an expression inside an object
You solved this but I can tell you where you was wrong:
'$sum => 1',
Should be:
array('$sum' => 1)
Rewrote the array and now it works:
$cursor = DB::collection('twitter')->raw(function($collection)
{
return $collection->aggregate([
[
'$group' => [
'_id' => '$status.entities.hashtags.text',
'hashtags' => [
'$addToSet' => '$status.entities.hashtags.text'
]
]
],
[ '$unwind' => '$hashtags' ],
[ '$unwind' => '$hashtags' ],
[ '$group' => [ '_id' => [ '$toLower' => '$hashtags' ], 'count' => [ '$sum' => 1 ] ] ],
[ '$sort' => [ 'count' => -1, '_id' => 1 ] ]
]);
});
Just replaced the {} by [] and the : by => and that did the trick!

Aggregate by country, state and city

Suppose we have a list of companies so that each company is assigned city, state (province) and country.
I'm looking for a solution, to create a 3-level aggregation as already mentioned here. Unfortunately, I am not able to translate the raw MongoDB query to PHP.
Current exception:
exception: invalid operator '$push'
Database Item:
{
"_id" : ObjectId("52c85fafc8526a4d0d21e0be"),
"city" : "Freiburg",
"country" : "DE",
"state" : "DE-BW",
"_coords" : {
"latitude" : 47.9990077,
"longitude" : 7.842104299999999
}
}
Source:
$collection = $this->getClient()->getCollection('companies');
$ops = array(
array(
'$group' => array(
'_id' => array(
'country' => '$country',
'state' => '$state',
'city' => '$city'
),
),
),
array(
'$group' => array(
'_id' => array(
'country' => '$_id.country',
'state' => '$_id.state'
),
),
),
array(
'$group' => array(
'_id' => array(
'country' => '$_id.country',
'state' => array('$push' => '$_id.state')
),
),
),
array(
'$sort' => array(
'state' => 1
),
)
);
$results = $collection->aggregate($ops);
Expected result:
[
{
"country" : "DE",
"states" :
[
{
"state" : "DE-BW",
"cities" : [
{
"city": "Freiburg",
"_coords": {
"latitude" : 47.9990077,
"longitude" : 7.842104299999999
}
},
...
]
},
...
]
},
...
]
You want two $group levels here. Optionally using $addToSet rather than $push for uniqueness:
In basic JSON notation for the rest of the people:
db.country.aggregate([
{ "$group": {
"_id": {
"country": "$country",
"state": "$state"
},
"cities": {
"$addToSet": {
"city": "$city",
"_coords": "$_coords"
}
}
}},
{ "$group": {
"_id": "$_id.country",
"states": {
"$addToSet": {
"state": "$_id.state",
"cities": "$cities"
}
}
}}
])
And in PHP notation for you:
$collection->aggregate(
array(
array(
'$group' => array(
'_id' => array(
'country' => 'country',
'state' => '$state'
),
'$cities' => array(
'$addToSet' => array(
'city' => '$city',
'_coords' => '$_coords'
)
)
)
),
array(
'$group' => array(
'_id' => '$_id.country',
'states' => array(
'$addToSet' => array(
'state' => '$_id.state',
'cities' => '$cities'
)
)
)
)
)
);
But seriously, learn how to use json_decode. JSON is pretty much the "lingua franca" for general data structure representation, not to detract from YAML or simplified XML or others. It really is not hard to translate these things into a native language representation, especially when many libraies exist to do so.

Categories