Related
We have a big database. We collect newsletters and I want to make a trending page. The goal is to make the page realtime and fast! We want to display trending newsletters from the past 2 hours, 4 hours, 24 hours, past week, and past month.
I've worked with MongoDB for a while and I try to keep things simple. I want a new collection, trending, that stores the visitors of the newsletter pages in a time bucket. On every visit, I want to add the information of the newsletter to the object that holds the trending newsletters for that time and $inc the hits field for statistics.
My objects are:
{
"_id" : ObjectId("5d4b4ca5a6bba5f7ffb23b39"),
"bucket" : "last2hours",
"language" : "nl",
"time" : "2019-08-08_00",
"newsletters" : {
"5d4b29ba8ddf870fe15628c7" : {
"_id" : ObjectId("5d4b29ba8ddf870fe15628c7"),
"_slug" : "nieuwsbrief-dalstra-reizen-touring-december-2015",
"subject" : "Nieuwsbrief Dalstra Reizen Touring december 2015",
"date" : ISODate("2015-12-04T13:15:03.000+0000"),
"publisher" : {
"_id" : ObjectId("557ebcc54c79597761fd71c2"),
"_slug" : "dalstra-nl",
"name" : "dalstra.nl",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29af8ddf870fe15624ba" : {
"_id" : ObjectId("5d4b29af8ddf870fe15624ba"),
"_slug" : "the-carolina-weddings-show",
"subject" : "The Carolina Weddings Show",
"date" : ISODate("2015-12-04T13:13:54.000+0000"),
"publisher" : {
"_id" : ObjectId("503b950fffa67e2c790007d7"),
"_slug" : "livingsocialcom",
"name" : "Livingsocial.com",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29ad8ddf870fe15623f4" : {
"_id" : ObjectId("5d4b29ad8ddf870fe15623f4"),
"_slug" : "newport-gangster-tour",
"subject" : "Newport Gangster Tour",
"date" : ISODate("2015-12-04T13:13:22.000+0000"),
"publisher" : {
"_id" : ObjectId("503b950fffa67e2c790007d7"),
"_slug" : "livingsocialcom",
"name" : "Livingsocial.com",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29bb8ddf870fe15628f3" : {
"_id" : ObjectId("5d4b29bb8ddf870fe15628f3"),
"_slug" : "springwise-daily-shoe-insoles-control-devices-through-kicking-and-more",
"subject" : "Springwise Daily | Shoe insoles control devices through kicking, and more.",
"date" : ISODate("2015-12-04T13:15:05.000+0000"),
"publisher" : {
"_id" : ObjectId("5581f0b54c7959e82bfd71c2"),
"_slug" : "springwise-com",
"name" : "springwise.com",
"taal" : "nl"
},
"hits" : NumberInt(2)
}
}
}
{
"_id" : ObjectId("5d4b4ca5a6bba5f7ffb23b3b"),
"bucket" : "last2hours",
"language" : "nl",
"time" : "2019-08-08_01",
"newsletters" : {
"5d4b29ba8ddf870fe15628c7" : {
"_id" : ObjectId("5d4b29ba8ddf870fe15628c7"),
"_slug" : "nieuwsbrief-dalstra-reizen-touring-december-2015",
"subject" : "Nieuwsbrief Dalstra Reizen Touring december 2015",
"date" : ISODate("2015-12-04T13:15:03.000+0000"),
"publisher" : {
"_id" : ObjectId("557ebcc54c79597761fd71c2"),
"_slug" : "dalstra-nl",
"name" : "dalstra.nl",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29af8ddf870fe15624ba" : {
"_id" : ObjectId("5d4b29af8ddf870fe15624ba"),
"_slug" : "the-carolina-weddings-show",
"subject" : "The Carolina Weddings Show",
"date" : ISODate("2015-12-04T13:13:54.000+0000"),
"publisher" : {
"_id" : ObjectId("503b950fffa67e2c790007d7"),
"_slug" : "livingsocialcom",
"name" : "Livingsocial.com",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29ad8ddf870fe15623f4" : {
"_id" : ObjectId("5d4b29ad8ddf870fe15623f4"),
"_slug" : "newport-gangster-tour",
"subject" : "Newport Gangster Tour",
"date" : ISODate("2015-12-04T13:13:22.000+0000"),
"publisher" : {
"_id" : ObjectId("503b950fffa67e2c790007d7"),
"_slug" : "livingsocialcom",
"name" : "Livingsocial.com",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29bb8ddf870fe15628f3" : {
"_id" : ObjectId("5d4b29bb8ddf870fe15628f3"),
"_slug" : "springwise-daily-shoe-insoles-control-devices-through-kicking-and-more",
"subject" : "Springwise Daily | Shoe insoles control devices through kicking, and more.",
"date" : ISODate("2015-12-04T13:15:05.000+0000"),
"publisher" : {
"_id" : ObjectId("5581f0b54c7959e82bfd71c2"),
"_slug" : "springwise-com",
"name" : "springwise.com",
"taal" : "nl"
},
"hits" : NumberInt(2)
}
}
}
{
"_id" : ObjectId("5d4b4ca5a6bba5f7ffb23b3d"),
"bucket" : "last4hours",
"language" : "nl",
"time" : "2019-08-08_00",
"newsletters" : {
"5d4b29ba8ddf870fe15628c7" : {
"_id" : ObjectId("5d4b29ba8ddf870fe15628c7"),
"_slug" : "nieuwsbrief-dalstra-reizen-touring-december-2015",
"subject" : "Nieuwsbrief Dalstra Reizen Touring december 2015",
"date" : ISODate("2015-12-04T13:15:03.000+0000"),
"publisher" : {
"_id" : ObjectId("557ebcc54c79597761fd71c2"),
"_slug" : "dalstra-nl",
"name" : "dalstra.nl",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29af8ddf870fe15624ba" : {
"_id" : ObjectId("5d4b29af8ddf870fe15624ba"),
"_slug" : "the-carolina-weddings-show",
"subject" : "The Carolina Weddings Show",
"date" : ISODate("2015-12-04T13:13:54.000+0000"),
"publisher" : {
"_id" : ObjectId("503b950fffa67e2c790007d7"),
"_slug" : "livingsocialcom",
"name" : "Livingsocial.com",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29ad8ddf870fe15623f4" : {
"_id" : ObjectId("5d4b29ad8ddf870fe15623f4"),
"_slug" : "newport-gangster-tour",
"subject" : "Newport Gangster Tour",
"date" : ISODate("2015-12-04T13:13:22.000+0000"),
"publisher" : {
"_id" : ObjectId("503b950fffa67e2c790007d7"),
"_slug" : "livingsocialcom",
"name" : "Livingsocial.com",
"taal" : "nl"
},
"hits" : NumberInt(1)
},
"5d4b29bb8ddf870fe15628f3" : {
"_id" : ObjectId("5d4b29bb8ddf870fe15628f3"),
"_slug" : "springwise-daily-shoe-insoles-control-devices-through-kicking-and-more",
"subject" : "Springwise Daily | Shoe insoles control devices through kicking, and more.",
"date" : ISODate("2015-12-04T13:15:05.000+0000"),
"publisher" : {
"_id" : ObjectId("5581f0b54c7959e82bfd71c2"),
"_slug" : "springwise-com",
"name" : "springwise.com",
"taal" : "nl"
},
"hits" : NumberInt(2)
}
}
}
The goal here is to only have to query the bucket language time. So, if I want to see the trending newsletters of the last 2 hours, I query {bucket: 'last2hours', language: 'nl', time: '2019-08-08_00'}, then I have all the information I need. No need for aggregation. This findOne query is fast.
So i made a method to update the trending collection:
public function setNewsletterTrendingStatistics($newsletter){
// Buckets
$trend_buckets = array(
'last2hours' => array('steps' => 2, 'step'=>'hour', 'format'=> 'Y-m-d_H'),
'last4hours' => array('steps' => 4, 'step'=>'hour', 'format'=> 'Y-m-d_H' ),
'last1day' => array('steps' => 24, 'step'=>'hour', 'format'=> 'Y-m-d_H' ),
'lastweek' => array('steps' => 7, 'step'=>'day', 'format'=> 'Y-m-d' ),
'lastmonth' => array('steps' => 31, 'step'=>'day', 'format'=> 'Y-m-d' ),
);
// $newsletter['date']->toDateTime()->format('U')
$buckets = array();
foreach($trend_buckets AS $bucket => $settings){
for($i=0; $i<$settings['steps']; $i++){
$buckets[] = array(
'bucket' => $bucket,
'time' => date($settings['format'], strtotime('+'. $i . ' ' . $settings['step'])),
'language' => $newsletter['publisher']['taal'],
);
}
}
// Add the stats to each bucket
foreach($buckets AS $bucket){
$query = array();
$query = $bucket;
$update = array(
'$set' => array(
'newsletters.' . (string) $newsletter['_id'] . '._id' => $newsletter['_id'],
'newsletters.' . (string) $newsletter['_id'] . '._slug' => $newsletter['_slug'],
'newsletters.' . (string) $newsletter['_id'] . '.subject' => $newsletter['subject'],
'newsletters.' . (string) $newsletter['_id'] . '.date' => $newsletter['date'],
'newsletters.' . (string) $newsletter['_id'] . '.publisher' => array(
'_id' => $newsletter['publisher']['_id'],
'_slug' => $newsletter['publisher']['_slug'],
'name' => $newsletter['publisher']['name'],
'taal' => $newsletter['publisher']['taal'],
),
),
'$inc' => array(
'newsletters.' . (string) $newsletter['_id'] . '.hits' => 1
),
);
$options = array('upsert'=>true);
$this->FW->mdb->{$this->config['collections']['newsletters_trending']}->updateOne($query, $update, $options);
}
}
First of all, is this a good approach? Is there a better approach? Second, I want to count unique hits, so I need to save an IP address. I want to count unique hits on the update query so I don't have to count on the findOne query. Whats the best way to achieve this? I know I can use addtoset for a unique array with IP addresses. But then I need to count these unique IP addresses.
So i ended up doing this:
I made buckets for each trending container (last 2 hours, last 4 hours, today, last week, last month) for every hour.
I fill this containers on every pageview with an update query $inc 1.
Every hour a cronjob combines these stats. so 2 hours fill 4 hours 4 hours fill today etc.
This seems like the best approche and are live stats.
i have got a mongo array , in which i want to remove the whole block , if the nested array of that block is empty
I have attached the array below
{
"_id" : ObjectId("5b17b991c440782b5a218cd1"),
"vendor_view_id" : 741733,
"product" : [
{
"id" : ObjectId("5b86546540c1c414543e4333"),
"vendor_user_id" : ObjectId("5b17b992c440782b5a218cd2"),
"product_type_id" : ObjectId("5ae8348b7ae0d9538e45ab46"),
"condition_id" : [ ],
"shipping_cost" : 100,
"date_added" : "2018-08-29-08-08-05",
"date_status_change" : "2018-08-29-08-08-05",
"status" : 0
},
{
"id" : ObjectId("5b8654ba40c1c4145d1f5473"),
"vendor_user_id" : ObjectId("5b17b992c440782b5a218cd2"),
"product_type_id" : ObjectId("5ae834b17ae0d9538e45ab48"),
"condition_id" : [ ],
"shipping_cost" : 100,
"date_added" : "2018-08-29-08-09-30",
"date_status_change" : "2018-08-29-08-09-30",
"status" : 0
},
{
"id" : ObjectId("5b8655a840c1c415080b0a33"),
"vendor_user_id" : ObjectId("5b17b992c440782b5a218cd2"),
"product_type_id" : ObjectId("5ae834a67ae0d9538e45ab47"),
"condition_id" : [
{
"_id" : ObjectId("5ae977da7ff1706f3b7dc47a"),
"status" : 0,
"date_added" : "2018-08-29-08-13-28"
}
],
"shipping_cost" : 100,
"date_added" : "2018-08-29-08-13-28",
"date_status_change" : "2018-08-29-08-13-28",
"status" : 0
}
]
}
I would like to delete the array block where product.condition_id is empty
So far i have tried this
$this->collection_name->collection->updateOne([
'_id' => $vendor_id,
],
[
'$unset' =>
[
'product.$.condition_id' =>
[
'$size'=>0,
]
]
])
EDIT 1:
db.collection_name.collection({_id : ObjectId('5b17b991c440782b5a218cd1'),
"product.condition_id.$":{ "$exists": false }},
{ "$unset": { "product.$": "" }});
still not working
db.collection_name.update(
{},
{$pull : {product : {condition_id : {$size : 0} }}},
{ multi: true } // multi : true will updates multiple documents that meet the query criteria
)
if condition_id array is empty pull the document.
Output:
{
"_id" : ObjectId("5b17b991c440782b5a218cd1"),
"vendor_view_id" : 741733,
"product" : [
{
"id" : ObjectId("5b8655a840c1c415080b0a33"),
"vendor_user_id" : ObjectId("5b17b992c440782b5a218cd2"),
"product_type_id" : ObjectId("5ae834a67ae0d9538e45ab47"),
"condition_id" : [
{
"_id" : ObjectId("5ae977da7ff1706f3b7dc47a"),
"status" : 0,
"date_added" : "2018-08-29-08-13-28"
}
],
"shipping_cost" : 100,
"date_added" : "2018-08-29-08-13-28",
"date_status_change" : "2018-08-29-08-13-28",
"status" : 0
}
]
}
You can transform your json to PHP array json_decode($json) so you'll be able to process it before it turn into json after.
json_encode($arr)
Note that you can check the depth of the array in your case to see if product.condition_id is empty.
My collections are like this:
{
"_index" : "test_index",
"_type" : "test_type",
"_id" : "10000",
"_score" : 1.0,
"_source" : {
"user_id" : 12,
"index_date" : {
"date" : "2018-02-06 14:25:49.816952",
"timezone_type" : 3,
"timezone" : "UTC"
},
"rating" : null,
"orders" : [
{
"hour" : "08",
"count" : 1
},
{
"hour" : "10",
"count" : 1
}
],
"products" : [
{
"p_id" : 970111,
"count" : 4
},
{
"p_id" : 1280811,
"count" : 1
},
]
}
},
and tried to access to {"hour":"10"}
My query is:
$query = new Query\Nested();
$query->setPath('orders');
$term = new Term();
$term->setTerm('orders.hour', $order->getCreatedAt()->format('H'));
$query->setQuery($term);
dump($finder->find($query));die;
but i got the following error:
[Elastica\Exception\ResponseException]
failed to create query: {
"nested" : {
"query" : {
"term" : {
"orders.hour" : {
"value" : "12",
"boost" : 1.0
}
}
},
"path" : "orders",
"ignore_unmapped" : false,
"score_mode" : "avg",
"boost" : 1.0
}
} [index: test_index] [reason: all shards failed]
Your documents not look like nested queries.
I assume that finder is your repository manager that is defined as orders repository, your code should look something like this
$finder = $this->get('fos_elastica.repository_manager')->getRepository('YourBundle:order');
$boolquery = new Query\BoolQuery();
$term = new Query\Term();
$term->setTerm('hour', $order->getCreatedAt()->format('H'));
$boolquery->addMust($term);
$finder->find($boolquery);
when trying to "JOIN" operation with $lookup but results count is ok but "as" document is empty
I have two collections and i need to get user details from subscribercol with user_id in employer_jobscol
subscribercol
{
"_id" : ObjectId("58187e7551d244640626d7e1"),
"type" : "job_seeker",
"firstname" : "vishnu",
"lastname" : "kumar pv",
"email_array" : {
"primary" : "test#test.com",
"secondary" : "test#test.test",
"verified" : false
},
"address_array" : {
"address" : "test address22d",
"streetname" : "test222d",
"pincode" : "test222d",
"city" : "dddd",
"state" : "ALASKA2d",
"country" : "Argentinad"
},
"phone_array" : {
"primary" : "",
"secondary" : "",
"verified" : ""
},
"languages" : [
"english",
"malayalam",
"english2"
]
}
employer_jobscol
{
"_id" : ObjectId("582ada6b51d244073e2a7541"),
"employer_id" : ObjectId("58187e7551d244640626d7e1"),
"job_id" : "testjob16946",
"job_title" : "Test Job 25",
"category" : "IT",
"vacancies" : "5",
"salary" : "200000",
"location" : "Kollam",
"employer_name" : "test test",
"mobile" : "9123456987",
"video" : "",
"image" : "",
"work_place" : "option1",
"email" : "test#test.test",
"skills" : [
"php"
],
"isActive" : true,
"applied_users" : [
{
"user_id" : ObjectId("581b364751d2445c311cf6f1"),
"accepted" : false
},
{
"user_id" : ObjectId("58187e7551d244640626d7e1"),
"accepted" : false
}
]
}
my database query here, (executed with Robomongo )
db.getCollection('employer_jobscol').aggregate([ {
$unwind: "$applied_users"
},
{
$lookup:
{
from: "subscribercol",
localField: "user_id",
foreignField: "_id",
as: "subscribercol_docs"
}
}
])
Result is
{
"_id" : ObjectId("582ada6b51d244073e2a7541"),
"employer_id" : ObjectId("58187e7551d244640626d7e1"),
"job_id" : "testjob16946",
"job_title" : "Test Job 25",
"category" : "IT",
"vacancies" : "5",
"salary" : "200000",
"location" : "Kollam",
"employer_name" : "test test",
"mobile" : "9123456987",
"video" : "",
"image" : "",
"work_place" : "option1",
"email" : "test#test.test",
"skills" : [
"php"
],
"isActive" : true,
"applied_users" : {
"user_id" : ObjectId("58187e7551d244640626d7e1"),
"accepted" : false
},
"subscribercol_docs" : []
}
here subscribercol_docs is empty array i need user info (name, address etc..),
Because there is no user_id field in local document its "applied_users.user_id"
Try this
db.getCollection('employer_jobscol').aggregate([ {
$unwind: "$applied_users"
},
{
$lookup:
{
from: "subscribercol",
localField: "applied_users.user_id", // <-- check here
foreignField: "_id",
as: "subscribercol_docs"
}
}
])
I'm trying to calculate a sum of all records in my db, and I need to avoid duplicates. I wrote this code to group the records, but it doesn't work for me.
$pipeline = [
['$match' =>
$criteria->getCondition()],
['$group' =>
['_id' => '$order_id', 'total' => ['$sum' => '$'.$column]]]
];
$this->getDbConnection()->aggregate('ticket_cache', $pipeline);
Test request:
db.getCollection('ticket_cache').aggregate(
{
"$match":
{"event_id":64}
},
{
"$group" :
{"_id":"$order_id", "total": {"$sum":"$payment_amount"}}
})
Result:
/* 1 */
{
"result" : [
{
"_id" : NumberLong(7002),
"total" : 9000.0000000000000000
}
],
"ok" : 1.0000000000000000
}
Data in the db:
/* 1 */
{
"result" : [
{
"_id" : ObjectId("553f8b4fbfabe2772f8b4f51"),
"event_id" : NumberLong(64),
"ticket_id" : NumberLong(8563),
"ticket_code" : NumberLong(22062299),
"ticket_type_id" : NumberLong(391),
"ticket_created" : NumberLong(1430227620),
"ticket_deleted" : NumberLong(0),
"ticket_user_id" : NumberLong(2),
"ticket_used" : NumberLong(0),
"order_id" : NumberLong(7002),
"order_code" : NumberLong(517005),
"order_created" : NumberLong(1430227620),
"order_deleted" : NumberLong(0),
"order_sales_pipeline" : NumberLong(18),
"order_invoice_id" : NumberLong(4202),
"order_invoice_amount" : 3000.0000000000000000,
"order_invoice_created" : NumberLong(1430227641),
"order_invoice_deleted" : NumberLong(0),
"order_invoice_code" : NumberLong(420155),
"payment_id" : NumberLong(4365),
"payment_amount" : 3000.0000000000000000,
"payment_currency" : NumberLong(4),
"payment_author_id" : NumberLong(1),
"payment_type_id" : NumberLong(27),
"payment_created" : NumberLong(1430227641),
"payment_deleted" : NumberLong(0),
"create_time" : ISODate("2015-04-28T13:29:51.328Z")
},
{
"_id" : ObjectId("553f8b4fbfabe2772f8b4f4f"),
"event_id" : NumberLong(64),
"ticket_id" : NumberLong(8561),
"ticket_code" : NumberLong(49287433),
"ticket_type_id" : NumberLong(391),
"ticket_created" : NumberLong(1430227620),
"ticket_deleted" : NumberLong(0),
"ticket_user_id" : NumberLong(2),
"ticket_used" : NumberLong(0),
"order_id" : NumberLong(7002),
"order_code" : NumberLong(517005),
"order_created" : NumberLong(1430227620),
"order_deleted" : NumberLong(0),
"order_sales_pipeline" : NumberLong(18),
"order_invoice_id" : NumberLong(4202),
"order_invoice_amount" : 3000.0000000000000000,
"order_invoice_created" : NumberLong(1430227641),
"order_invoice_deleted" : NumberLong(0),
"order_invoice_code" : NumberLong(420155),
"payment_id" : NumberLong(4365),
"payment_amount" : 3000.0000000000000000,
"payment_currency" : NumberLong(4),
"payment_author_id" : NumberLong(1),
"payment_type_id" : NumberLong(27),
"payment_created" : NumberLong(1430227641),
"payment_deleted" : NumberLong(0),
"create_time" : ISODate("2015-04-28T13:29:51.316Z")
},
{
"_id" : ObjectId("553f8b4fbfabe2772f8b4f50"),
"event_id" : NumberLong(64),
"ticket_id" : NumberLong(8562),
"ticket_code" : NumberLong(24016753),
"ticket_type_id" : NumberLong(391),
"ticket_created" : NumberLong(1430227620),
"ticket_deleted" : NumberLong(0),
"ticket_user_id" : NumberLong(2),
"ticket_used" : NumberLong(0),
"order_id" : NumberLong(7002),
"order_code" : NumberLong(517005),
"order_created" : NumberLong(1430227620),
"order_deleted" : NumberLong(0),
"order_sales_pipeline" : NumberLong(18),
"order_invoice_id" : NumberLong(4202),
"order_invoice_amount" : 3000.0000000000000000,
"order_invoice_created" : NumberLong(1430227641),
"order_invoice_deleted" : NumberLong(0),
"order_invoice_code" : NumberLong(420155),
"payment_id" : NumberLong(4365),
"payment_amount" : 3000.0000000000000000,
"payment_currency" : NumberLong(4),
"payment_author_id" : NumberLong(1),
"payment_type_id" : NumberLong(27),
"payment_created" : NumberLong(1430227641),
"payment_deleted" : NumberLong(0),
"create_time" : ISODate("2015-04-28T13:29:51.326Z")
}
],
"ok" : 1.0000000000000000
}
Where did I go wrong?
Can you try with the below query. It assumes that the payment amount will always be same. Have a look at addToSet http://docs.mongodb.org/manual/reference/operator/update/addToSet/.
db.getCollection('ticket_cache').aggregate(
{ "$match": {"event_id":64} },
{ "$group" :
{"_id":"$order_id", "total": {"$addToSet":"$payment_amount"}}
},
{"$unwind": "$total"},
{"$group": {"_id": "null", "totalOdr": {"$sum": "$total"}}}
)