MongoDB not using Index - possible collation issue?
We have a MongoDB Query, fully indexed, that should be scanning at most 4/5 rows. However the query appears to use one only element of the index (the integer) and ignore the string portion.
We are using a case-insensitive collation (strength=2), but it makes no difference if we specify this or not. Documentation: https://docs.mongodb.com/manual/core/index-case-insensitive/
Do collations use indexes? Is there a more efficient way of do we manually need to de-normalise? NOTE: we're not actually using any non-standard characters, the collation is specified purely for case insensitivity.
Version (supports collation):
MongoDB server version: 3.6.13
db.version() => 3.6.13
db.adminCommand( { getParameter: 1, featureCompatibilityVersion: 1 } )
gives:
{
"featureCompatibilityVersion" : {
"version" : "3.6"
},
"ok" : 1,
"operationTime" : Timestamp(1565754388, 51),
"$clusterTime" : {
"clusterTime" : Timestamp(1565754388, 51),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
DB Structure (summarised) is
{
"PrimaryID": (int)XXX,
"aTables": {
"userExt": [
{
"userExtPlatform": (int)XXX,
"userExtID": (string)XXX,
"userExtActive": (int 1 | 0)XXX,
},
{
"userExtPlatform": (int)XXX,
"userExtID": (string)XXX,
"userExtActive": (int 1 | 0)XXX,
},
...
],
"userOtherData": [
{
"otherDataField1": XXX,
"otherDataField2": XXX,
},
...
],
...
}
}
Index is set up as follows (note - collation is specified as {locale:en, strength: 2}:
{
"v" : 2,
"key" : {
"aTables.userExt.userExtPlatform" : 1,
"aTables.userExt.userExtID" : 1
},
"name" : "extPlatform",
"background" : false,
"ns" : "archive.users",
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
}
}
The query (trying with/without specifying collation)
use archive;
db.users.find(
{
"aTables.userExt.userExtPlatform": 4,
"aTables.userExt.userExtID": "AStringValue",
"aTables.userExt.userExtActive": 1,
"deleted": { "$exists": false }
}
)
db.users.find(
{
"aTables.userExt.userExtPlatform": 4,
"aTables.userExt.userExtID": "AStringValue",
"aTables.userExt.userExtActive": 1,
"deleted": { "$exists": false }
}
).collation( { locale: "en", strength: 2 } )
Note: Removing the 'deleted' clause makes no difference to the speed / results / explain.
Here is the explain, and it shows a vast number of keys and documents queried.
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "archive.users",
"indexFilterSet" : false,
"parsedQuery" : {
"$and" : [
{
"aTables.userExt.userExtActive" : {
"$eq" : 1
}
},
{
"aTables.userExt.userExtID" : {
"$eq" : "PrivateStringRemoved"
}
},
{
"aTables.userExt.userExtPlatform" : {
"$eq" : 4
}
},
{
"$nor" : [
{
"deleted" : {
"$exists" : true
}
}
]
}
]
},
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"aTables.userExt.userExtActive" : {
"$eq" : 1
}
},
{
"aTables.userExt.userExtID" : {
"$eq" : "PrivateStringRemoved"
}
},
{
"$nor" : [
{
"deleted" : {
"$exists" : true
}
}
]
}
]
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"aTables.userExt.userExtPlatform" : 1,
"aTables.userExt.userExtID" : 1
},
"indexName" : "extPlatform",
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"aTables.userExt.userExtPlatform" : [
"aTables.userExt"
],
"aTables.userExt.userExtID" : [
"aTables.userExt"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"aTables.userExt.userExtPlatform" : [
"[4.0, 4.0]"
],
"aTables.userExt.userExtID" : [
"[MinKey, MaxKey]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 1304,
"totalKeysExamined" : 290114,
"totalDocsExamined" : 290114,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"$and" : [
{
"aTables.userExt.userExtActive" : {
"$eq" : 1
}
},
{
"aTables.userExt.userExtID" : {
"$eq" : "PrivateStringRemoved"
}
},
{
"$nor" : [
{
"deleted" : {
"$exists" : true
}
}
]
}
]
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 1245,
"works" : 290115,
"advanced" : 0,
"needTime" : 290114,
"needYield" : 0,
"saveState" : 2267,
"restoreState" : 2267,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 290114,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 290114,
"executionTimeMillisEstimate" : 270,
"works" : 290115,
"advanced" : 290114,
"needTime" : 0,
"needYield" : 0,
"saveState" : 2267,
"restoreState" : 2267,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"aTables.userExt.userExtPlatform" : 1,
"aTables.userExt.userExtID" : 1
},
"indexName" : "extPlatform",
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"aTables.userExt.userExtPlatform" : [
"aTables.userExt"
],
"aTables.userExt.userExtID" : [
"aTables.userExt"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"aTables.userExt.userExtPlatform" : [
"[4.0, 4.0]"
],
"aTables.userExt.userExtID" : [
"[MinKey, MaxKey]"
]
},
"keysExamined" : 290114,
"seeks" : 1,
"dupsTested" : 290114,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : "api-mdb-archive-03",
"port" : 27017,
"version" : "3.6.13",
"gitVersion" : "db3c76679b7a3d9b443a0e1b3e45ed02b88c539f"
},
"ok" : 1,
"operationTime" : Timestamp(1565753056, 9),
"$clusterTime" : {
"clusterTime" : Timestamp(1565753056, 9),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
The log output also confirms that it's checking a huge number of documents, and that collation comes through.
2019-08-14T03:23:53.912+0000 I COMMAND [conn20679] command archive.users appName: "MongoDB Shell" command: find { find: "users", filter: { aTables.userExt.userExtPlatform: 4.0, aTables.userExt.userExtID: "PrivateStringRemoved", aTables.userExt.userExtActive: 1.0, deleted: { $exists: false } }, collation: { locale: "en", strength: 2.0 }, lsid: { id: UUID("3178aa31-5ee9-4a79-9848-f01c1842f542") }, $clusterTime: { clusterTime: Timestamp(1565753015, 41), signature: { hash: BinData(0, 0000000000000000000000000000000000000000), keyId: 0 } }, $db: "archive" } planSummary: IXSCAN { aTables.userExt.userExtPlatform: 1, aTables.userExt.userExtID: 1 } keysExamined:289966 docsExamined:289966 cursorExhausted:1 numYields:2267 nreturned:0 reslen:228 locks:{ Global: { acquireCount: { r: 4536 } }, Database: { acquireCount: { r: 2268 } }, Collection: { acquireCount: { r: 2268 } } } protocol:op_msg 1546ms
2019-08-14T03:24:16.864+0000 I COMMAND [conn20679] command archive.users appName: "MongoDB Shell" command: explain { explain: { find: "users", filter: { aTables.userExt.userExtPlatform: 4.0, aTables.userExt.userExtID: "PrivateStringRemoved", aTables.userExt.userExtActive: 1.0, deleted: { $exists: false } }, collation: { locale: "en", strength: 2.0 } }, verbosity: "executionStats", lsid: { id: UUID("3178aa31-5ee9-4a79-9848-f01c1842f542") }, $clusterTime: { clusterTime: Timestamp(1565753033, 128), signature: { hash: BinData(0, 0000000000000000000000000000000000000000), keyId: 0 } }, $db: "archive" } numYields:2267 reslen:3578 locks:{ Global: { acquireCount: { r: 4536 } }, Database: { acquireCount: { r: 2268 } }, Collection: { acquireCount: { r: 2268 } } } protocol:op_msg 1341ms
For completeness, this is driven by PHP, but verified in Mongo CMD as above. Here is the PHP:
$aParams = [
'aTables.userExt.userExtID' => 4,
'aTables.userExt.userExtPlatform' => 'PrivateStringRemoved',
'aTables.userExt.userExtActive' => 1,
'deleted': [
'$exists' => false
]
];
$aOptions = [
'readPreference' => new \MongoDB\Driver\ReadPreference(\MongoDB\Driver\ReadPreference::RP_NEAREST),
'skip' => $start,
'limit' => $limit,
'typeMap' => [
'root' => 'array',
'document' => 'array',
'array' => 'array'
],
'collation' => [
'locale' => 'en',
'strength' => 2
],
];
try {
$aResults = $collectionArchive->find($aParams, $aOptions);
} catch (\Exception $exception) {
throw new ArchiverException('Mongo Error', ArchiverRequest::ERROR_MONGO, $exception->getMessage());
}
Posting here, for reassurance if anyone searches. (Based on other answers)
Having played around, the following syntax is the correct one. You need to group the final elements into $elemMatch as below.
db.users.find(
{
"aTables.userExt" : {
"$elemMatch" : {
"userExtPlatform": 4,
"userExtID": "AStringValue",
"userExtActive": 1
}
}
}
).collation( { locale: "en", strength: 2 } ).explain("executionStats")
As requested: here is the explain:
{
"queryPlanner" : {
"plannerVersion" : 1,
"namespace" : "archive.users",
"indexFilterSet" : false,
"parsedQuery" : {
"aTables.userExt" : {
"$elemMatch" : {
"$and" : [
{
"userExtActive" : {
"$eq" : 1
}
},
{
"userExtID" : {
"$eq" : "AStringValue"
}
},
{
"userExtPlatform" : {
"$eq" : 4
}
}
]
}
}
},
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"winningPlan" : {
"stage" : "FETCH",
"filter" : {
"aTables.userExt" : {
"$elemMatch" : {
"$and" : [
{
"userExtPlatform" : {
"$eq" : 4
}
},
{
"userExtID" : {
"$eq" : "AStringValue"
}
},
{
"userExtActive" : {
"$eq" : 1
}
}
]
}
}
},
"inputStage" : {
"stage" : "IXSCAN",
"keyPattern" : {
"aTables.userExt.userExtPlatform" : 1,
"aTables.userExt.userExtID" : 1
},
"indexName" : "extPlatform",
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"aTables.userExt.userExtPlatform" : [
"aTables.userExt"
],
"aTables.userExt.userExtID" : [
"aTables.userExt"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"aTables.userExt.userExtPlatform" : [
"[4.0, 4.0]"
],
"aTables.userExt.userExtID" : [
"[\")MOK9C5S)?Q1\u0001\u0010\", \")MOK9C5S)?Q1\u0001\u0010\"]"
]
}
}
},
"rejectedPlans" : [ ]
},
"executionStats" : {
"executionSuccess" : true,
"nReturned" : 0,
"executionTimeMillis" : 4,
"totalKeysExamined" : 0,
"totalDocsExamined" : 0,
"executionStages" : {
"stage" : "FETCH",
"filter" : {
"aTables.userExt" : {
"$elemMatch" : {
"$and" : [
{
"userExtPlatform" : {
"$eq" : 4
}
},
{
"userExtID" : {
"$eq" : "AStringValue"
}
},
{
"userExtActive" : {
"$eq" : 1
}
}
]
}
}
},
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"docsExamined" : 0,
"alreadyHasObj" : 0,
"inputStage" : {
"stage" : "IXSCAN",
"nReturned" : 0,
"executionTimeMillisEstimate" : 0,
"works" : 1,
"advanced" : 0,
"needTime" : 0,
"needYield" : 0,
"saveState" : 0,
"restoreState" : 0,
"isEOF" : 1,
"invalidates" : 0,
"keyPattern" : {
"aTables.userExt.userExtPlatform" : 1,
"aTables.userExt.userExtID" : 1
},
"indexName" : "extPlatform",
"collation" : {
"locale" : "en",
"caseLevel" : false,
"caseFirst" : "off",
"strength" : 2,
"numericOrdering" : false,
"alternate" : "non-ignorable",
"maxVariable" : "punct",
"normalization" : false,
"backwards" : false,
"version" : "57.1"
},
"isMultiKey" : true,
"multiKeyPaths" : {
"aTables.userExt.userExtPlatform" : [
"aTables.userExt"
],
"aTables.userExt.userExtID" : [
"aTables.userExt"
]
},
"isUnique" : false,
"isSparse" : false,
"isPartial" : false,
"indexVersion" : 2,
"direction" : "forward",
"indexBounds" : {
"aTables.userExt.userExtPlatform" : [
"[4.0, 4.0]"
],
"aTables.userExt.userExtID" : [
"[\")MOK9C5S)?Q1\u0001\u0010\", \")MOK9C5S)?Q1\u0001\u0010\"]"
]
},
"keysExamined" : 0,
"seeks" : 1,
"dupsTested" : 0,
"dupsDropped" : 0,
"seenInvalidated" : 0
}
}
},
"serverInfo" : {
"host" : "api-mdb-archive-03",
"port" : 27017,
"version" : "3.6.13",
"gitVersion" : "db3c76679b7a3d9b443a0e1b3e45ed02b88c539f"
},
"ok" : 1,
"operationTime" : Timestamp(1565870195, 8),
"$clusterTime" : {
"clusterTime" : Timestamp(1565870195, 8),
"signature" : {
"hash" : BinData(0,"AAAAAAAAAAAAAAAAAAAAAAAAAAA="),
"keyId" : NumberLong(0)
}
}
}
i have got a mongo array , in which i want to remove the whole block , if the nested array of that block is empty
I have attached the array below
{
"_id" : ObjectId("5b17b991c440782b5a218cd1"),
"vendor_view_id" : 741733,
"product" : [
{
"id" : ObjectId("5b86546540c1c414543e4333"),
"vendor_user_id" : ObjectId("5b17b992c440782b5a218cd2"),
"product_type_id" : ObjectId("5ae8348b7ae0d9538e45ab46"),
"condition_id" : [ ],
"shipping_cost" : 100,
"date_added" : "2018-08-29-08-08-05",
"date_status_change" : "2018-08-29-08-08-05",
"status" : 0
},
{
"id" : ObjectId("5b8654ba40c1c4145d1f5473"),
"vendor_user_id" : ObjectId("5b17b992c440782b5a218cd2"),
"product_type_id" : ObjectId("5ae834b17ae0d9538e45ab48"),
"condition_id" : [ ],
"shipping_cost" : 100,
"date_added" : "2018-08-29-08-09-30",
"date_status_change" : "2018-08-29-08-09-30",
"status" : 0
},
{
"id" : ObjectId("5b8655a840c1c415080b0a33"),
"vendor_user_id" : ObjectId("5b17b992c440782b5a218cd2"),
"product_type_id" : ObjectId("5ae834a67ae0d9538e45ab47"),
"condition_id" : [
{
"_id" : ObjectId("5ae977da7ff1706f3b7dc47a"),
"status" : 0,
"date_added" : "2018-08-29-08-13-28"
}
],
"shipping_cost" : 100,
"date_added" : "2018-08-29-08-13-28",
"date_status_change" : "2018-08-29-08-13-28",
"status" : 0
}
]
}
I would like to delete the array block where product.condition_id is empty
So far i have tried this
$this->collection_name->collection->updateOne([
'_id' => $vendor_id,
],
[
'$unset' =>
[
'product.$.condition_id' =>
[
'$size'=>0,
]
]
])
EDIT 1:
db.collection_name.collection({_id : ObjectId('5b17b991c440782b5a218cd1'),
"product.condition_id.$":{ "$exists": false }},
{ "$unset": { "product.$": "" }});
still not working
db.collection_name.update(
{},
{$pull : {product : {condition_id : {$size : 0} }}},
{ multi: true } // multi : true will updates multiple documents that meet the query criteria
)
if condition_id array is empty pull the document.
Output:
{
"_id" : ObjectId("5b17b991c440782b5a218cd1"),
"vendor_view_id" : 741733,
"product" : [
{
"id" : ObjectId("5b8655a840c1c415080b0a33"),
"vendor_user_id" : ObjectId("5b17b992c440782b5a218cd2"),
"product_type_id" : ObjectId("5ae834a67ae0d9538e45ab47"),
"condition_id" : [
{
"_id" : ObjectId("5ae977da7ff1706f3b7dc47a"),
"status" : 0,
"date_added" : "2018-08-29-08-13-28"
}
],
"shipping_cost" : 100,
"date_added" : "2018-08-29-08-13-28",
"date_status_change" : "2018-08-29-08-13-28",
"status" : 0
}
]
}
You can transform your json to PHP array json_decode($json) so you'll be able to process it before it turn into json after.
json_encode($arr)
Note that you can check the depth of the array in your case to see if product.condition_id is empty.
I am using following mongo db query . and it is showing me only age groups of my personas data.
db.amplifyindex.aggregate([
{ $unwind: "$demographic" },
{ $match : { 'demographic.is_latest':"active",
'demographic.date_of_birth' : { $exists : true} } },
{ $project : {"ageInMillis" : {$subtract : [new Date(),
"$demographic.date_of_birth"] } } },
{ $project : {"age" : {$divide : ["$ageInMillis", 31558464000] }}},
{ $project : {"age" : {$subtract : ["$age", {$mod : ["$age",1]}]}}},
])
The result object by this query is:
{
"_id" : ObjectId("58a42cbbdb5d880c1e000029"),
"age" : 29.0
}
I want to get data in a way as :
{
"_id" : ObjectId("58a42cbbdb5d880c1e000029"),
"age" : 10-30
"personas" : 10
}
Complete data structure in my scenario is given as :
{
"_id" : ObjectId("58a42cbbdb5d880c1e000029"),
"persona_email" : "milton.ullrich#gmail.com",
"company_id" : "1",
"date_added" : ISODate("2017-02-15T10:23:15.000Z"),
"demographic" : [
{
"persona_fname" : "Hayden",
"middle_name" : "Jacobs",
"persona_lname" : "Schmeler",
"gender" : "male",
"date_of_birth" : ISODate("1987-06-16T19:00:00.000Z"),
"marital_status" : "single",
"height" : "2.1 feet",
"weight" : "5 kg",
"measurement" : {
"waist" : "34 inch"
},
"disabilities" : "No",
"race" : "Asian",
"nationality" : "Jordan",
"life_cycle" : "empty nest",
"children" : {
"gender" : "female",
"date_of_birth" : ISODate("1987-06-16T19:00:00.000Z")
},
"medicare_no" : "4916725587565",
"driving_licence_no" : "PACBALM420M",
"id_no" : "4485872783336",
"passport_no" : "OCRFHCNR",
"residential_address" : {
"unit_no" : 603,
"street_no" : "34863 Ondricka Viaduct Apt. 154",
"street_name" : "84799 Little Wall",
"suburb" : "West Virginia",
"postal_code" : "23873",
"state" : "Dominica",
"country" : "Cook Islands"
},
"work_address" : {
"unit_no" : "6011865161287875",
"street_no" : "630 Beer Underpass Suite 372",
"street_name" : "87672 Lind Burg",
"suburb" : "West Virginia",
"postal_code" : "84356-3662",
"state" : "Qatar",
"country" : "British Indian Ocean Territory (Chagos Archipelago)"
},
"shipping_address" : {
"unit_no" : "6011865161287875",
"street_no" : "8292 Langosh Drive Suite 065",
"street_name" : "9844 Nicolas Mount",
"suburb" : "West Virginia",
"postal_code" : "07014",
"state" : "Togo",
"country" : "Tunisia"
},
"job_title" : "Geoscientists",
"employer_name" : "Aufderhar Group",
"income" : 72577,
"phone_numbers" : {
"work_phone" : "993-783-7499",
"home_phone" : "701.546.7016",
"mobile" : "1-346-729-4392"
},
"emails" : {
"work_emails" : "nikolaus.asa#gmail.com",
"personal_emails" : "emmitt68#gmail.com"
},
"languages" : "italian",
"data_source" : "soldi",
"date_added" : ISODate("2017-02-15T10:23:15.000Z"),
"source" : "soldi",
"is_latest" : "inactive"
},
{
"persona_fname" : "Julien",
"middle_name" : "Hirthe",
"persona_lname" : "Schaefer",
"gender" : "female",
"date_of_birth" : ISODate("1987-06-16T19:00:00.000Z"),
"marital_status" : "single",
"height" : "2.1 feet",
"weight" : "5 kg",
"measurement" : {
"waist" : "34 inch"
},
"disabilities" : "No",
"race" : "Asian",
"nationality" : "Sudan",
"life_cycle" : "single",
"children" : {
"gender" : "female",
"date_of_birth" : ISODate("1987-06-16T19:00:00.000Z")
},
"medicare_no" : "4024007131689860",
"driving_licence_no" : "OGOSUOIFSDN",
"id_no" : "5523297913341227",
"passport_no" : "WWZYTE489ZR",
"residential_address" : {
"unit_no" : 603,
"street_no" : "338 Mueller Gardens Suite 397",
"street_name" : "6302 Catalina Isle",
"suburb" : "West Virginia",
"postal_code" : "97534",
"state" : "Antarctica (the territory South of 60 deg S)",
"country" : "Turks and Caicos Islands"
},
"work_address" : {
"unit_no" : "6011865161287875",
"street_no" : "8561 Jesus Ridges Apt. 662",
"street_name" : "1869 Josiah Wall Apt. 347",
"suburb" : "West Virginia",
"postal_code" : "14810",
"state" : "Zambia",
"country" : "Mongolia"
},
"shipping_address" : {
"unit_no" : "6011865161287875",
"street_no" : "514 Heller Center",
"street_name" : "835 Paxton Cliffs Suite 040",
"suburb" : "West Virginia",
"postal_code" : "48942-3845",
"state" : "Palau",
"country" : "Belize"
},
"job_title" : "Geoscientists",
"employer_name" : "Aufderhar Group",
"income" : 72577,
"phone_numbers" : {
"work_phone" : "(553) 892-7614 x573",
"home_phone" : "796.308.3001 x88799",
"mobile" : "+1-876-339-1755"
},
"emails" : {
"work_emails" : "prohaska.katelynn#hotmail.com",
"personal_emails" : "ozulauf#jenkins.com"
},
"languages" : "german",
"data_source" : "soldi",
"date_added" : ISODate("2017-02-15T10:23:15.000Z"),
"source" : "soldi",
"is_latest" : "active"
}
]
}
Within your group pipeline, create the age ranges as part of the _id key and this can be done through the use of the $concat and "$cond" operators. Consider running the following pipeline to get the desired result:
db.amplifyindex.aggregate([
{ "$unwind": "$demographic" },
{ "$match": { "demographic.is_latest": "active" } },
{
"$project": {
"age": {
"$divide": [
{
"$subtract": [
new Date(),
{ "$ifNull": ["$demographic.date_of_birth", new Date()] }
]
},
1000 * 86400 * 365
]
}
}
},
{
"$group": {
"_id": {
"$concat": [
{ "$cond": [ { "$lte": [ "$age", 0 ] }, "Unknown", ""] },
{ "$cond": [ { "$and": [ { "$gt": ["$age", 0 ] }, { "$lt": ["$age", 10] } ]}, "Under 10", ""] },
{ "$cond": [ { "$and": [ { "$gte": ["$age", 10] }, { "$lt": ["$age", 31] } ]}, "10 - 30", ""] },
{ "$cond": [ { "$and": [ { "$gte": ["$age", 31] }, { "$lt": ["$age", 51] } ]}, "31 - 50", ""] },
{ "$cond": [ { "$and": [ { "$gte": ["$age", 51] }, { "$lt": ["$age", 71] } ]}, "51 - 70", ""] },
{ "$cond": [ { "$gte": [ "$age", 71 ] }, "Over 70", ""] }
]
},
"personas": { "$sum": 1 }
}
},
{ "$project": { "_id": 0, "age": "$_id", "personas": 1 } }
])
In the above pipeline, I've taken out the query for null date fields and replaced it with the $ifNull operator within the $project pipeline. This will return the current date where the birthday field is null and thus yields an age of 0 years which will fall into a special age range bracket, thus the need to include all the documents, including the ones where the date of birth field missing.
In the case where the $concat operator is used for all $cond expressions, the combination is crucial as it acts as a case statement where otherwise there will be nested $cond operators.
To understand this design, take the third $cond expression
{
"$cond": [
{
"$and": [
{ "$gte": ["$age", 10] },
{ "$lt": ["$age", 31] }
]
},
"10 - 30", // matching if
"" // else
]
},
which essentially expresses the logic
if ( "$age" >= 10 && "$age" < 31 ) { return "10 - 30"; }
else return "";
$concat is useful when used with the $cond operators as the expression will return an empty value except the matching one, which in turn will return the age group.
For example, an age of 30.645411 would return
"$concat": [ "", "", "10 - 30", "", "", "" ]
which then gives you an _id with value of "10 - 30".