mirror of
https://github.com/Andreaierardi/Master-DataScience-Notes.git
synced 2025-01-27 19:57:38 +01:00
130 lines
3.8 KiB
JavaScript
130 lines
3.8 KiB
JavaScript
/* mongodb operators to study (aggregation framework):
|
||
-- $project
|
||
-- $match
|
||
-- $group
|
||
-- $sum
|
||
-- $sort
|
||
-- $unwind
|
||
|
||
*/
|
||
|
||
/* retrieve the number of businesses in Nevada */
|
||
db.business.find(
|
||
{
|
||
"country": "NV"
|
||
},
|
||
{
|
||
"_id": 0,
|
||
"name": 1,
|
||
"city": 1,
|
||
"country": 1,
|
||
}).count();
|
||
|
||
db.business.aggregate( [
|
||
{ "$match": { "country": "NV" } },
|
||
{ "$group": { "_id": "$country", "businesses": {"$sum": 1} } }
|
||
] );
|
||
|
||
|
||
/* retrieve the number of businesses for each country */
|
||
/* the '$' before a field name indicates that the value of this field from incoming document will be considered in the operation. In this example, $country means group by the values of the country field. */
|
||
db.business.aggregate( [
|
||
{"$group": {"_id": {"country": "$country"}, "businesses": {"$sum": 1} } },
|
||
{"$sort": {"_id": 1} }
|
||
] );
|
||
|
||
/* in the previous example, to retrieve just the count for Nevada: */
|
||
/* note: this requires to compute the count for all the groups and to filter the result. For efficiency, it is better to match before the $group operator */
|
||
db.business.aggregate( [
|
||
{"$group": {"_id": {"country": "$country"}, "businesses": {"$sum": 1} } },
|
||
{"$match": { "_id": "NV" } }
|
||
] );
|
||
|
||
|
||
/* retrieve the average number of stars for businesses of each country */
|
||
db.business.aggregate( [
|
||
{"$group": {"_id": {"country": "$country"}, "Average number of businesses": {"$avg": "$stars" } } },
|
||
{"$sort": {"_id": 1} }
|
||
] );
|
||
|
||
|
||
/* retrieve the number of reviews for each business in Nevada */
|
||
db.business.aggregate( [
|
||
{ "$match": { "country": "NV" } },
|
||
{ "$project": { "_id": 0, "Business name": 1, "Number of reviews": { "$size": "$reviews" } } }
|
||
]);
|
||
|
||
|
||
/* retrieve the average number of businesses per country */
|
||
db.business.aggregate( [
|
||
{"$group": {"_id": {"country": "$country"}, "businesses": {"$sum": 1} } },
|
||
{"$group": {"_id": 1, "Average number of businesses": {"$avg": "$businesses"} } }
|
||
] );
|
||
|
||
|
||
/* retrieve the businesses that have been reviewed in Nov 2013 */
|
||
/* this is a wrong solution */
|
||
db.business.find(
|
||
{
|
||
"$and": [
|
||
{ "reviews.review_date": { "$gte": new Date("2013-11-01")} },
|
||
{ "reviews.review_date": { "$lte": new Date("2013-11-30")} },
|
||
]
|
||
});
|
||
|
||
|
||
/* this is a correct solution */
|
||
db.business.aggregate([
|
||
{ "$unwind": "$reviews" },
|
||
{ "$match": { "$and": [
|
||
{ "reviews.review_date": { "$gte": new Date("2013-11-01")} },
|
||
{ "reviews.review_date": { "$lte": new Date("2013-11-30")} },
|
||
] }
|
||
},
|
||
{ "$project": { "_id": 0, "name": 1, "reviews.review_date": 1 } }
|
||
]);
|
||
|
||
|
||
/* retrieve the average number of stars of each business according to the received reviews */
|
||
db.business.aggregate([
|
||
{ "$unwind": "$reviews" },
|
||
{ "$group": { "_id": {"id": "$_id", "name": "$name"}, "average stars per business": { "$avg": "$reviews.review_score" } } }
|
||
]);
|
||
|
||
|
||
/* retrieve the number of reviews provided by any reviewer */
|
||
db.business.aggregate([
|
||
{ "$unwind": "$reviews" },
|
||
{ "$group": { "_id": "$reviews.reviewer_id", "Number of reviews": { "$sum": 1 } } }
|
||
]);
|
||
|
||
|
||
/* retrieve the reviewers that provided more than 10 reviews */
|
||
db.business.aggregate([
|
||
{ "$unwind": "$reviews" },
|
||
{ "$group": { "_id": "$reviews.reviewer_id", "Number of reviews": { "$sum": 1 } } },
|
||
{ "$match": { "Number of reviewers": { "$gt": 10 } } }
|
||
]);
|
||
|
||
|
||
/* retrieve the whole number of reviews provided in 2013 */
|
||
db.business.aggregate([
|
||
{ "$unwind": "$reviews" },
|
||
{ "$project": {"_id": 0,
|
||
"year": {"$year": "$reviews.review_date"}
|
||
} },
|
||
{ "$match": { "year": 2013 } },
|
||
{ "$group": { "_id": "$year", "Number of reviews": { "$sum": 1 } } }
|
||
]);
|
||
|
||
|
||
/* retrieve the whole number of reviews year by year */
|
||
db.business.aggregate([
|
||
{ "$unwind": "$reviews" },
|
||
{ "$project": {"_id": 0,
|
||
"year": {"$year": "$reviews.review_date"}
|
||
} },
|
||
{ "$group": { "_id": "$year", "Number of reviews": { "$sum": 1 } } },
|
||
{ "$sort": { "_id": 1 } }
|
||
]);
|