2020-06-22 15:23:21 +02:00

907 lines
46 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# MongoDB queries\n",
"\n",
"We import PyMongo, a Python library containing tools for working with MongoDB.\n",
"\n",
"The doc of the library can be found here: https://docs.mongodb.com."
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [],
"source": [
"import pymongo"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Then, we connect to the MongoDB collection named 'businesses'."
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [],
"source": [
"mdb = pymongo.MongoClient()['businesses']\n",
"businesses = mdb['businesses']"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Using ```find_one()``` we get one document that satisfies the specified query criteria on the collection. Notice here that the query argument is empty.\n",
"\n",
"If multiple documents satisfy the query, this method returns the first document according to the natural order which reflects the order of documents on the disk."
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
"example = businesses.find_one()"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'_id': ObjectId('5edf46601bca82ff74bb96ef'),\n",
" 'id': 'FYWN1wneV18bWNgQjJ2GNg',\n",
" 'city': 'Ahwatukee',\n",
" 'country': 'AZ',\n",
" 'stars': 4,\n",
" 'is_active': True,\n",
" 'categories': ['Dentists',\n",
" 'Health & Medical',\n",
" 'General Dentistry',\n",
" 'Oral Surgeons',\n",
" 'Orthodontists',\n",
" 'Cosmetic Dentists'],\n",
" 'reviews': [{'reviewer_id': 'MWxrPSz87wx559-Rg3YL-Q',\n",
" 'reviewer_name': 'Jessica',\n",
" 'yelp_since': datetime.datetime(2012, 1, 9, 0, 0),\n",
" 'review_date': datetime.datetime(2013, 3, 12, 0, 0),\n",
" 'review_score': 1},\n",
" {'reviewer_id': 'O412lFp-8M8VpRwdzl0S0A',\n",
" 'reviewer_name': 'Christina',\n",
" 'yelp_since': datetime.datetime(2014, 4, 21, 0, 0),\n",
" 'review_date': datetime.datetime(2014, 4, 21, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': '4ZR5wwn8NST5aqCa3ueTFg',\n",
" 'reviewer_name': 'F.',\n",
" 'yelp_since': datetime.datetime(2012, 11, 19, 0, 0),\n",
" 'review_date': datetime.datetime(2014, 6, 28, 0, 0),\n",
" 'review_score': 1},\n",
" {'reviewer_id': '9G8nsgZ-7o3an9HN4N08Hw',\n",
" 'reviewer_name': 'Lisa',\n",
" 'yelp_since': datetime.datetime(2010, 11, 5, 0, 0),\n",
" 'review_date': datetime.datetime(2014, 6, 17, 0, 0),\n",
" 'review_score': 1},\n",
" {'reviewer_id': 'KdXsniAzdczdlxi31ftbvA',\n",
" 'reviewer_name': 'Jeremy',\n",
" 'yelp_since': datetime.datetime(2015, 11, 29, 0, 0),\n",
" 'review_date': datetime.datetime(2017, 9, 28, 0, 0),\n",
" 'review_score': 1},\n",
" {'reviewer_id': 'ZeNWkf6fdzZWyat8gdGcqA',\n",
" 'reviewer_name': 'Brandon',\n",
" 'yelp_since': datetime.datetime(2015, 12, 29, 0, 0),\n",
" 'review_date': datetime.datetime(2017, 5, 4, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': 'YGQv5HXLKu8X6K9yGgTQ7Q',\n",
" 'reviewer_name': 'John',\n",
" 'yelp_since': datetime.datetime(2014, 3, 12, 0, 0),\n",
" 'review_date': datetime.datetime(2016, 2, 9, 0, 0),\n",
" 'review_score': 1},\n",
" {'reviewer_id': '05QcvAw7bO4Lcm0bCNejiw',\n",
" 'reviewer_name': 'Craig',\n",
" 'yelp_since': datetime.datetime(2011, 9, 21, 0, 0),\n",
" 'review_date': datetime.datetime(2011, 9, 21, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': 'XGL7VDkeUyM5nKQspJBTNw',\n",
" 'reviewer_name': 'Linda',\n",
" 'yelp_since': datetime.datetime(2011, 8, 23, 0, 0),\n",
" 'review_date': datetime.datetime(2014, 4, 13, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': 'hf27xTME3EiCp6NL6VtWZQ',\n",
" 'reviewer_name': 'Andrew',\n",
" 'yelp_since': datetime.datetime(2010, 9, 22, 0, 0),\n",
" 'review_date': datetime.datetime(2014, 4, 18, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': 'jTvKkNTat9QftSxL_FMcQw',\n",
" 'reviewer_name': 'Linz',\n",
" 'yelp_since': datetime.datetime(2011, 6, 19, 0, 0),\n",
" 'review_date': datetime.datetime(2017, 2, 22, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': '5i3woonzbCpNQS9oPYuG6A',\n",
" 'reviewer_name': 'Sandra',\n",
" 'yelp_since': datetime.datetime(2011, 11, 18, 0, 0),\n",
" 'review_date': datetime.datetime(2014, 5, 23, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': 'EYnO5JexaaHrFwWOQt4DQw',\n",
" 'reviewer_name': 'Laura',\n",
" 'yelp_since': datetime.datetime(2012, 8, 3, 0, 0),\n",
" 'review_date': datetime.datetime(2012, 11, 7, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': 'x_9_Xr0_X5YVQ5X4ha6NXw',\n",
" 'reviewer_name': 'Travis',\n",
" 'yelp_since': datetime.datetime(2013, 1, 25, 0, 0),\n",
" 'review_date': datetime.datetime(2014, 8, 7, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': 'LItF9hg6J8PED74sAwf82w',\n",
" 'reviewer_name': 'Revonne',\n",
" 'yelp_since': datetime.datetime(2012, 10, 3, 0, 0),\n",
" 'review_date': datetime.datetime(2014, 6, 9, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': '2Rh41EBjWI137N2m5VHJJA',\n",
" 'reviewer_name': 'Anna',\n",
" 'yelp_since': datetime.datetime(2012, 10, 24, 0, 0),\n",
" 'review_date': datetime.datetime(2013, 6, 6, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': '_7m3vnjuw77WATm5TmFFrQ',\n",
" 'reviewer_name': 'Ginger',\n",
" 'yelp_since': datetime.datetime(2014, 10, 22, 0, 0),\n",
" 'review_date': datetime.datetime(2016, 6, 20, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': '8XE--xEhu5i-Xlrt9cXgzA',\n",
" 'reviewer_name': 'John',\n",
" 'yelp_since': datetime.datetime(2011, 7, 7, 0, 0),\n",
" 'review_date': datetime.datetime(2014, 8, 12, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': 'mG7i3mzWqyQ3SA-SwCpA_A',\n",
" 'reviewer_name': 'Derek',\n",
" 'yelp_since': datetime.datetime(2013, 1, 6, 0, 0),\n",
" 'review_date': datetime.datetime(2016, 2, 18, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': 'IG6XMgQyWF2_RW1nKEioxA',\n",
" 'reviewer_name': 'Michele',\n",
" 'yelp_since': datetime.datetime(2014, 4, 28, 0, 0),\n",
" 'review_date': datetime.datetime(2015, 1, 20, 0, 0),\n",
" 'review_score': 5},\n",
" {'reviewer_id': 'YcmNpPM0ag94g4T0zAtdcg',\n",
" 'reviewer_name': 'Jenn',\n",
" 'yelp_since': datetime.datetime(2011, 12, 26, 0, 0),\n",
" 'review_date': datetime.datetime(2016, 5, 7, 0, 0),\n",
" 'review_score': 4},\n",
" {'reviewer_id': 'vMf4nEiuCf7ig4k0fMNuzg',\n",
" 'reviewer_name': 'Jeary',\n",
" 'yelp_since': datetime.datetime(2013, 1, 3, 0, 0),\n",
" 'review_date': datetime.datetime(2014, 5, 8, 0, 0),\n",
" 'review_score': 4},\n",
" {'reviewer_id': 'DHQWSHz6b7Jsy-A04m1cIA',\n",
" 'reviewer_name': 'Ursula',\n",
" 'yelp_since': datetime.datetime(2011, 8, 10, 0, 0),\n",
" 'review_date': datetime.datetime(2012, 7, 26, 0, 0),\n",
" 'review_score': 3}]}"
]
},
"execution_count": 73,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"example"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Q1: businesses in the Arizona (AZ) country with 5 stars\n",
"\n",
"In this first example, we want to find all the businesses in the AZ country which received a review with 5 stars.\n",
"\n",
"We will use ```find()```, which selects documents in a collection and returns a cursor to the selected documents.\n",
"\n",
"We will also set the arguments query and projection. The first one, specifies the selection filter using query operators. The second one, specifies the fields to return in the documents that match the query filter.\n",
"\n",
"Recall that: \n",
"- A projection can explicitly include several fields by setting them to 1.\n",
"- You can remove the ```_id``` field from the results by setting it to 0 in the projection."
]
},
{
"cell_type": "code",
"execution_count": 93,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'city': 'Chandler', 'country': 'AZ', 'stars': 5}\n",
"{'city': 'Goodyear', 'country': 'AZ', 'stars': 5}\n",
"{'city': 'Phoenix', 'country': 'AZ', 'stars': 5}\n",
"{'city': 'Cave Creek', 'country': 'AZ', 'stars': 5}\n",
"{'city': 'Phoenix', 'country': 'AZ', 'stars': 5}\n",
"{'city': 'Mesa', 'country': 'AZ', 'stars': 5}\n",
"{'city': 'Phoenix', 'country': 'AZ', 'stars': 5}\n",
"{'city': 'Peoria', 'country': 'AZ', 'stars': 5}\n",
"{'city': 'Phoenix', 'country': 'AZ', 'stars': 5}\n"
]
}
],
"source": [
"q = {\"country\" : \"AZ\", \"stars\": 5}\n",
"p = {'_id' : 0, 'country' : 1, 'city' : 1, 'stars' : 1}\n",
"cursor = businesses.find(q, p)\n",
"for record in cursor.limit(20):\n",
" print(record)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Q2: businesses that have at least five reviews\n",
"\n",
"In this query we want to find the businesses that have at least five reviews. We can address the query using the aggregation framework. MongoDB aggregation framework is modeled on the concept of data processing pipelines: documents enter a multi-stage pipeline that transforms the documents into a final aggregated result. Using the aggregation framework we will:\n",
"- ``unwind`` on the reviews field. Recall that the unwind command deconstructs an array field from the input documents to output a document for each element. Each output document is the input document with the value of the array field replaced by the element.\n",
"- ``group`` on the _id field. The command groups input documents by the specified _id expression. When grouping we will compute the sum of the elements with the same _id.\n",
"- ``match`` on the computed number of reviews. The command filters the documents to pass only the documents that match the specified condition(s) to the next pipeline stage."
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'_id': ObjectId('5edf470b1bca82ff74bb9742'), 'Number of reviews': 24}\n",
"{'_id': ObjectId('5edf47151bca82ff74bb9748'), 'Number of reviews': 16}\n",
"{'_id': ObjectId('5edf476d1bca82ff74bb977c'), 'Number of reviews': 7}\n",
"{'_id': ObjectId('5edf46f81bca82ff74bb9738'), 'Number of reviews': 21}\n",
"{'_id': ObjectId('5edf47561bca82ff74bb976e'), 'Number of reviews': 7}\n",
"{'_id': ObjectId('5edf46841bca82ff74bb96fe'), 'Number of reviews': 12}\n",
"{'_id': ObjectId('5edf46601bca82ff74bb96ef'), 'Number of reviews': 23}\n",
"{'_id': ObjectId('5edf46c91bca82ff74bb971f'), 'Number of reviews': 25}\n",
"{'_id': ObjectId('5edf47001bca82ff74bb973c'), 'Number of reviews': 17}\n",
"{'_id': ObjectId('5edf46681bca82ff74bb96f3'), 'Number of reviews': 116}\n",
"{'_id': ObjectId('5edf47401bca82ff74bb9761'), 'Number of reviews': 26}\n",
"{'_id': ObjectId('5edf47881bca82ff74bb978c'), 'Number of reviews': 12}\n",
"{'_id': ObjectId('5edf47b81bca82ff74bb97a9'), 'Number of reviews': 8}\n",
"{'_id': ObjectId('5edf46c11bca82ff74bb971b'), 'Number of reviews': 25}\n",
"{'_id': ObjectId('5edf46c01bca82ff74bb971a'), 'Number of reviews': 9}\n",
"{'_id': ObjectId('5edf468b1bca82ff74bb9701'), 'Number of reviews': 65}\n",
"{'_id': ObjectId('5edf47b31bca82ff74bb97a6'), 'Number of reviews': 18}\n",
"{'_id': ObjectId('5edf47b51bca82ff74bb97a7'), 'Number of reviews': 7}\n",
"{'_id': ObjectId('5edf47bf1bca82ff74bb97ad'), 'Number of reviews': 109}\n",
"{'_id': ObjectId('5edf47a81bca82ff74bb979f'), 'Number of reviews': 6}\n",
"{'_id': ObjectId('5edf46da1bca82ff74bb9728'), 'Number of reviews': 80}\n",
"{'_id': ObjectId('5edf479c1bca82ff74bb9798'), 'Number of reviews': 8}\n",
"{'_id': ObjectId('5edf47021bca82ff74bb973d'), 'Number of reviews': 12}\n",
"{'_id': ObjectId('5edf46bc1bca82ff74bb9718'), 'Number of reviews': 20}\n",
"{'_id': ObjectId('5edf46661bca82ff74bb96f2'), 'Number of reviews': 9}\n",
"{'_id': ObjectId('5edf47491bca82ff74bb9766'), 'Number of reviews': 37}\n",
"{'_id': ObjectId('5edf46ba1bca82ff74bb9717'), 'Number of reviews': 78}\n",
"{'_id': ObjectId('5edf46f61bca82ff74bb9737'), 'Number of reviews': 34}\n",
"{'_id': ObjectId('5edf47091bca82ff74bb9741'), 'Number of reviews': 46}\n",
"{'_id': ObjectId('5edf46901bca82ff74bb9703'), 'Number of reviews': 21}\n",
"{'_id': ObjectId('5edf474d1bca82ff74bb9769'), 'Number of reviews': 20}\n",
"{'_id': ObjectId('5edf47391bca82ff74bb975d'), 'Number of reviews': 17}\n",
"{'_id': ObjectId('5edf472f1bca82ff74bb9757'), 'Number of reviews': 6}\n",
"{'_id': ObjectId('5edf46c31bca82ff74bb971c'), 'Number of reviews': 11}\n",
"{'_id': ObjectId('5edf475b1bca82ff74bb9771'), 'Number of reviews': 82}\n",
"{'_id': ObjectId('5edf47621bca82ff74bb9775'), 'Number of reviews': 7}\n",
"{'_id': ObjectId('5edf476f1bca82ff74bb977d'), 'Number of reviews': 11}\n",
"{'_id': ObjectId('5edf47b21bca82ff74bb97a5'), 'Number of reviews': 43}\n",
"{'_id': ObjectId('5edf476a1bca82ff74bb977a'), 'Number of reviews': 16}\n",
"{'_id': ObjectId('5edf47341bca82ff74bb975a'), 'Number of reviews': 37}\n",
"{'_id': ObjectId('5edf46821bca82ff74bb96fd'), 'Number of reviews': 7}\n",
"{'_id': ObjectId('5edf47251bca82ff74bb9751'), 'Number of reviews': 21}\n",
"{'_id': ObjectId('5edf46a81bca82ff74bb970f'), 'Number of reviews': 8}\n",
"{'_id': ObjectId('5edf46fd1bca82ff74bb973a'), 'Number of reviews': 16}\n",
"{'_id': ObjectId('5edf47361bca82ff74bb975b'), 'Number of reviews': 13}\n",
"{'_id': ObjectId('5edf47ab1bca82ff74bb97a1'), 'Number of reviews': 53}\n",
"{'_id': ObjectId('5edf47c11bca82ff74bb97ae'), 'Number of reviews': 66}\n",
"{'_id': ObjectId('5edf46861bca82ff74bb96ff'), 'Number of reviews': 20}\n",
"{'_id': ObjectId('5edf472a1bca82ff74bb9754'), 'Number of reviews': 263}\n",
"{'_id': ObjectId('5edf469c1bca82ff74bb9709'), 'Number of reviews': 23}\n",
"{'_id': ObjectId('5edf46de1bca82ff74bb972a'), 'Number of reviews': 9}\n",
"{'_id': ObjectId('5edf478e1bca82ff74bb9790'), 'Number of reviews': 30}\n",
"{'_id': ObjectId('5edf46701bca82ff74bb96f6'), 'Number of reviews': 9}\n",
"{'_id': ObjectId('5edf46db1bca82ff74bb9729'), 'Number of reviews': 7}\n",
"{'_id': ObjectId('5edf470e1bca82ff74bb9744'), 'Number of reviews': 30}\n",
"{'_id': ObjectId('5edf47581bca82ff74bb976f'), 'Number of reviews': 373}\n",
"{'_id': ObjectId('5edf47421bca82ff74bb9762'), 'Number of reviews': 19}\n",
"{'_id': ObjectId('5edf47791bca82ff74bb9783'), 'Number of reviews': 18}\n",
"{'_id': ObjectId('5edf47ae1bca82ff74bb97a3'), 'Number of reviews': 7}\n",
"{'_id': ObjectId('5edf46ad1bca82ff74bb9711'), 'Number of reviews': 17}\n",
"{'_id': ObjectId('5edf47451bca82ff74bb9764'), 'Number of reviews': 97}\n",
"{'_id': ObjectId('5edf46881bca82ff74bb9700'), 'Number of reviews': 9}\n",
"{'_id': ObjectId('5edf47261bca82ff74bb9752'), 'Number of reviews': 16}\n",
"{'_id': ObjectId('5edf46d61bca82ff74bb9726'), 'Number of reviews': 33}\n",
"{'_id': ObjectId('5edf47311bca82ff74bb9758'), 'Number of reviews': 22}\n",
"{'_id': ObjectId('5edf46c71bca82ff74bb971e'), 'Number of reviews': 14}\n",
"{'_id': ObjectId('5edf476c1bca82ff74bb977b'), 'Number of reviews': 6}\n",
"{'_id': ObjectId('5edf468e1bca82ff74bb9702'), 'Number of reviews': 34}\n",
"{'_id': ObjectId('5edf47a11bca82ff74bb979b'), 'Number of reviews': 38}\n",
"{'_id': ObjectId('5edf47b01bca82ff74bb97a4'), 'Number of reviews': 29}\n",
"{'_id': ObjectId('5edf475d1bca82ff74bb9772'), 'Number of reviews': 30}\n",
"{'_id': ObjectId('5edf47711bca82ff74bb977e'), 'Number of reviews': 32}\n",
"{'_id': ObjectId('5edf47641bca82ff74bb9776'), 'Number of reviews': 11}\n",
"{'_id': ObjectId('5edf46721bca82ff74bb96f7'), 'Number of reviews': 15}\n",
"{'_id': ObjectId('5edf46d01bca82ff74bb9723'), 'Number of reviews': 13}\n",
"{'_id': ObjectId('5edf46f31bca82ff74bb9735'), 'Number of reviews': 6}\n",
"{'_id': ObjectId('5edf46941bca82ff74bb9705'), 'Number of reviews': 46}\n",
"{'_id': ObjectId('5edf474a1bca82ff74bb9767'), 'Number of reviews': 37}\n",
"{'_id': ObjectId('5edf47761bca82ff74bb9781'), 'Number of reviews': 15}\n",
"{'_id': ObjectId('5edf472d1bca82ff74bb9756'), 'Number of reviews': 21}\n",
"{'_id': ObjectId('5edf47821bca82ff74bb9788'), 'Number of reviews': 75}\n",
"{'_id': ObjectId('5edf467a1bca82ff74bb96fa'), 'Number of reviews': 23}\n",
"{'_id': ObjectId('5edf46df1bca82ff74bb972b'), 'Number of reviews': 12}\n",
"{'_id': ObjectId('5edf46cc1bca82ff74bb9721'), 'Number of reviews': 11}\n",
"{'_id': ObjectId('5edf46981bca82ff74bb9707'), 'Number of reviews': 7}\n",
"{'_id': ObjectId('5edf46961bca82ff74bb9706'), 'Number of reviews': 16}\n",
"{'_id': ObjectId('5edf47891bca82ff74bb978d'), 'Number of reviews': 7}\n",
"{'_id': ObjectId('5edf47931bca82ff74bb9793'), 'Number of reviews': 14}\n",
"{'_id': ObjectId('5edf479d1bca82ff74bb9799'), 'Number of reviews': 6}\n",
"{'_id': ObjectId('5edf47721bca82ff74bb977f'), 'Number of reviews': 12}\n",
"{'_id': ObjectId('5edf479f1bca82ff74bb979a'), 'Number of reviews': 6}\n",
"{'_id': ObjectId('5edf47a31bca82ff74bb979c'), 'Number of reviews': 131}\n",
"{'_id': ObjectId('5edf473b1bca82ff74bb975e'), 'Number of reviews': 11}\n",
"{'_id': ObjectId('5edf47a51bca82ff74bb979d'), 'Number of reviews': 10}\n",
"{'_id': ObjectId('5edf469a1bca82ff74bb9708'), 'Number of reviews': 39}\n",
"{'_id': ObjectId('5edf47c71bca82ff74bb97b2'), 'Number of reviews': 9}\n",
"{'_id': ObjectId('5edf472c1bca82ff74bb9755'), 'Number of reviews': 8}\n",
"{'_id': ObjectId('5edf47441bca82ff74bb9763'), 'Number of reviews': 8}\n",
"{'_id': ObjectId('5edf47541bca82ff74bb976d'), 'Number of reviews': 76}\n",
"{'_id': ObjectId('5edf46a31bca82ff74bb970c'), 'Number of reviews': 6}\n",
"{'_id': ObjectId('5edf47901bca82ff74bb9791'), 'Number of reviews': 25}\n",
"{'_id': ObjectId('5edf46751bca82ff74bb96f8'), 'Number of reviews': 7}\n",
"{'_id': ObjectId('5edf46d41bca82ff74bb9725'), 'Number of reviews': 349}\n",
"{'_id': ObjectId('5edf46d21bca82ff74bb9724'), 'Number of reviews': 6}\n",
"{'_id': ObjectId('5edf47951bca82ff74bb9794'), 'Number of reviews': 6}\n",
"{'_id': ObjectId('5edf47831bca82ff74bb9789'), 'Number of reviews': 7}\n",
"{'_id': ObjectId('5edf478c1bca82ff74bb978f'), 'Number of reviews': 28}\n",
"{'_id': ObjectId('5edf46c51bca82ff74bb971d'), 'Number of reviews': 232}\n",
"{'_id': ObjectId('5edf47b61bca82ff74bb97a8'), 'Number of reviews': 26}\n",
"{'_id': ObjectId('5edf47211bca82ff74bb974f'), 'Number of reviews': 6}\n",
"{'_id': ObjectId('5edf46ef1bca82ff74bb9733'), 'Number of reviews': 16}\n",
"{'_id': ObjectId('5edf46aa1bca82ff74bb9710'), 'Number of reviews': 55}\n",
"{'_id': ObjectId('5edf46f41bca82ff74bb9736'), 'Number of reviews': 18}\n",
"{'_id': ObjectId('5edf47c51bca82ff74bb97b1'), 'Number of reviews': 167}\n",
"{'_id': ObjectId('5edf46621bca82ff74bb96f0'), 'Number of reviews': 11}\n",
"{'_id': ObjectId('5edf467d1bca82ff74bb96fb'), 'Number of reviews': 38}\n",
"{'_id': ObjectId('5edf46cb1bca82ff74bb9720'), 'Number of reviews': 213}\n",
"{'_id': ObjectId('5edf47471bca82ff74bb9765'), 'Number of reviews': 9}\n",
"{'_id': ObjectId('5edf47321bca82ff74bb9759'), 'Number of reviews': 68}\n",
"{'_id': ObjectId('5edf475a1bca82ff74bb9770'), 'Number of reviews': 26}\n",
"{'_id': ObjectId('5edf471e1bca82ff74bb974d'), 'Number of reviews': 42}\n",
"{'_id': ObjectId('5edf46641bca82ff74bb96f1'), 'Number of reviews': 18}\n",
"{'_id': ObjectId('5edf47071bca82ff74bb9740'), 'Number of reviews': 34}\n",
"{'_id': ObjectId('5edf47601bca82ff74bb9774'), 'Number of reviews': 140}\n",
"{'_id': ObjectId('5edf477b1bca82ff74bb9784'), 'Number of reviews': 15}\n",
"{'_id': ObjectId('5edf47a91bca82ff74bb97a0'), 'Number of reviews': 33}\n",
"{'_id': ObjectId('5edf469e1bca82ff74bb970a'), 'Number of reviews': 6}\n",
"{'_id': ObjectId('5edf46fb1bca82ff74bb9739'), 'Number of reviews': 8}\n",
"{'_id': ObjectId('5edf47c81bca82ff74bb97b3'), 'Number of reviews': 13}\n",
"{'_id': ObjectId('5edf473e1bca82ff74bb9760'), 'Number of reviews': 34}\n",
"{'_id': ObjectId('5edf47ca1bca82ff74bb97b4'), 'Number of reviews': 14}\n",
"{'_id': ObjectId('5edf47171bca82ff74bb9749'), 'Number of reviews': 87}\n"
]
}
],
"source": [
"cursor = businesses.aggregate([{ \"$unwind\": \"$reviews\" },\n",
" { \"$group\": { \"_id\": \"$_id\", \"Number of reviews\": { \"$sum\": 1 } } },\n",
" { \"$match\": { \"Number of reviews\": { \"$gt\": 5 } } }\n",
" ]);\n",
"for record in cursor:\n",
" print(record)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Q3: find the businesses that are categorized as \"Restaurants\" and \"Arts & Entertainment\" (both categories)\n",
"\n",
"The ``$in`` operator selects the documents where the value of a field equals any value in the specified array.\n",
"\n",
"``$and`` performs a logical AND operation on an array of one or more expressions (e.g. ``<expression1>``, ``<expression2>``, etc.) and selects the documents that satisfy all the expressions in the array."
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'id': '1_3nOM7s9WqnJWTNu2-i8Q', 'categories': ['Restaurants', 'French', 'Gastropubs', 'Festivals', 'Arts & Entertainment']}\n",
"{'id': 'n7V4cD-KqqE3OXk0irJTyA', 'categories': ['American (New)', 'Arcades', 'Restaurants', 'Arts & Entertainment', 'Gastropubs']}\n",
"{'id': 'M3uV9Y3EDSpy9d4YwyNSAQ', 'categories': ['Arts & Entertainment', 'Restaurants', 'Ramen', 'Japanese', 'Bars', 'Nightlife', 'Music Venues']}\n"
]
}
],
"source": [
"q = {\"$and\": [{ \"categories\": { \"$in\": [ 'Restaurants' ]}},\n",
" { \"categories\": { \"$in\": [ 'Arts & Entertainment']}}\n",
" ]\n",
" }\n",
"\n",
"p = {\"_id\": 0, \"id\": 1, \"categories\": 1}\n",
"\n",
"cursor = businesses.find(q, p);\n",
"\n",
"for record in cursor:\n",
" print(record)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Q4: find the number of restaurants in Nevada\n",
"\n",
"In this second query, we want to find the total number of restaurants in Nevada.\n",
"\n",
"A first possible query is:"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'_id': {'country': 'NV'}, 'Number of restaurants': 8}\n"
]
}
],
"source": [
"cursor = businesses.aggregate( [ \n",
"\t{ \"$match\": { \"country\": \"NV\", \"categories\": { \"$in\": [ 'Restaurants' ] } } },\n",
" {\"$group\": {\"_id\": {\"country\": \"$country\"}, \"Number of restaurants\": {\"$sum\": 1 } } }\n",
"] );\n",
"\n",
"for record in cursor:\n",
" print(record)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here it follows an alternative query:"
]
},
{
"cell_type": "code",
"execution_count": 109,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.7/site-packages/ipykernel_launcher.py:3: DeprecationWarning: count is deprecated. Use Collection.count_documents instead.\n",
" This is separate from the ipykernel package so we can avoid doing imports until\n"
]
},
{
"data": {
"text/plain": [
"8"
]
},
"execution_count": 109,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"businesses.find({\n",
" \"country\": \"NV\",\n",
" \"categories\": {\"$in\": [\"Restaurants\"]}\n",
"}\n",
").count()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Another one here below:"
]
},
{
"cell_type": "code",
"execution_count": 111,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'_id': {'country': 'NV'}, 'Number of restaurants': 8}\n"
]
}
],
"source": [
"cursor = businesses.aggregate( [ \n",
"\t{ \"$match\": { \"country\": \"NV\" } },\n",
"\t{ \"$unwind\": \"$categories\" },\n",
"\t{ \"$match\": { \"categories\": \"Restaurants\" } },\n",
" {\"$group\": {\"_id\": {\"country\": \"$country\"}, \"Number of restaurants\": {\"$sum\": 1 } } }\n",
"] );\n",
"\n",
"for record in cursor:\n",
" print(record)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Q5: find the average stars per business"
]
},
{
"cell_type": "code",
"execution_count": 113,
"metadata": {},
"outputs": [],
"source": [
"p = {'$project': {'_id': 0, 'id' : 1, 'reviewer_id' : '$reviews.reviewer_id', 'stars': '$reviews.review_score'}}"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [],
"source": [
"u = {'$unwind': '$reviews'}"
]
},
{
"cell_type": "code",
"execution_count": 117,
"metadata": {
"scrolled": true
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'id': 'FYWN1wneV18bWNgQjJ2GNg', 'reviewer_id': 'MWxrPSz87wx559-Rg3YL-Q', 'stars': 1}\n",
"{'id': 'FYWN1wneV18bWNgQjJ2GNg', 'reviewer_id': 'O412lFp-8M8VpRwdzl0S0A', 'stars': 5}\n",
"{'id': 'FYWN1wneV18bWNgQjJ2GNg', 'reviewer_id': '4ZR5wwn8NST5aqCa3ueTFg', 'stars': 1}\n",
"{'id': 'FYWN1wneV18bWNgQjJ2GNg', 'reviewer_id': '9G8nsgZ-7o3an9HN4N08Hw', 'stars': 1}\n",
"{'id': 'FYWN1wneV18bWNgQjJ2GNg', 'reviewer_id': 'KdXsniAzdczdlxi31ftbvA', 'stars': 1}\n",
"{'id': 'FYWN1wneV18bWNgQjJ2GNg', 'reviewer_id': 'ZeNWkf6fdzZWyat8gdGcqA', 'stars': 5}\n",
"{'id': 'FYWN1wneV18bWNgQjJ2GNg', 'reviewer_id': 'YGQv5HXLKu8X6K9yGgTQ7Q', 'stars': 1}\n",
"{'id': 'FYWN1wneV18bWNgQjJ2GNg', 'reviewer_id': '05QcvAw7bO4Lcm0bCNejiw', 'stars': 5}\n",
"{'id': 'FYWN1wneV18bWNgQjJ2GNg', 'reviewer_id': 'XGL7VDkeUyM5nKQspJBTNw', 'stars': 5}\n",
"{'id': 'FYWN1wneV18bWNgQjJ2GNg', 'reviewer_id': 'hf27xTME3EiCp6NL6VtWZQ', 'stars': 5}\n"
]
}
],
"source": [
"l = {'$limit': 10}\n",
"cursor = businesses.aggregate([u, p, l])\n",
"for record in cursor:\n",
" print(record)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now we can group and sort:"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [],
"source": [
"g = {'$group': {'_id': '$id', 'score': {'$avg': '$stars'}, 'count': {'$sum': 1}}}\n",
"s = {'$sort': {'score': 1}}"
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'_id': 'Kul8tFT48hZQJkeNK5jLBQ', 'score': 1.0, 'count': 10}\n",
"{'_id': 'rDMptJYWtnMhpQu_rRXHng', 'score': 1.0909090909090908, 'count': 11}\n",
"{'_id': 'oZBISmgb-GjOdj-ik8UfjA', 'score': 1.2857142857142858, 'count': 7}\n",
"{'_id': 'z0BQG6LJOmd8E7cNuMtH0A', 'score': 1.3333333333333333, 'count': 3}\n",
"{'_id': '7YIy1tXOor9VCwvaSjuBHg', 'score': 1.391304347826087, 'count': 46}\n",
"{'_id': 'OD2hnuuTJI9uotcKycxg1A', 'score': 1.4444444444444444, 'count': 9}\n",
"{'_id': 'F0fEKpTk7gAmuSFI0KW1eQ', 'score': 1.6666666666666667, 'count': 3}\n",
"{'_id': 'KQPW8lFf1y5BT2MxiSZ3QA', 'score': 1.6666666666666667, 'count': 18}\n",
"{'_id': 'NmZtoE3v8RdSJEczYbMT9g', 'score': 1.8, 'count': 5}\n",
"{'_id': 'GDOAg680Gmi6S1MlhU7B1g', 'score': 1.8, 'count': 5}\n",
"{'_id': 'A_Ij4SwFmlRbVtRnsdSzWA', 'score': 1.8, 'count': 5}\n",
"{'_id': 'aWnASLfWj1G6ptH4SR5RRA', 'score': 2.0, 'count': 3}\n",
"{'_id': 'Q1MzgzH263RgYX4TU4xQ2Q', 'score': 2.0, 'count': 4}\n",
"{'_id': 'c6Shr51XcbvAeXp6hb_Exg', 'score': 2.0, 'count': 18}\n",
"{'_id': '1nhf9BPXOBFBkbRkpsFaxA', 'score': 2.0, 'count': 6}\n",
"{'_id': 'Wpt0sFHcPtV5MO9He7yMKQ', 'score': 2.15, 'count': 20}\n",
"{'_id': 'PMH4oUa-bWELKogdtkWewg', 'score': 2.1875, 'count': 16}\n",
"{'_id': 'zWb-kwaANIPMS_AFBbVOew', 'score': 2.25, 'count': 8}\n",
"{'_id': 'EfYExaDrwhj0k611u5lYMg', 'score': 2.25, 'count': 12}\n",
"{'_id': 'Dj0S-Oe4ytRJzMGUPgYUkw', 'score': 2.25, 'count': 4}\n",
"{'_id': 'Pp_ca_wyn1vsKBNR94ZRXw', 'score': 2.3333333333333335, 'count': 6}\n",
"{'_id': '0953KSZ26LmJ7CDacVzpYg', 'score': 2.3333333333333335, 'count': 3}\n",
"{'_id': '2pD9wZWXDNsZf_MXd8rQtg', 'score': 2.3333333333333335, 'count': 6}\n",
"{'_id': '-nHkhiuerqmfBG3v2v9O-g', 'score': 2.4, 'count': 5}\n",
"{'_id': '98YzjvO45DRRvqrNv0hhAA', 'score': 2.427480916030534, 'count': 131}\n",
"{'_id': '2v-8QQfMLX2PCz-0S6gISQ', 'score': 2.4285714285714284, 'count': 7}\n",
"{'_id': 'yh-NN-dCaxxleNcI-_bsUA', 'score': 2.4285714285714284, 'count': 7}\n",
"{'_id': 'kKx8iCJkomVQBdWHnmmOiA', 'score': 2.5, 'count': 4}\n",
"{'_id': 't8yi2l7pZF43Rlf9_lHdDA', 'score': 2.5172413793103448, 'count': 29}\n",
"{'_id': 'sJ0MYSAIVK28cMzh-s-NPA', 'score': 2.5588235294117645, 'count': 34}\n",
"{'_id': 'XPZT3zLrkLkFSkBVxSsM3w', 'score': 2.5714285714285716, 'count': 21}\n",
"{'_id': 'VSGcuYDV3q-AAZ9ZPq4fBQ', 'score': 2.5714285714285716, 'count': 7}\n",
"{'_id': 'iOudjMD1sFvD9QTltPQiTg', 'score': 2.6, 'count': 15}\n",
"{'_id': 'QkG3KUXwqZBW18A9k1xqCA', 'score': 2.6486486486486487, 'count': 37}\n",
"{'_id': 'S4a42azhoOmr0fpeNkggHg', 'score': 2.6666666666666665, 'count': 3}\n",
"{'_id': '4srfPk1s8nlm1YusyDUbjg', 'score': 2.6666666666666665, 'count': 6}\n",
"{'_id': 'AZUCA3oiGou2wLnAYG1P6g', 'score': 2.6666666666666665, 'count': 3}\n",
"{'_id': 'GX83LORhO99mdNFz9_cZdQ', 'score': 2.6666666666666665, 'count': 3}\n",
"{'_id': 'nAFI7JZEhmvUKMKSA_5Chg', 'score': 2.7142857142857144, 'count': 7}\n",
"{'_id': 'UlI0TksGFiIXtcbtg2KalQ', 'score': 2.736842105263158, 'count': 38}\n",
"{'_id': 'iPa__LOhse-hobC2Xmp-Kw', 'score': 2.764705882352941, 'count': 34}\n",
"{'_id': '8DShNS-LuFqpEWIp0HxijA', 'score': 2.7777777777777777, 'count': 9}\n",
"{'_id': 'dTWfATVrBfKj7Vdn0qWVWg', 'score': 2.8333333333333335, 'count': 6}\n",
"{'_id': 'Vl9UFifPCWTGKdogF5gnvw', 'score': 2.875, 'count': 8}\n",
"{'_id': 'spDZkD6cp0JUUm6ghIWHzA', 'score': 2.9, 'count': 80}\n",
"{'_id': 'He-G7vWjzVUysIKrfNbPUQ', 'score': 2.909090909090909, 'count': 11}\n",
"{'_id': 'l09JfMeQ6ynYs5MCJtrcmQ', 'score': 2.9166666666666665, 'count': 12}\n",
"{'_id': 'IQSlT5jGE6CCDhSG0zG3xg', 'score': 2.95, 'count': 20}\n",
"{'_id': 'nigYwB_m1TQ1WosjSWi-Hw', 'score': 3.0, 'count': 5}\n",
"{'_id': 'VWNvhS3kUYU9qxiFudb6Ag', 'score': 3.0, 'count': 4}\n",
"{'_id': 'zV_aclADLjx2KOql9F_FTw', 'score': 3.0, 'count': 4}\n",
"{'_id': 'XG0twdTiMzUS3v3p9OZbJA', 'score': 3.0, 'count': 3}\n",
"{'_id': '1_3nOM7s9WqnJWTNu2-i8Q', 'score': 3.0, 'count': 8}\n",
"{'_id': 'NC03ZDFpIYpEi2aiFTmGPg', 'score': 3.0, 'count': 6}\n",
"{'_id': 'vzx1WdVivFsaN4QYrez2rw', 'score': 3.0, 'count': 3}\n",
"{'_id': 'zdC6e26U7tS1XtWcJPKCcA', 'score': 3.0, 'count': 3}\n",
"{'_id': 'aFBCmJUYrPeol_P75QX7Jw', 'score': 3.045977011494253, 'count': 87}\n",
"{'_id': 'SJAggfn8ta7XLD98XD2mkw', 'score': 3.090909090909091, 'count': 11}\n",
"{'_id': 'evwSIx2mmvpN0V0QteOAsA', 'score': 3.090909090909091, 'count': 11}\n",
"{'_id': 'T5CdfrZWw-uW9Y5L_sddqQ', 'score': 3.111111111111111, 'count': 9}\n",
"{'_id': 'n7V4cD-KqqE3OXk0irJTyA', 'score': 3.1174785100286533, 'count': 349}\n",
"{'_id': '1WBkAuQg81kokZIPMpn9Zg', 'score': 3.1551724137931036, 'count': 232}\n",
"{'_id': 'lZ18HyZhfrrN_VAJIpyhxQ', 'score': 3.16, 'count': 25}\n",
"{'_id': 'HcEJ2YiSLX_mT3RE0hvKWQ', 'score': 3.1904761904761907, 'count': 21}\n",
"{'_id': 'VXH7zXcZzXlmAVN8GSjGRQ', 'score': 3.2, 'count': 5}\n",
"{'_id': 'AXb5gCwqHl-_v6ZIMb1mXQ', 'score': 3.2325581395348837, 'count': 43}\n",
"{'_id': 'mtTxLi9CZNOsDqOTJH3pQw', 'score': 3.25, 'count': 4}\n",
"{'_id': 'FmGZh8ZwEnSO860_2eiYHw', 'score': 3.25, 'count': 4}\n",
"{'_id': 'HAX1zec191t7QkT2sBZ76A', 'score': 3.25, 'count': 4}\n",
"{'_id': 'PEKloTo1IkW_gyxp15e8Dg', 'score': 3.2857142857142856, 'count': 7}\n",
"{'_id': 'AR6mrWO89rq-ku_t_E71AA', 'score': 3.2857142857142856, 'count': 7}\n",
"{'_id': 'fNMVV_ZX7CJSDWQGdOM8Nw', 'score': 3.2857142857142856, 'count': 7}\n",
"{'_id': 'WUiDaFQRZ8wKYGLvmjFjAw', 'score': 3.3289473684210527, 'count': 76}\n",
"{'_id': '7SBM_0TcfYTs2oMD-vgaQA', 'score': 3.3333333333333335, 'count': 3}\n",
"{'_id': 'sG3jr8wLMpncvwvu8JM1Dw', 'score': 3.3333333333333335, 'count': 3}\n",
"{'_id': 'zjySmTfL9WiMDVgp8-Jp3w', 'score': 3.3333333333333335, 'count': 3}\n",
"{'_id': 'gAy4LYpsScrj8POnCW6btQ', 'score': 3.3333333333333335, 'count': 6}\n",
"{'_id': 'HyE2qiMaz7HMJWDhunHA4A', 'score': 3.3461538461538463, 'count': 26}\n",
"{'_id': 'BnuzcebyB1AfxH0kjNWqSg', 'score': 3.4, 'count': 25}\n",
"{'_id': 'VdlPZg2NAu8t8GkdbPLecg', 'score': 3.4106463878326996, 'count': 263}\n",
"{'_id': 'bA21m-qbgN_GNR6g-AlfYw', 'score': 3.433333333333333, 'count': 30}\n",
"{'_id': 'NoxzrJbWS4xD9ft0ygD3JA', 'score': 3.433333333333333, 'count': 30}\n",
"{'_id': 'o1fTwfqN0sDFNpV1CkOPPg', 'score': 3.4375, 'count': 16}\n",
"{'_id': 'y7g-pgAFx37vKytP5oRD0Q', 'score': 3.4444444444444446, 'count': 18}\n",
"{'_id': 'veXxt8rGY_RJPpA5QkHT9Q', 'score': 3.463917525773196, 'count': 97}\n",
"{'_id': 'Gu-xs3NIQTj3Mj2xYoN2aw', 'score': 3.4705882352941178, 'count': 34}\n",
"{'_id': 'PfOCPjBrlQAnz__NXj9h_w', 'score': 3.4827586206896552, 'count': 116}\n",
"{'_id': 'c6Q3HP4cmWZbD9GX8kr4IA', 'score': 3.5, 'count': 8}\n",
"{'_id': '8XQmnE-v6RrsHiqEz6QXdg', 'score': 3.5, 'count': 4}\n",
"{'_id': 'JdzoBklGL66IPFxGkJ2lQA', 'score': 3.5, 'count': 4}\n",
"{'_id': 'c_XbaJqhm-5ycSBOkVUBmg', 'score': 3.5454545454545454, 'count': 33}\n",
"{'_id': 'Sx0C2RsDgrG3RxBBUoBqTw', 'score': 3.5555555555555554, 'count': 9}\n",
"{'_id': '1K4qrnfyzKzGgJPBEcJaNQ', 'score': 3.6153846153846154, 'count': 39}\n",
"{'_id': 'RV_NgH8wT1TuOs2qNt1WDg', 'score': 3.6486486486486487, 'count': 37}\n",
"{'_id': 'b2I2DXtZVnpUMCXp1JON7A', 'score': 3.6666666666666665, 'count': 9}\n",
"{'_id': 'shNBn6mTKxZ124yPC_H02Q', 'score': 3.6666666666666665, 'count': 3}\n",
"{'_id': 'z1hRMOmEvw7jx4cq4-8Yfw', 'score': 3.6666666666666665, 'count': 6}\n",
"{'_id': 'ZvvX30vgzBD2Ezd1zM33IQ', 'score': 3.6666666666666665, 'count': 3}\n",
"{'_id': '0lCpaZpmjCFEmtti9O9ZdQ', 'score': 3.6666666666666665, 'count': 3}\n",
"{'_id': 'd7AtQMYDUh40DeJinidFNQ', 'score': 3.6666666666666665, 'count': 3}\n",
"{'_id': 'v3tBN6s8BhotnjA7GncQnQ', 'score': 3.6666666666666665, 'count': 3}\n",
"{'_id': '-McKyjNSqS1h9dDJH3dyUA', 'score': 3.6666666666666665, 'count': 15}\n",
"{'_id': 'MTH-AcNyWfsBa9sXp04HcQ', 'score': 3.6707317073170733, 'count': 82}\n",
"{'_id': 'DPQnTnNw2PJj7DdENM98Cw', 'score': 3.68, 'count': 25}\n",
"{'_id': 'tRVx2c89coruPRwYhGTcTw', 'score': 3.7051282051282053, 'count': 78}\n",
"{'_id': 'KX_4sldJKgkEWaYVfSWjwQ', 'score': 3.727272727272727, 'count': 11}\n",
"{'_id': 'Pd52CjgyEU3Rb8co6QfTPw', 'score': 3.769230769230769, 'count': 13}\n",
"{'_id': 'XSrN9gtLHC8MtUnSobZfDQ', 'score': 3.7777777777777777, 'count': 18}\n",
"{'_id': 'EJFdWX908N8Yc2XG0Lky8A', 'score': 3.8, 'count': 5}\n",
"{'_id': 'xcgFnd-MwkZeO5G2HQ0gAQ', 'score': 3.8157894736842106, 'count': 38}\n",
"{'_id': '5XejqzaFmtkZMstJS5Iy-w', 'score': 3.8378378378378377, 'count': 37}\n",
"{'_id': 'kyXEnWKQGWSThY6EcjORuw', 'score': 3.853211009174312, 'count': 109}\n",
"{'_id': '5GAXZ7gJ81TSR0-Q6AMp_A', 'score': 3.857142857142857, 'count': 7}\n",
"{'_id': 'w5WBrukfSuEyTjYUYlV4Ug', 'score': 3.875, 'count': 16}\n",
"{'_id': 'NFTh6mj3X2AnHJCKIFUk5Q', 'score': 3.8823529411764706, 'count': 17}\n",
"{'_id': 'mLP-jIIRdoaYM7fJqqFurw', 'score': 3.8867924528301887, 'count': 53}\n",
"{'_id': 'lHYiCS-y8AFjUitv6MGpxg', 'score': 3.9047619047619047, 'count': 21}\n",
"{'_id': '7gquCdaFoHZCcLYDttpHtw', 'score': 3.9411764705882355, 'count': 17}\n",
"{'_id': 'FYWN1wneV18bWNgQjJ2GNg', 'score': 3.9565217391304346, 'count': 23}\n",
"{'_id': '1cLXGXThDYZ5WK_KpBLtkw', 'score': 3.970059880239521, 'count': 167}\n",
"{'_id': 'EiZYdEo9p2K6r6CnZ5gqsw', 'score': 4.0, 'count': 28}\n",
"{'_id': 'MmR06_kNAbmOPK-0pKvGtA', 'score': 4.0, 'count': 3}\n",
"{'_id': 'htKaC4cHY4wlB4Wqb8CDnQ', 'score': 4.0, 'count': 4}\n",
"{'_id': '4SBY4CHiMD8YOCEU9_fdnw', 'score': 4.0, 'count': 3}\n",
"{'_id': 'kCoE3jvEtg6UVz5SOD3GVw', 'score': 4.0, 'count': 5}\n",
"{'_id': 'd2HSFutxpaYsCPglJPRe1Q', 'score': 4.0, 'count': 13}\n",
"{'_id': 'CfEJGhzBFkWKoe-UTT2jiw', 'score': 4.0, 'count': 12}\n",
"{'_id': 'pR6Fy9cP3oAzM_DtloLV5w', 'score': 4.0, 'count': 4}\n",
"{'_id': 'o9eMRCWt5PkpLDE0gOPtcQ', 'score': 4.0, 'count': 5}\n",
"{'_id': 'gIVjdDWRnS1yDVpPF3oUeg', 'score': 4.0, 'count': 8}\n",
"{'_id': '8y56fOiKhtCnqaiYB2S2Qg', 'score': 4.0, 'count': 4}\n",
"{'_id': 'o8cX77mJ1nHMMo0URPS5bg', 'score': 4.0, 'count': 5}\n",
"{'_id': 'M3uV9Y3EDSpy9d4YwyNSAQ', 'score': 4.015151515151516, 'count': 66}\n",
"{'_id': 'W1Yr6c2XDx_RBjb6WsV-aQ', 'score': 4.05, 'count': 140}\n",
"{'_id': '_F3AMoo_zdl-he384ISQbw', 'score': 4.084507042253521, 'count': 213}\n",
"{'_id': 'KVPGEIZb0i5EBYOrlMOziQ', 'score': 4.111111111111111, 'count': 9}\n",
"{'_id': 'l5ijDMpYdyKbF0CxiCVO_Q', 'score': 4.125, 'count': 24}\n",
"{'_id': 'FXHfcFVEfI1vVngW2gVOpw', 'score': 4.1454545454545455, 'count': 55}\n",
"{'_id': 'AtdXq_gu9NTE5rx4ct_dGg', 'score': 4.166666666666667, 'count': 6}\n",
"{'_id': 'B5EZlEDH6AVDk8tQGHAGqg', 'score': 4.176470588235294, 'count': 34}\n",
"{'_id': 'nJ3mXjItS8WcwhYbzbfDQw', 'score': 4.2, 'count': 5}\n",
"{'_id': 'zzMu-6SmqhpvHxVRM6tx9g', 'score': 4.2, 'count': 5}\n",
"{'_id': 'oB5KH-jYU93w-QnHas6EMA', 'score': 4.2105263157894735, 'count': 19}\n",
"{'_id': 'Uy3_5nLo3sYkAuSX6mjdmg', 'score': 4.2727272727272725, 'count': 33}\n",
"{'_id': '-ooEO2YqDQVYNHnSF2BPfw', 'score': 4.2727272727272725, 'count': 11}\n",
"{'_id': 'NVaM_cKKJT3WWyP21PANzw', 'score': 4.333333333333333, 'count': 3}\n",
"{'_id': 'uZd7UVMib1EYUFFbfWkGdw', 'score': 4.333333333333333, 'count': 3}\n",
"{'_id': 'AVtMhvR_3bhL9VkRrAG6dQ', 'score': 4.333333333333333, 'count': 3}\n",
"{'_id': 'cYuGJKCCazgCnsGGlBxaEQ', 'score': 4.375, 'count': 8}\n",
"{'_id': 'c7X2SdKxVJMaOnFROO8WEg', 'score': 4.380952380952381, 'count': 21}\n",
"{'_id': 'd6ngz4WmMaQL8Xdfur7bwQ', 'score': 4.4, 'count': 5}\n",
"{'_id': 'PJ-VbAtIOso1dqd2frQqqg', 'score': 4.4, 'count': 75}\n",
"{'_id': '8GL63JMAiD0mI-Z1Y-kT2Q', 'score': 4.4, 'count': 5}\n",
"{'_id': 'v2GJWvZqEAjUc22hZUYzYw', 'score': 4.416666666666667, 'count': 12}\n",
"{'_id': '1Jp_hmPNUZArNqzpbm7B0g', 'score': 4.45, 'count': 20}\n",
"{'_id': 'cehTmoCXPi0a3FwCE3Tq2Q', 'score': 4.5, 'count': 6}\n",
"{'_id': '94KziT6DQ9XlBET3WzIv_w', 'score': 4.5, 'count': 4}\n",
"{'_id': '5q6Xh-UcJa78bp6dzyaE7w', 'score': 4.522788203753351, 'count': 373}\n",
"{'_id': 'bOOgAB_CEWWsxalAthnRSw', 'score': 4.543478260869565, 'count': 46}\n",
"{'_id': 'M5jiUpAuliJtCYOF0cI3wA', 'score': 4.566666666666666, 'count': 30}\n",
"{'_id': 'Z-4KHBDbdwyC7PwA_JE5Xw', 'score': 4.571428571428571, 'count': 14}\n",
"{'_id': 'JmKgz6n7zn24F-WkgT-kiA', 'score': 4.571428571428571, 'count': 7}\n",
"{'_id': 'P5TLch0Fu9p3o6W2hRSz0g', 'score': 4.6, 'count': 5}\n",
"{'_id': 'pOyt4Fon5n783Jc-hrSq-A', 'score': 4.642857142857143, 'count': 14}\n",
"{'_id': '7m9ux0KgTe2NwUU0XA_9xQ', 'score': 4.642857142857143, 'count': 14}\n",
"{'_id': 'gSr8zPc8M4eTxwGgPb5AsA', 'score': 4.653846153846154, 'count': 26}\n",
"{'_id': 'xMP2oDrKFqyUrN0Uww85EQ', 'score': 4.666666666666667, 'count': 3}\n",
"{'_id': 'XOSRcvtaKc_Q5H1SAzN20A', 'score': 4.666666666666667, 'count': 3}\n",
"{'_id': 'WSZL9uQ9JMOrrulMKEl7Tw', 'score': 4.6875, 'count': 32}\n",
"{'_id': 'F31RycVVooeIOp9jsXmg6g', 'score': 4.6875, 'count': 16}\n",
"{'_id': 'ZmMCgM4RCqCXJ0Lswu6yxw', 'score': 4.705882352941177, 'count': 17}\n",
"{'_id': 'h2XsV6mR6c7QURhlsi0RqA', 'score': 4.730769230769231, 'count': 26}\n",
"{'_id': 'YvxQOMstU2MVDr2uNHs_vw', 'score': 4.738095238095238, 'count': 42}\n",
"{'_id': 'VBHEsoXQb2AQ76J9l8h1uQ', 'score': 4.739130434782608, 'count': 23}\n",
"{'_id': 'onb5syYKz4Wf3cwQWuqv1A', 'score': 4.769230769230769, 'count': 13}\n",
"{'_id': 'EsMcGiZaQuG1OOvL9iUFug', 'score': 4.8, 'count': 15}\n",
"{'_id': '0FMKDOU8TJT1x87OKYGDTg', 'score': 4.815384615384615, 'count': 65}\n",
"{'_id': 'e3rNvRnupvSMu6BABlDUuQ', 'score': 4.823529411764706, 'count': 68}\n",
"{'_id': 'Y0eMNa5C-YU1RQOZf9XvVA', 'score': 4.826086956521739, 'count': 23}\n",
"{'_id': 'VZ37HCZVruFm-w_Mkl1aEQ', 'score': 4.9375, 'count': 16}\n",
"{'_id': 'ykJM7EuGziATZ5u2qIT08g', 'score': 5.0, 'count': 4}\n",
"{'_id': 'vIAQV1_p48AsKP-B9iULwg', 'score': 5.0, 'count': 6}\n",
"{'_id': 'dA7KJReGl9PMi3Tg_mBHuw', 'score': 5.0, 'count': 3}\n",
"{'_id': 'DCQ3qzzmsm1JZxvW8-bCgQ', 'score': 5.0, 'count': 12}\n",
"{'_id': 'LL01hTt_eVdp-ws4zMDsrw', 'score': 5.0, 'count': 3}\n",
"{'_id': 'x5s5yvoI3QFLBZE6JNNhyg', 'score': 5.0, 'count': 5}\n",
"{'_id': 'TGWhGNusxyMaA4kQVBNeew', 'score': 5.0, 'count': 7}\n",
"{'_id': 'YhV93k9uiMdr3FlV4FHjwA', 'score': 5.0, 'count': 4}\n",
"{'_id': 'strJsTvTHqWS18GVJQEHoA', 'score': 5.0, 'count': 22}\n",
"{'_id': 'lj0MiK5_fyv9df2twnsI7g', 'score': 5.0, 'count': 3}\n",
"{'_id': 'EXsmUB2mJyJsCpCV-56Ujw', 'score': 5.0, 'count': 16}\n",
"{'_id': 'r6Jw8oRCeumxu7Y1WRxT7A', 'score': 5.0, 'count': 4}\n",
"{'_id': 'aLM-0HupwCE5r7bMIcQ2TQ', 'score': 5.0, 'count': 3}\n",
"{'_id': 'DAG8irsIf0hMsEgDbrOE_A', 'score': 5.0, 'count': 5}\n",
"{'_id': 'nbhBRhZtdaZmMMeb2i02pg', 'score': 5.0, 'count': 3}\n",
"{'_id': 'swZaqSjwq4UF8QhEtacx5A', 'score': 5.0, 'count': 5}\n",
"{'_id': '5qpi5VQ_S2Itm8xvRm1k8Q', 'score': 5.0, 'count': 9}\n",
"{'_id': 'ORUUPE_Ahrcop9K7h6rvVQ', 'score': 5.0, 'count': 3}\n",
"{'_id': 'ok38fApaT1TBEU-IH85BvA', 'score': 5.0, 'count': 5}\n",
"{'_id': '6aFAEeJ3nS-iWGt7Tn7S0Q', 'score': 5.0, 'count': 5}\n",
"{'_id': 'OcUVZniPo7CnIG-Tv83XcQ', 'score': 5.0, 'count': 3}\n"
]
}
],
"source": [
"cursor = businesses.aggregate([u, p, g, s])\n",
"for record in cursor:\n",
" print(record)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Q6: find the countries where the number of restaurants are more than 10"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'_id': {'country': 'ON'}, 'Number of restaurants': 17}\n",
"{'_id': {'country': 'AZ'}, 'Number of restaurants': 12}\n"
]
}
],
"source": [
"cursor = businesses.aggregate( [ \n",
"\t{ \"$match\": { \"categories\": { \"$in\": [ 'Restaurants' ] } } },\n",
" {\"$group\": {\"_id\": {\"country\": \"$country\"}, \"Number of restaurants\": {\"$sum\": 1 } } },\n",
" { \"$match\": { \"Number of restaurants\": { \"$gt\": 10 } } }\n",
"] )\n",
"\n",
"for record in cursor:\n",
" print(record)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}