π $group and $unwind
In SQL, the GROUP BY statement groups rows that have the same values into summary rows, like "find the number of customers in each country." The GROUP BY statement is often used with aggregate functions (COUNT(), MAX(), MIN(), SUM(), AVG()) to group the result-set by one or more columns.
The $group stage in MongoDBβs Aggregation Framework is equivalent to GROUP BY in SQL. It allows us to group documents by a specific field and apply aggregate functions like sum, average, count, min, and max.
π¬ How does $group work?β
The $group stage groups documents based on a field and performs calculations on grouped data.
Syntaxβ
{
$group: {
_id: <expression>,
<field>: { <accumulator>: <expression> }
}
}
_id: The field to group by (usenullto aggregate all documents together)<accumulator>: An aggregation operator ($sum,$avg,$min,$max,$push,$addToSet, etc.)
π Example 1: Count the number of books published every yearβ
MongoDB queryβ
db.books.aggregate([
{
$group: {
_id: "$year",
totalBooks: { $sum: 1 },
},
},
]);
Equivalent SQL queryβ
SELECT year, COUNT(*) AS totalBooks
FROM books
GROUP BY year;
Sample outputβ
[
{"_id": 1980,"totalBooks": 42},
{"_id": 2000,"totalBooks": 490},
{"_id": 1981,"totalBooks": 45},
...
]
π Example 2: Without using $group, count the number of books published every yearβ
MongoDB queryβ
db.books.aggregate([
{
$sortByCount: "$year",
},
]);
Sample output remains the same as beforeβ
[
{"_id": 1980,"totalBooks": 42},
{"_id": 2000,"totalBooks": 490},
{"_id": 1981,"totalBooks": 45},
...
]
π Example 3: Find the total number of pages published every year.β
MongoDB queryβ
db.books.aggregate([
{
$group: {
_id: "$year",
totalPages: { $sum: "$pages" },
},
},
]);
Equivalent SQL queryβ
SELECT year, SUM(rating) AS totalPages
FROM books
GROUP BY year;
Sample outputβ
[
{ "_id": 1955, "totalPages": 664 },
{ "_id": 1952, "totalPages": 416 },
{ "_id": 1899, "totalPages": 128 }
...
]
πΉ How does $unwind work?β
The $unwind stage deconstructs an array field and creates one output document for each element in the array.
{
$unwind: "<array field>"
}
<array field>: The path to the array field you want to deconstruct
π Example 1: Return one document for each authorβ
MongoDB Queryβ
db.books.aggregate([
{
$unwind: "$authors"
}
]);
Equivalent SQL queryβ
SELECT
b._id,
b.title,
authors
FROM books b
CROSS JOIN UNNEST(b.authors) AS authors
WHERE b._id = '1567189644';
Sample outputβ
[
{ "_id": "0740714104", "title": "Who's Your Daddy?", "author": "Phyllis Wright-Herman" }
{ "_id": "0740714104", "title": "Who's Your Daddy?", "author": "Timothy Mikkelsen" }
{ "_id": "0740714104", "title": "Who's Your Daddy?", "author": "Ltd. MikWrgiht" }
]
- This returns one document per author entry in the authors array.
π Challengeβ
π 1. Find the average book rating of all booksβ
Answer
- JavaScript
- C#
- Python
- Java
await reviews.aggregate([
{
$group: {
_id: "$bookId",
avgRating: { $avg: "$rating" }
}
}
]).toArray();
public class BookRating
{
[BsonId]
public string BookId { get; set; }
[BsonElement("avgRating")]
public double AvgRating { get; set; }
}
var pipeline = reviewsCollection.Aggregate()
.Group(
r => r.BookId,
g => new BookRating
{
BookId = g.Key,
AvgRating = g.Average(r => r.Rating.Value)
}
);
var booksWithAvg = pipeline.ToList();
books_with_avg_rating = reviews.aggregate([
{
"$group": {
"_id": "$bookId",
"avgRating": {"$avg": "$rating"}
}
}
])
for book in books_with_avg_rating:
print(book)
AggregateIterable<Document> result = reviews.aggregate(
List.of(
group(
"$bookId",
Accumulators.avg("avg", "$rating")
),
limit(5)
)
);
for (Document doc : result) {
System.out.println("result: " + doc.toJson());
}
π 2. Find the top 5 users with the most number of reviews (Hint: use the name field in the reviews collection)β
Answer
There are 2 ways to solve this-
- $group with $sort
- $sortByCount
- JavaScript
- C#
- Python
- Java
- $group with $sum + $sort
- $sortByCount
await reviews.aggregate([
{
$group: {
_id: "$name",
totalReviews: { $sum: 1 },
},
},
{
$sort: {
totalReviews: -1,
},
},
{
$limit: 5,
},
]).toArray();
await reviews.aggregate([
{
$sortByCount: "$name",
},
{
$limit: 5,
},
]).toArray();
- $group with $sum + $sort
- $sortByCount
public class ReviewerCount
{
[BsonId]
public string Name { get; set; }
[BsonElement("totalReviews")]
public int TotalReviews { get; set; }
}
var pipeline = reviewsCollection.Aggregate()
.Group(
r => r.Name,
g => new ReviewerCount
{
Name = g.Key,
TotalReviews = g.Count()
}
)
.SortByDescending(r => r.TotalReviews)
.Limit(5);
var result = await pipeline.ToListAsync();
var pipeline = reviewsCollection.Aggregate()
.AppendStage<BsonDocument>(
new BsonDocument("$sortByCount", "$name")
)
.Limit(5);
var result = pipeline.ToList();
- $group with $sum + $sort
- $sortByCount
reviewers_count = reviews.aggregate([
{
"$group": {
"_id": "$name",
"totalReviews": {"$sum": 1}
}
},
{
"$sort": {"totalReviews": -1}
},
{
"$limit": 5
}
])
for reviewer in reviewers_count:
print(reviewer)
reviewers_count = reviews.aggregate([
{
"$sortByCount": "$name"
},
{
"$limit": 5
}
])
for reviewer in reviewers_count:
print(reviewer)
- $group with $sum + $sort
- $sortByCount
AggregateIterable<Document> result = reviews.aggregate(
List.of(
group(
"$name",
Accumulators.sum("totalReviews", 1)
),
sort(descending("totalReviews")),
limit(5)
)
);
for (Document doc : result) {
System.out.println("user: " + doc.toJson());
}
AggregateIterable<Document> result = reviews.aggregate(
List.of(
Aggregates.sortByCount("$name"),
limit(5)
)
);
for (Document doc : result) {
System.out.println("user: " + doc.toJson());
}
π 3. Return one document per author for book id 1567189644β
Answer
- JavaScript
- C#
- Python
- Java
await books.aggregate([
{
$match: {
_id: "1567189644"
}
},
{
$unwind: {
path: "$authors"
}
},
{
$project: {
"_id": 0,
"title": 1,
"author": "$authors.name"
}
}
]).toArray();
var pipeline = booksCollection.Aggregate()
.Match(b => b.Id == "1567189644")
.Unwind(b => b.Authors)
.Project(b => new
{
b.Title,
b.Authors.Name
});
var results = pipeline.ToList();
books_with_authors = books.aggregate([
{
"$match": {
"_id": "1567189644"
}
},
{
"$unwind": "$authors"
},
{
"$project": {
"_id": 0,
"title": 1,
"authors.name": 1
}
}
])
AggregateIterable<Document> result = books.aggregate(
List.of(
match(eq("_id", "1567189644")),
unwind("$authors"),
project(
fields(
excludeId(),
include("title", "authors.name")
)
)
)
);
// Iterate through the results
for (Document doc : result) {
System.out.println("book: " + doc.toJson());
}