MongoDB – Boise Code Camp 2012

Welcome to Boise Code Camp, 2012. This year I decided to present on MongoDB. In a previous presentation on MongoDB and some video training I cover installation and configuration and using replica sets.
For the code camp this year I explore some of the differences between schema design for a relational database and schema design for MongoDB. You can copy and paste the commands below directly onto the MongoDB command line to follow along.
To begin with, it’s important to know how to get help and see what databases and collections are available. The default MongoDB ID generation is also useful to know about.
1 2 3 4 5 6 7 8 9 10 11 | help show dbs // discuss default IDs and how they work use exploreid db.dropDatabase() use exploreid // db.collection.insert(object) db.things.insert({'name': 'Daniel Watrous'}) db.things.find() |
Schema Design
Now let’s build the course tracker database. The MongoDB website provides some great instruction and a few videos on its schema design for MongoDB page. First have a look at the relational schema and then the adjusted MongoDB schema.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 | // build coursetracker database use coursetracker db.dropDatabase() use coursetracker // create student records db.students.insert({"_id": "john.shore", "name": {"first": "John", "last": "Shore"}, "email": "john.shore@gmail.com", "major": "Electrical Engineering"}) db.students.find() db.students.insert({"_id": "jeff", "name": {"first": "Jeff", "last": "Holland"}, "email": "jeff@yahoo.com", "major": "Business"}) db.students.insert({"_id": "scott", "name": {"first": "Scott", "last": "Mills"}, "email": "scott@hotmail.com", "major": "Humanities/Art"}) db.students.insert({"_id": "rcotter", "name": {"first": "Ray", "last": "Cotter"}, "email": "rcotter@msn.com", "major": "Computer Science"}) db.students.insert({"_id": "lee2331", "name": {"first": "Lee", "last": "Aldwell"}, "email": "lee2331@aol.com", "major": "Graphic Design"}) db.students.find() // have a look to see that the students collection now exists show collections // create course records db.courses.insert({"_id": "HIST-1010", "name": "History of the World 1010", "description": "A bunch of really interesting things that actually happened", "students": [], "ratings": []}) db.courses.find() db.courses.insert({"_id": "ENGCOMP-1010", "name": "English Composition 1010", "description": "If you can't write well, you've got nothing!", "students": [], "ratings": []}) db.courses.insert({"_id": "ART-1050", "name": "Artistic Interpretation 1050", "description": "Discover your inner beholder", "students": [], "ratings": []}) db.courses.find() // create instructor records db.instructors.insert({"_id": "wally.r.binns", "name": {"first": "Wally", "middle": "r", "last": "Binns"}, "email": "wally.r.binns@ssu.edu", "bio": "I was born in the middle of my mother\"s doctoral dissertation on Faraday Cage isolation. I\"ve been an academic ever since...", "publications": [], "courses": []}) db.instructors.find() db.instructors.insert({"_id": "gerald.waterford.iii", "name": {"first": "Gerald", "last": "Waterford", "suffix": "III"}, "email": "gerald.waterford.iii@ssu.edu", "bio": "My father's father was a great man. My father, not so much. I am restoring the family honor.", "publications": [], "courses": []}) db.instructors.insert({"_id": "kim.b", "name": {"prefix": "Mrs.", "first": "Kim", "last": "Binnley"}, "email": "kim.b@ssu.edu", "bio": "My mother told me 'Don't let those dopes push you around'. My life has been a constant struggle against dopeness ever since. Sigh...", "publications": [], "courses": []}) db.instructors.find().pretty() // now we can see all three of our collections show collections |
At this point we have records for primary entities, but all the arrays that would hold course assignments, publications, etc. are empty. Let’s start adding some additional details to those. primary documents.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 | // add some publications var pub = {"title": "Inverted Celestial Poetry", "source": "http://www.pubcentral.com/poetry/inverted-celestial-poetry"} pub // db.collection.update( criteria, objNew, upsert, multi ) db.instructors.update({"_id": "wally.r.binns"}, {$push: {"publications": pub1}}, false, false) db.instructors.update({"_id": "gerald.waterford.iii"}, {$push: {"publications": {"title": "Grow, grow, little Dandelion", "source": "http://www.hopefulstories.com/my-dandelion"}}}, false, false) db.instructors.update({"_id": "gerald.waterford.iii"}, {$push: {"publications": {"title": "The teapot and the spoon", "source": "http://www.dishsoap.com/teapot-spoon"}}}, false, false) // now lets assign some courses to instructors db.courses.find({}, {'_id': 1, 'name': 1}) db.instructors.update({"_id": "kim.b"}, {$push: {"courses": "ART-1050"}}, false, false) db.instructors.update({"_id": "gerald.waterford.iii"}, {$push: {"courses": "ENGCOMP-1010"}}, false, false) db.instructors.update({"_id": "gerald.waterford.iii"}, {$push: {"courses": "HIST-1010"}}, false, false) db.instructors.find().pretty() // now lets add some students to courses db.courses.update({"_id": "ART-1050"}, {$push: {"students": "scott"}}, false, false) db.courses.update({"_id": "ART-1050"}, {$push: {"students": "jeff"}}, false, false) db.courses.update({"_id": "ART-1050"}, {$push: {"students": "rcotter"}}, false, false) db.courses.update({"_id": "HIST-1010"}, {$push: {"students": "scott"}}, false, false) db.courses.update({"_id": "HIST-1010"}, {$push: {"students": "john.shore"}}, false, false) db.courses.update({"_id": "ENGCOMP-1010"}, {$push: {"students": "john.shore"}}, false, false) db.courses.update({"_id": "ENGCOMP-1010"}, {$push: {"students": "jeff"}}, false, false) db.courses.update({"_id": "ENGCOMP-1010"}, {$push: {"students": "rcotter"}}, false, false) db.courses.update({"_id": "ENGCOMP-1010"}, {$push: {"students": "lee2331"}}, false, false) db.courses.update({"_id": "ENGCOMP-1010"}, {$push: {"students": "scott"}}, false, false) db.courses.find().pretty() |
Map Reduce
Now we have details about which instructors teach which courses. We know which students are registered for each course. Let’s explore how we can get a clean aggregate view of these details. In SQL we would construct a query and include the grouping and calculations we wanted to get these details. In MongoDB we need to use Map Reduce.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 | // find out how many courses each student has map_number_courses = function() { this.students.forEach(function(value) { emit(value, 1); }); } reduce_number_courses = function(key, values) { var result = 0; values.forEach(function(value) { result += value; }); return result; } result = db.courses.mapReduce(map_number_courses, reduce_number_courses, {out: {replace: "numbercourses"}}); show collections db.numbercourses.find() |
It’s nice to see by _id how many courses each student has, but what if we want to combine details from two different collections into a single result. It turns out we can accomplish this with MongoDB too. However, we need to run two separate map reduce executions to get all the data we need in one place.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | // produce a listing of student full name and how many courses he/she is taking map_course_report_names = function() { emit(this._id, {name: this.name.first+' '+this.name.last, numbercourses: 0}); } map_course_report_number = function() { this.students.forEach(function(value) { emit(value, {name: '', numbercourses: 1}); }); } reduce_course_report = function(key, values) { var result = {name: '', numbercourses: 0}; values.forEach(function(value) { result.numbercourses += value.numbercourses; if (result.name === '') { result.name = value.name; } }); return result; } result = db.courses.mapReduce(map_course_report_number, reduce_course_report, {out: {replace: "coursereport"}}); result = db.students.mapReduce(map_course_report_names, reduce_course_report, {out: {reduce: "coursereport"}}); show collections db.coursereport.find() |
You might notice that neither of these map reduce examples lend themselves to the type of ad hoc queries that are possible in SQL. One potential downside to this is that extracting informal data in various ways may be more tedious. A potential upside is that once you have defined the map, reduce structure that will produce the results you want, the output is cached in a very fast, indexable collection.
Inspiration
Some of the mapping techniques above were found here:
http://tebros.com/2011/07/using-mongodb-mapreduce-to-join-2-collections/