So, I was trying to do a query similar to the following:
select field_a, max(field_b) as maxb from tableA group by field_a
But, when I found field_a, I needed another field from that record. This would be straightforward in SQL with a subselect or join, but MongoDB on a sharded DB requires some Map-Reduce foo.
The problem is that reduce only allows you to return one value. But, I needed both the accumulation value AND another field to come back. Solved via this test program:
from pprint import pprint, pformat from pymongo.code import Code from pymongo import Connection conn = Connection() db = conn["test"] db.testcol.remove() db.testcol.insert({ "name" : "bob", "val" : 5, "host":"blah1"}) db.testcol.insert({ "name" : "bob", "val" : 3, "host":"blah2"}) db.testcol.insert({ "name" : "bob", "val" : 7, "host":"blah3"}) db.testcol.insert({ "name" : "john", "val" : 2, "host":"blah4"}) db.testcol.insert({ "name" : "john", "val" : 4, "host":"blah5"}) db.testcol.insert({ "name" : "john", "val" : 8, "host":"blah6"}) db.testcol.insert({ "name" : "john", "val" : 6, "host":"blah7"}) #print "Data: " #print pformat(list(db.testcol.find())) mapfunc = Code(''' function() { emit( this.name, { name : this.name, val: this.val, host: this.host }); } ''') redfunc = Code(''' function(key, values) { var latestRec = { val: 0, host: "", name: ""}; for (var i=1; i < values.length; i++) { thisName = values[i].name thisVal = values[i].val; thisHost = values[i].host; if (thisVal > latestRec.val) { latestRec.name = thisName latestRec.val = thisVal; latestRec.host = thisHost; } } return latestRec } ''') res = db.testcol.map_reduce(mapfunc, redfunc, "myresults") for doc in res.find(): print doc print "*" * 80 conn.close()