So, I was trying to do a query similar to the following:
select field_a, max(field_b) as maxb from tableA group by field_a
But, when I found field_a, I needed another field from that record. This would be straightforward in SQL with a subselect or join, but MongoDB on a sharded DB requires some Map-Reduce foo.
The problem is that reduce only allows you to return one value. But, I needed both the accumulation value AND another field to come back. Solved via this test program:
from pprint import pprint, pformat
from pymongo.code import Code
from pymongo import Connection
conn = Connection()
db = conn["test"]
db.testcol.remove()
db.testcol.insert({ "name" : "bob", "val" : 5, "host":"blah1"})
db.testcol.insert({ "name" : "bob", "val" : 3, "host":"blah2"})
db.testcol.insert({ "name" : "bob", "val" : 7, "host":"blah3"})
db.testcol.insert({ "name" : "john", "val" : 2, "host":"blah4"})
db.testcol.insert({ "name" : "john", "val" : 4, "host":"blah5"})
db.testcol.insert({ "name" : "john", "val" : 8, "host":"blah6"})
db.testcol.insert({ "name" : "john", "val" : 6, "host":"blah7"})
#print "Data: "
#print pformat(list(db.testcol.find()))
mapfunc = Code(''' function() { emit( this.name, { name : this.name, val: this.val, host: this.host }); } ''')
redfunc = Code(''' function(key, values) {
var latestRec = { val: 0, host: "", name: ""};
for (var i=1; i < values.length; i++) {
thisName = values[i].name
thisVal = values[i].val;
thisHost = values[i].host;
if (thisVal > latestRec.val) {
latestRec.name = thisName
latestRec.val = thisVal;
latestRec.host = thisHost;
}
}
return latestRec
} ''')
res = db.testcol.map_reduce(mapfunc, redfunc, "myresults")
for doc in res.find():
print doc
print "*" * 80
conn.close()
No comments:
Post a Comment