统计每个分组里的最大最小记录
mongo实现
{ "_id" : "01001", "city" : "AGAWAM", "pop" : 15338, "state" : "MA"}
完整json见:
需求
得到每个state拥有最多人口的城市和拥有最小人口的城市以及对应的人口数
db.zipcodes.aggregate( {$group: {_id:{state:"$state",city:"$city"}, popPerCity:{$sum:"$pop"} } }, {$sort: {popPerCity:1} }, {$group: { _id:"$_id.state", biggestCity:{$last:"$_id.city"}, biggestPop: {$last:"$popPerCity"}, smallestCity: {$first:"$_id.city"}, smallestPop: {$first:"$popPerCity"} }})
效果
{ "_id" : "DE", "biggestCity" : "NEWARK", "biggestPop" : 111674, "smallestCity" : "BETHEL", "smallestPop" : 108 }{ "_id" : "MS", "biggestCity" : "JACKSON", "biggestPop" : 204788, "smallestCity" : "CHUNKY", "smallestPop" : 79 }...
见:
Mysql实现
相比mongo的直观 就要绕很多了
方案一
# 需要设置一个较大值 默认的1024还不够用SET SESSION group_concat_max_len = 20480;select state, substring_index(group_concat(city order by pop ),",",1) smallestCity, min(pop),substring_index(group_concat(city order by pop ),",",-1) biggestCity, max(pop) from (select state, city, sum(pop) pop from zipcode group by state, city) a group by state ;
参考
方案二
# 每个state分组里面分别按pop升序、降序排序 人为分配一个序号 均取序号一就得到了该分组的起止记录select b.state, b.city smallestCity, b.pop smallestPop, c.city biggestCity, c.pop biggestPop from( select state,city,pop,@rank:=if(@current_state=state, @rank+1, 1) rank, @current_state:=state from(select state, city, sum(pop) pop from zipcode group by state, city) a,(select @current_state:=NULL, @rank:=NULL) varsorder by a.state,a.pop) b ,( select state,city,pop,@rank:=if(@current_state=state, @rank+1, 1) rank, @current_state:=state from(select state, city, sum(pop) pop from zipcode group by state, city) a,(select @current_state:=NULL, @rank:=NULL) varsorder by a.state,a.pop desc) cwhere b.state = c.state and b.rank = 1 and c.rank = 1
补充
建表语句
CREATE TABLE `zipcode` ( `id` int(11) NOT NULL AUTO_INCREMENT, `zipcode` varchar(10) NOT NULL, `city` varchar(30) NOT NULL, `pop` int(11) NOT NULL, `state` varchar(5) NOT NULL, PRIMARY KEY (`id`), UNIQUE KEY `zipcode` (`zipcode`), KEY `idx_state_city` (`state`,`city`)) ENGINE=InnoDB DEFAULT CHARSET=utf8
json ==> batch insert sql
jq -c '[._id, .city, .pop, .state]' zips.json | sed 's/\[\(.*\)\]$/\1/' | awk -F, '{print "insert into zipcode select null," $1"," $2","$3","$4";"}'