cocoeval.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. __author__ = 'tsungyi'
  2. import numpy as np
  3. import datetime
  4. import time
  5. from collections import defaultdict
  6. import mask
  7. import copy
  8. class COCOeval:
  9. # Interface for evaluating detection on the Microsoft COCO dataset.
  10. #
  11. # The usage for CocoEval is as follows:
  12. # cocoGt=..., cocoDt=... # load dataset and results
  13. # E = CocoEval(cocoGt,cocoDt); # initialize CocoEval object
  14. # E.params.recThrs = ...; # set parameters as desired
  15. # E.evaluate(); # run per image evaluation
  16. # E.accumulate(); # accumulate per image results
  17. # E.summarize(); # display summary metrics of results
  18. # For example usage see evalDemo.m and http://mscoco.org/.
  19. #
  20. # The evaluation parameters are as follows (defaults in brackets):
  21. # imgIds - [all] N img ids to use for evaluation
  22. # catIds - [all] K cat ids to use for evaluation
  23. # iouThrs - [.5:.05:.95] T=10 IoU thresholds for evaluation
  24. # recThrs - [0:.01:1] R=101 recall thresholds for evaluation
  25. # areaRng - [...] A=4 object area ranges for evaluation
  26. # maxDets - [1 10 100] M=3 thresholds on max detections per image
  27. # useSegm - [1] if true evaluate against ground-truth segments
  28. # useCats - [1] if true use category labels for evaluation # Note: if useSegm=0 the evaluation is run on bounding boxes.
  29. # Note: if useCats=0 category labels are ignored as in proposal scoring.
  30. # Note: multiple areaRngs [Ax2] and maxDets [Mx1] can be specified.
  31. #
  32. # evaluate(): evaluates detections on every image and every category and
  33. # concats the results into the "evalImgs" with fields:
  34. # dtIds - [1xD] id for each of the D detections (dt)
  35. # gtIds - [1xG] id for each of the G ground truths (gt)
  36. # dtMatches - [TxD] matching gt id at each IoU or 0
  37. # gtMatches - [TxG] matching dt id at each IoU or 0
  38. # dtScores - [1xD] confidence of each dt
  39. # gtIgnore - [1xG] ignore flag for each gt
  40. # dtIgnore - [TxD] ignore flag for each dt at each IoU
  41. #
  42. # accumulate(): accumulates the per-image, per-category evaluation
  43. # results in "evalImgs" into the dictionary "eval" with fields:
  44. # params - parameters used for evaluation
  45. # date - date evaluation was performed
  46. # counts - [T,R,K,A,M] parameter dimensions (see above)
  47. # precision - [TxRxKxAxM] precision for every evaluation setting
  48. # recall - [TxKxAxM] max recall for every evaluation setting
  49. # Note: precision and recall==-1 for settings with no gt objects.
  50. #
  51. # See also coco, mask, pycocoDemo, pycocoEvalDemo
  52. #
  53. # Microsoft COCO Toolbox. version 2.0
  54. # Data, paper, and tutorials available at: http://mscoco.org/
  55. # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  56. # Licensed under the Simplified BSD License [see coco/license.txt]
  57. def __init__(self, cocoGt=None, cocoDt=None):
  58. '''
  59. Initialize CocoEval using coco APIs for gt and dt
  60. :param cocoGt: coco object with ground truth annotations
  61. :param cocoDt: coco object with detection results
  62. :return: None
  63. '''
  64. self.cocoGt = cocoGt # ground truth COCO API
  65. self.cocoDt = cocoDt # detections COCO API
  66. self.params = {} # evaluation parameters
  67. self.evalImgs = defaultdict(list) # per-image per-category evaluation results [KxAxI] elements
  68. self.eval = {} # accumulated evaluation results
  69. self._gts = defaultdict(list) # gt for evaluation
  70. self._dts = defaultdict(list) # dt for evaluation
  71. self.params = Params() # parameters
  72. self._paramsEval = {} # parameters for evaluation
  73. self.stats = [] # result summarization
  74. self.ious = {} # ious between all gts and dts
  75. if not cocoGt is None:
  76. self.params.imgIds = sorted(cocoGt.getImgIds())
  77. self.params.catIds = sorted(cocoGt.getCatIds())
  78. def _prepare(self):
  79. '''
  80. Prepare ._gts and ._dts for evaluation based on params
  81. :return: None
  82. '''
  83. #
  84. def _toMask(objs, coco):
  85. # modify segmentation by reference
  86. for obj in objs:
  87. t = coco.imgs[obj['image_id']]
  88. if type(obj['segmentation']) == list:
  89. if type(obj['segmentation'][0]) == dict:
  90. print 'debug'
  91. obj['segmentation'] = mask.frPyObjects(obj['segmentation'],t['height'],t['width'])
  92. if len(obj['segmentation']) == 1:
  93. obj['segmentation'] = obj['segmentation'][0]
  94. else:
  95. # an object can have multiple polygon regions
  96. # merge them into one RLE mask
  97. obj['segmentation'] = mask.merge(obj['segmentation'])
  98. elif type(obj['segmentation']) == dict and type(obj['segmentation']['counts']) == list:
  99. obj['segmentation'] = mask.frPyObjects([obj['segmentation']],t['height'],t['width'])[0]
  100. elif type(obj['segmentation']) == dict and \
  101. type(obj['segmentation']['counts'] == unicode or type(obj['segmentation']['counts']) == str):
  102. pass
  103. else:
  104. raise Exception('segmentation format not supported.')
  105. p = self.params
  106. if p.useCats:
  107. gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
  108. dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds, catIds=p.catIds))
  109. else:
  110. gts=self.cocoGt.loadAnns(self.cocoGt.getAnnIds(imgIds=p.imgIds))
  111. dts=self.cocoDt.loadAnns(self.cocoDt.getAnnIds(imgIds=p.imgIds))
  112. if p.useSegm:
  113. _toMask(gts, self.cocoGt)
  114. _toMask(dts, self.cocoDt)
  115. self._gts = defaultdict(list) # gt for evaluation
  116. self._dts = defaultdict(list) # dt for evaluation
  117. for gt in gts:
  118. self._gts[gt['image_id'], gt['category_id']].append(gt)
  119. for dt in dts:
  120. self._dts[dt['image_id'], dt['category_id']].append(dt)
  121. self.evalImgs = defaultdict(list) # per-image per-category evaluation results
  122. self.eval = {} # accumulated evaluation results
  123. def evaluate(self):
  124. '''
  125. Run per image evaluation on given images and store results (a list of dict) in self.evalImgs
  126. :return: None
  127. '''
  128. tic = time.time()
  129. print 'Running per image evaluation... '
  130. p = self.params
  131. p.imgIds = list(np.unique(p.imgIds))
  132. if p.useCats:
  133. p.catIds = list(np.unique(p.catIds))
  134. p.maxDets = sorted(p.maxDets)
  135. self.params=p
  136. self._prepare()
  137. # loop through images, area range, max detection number
  138. catIds = p.catIds if p.useCats else [-1]
  139. computeIoU = self.computeIoU
  140. self.ious = {(imgId, catId): computeIoU(imgId, catId) \
  141. for imgId in p.imgIds
  142. for catId in catIds}
  143. evaluateImg = self.evaluateImg
  144. maxDet = p.maxDets[-1]
  145. self.evalImgs = [evaluateImg(imgId, catId, areaRng, maxDet)
  146. for catId in catIds
  147. for areaRng in p.areaRng
  148. for imgId in p.imgIds
  149. ]
  150. self._paramsEval = copy.deepcopy(self.params)
  151. toc = time.time()
  152. print 'DONE (t=%0.2fs).'%(toc-tic)
  153. def computeIoU(self, imgId, catId):
  154. p = self.params
  155. if p.useCats:
  156. gt = self._gts[imgId,catId]
  157. dt = self._dts[imgId,catId]
  158. else:
  159. gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
  160. dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
  161. if len(gt) == 0 and len(dt) ==0:
  162. return []
  163. dt = sorted(dt, key=lambda x: -x['score'])
  164. if len(dt) > p.maxDets[-1]:
  165. dt=dt[0:p.maxDets[-1]]
  166. if p.useSegm:
  167. g = [g['segmentation'] for g in gt]
  168. d = [d['segmentation'] for d in dt]
  169. else:
  170. g = [g['bbox'] for g in gt]
  171. d = [d['bbox'] for d in dt]
  172. # compute iou between each dt and gt region
  173. iscrowd = [int(o['iscrowd']) for o in gt]
  174. ious = mask.iou(d,g,iscrowd)
  175. return ious
  176. def evaluateImg(self, imgId, catId, aRng, maxDet):
  177. '''
  178. perform evaluation for single category and image
  179. :return: dict (single image results)
  180. '''
  181. #
  182. p = self.params
  183. if p.useCats:
  184. gt = self._gts[imgId,catId]
  185. dt = self._dts[imgId,catId]
  186. else:
  187. gt = [_ for cId in p.catIds for _ in self._gts[imgId,cId]]
  188. dt = [_ for cId in p.catIds for _ in self._dts[imgId,cId]]
  189. if len(gt) == 0 and len(dt) ==0:
  190. return None
  191. for g in gt:
  192. if 'ignore' not in g:
  193. g['ignore'] = 0
  194. if g['iscrowd'] == 1 or g['ignore'] or (g['area']<aRng[0] or g['area']>aRng[1]):
  195. g['_ignore'] = 1
  196. else:
  197. g['_ignore'] = 0
  198. # sort dt highest score first, sort gt ignore last
  199. # gt = sorted(gt, key=lambda x: x['_ignore'])
  200. gtind = [ind for (ind, g) in sorted(enumerate(gt), key=lambda (ind, g): g['_ignore']) ]
  201. gt = [gt[ind] for ind in gtind]
  202. dt = sorted(dt, key=lambda x: -x['score'])[0:maxDet]
  203. iscrowd = [int(o['iscrowd']) for o in gt]
  204. # load computed ious
  205. N_iou = len(self.ious[imgId, catId])
  206. ious = self.ious[imgId, catId][0:maxDet, np.array(gtind)] if N_iou >0 else self.ious[imgId, catId]
  207. T = len(p.iouThrs)
  208. G = len(gt)
  209. D = len(dt)
  210. gtm = np.zeros((T,G))
  211. dtm = np.zeros((T,D))
  212. gtIg = np.array([g['_ignore'] for g in gt])
  213. dtIg = np.zeros((T,D))
  214. if not len(ious)==0:
  215. for tind, t in enumerate(p.iouThrs):
  216. for dind, d in enumerate(dt):
  217. # information about best match so far (m=-1 -> unmatched)
  218. iou = min([t,1-1e-10])
  219. m = -1
  220. for gind, g in enumerate(gt):
  221. # if this gt already matched, and not a crowd, continue
  222. if gtm[tind,gind]>0 and not iscrowd[gind]:
  223. continue
  224. # if dt matched to reg gt, and on ignore gt, stop
  225. if m>-1 and gtIg[m]==0 and gtIg[gind]==1:
  226. break
  227. # continue to next gt unless better match made
  228. if ious[dind,gind] < iou:
  229. continue
  230. # match successful and best so far, store appropriately
  231. iou=ious[dind,gind]
  232. m=gind
  233. # if match made store id of match for both dt and gt
  234. if m ==-1:
  235. continue
  236. dtIg[tind,dind] = gtIg[m]
  237. dtm[tind,dind] = gt[m]['id']
  238. gtm[tind,m] = d['id']
  239. # set unmatched detections outside of area range to ignore
  240. a = np.array([d['area']<aRng[0] or d['area']>aRng[1] for d in dt]).reshape((1, len(dt)))
  241. dtIg = np.logical_or(dtIg, np.logical_and(dtm==0, np.repeat(a,T,0)))
  242. # store results for given image and category
  243. return {
  244. 'image_id': imgId,
  245. 'category_id': catId,
  246. 'aRng': aRng,
  247. 'maxDet': maxDet,
  248. 'dtIds': [d['id'] for d in dt],
  249. 'gtIds': [g['id'] for g in gt],
  250. 'dtMatches': dtm,
  251. 'gtMatches': gtm,
  252. 'dtScores': [d['score'] for d in dt],
  253. 'gtIgnore': gtIg,
  254. 'dtIgnore': dtIg,
  255. }
  256. def accumulate(self, p = None):
  257. '''
  258. Accumulate per image evaluation results and store the result in self.eval
  259. :param p: input params for evaluation
  260. :return: None
  261. '''
  262. print 'Accumulating evaluation results... '
  263. tic = time.time()
  264. if not self.evalImgs:
  265. print 'Please run evaluate() first'
  266. # allows input customized parameters
  267. if p is None:
  268. p = self.params
  269. p.catIds = p.catIds if p.useCats == 1 else [-1]
  270. T = len(p.iouThrs)
  271. R = len(p.recThrs)
  272. K = len(p.catIds) if p.useCats else 1
  273. A = len(p.areaRng)
  274. M = len(p.maxDets)
  275. precision = -np.ones((T,R,K,A,M)) # -1 for the precision of absent categories
  276. recall = -np.ones((T,K,A,M))
  277. # create dictionary for future indexing
  278. _pe = self._paramsEval
  279. catIds = _pe.catIds if _pe.useCats else [-1]
  280. setK = set(catIds)
  281. setA = set(map(tuple, _pe.areaRng))
  282. setM = set(_pe.maxDets)
  283. setI = set(_pe.imgIds)
  284. # get inds to evaluate
  285. k_list = [n for n, k in enumerate(p.catIds) if k in setK]
  286. m_list = [m for n, m in enumerate(p.maxDets) if m in setM]
  287. a_list = [n for n, a in enumerate(map(lambda x: tuple(x), p.areaRng)) if a in setA]
  288. i_list = [n for n, i in enumerate(p.imgIds) if i in setI]
  289. # K0 = len(_pe.catIds)
  290. I0 = len(_pe.imgIds)
  291. A0 = len(_pe.areaRng)
  292. # retrieve E at each category, area range, and max number of detections
  293. for k, k0 in enumerate(k_list):
  294. Nk = k0*A0*I0
  295. for a, a0 in enumerate(a_list):
  296. Na = a0*I0
  297. for m, maxDet in enumerate(m_list):
  298. E = [self.evalImgs[Nk+Na+i] for i in i_list]
  299. E = filter(None, E)
  300. if len(E) == 0:
  301. continue
  302. dtScores = np.concatenate([e['dtScores'][0:maxDet] for e in E])
  303. # different sorting method generates slightly different results.
  304. # mergesort is used to be consistent as Matlab implementation.
  305. inds = np.argsort(-dtScores, kind='mergesort')
  306. dtm = np.concatenate([e['dtMatches'][:,0:maxDet] for e in E], axis=1)[:,inds]
  307. dtIg = np.concatenate([e['dtIgnore'][:,0:maxDet] for e in E], axis=1)[:,inds]
  308. gtIg = np.concatenate([e['gtIgnore'] for e in E])
  309. npig = len([ig for ig in gtIg if ig == 0])
  310. if npig == 0:
  311. continue
  312. tps = np.logical_and( dtm, np.logical_not(dtIg) )
  313. fps = np.logical_and(np.logical_not(dtm), np.logical_not(dtIg) )
  314. tp_sum = np.cumsum(tps, axis=1).astype(dtype=np.float)
  315. fp_sum = np.cumsum(fps, axis=1).astype(dtype=np.float)
  316. for t, (tp, fp) in enumerate(zip(tp_sum, fp_sum)):
  317. tp = np.array(tp)
  318. fp = np.array(fp)
  319. nd = len(tp)
  320. rc = tp / npig
  321. pr = tp / (fp+tp+np.spacing(1))
  322. q = np.zeros((R,))
  323. if nd:
  324. recall[t,k,a,m] = rc[-1]
  325. else:
  326. recall[t,k,a,m] = 0
  327. # numpy is slow without cython optimization for accessing elements
  328. # use python array gets significant speed improvement
  329. pr = pr.tolist(); q = q.tolist()
  330. for i in range(nd-1, 0, -1):
  331. if pr[i] > pr[i-1]:
  332. pr[i-1] = pr[i]
  333. inds = np.searchsorted(rc, p.recThrs)
  334. try:
  335. for ri, pi in enumerate(inds):
  336. q[ri] = pr[pi]
  337. except:
  338. pass
  339. precision[t,:,k,a,m] = np.array(q)
  340. self.eval = {
  341. 'params': p,
  342. 'counts': [T, R, K, A, M],
  343. 'date': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
  344. 'precision': precision,
  345. 'recall': recall,
  346. }
  347. toc = time.time()
  348. print 'DONE (t=%0.2fs).'%( toc-tic )
  349. def summarize(self):
  350. '''
  351. Compute and display summary metrics for evaluation results.
  352. Note this functin can *only* be applied on the default parameter setting
  353. '''
  354. def _summarize( ap=1, iouThr=None, areaRng='all', maxDets=100 ):
  355. p = self.params
  356. iStr = ' {:<18} {} @[ IoU={:<9} | area={:>6} | maxDets={:>3} ] = {}'
  357. titleStr = 'Average Precision' if ap == 1 else 'Average Recall'
  358. typeStr = '(AP)' if ap==1 else '(AR)'
  359. iouStr = '%0.2f:%0.2f'%(p.iouThrs[0], p.iouThrs[-1]) if iouThr is None else '%0.2f'%(iouThr)
  360. areaStr = areaRng
  361. maxDetsStr = '%d'%(maxDets)
  362. aind = [i for i, aRng in enumerate(['all', 'small', 'medium', 'large']) if aRng == areaRng]
  363. mind = [i for i, mDet in enumerate([1, 10, 100]) if mDet == maxDets]
  364. if ap == 1:
  365. # dimension of precision: [TxRxKxAxM]
  366. s = self.eval['precision']
  367. # IoU
  368. if iouThr is not None:
  369. t = np.where(iouThr == p.iouThrs)[0]
  370. s = s[t]
  371. # areaRng
  372. s = s[:,:,:,aind,mind]
  373. else:
  374. # dimension of recall: [TxKxAxM]
  375. s = self.eval['recall']
  376. s = s[:,:,aind,mind]
  377. if len(s[s>-1])==0:
  378. mean_s = -1
  379. else:
  380. mean_s = np.mean(s[s>-1])
  381. print iStr.format(titleStr, typeStr, iouStr, areaStr, maxDetsStr, '%.3f'%(float(mean_s)))
  382. return mean_s
  383. if not self.eval:
  384. raise Exception('Please run accumulate() first')
  385. self.stats = np.zeros((12,))
  386. self.stats[0] = _summarize(1)
  387. self.stats[1] = _summarize(1,iouThr=.5)
  388. self.stats[2] = _summarize(1,iouThr=.75)
  389. self.stats[3] = _summarize(1,areaRng='small')
  390. self.stats[4] = _summarize(1,areaRng='medium')
  391. self.stats[5] = _summarize(1,areaRng='large')
  392. self.stats[6] = _summarize(0,maxDets=1)
  393. self.stats[7] = _summarize(0,maxDets=10)
  394. self.stats[8] = _summarize(0,maxDets=100)
  395. self.stats[9] = _summarize(0,areaRng='small')
  396. self.stats[10] = _summarize(0,areaRng='medium')
  397. self.stats[11] = _summarize(0,areaRng='large')
  398. def __str__(self):
  399. self.summarize()
  400. class Params:
  401. '''
  402. Params for coco evaluation api
  403. '''
  404. def __init__(self):
  405. self.imgIds = []
  406. self.catIds = []
  407. # np.arange causes trouble. the data point on arange is slightly larger than the true value
  408. self.iouThrs = np.linspace(.5, 0.95, np.round((0.95-.5)/.05)+1, endpoint=True)
  409. self.recThrs = np.linspace(.0, 1.00, np.round((1.00-.0)/.01)+1, endpoint=True)
  410. self.maxDets = [1,10,100]
  411. self.areaRng = [ [0**2,1e5**2], [0**2, 32**2], [32**2, 96**2], [96**2, 1e5**2] ]
  412. self.useSegm = 0
  413. self.useCats = 1