mask.py 4.0 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182
  1. __author__ = 'tsungyi'
  2. import pycocotools._mask as _mask
  3. # Interface for manipulating masks stored in RLE format.
  4. #
  5. # RLE is a simple yet efficient format for storing binary masks. RLE
  6. # first divides a vector (or vectorized image) into a series of piecewise
  7. # constant regions and then for each piece simply stores the length of
  8. # that piece. For example, given M=[0 0 1 1 1 0 1] the RLE counts would
  9. # be [2 3 1 1], or for M=[1 1 1 1 1 1 0] the counts would be [0 6 1]
  10. # (note that the odd counts are always the numbers of zeros). Instead of
  11. # storing the counts directly, additional compression is achieved with a
  12. # variable bitrate representation based on a common scheme called LEB128.
  13. #
  14. # Compression is greatest given large piecewise constant regions.
  15. # Specifically, the size of the RLE is proportional to the number of
  16. # *boundaries* in M (or for an image the number of boundaries in the y
  17. # direction). Assuming fairly simple shapes, the RLE representation is
  18. # O(sqrt(n)) where n is number of pixels in the object. Hence space usage
  19. # is substantially lower, especially for large simple objects (large n).
  20. #
  21. # Many common operations on masks can be computed directly using the RLE
  22. # (without need for decoding). This includes computations such as area,
  23. # union, intersection, etc. All of these operations are linear in the
  24. # size of the RLE, in other words they are O(sqrt(n)) where n is the area
  25. # of the object. Computing these operations on the original mask is O(n).
  26. # Thus, using the RLE can result in substantial computational savings.
  27. #
  28. # The following API functions are defined:
  29. # encode - Encode binary masks using RLE.
  30. # decode - Decode binary masks encoded via RLE.
  31. # merge - Compute union or intersection of encoded masks.
  32. # iou - Compute intersection over union between masks.
  33. # area - Compute area of encoded masks.
  34. # toBbox - Get bounding boxes surrounding encoded masks.
  35. # frPyObjects - Convert polygon, bbox, and uncompressed RLE to encoded RLE mask.
  36. #
  37. # Usage:
  38. # Rs = encode( masks )
  39. # masks = decode( Rs )
  40. # R = merge( Rs, intersect=false )
  41. # o = iou( dt, gt, iscrowd )
  42. # a = area( Rs )
  43. # bbs = toBbox( Rs )
  44. # Rs = frPyObjects( [pyObjects], h, w )
  45. #
  46. # In the API the following formats are used:
  47. # Rs - [dict] Run-length encoding of binary masks
  48. # R - dict Run-length encoding of binary mask
  49. # masks - [hxwxn] Binary mask(s) (must have type np.ndarray(dtype=uint8) in column-major order)
  50. # iscrowd - [nx1] list of np.ndarray. 1 indicates corresponding gt image has crowd region to ignore
  51. # bbs - [nx4] Bounding box(es) stored as [x y w h]
  52. # poly - Polygon stored as [[x1 y1 x2 y2...],[x1 y1 ...],...] (2D list)
  53. # dt,gt - May be either bounding boxes or encoded masks
  54. # Both poly and bbs are 0-indexed (bbox=[0 0 1 1] encloses first pixel).
  55. #
  56. # Finally, a note about the intersection over union (iou) computation.
  57. # The standard iou of a ground truth (gt) and detected (dt) object is
  58. # iou(gt,dt) = area(intersect(gt,dt)) / area(union(gt,dt))
  59. # For "crowd" regions, we use a modified criteria. If a gt object is
  60. # marked as "iscrowd", we allow a dt to match any subregion of the gt.
  61. # Choosing gt' in the crowd gt that best matches the dt can be done using
  62. # gt'=intersect(dt,gt). Since by definition union(gt',dt)=dt, computing
  63. # iou(gt,dt,iscrowd) = iou(gt',dt) = area(intersect(gt,dt)) / area(dt)
  64. # For crowd gt regions we use this modified criteria above for the iou.
  65. #
  66. # To compile run "python setup.py build_ext --inplace"
  67. # Please do not contact us for help with compiling.
  68. #
  69. # Microsoft COCO Toolbox. version 2.0
  70. # Data, paper, and tutorials available at: http://mscoco.org/
  71. # Code written by Piotr Dollar and Tsung-Yi Lin, 2015.
  72. # Licensed under the Simplified BSD License [see coco/license.txt]
  73. encode = _mask.encode
  74. decode = _mask.decode
  75. iou = _mask.iou
  76. merge = _mask.merge
  77. area = _mask.area
  78. toBbox = _mask.toBbox
  79. frPyObjects = _mask.frPyObjects