detector.py 8.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216
  1. #!/usr/bin/env python
  2. """
  3. Do windowed detection by classifying a number of images/crops at once,
  4. optionally using the selective search window proposal method.
  5. This implementation follows ideas in
  6. Ross Girshick, Jeff Donahue, Trevor Darrell, Jitendra Malik.
  7. Rich feature hierarchies for accurate object detection and semantic
  8. segmentation.
  9. http://arxiv.org/abs/1311.2524
  10. The selective_search_ijcv_with_python code required for the selective search
  11. proposal mode is available at
  12. https://github.com/sergeyk/selective_search_ijcv_with_python
  13. """
  14. import numpy as np
  15. import os
  16. import caffe
  17. class Detector(caffe.Net):
  18. """
  19. Detector extends Net for windowed detection by a list of crops or
  20. selective search proposals.
  21. Parameters
  22. ----------
  23. mean, input_scale, raw_scale, channel_swap : params for preprocessing
  24. options.
  25. context_pad : amount of surrounding context to take s.t. a `context_pad`
  26. sized border of pixels in the network input image is context, as in
  27. R-CNN feature extraction.
  28. """
  29. def __init__(self, model_file, pretrained_file, mean=None,
  30. input_scale=None, raw_scale=None, channel_swap=None,
  31. context_pad=None):
  32. caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)
  33. # configure pre-processing
  34. in_ = self.inputs[0]
  35. self.transformer = caffe.io.Transformer(
  36. {in_: self.blobs[in_].data.shape})
  37. self.transformer.set_transpose(in_, (2, 0, 1))
  38. if mean is not None:
  39. self.transformer.set_mean(in_, mean)
  40. if input_scale is not None:
  41. self.transformer.set_input_scale(in_, input_scale)
  42. if raw_scale is not None:
  43. self.transformer.set_raw_scale(in_, raw_scale)
  44. if channel_swap is not None:
  45. self.transformer.set_channel_swap(in_, channel_swap)
  46. self.configure_crop(context_pad)
  47. def detect_windows(self, images_windows):
  48. """
  49. Do windowed detection over given images and windows. Windows are
  50. extracted then warped to the input dimensions of the net.
  51. Parameters
  52. ----------
  53. images_windows: (image filename, window list) iterable.
  54. context_crop: size of context border to crop in pixels.
  55. Returns
  56. -------
  57. detections: list of {filename: image filename, window: crop coordinates,
  58. predictions: prediction vector} dicts.
  59. """
  60. # Extract windows.
  61. window_inputs = []
  62. for image_fname, windows in images_windows:
  63. image = caffe.io.load_image(image_fname).astype(np.float32)
  64. for window in windows:
  65. window_inputs.append(self.crop(image, window))
  66. # Run through the net (warping windows to input dimensions).
  67. in_ = self.inputs[0]
  68. caffe_in = np.zeros((len(window_inputs), window_inputs[0].shape[2])
  69. + self.blobs[in_].data.shape[2:],
  70. dtype=np.float32)
  71. for ix, window_in in enumerate(window_inputs):
  72. caffe_in[ix] = self.transformer.preprocess(in_, window_in)
  73. out = self.forward_all(**{in_: caffe_in})
  74. predictions = out[self.outputs[0]]
  75. # Package predictions with images and windows.
  76. detections = []
  77. ix = 0
  78. for image_fname, windows in images_windows:
  79. for window in windows:
  80. detections.append({
  81. 'window': window,
  82. 'prediction': predictions[ix],
  83. 'filename': image_fname
  84. })
  85. ix += 1
  86. return detections
  87. def detect_selective_search(self, image_fnames):
  88. """
  89. Do windowed detection over Selective Search proposals by extracting
  90. the crop and warping to the input dimensions of the net.
  91. Parameters
  92. ----------
  93. image_fnames: list
  94. Returns
  95. -------
  96. detections: list of {filename: image filename, window: crop coordinates,
  97. predictions: prediction vector} dicts.
  98. """
  99. import selective_search_ijcv_with_python as selective_search
  100. # Make absolute paths so MATLAB can find the files.
  101. image_fnames = [os.path.abspath(f) for f in image_fnames]
  102. windows_list = selective_search.get_windows(
  103. image_fnames,
  104. cmd='selective_search_rcnn'
  105. )
  106. # Run windowed detection on the selective search list.
  107. return self.detect_windows(zip(image_fnames, windows_list))
  108. def crop(self, im, window):
  109. """
  110. Crop a window from the image for detection. Include surrounding context
  111. according to the `context_pad` configuration.
  112. Parameters
  113. ----------
  114. im: H x W x K image ndarray to crop.
  115. window: bounding box coordinates as ymin, xmin, ymax, xmax.
  116. Returns
  117. -------
  118. crop: cropped window.
  119. """
  120. # Crop window from the image.
  121. crop = im[window[0]:window[2], window[1]:window[3]]
  122. if self.context_pad:
  123. box = window.copy()
  124. crop_size = self.blobs[self.inputs[0]].width # assumes square
  125. scale = crop_size / (1. * crop_size - self.context_pad * 2)
  126. # Crop a box + surrounding context.
  127. half_h = (box[2] - box[0] + 1) / 2.
  128. half_w = (box[3] - box[1] + 1) / 2.
  129. center = (box[0] + half_h, box[1] + half_w)
  130. scaled_dims = scale * np.array((-half_h, -half_w, half_h, half_w))
  131. box = np.round(np.tile(center, 2) + scaled_dims)
  132. full_h = box[2] - box[0] + 1
  133. full_w = box[3] - box[1] + 1
  134. scale_h = crop_size / full_h
  135. scale_w = crop_size / full_w
  136. pad_y = round(max(0, -box[0]) * scale_h) # amount out-of-bounds
  137. pad_x = round(max(0, -box[1]) * scale_w)
  138. # Clip box to image dimensions.
  139. im_h, im_w = im.shape[:2]
  140. box = np.clip(box, 0., [im_h, im_w, im_h, im_w])
  141. clip_h = box[2] - box[0] + 1
  142. clip_w = box[3] - box[1] + 1
  143. assert(clip_h > 0 and clip_w > 0)
  144. crop_h = round(clip_h * scale_h)
  145. crop_w = round(clip_w * scale_w)
  146. if pad_y + crop_h > crop_size:
  147. crop_h = crop_size - pad_y
  148. if pad_x + crop_w > crop_size:
  149. crop_w = crop_size - pad_x
  150. # collect with context padding and place in input
  151. # with mean padding
  152. context_crop = im[box[0]:box[2], box[1]:box[3]]
  153. context_crop = caffe.io.resize_image(context_crop, (crop_h, crop_w))
  154. crop = np.ones(self.crop_dims, dtype=np.float32) * self.crop_mean
  155. crop[pad_y:(pad_y + crop_h), pad_x:(pad_x + crop_w)] = context_crop
  156. return crop
  157. def configure_crop(self, context_pad):
  158. """
  159. Configure crop dimensions and amount of context for cropping.
  160. If context is included, make the special input mean for context padding.
  161. Parameters
  162. ----------
  163. context_pad : amount of context for cropping.
  164. """
  165. # crop dimensions
  166. in_ = self.inputs[0]
  167. tpose = self.transformer.transpose[in_]
  168. inv_tpose = [tpose[t] for t in tpose]
  169. self.crop_dims = np.array(self.blobs[in_].data.shape[1:])[inv_tpose]
  170. #.transpose(inv_tpose)
  171. # context padding
  172. self.context_pad = context_pad
  173. if self.context_pad:
  174. in_ = self.inputs[0]
  175. transpose = self.transformer.transpose.get(in_)
  176. channel_order = self.transformer.channel_swap.get(in_)
  177. raw_scale = self.transformer.raw_scale.get(in_)
  178. # Padding context crops needs the mean in unprocessed input space.
  179. mean = self.transformer.mean.get(in_)
  180. if mean is not None:
  181. inv_transpose = [transpose[t] for t in transpose]
  182. crop_mean = mean.copy().transpose(inv_transpose)
  183. if channel_order is not None:
  184. channel_order_inverse = [channel_order.index(i)
  185. for i in range(crop_mean.shape[2])]
  186. crop_mean = crop_mean[:, :, channel_order_inverse]
  187. if raw_scale is not None:
  188. crop_mean /= raw_scale
  189. self.crop_mean = crop_mean
  190. else:
  191. self.crop_mean = np.zeros(self.crop_dims, dtype=np.float32)