123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216 |
- #!/usr/bin/env python
- """
- Do windowed detection by classifying a number of images/crops at once,
- optionally using the selective search window proposal method.
- This implementation follows ideas in
- Ross Girshick, Jeff Donahue, Trevor Darrell, Jitendra Malik.
- Rich feature hierarchies for accurate object detection and semantic
- segmentation.
- http://arxiv.org/abs/1311.2524
- The selective_search_ijcv_with_python code required for the selective search
- proposal mode is available at
- https://github.com/sergeyk/selective_search_ijcv_with_python
- """
- import numpy as np
- import os
- import caffe
- class Detector(caffe.Net):
- """
- Detector extends Net for windowed detection by a list of crops or
- selective search proposals.
- Parameters
- ----------
- mean, input_scale, raw_scale, channel_swap : params for preprocessing
- options.
- context_pad : amount of surrounding context to take s.t. a `context_pad`
- sized border of pixels in the network input image is context, as in
- R-CNN feature extraction.
- """
- def __init__(self, model_file, pretrained_file, mean=None,
- input_scale=None, raw_scale=None, channel_swap=None,
- context_pad=None):
- caffe.Net.__init__(self, model_file, pretrained_file, caffe.TEST)
- # configure pre-processing
- in_ = self.inputs[0]
- self.transformer = caffe.io.Transformer(
- {in_: self.blobs[in_].data.shape})
- self.transformer.set_transpose(in_, (2, 0, 1))
- if mean is not None:
- self.transformer.set_mean(in_, mean)
- if input_scale is not None:
- self.transformer.set_input_scale(in_, input_scale)
- if raw_scale is not None:
- self.transformer.set_raw_scale(in_, raw_scale)
- if channel_swap is not None:
- self.transformer.set_channel_swap(in_, channel_swap)
- self.configure_crop(context_pad)
- def detect_windows(self, images_windows):
- """
- Do windowed detection over given images and windows. Windows are
- extracted then warped to the input dimensions of the net.
- Parameters
- ----------
- images_windows: (image filename, window list) iterable.
- context_crop: size of context border to crop in pixels.
- Returns
- -------
- detections: list of {filename: image filename, window: crop coordinates,
- predictions: prediction vector} dicts.
- """
- # Extract windows.
- window_inputs = []
- for image_fname, windows in images_windows:
- image = caffe.io.load_image(image_fname).astype(np.float32)
- for window in windows:
- window_inputs.append(self.crop(image, window))
- # Run through the net (warping windows to input dimensions).
- in_ = self.inputs[0]
- caffe_in = np.zeros((len(window_inputs), window_inputs[0].shape[2])
- + self.blobs[in_].data.shape[2:],
- dtype=np.float32)
- for ix, window_in in enumerate(window_inputs):
- caffe_in[ix] = self.transformer.preprocess(in_, window_in)
- out = self.forward_all(**{in_: caffe_in})
- predictions = out[self.outputs[0]]
- # Package predictions with images and windows.
- detections = []
- ix = 0
- for image_fname, windows in images_windows:
- for window in windows:
- detections.append({
- 'window': window,
- 'prediction': predictions[ix],
- 'filename': image_fname
- })
- ix += 1
- return detections
- def detect_selective_search(self, image_fnames):
- """
- Do windowed detection over Selective Search proposals by extracting
- the crop and warping to the input dimensions of the net.
- Parameters
- ----------
- image_fnames: list
- Returns
- -------
- detections: list of {filename: image filename, window: crop coordinates,
- predictions: prediction vector} dicts.
- """
- import selective_search_ijcv_with_python as selective_search
- # Make absolute paths so MATLAB can find the files.
- image_fnames = [os.path.abspath(f) for f in image_fnames]
- windows_list = selective_search.get_windows(
- image_fnames,
- cmd='selective_search_rcnn'
- )
- # Run windowed detection on the selective search list.
- return self.detect_windows(zip(image_fnames, windows_list))
- def crop(self, im, window):
- """
- Crop a window from the image for detection. Include surrounding context
- according to the `context_pad` configuration.
- Parameters
- ----------
- im: H x W x K image ndarray to crop.
- window: bounding box coordinates as ymin, xmin, ymax, xmax.
- Returns
- -------
- crop: cropped window.
- """
- # Crop window from the image.
- crop = im[window[0]:window[2], window[1]:window[3]]
- if self.context_pad:
- box = window.copy()
- crop_size = self.blobs[self.inputs[0]].width # assumes square
- scale = crop_size / (1. * crop_size - self.context_pad * 2)
- # Crop a box + surrounding context.
- half_h = (box[2] - box[0] + 1) / 2.
- half_w = (box[3] - box[1] + 1) / 2.
- center = (box[0] + half_h, box[1] + half_w)
- scaled_dims = scale * np.array((-half_h, -half_w, half_h, half_w))
- box = np.round(np.tile(center, 2) + scaled_dims)
- full_h = box[2] - box[0] + 1
- full_w = box[3] - box[1] + 1
- scale_h = crop_size / full_h
- scale_w = crop_size / full_w
- pad_y = round(max(0, -box[0]) * scale_h) # amount out-of-bounds
- pad_x = round(max(0, -box[1]) * scale_w)
- # Clip box to image dimensions.
- im_h, im_w = im.shape[:2]
- box = np.clip(box, 0., [im_h, im_w, im_h, im_w])
- clip_h = box[2] - box[0] + 1
- clip_w = box[3] - box[1] + 1
- assert(clip_h > 0 and clip_w > 0)
- crop_h = round(clip_h * scale_h)
- crop_w = round(clip_w * scale_w)
- if pad_y + crop_h > crop_size:
- crop_h = crop_size - pad_y
- if pad_x + crop_w > crop_size:
- crop_w = crop_size - pad_x
- # collect with context padding and place in input
- # with mean padding
- context_crop = im[box[0]:box[2], box[1]:box[3]]
- context_crop = caffe.io.resize_image(context_crop, (crop_h, crop_w))
- crop = np.ones(self.crop_dims, dtype=np.float32) * self.crop_mean
- crop[pad_y:(pad_y + crop_h), pad_x:(pad_x + crop_w)] = context_crop
- return crop
- def configure_crop(self, context_pad):
- """
- Configure crop dimensions and amount of context for cropping.
- If context is included, make the special input mean for context padding.
- Parameters
- ----------
- context_pad : amount of context for cropping.
- """
- # crop dimensions
- in_ = self.inputs[0]
- tpose = self.transformer.transpose[in_]
- inv_tpose = [tpose[t] for t in tpose]
- self.crop_dims = np.array(self.blobs[in_].data.shape[1:])[inv_tpose]
- #.transpose(inv_tpose)
- # context padding
- self.context_pad = context_pad
- if self.context_pad:
- in_ = self.inputs[0]
- transpose = self.transformer.transpose.get(in_)
- channel_order = self.transformer.channel_swap.get(in_)
- raw_scale = self.transformer.raw_scale.get(in_)
- # Padding context crops needs the mean in unprocessed input space.
- mean = self.transformer.mean.get(in_)
- if mean is not None:
- inv_transpose = [transpose[t] for t in transpose]
- crop_mean = mean.copy().transpose(inv_transpose)
- if channel_order is not None:
- channel_order_inverse = [channel_order.index(i)
- for i in range(crop_mean.shape[2])]
- crop_mean = crop_mean[:, :, channel_order_inverse]
- if raw_scale is not None:
- crop_mean /= raw_scale
- self.crop_mean = crop_mean
- else:
- self.crop_mean = np.zeros(self.crop_dims, dtype=np.float32)
|