123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173 |
- #!/usr/bin/env python
- """
- detector.py is an out-of-the-box windowed detector
- callable from the command line.
- By default it configures and runs the Caffe reference ImageNet model.
- Note that this model was trained for image classification and not detection,
- and finetuning for detection can be expected to improve results.
- The selective_search_ijcv_with_python code required for the selective search
- proposal mode is available at
- https://github.com/sergeyk/selective_search_ijcv_with_python
- TODO:
- - batch up image filenames as well: don't want to load all of them into memory
- - come up with a batching scheme that preserved order / keeps a unique ID
- """
- import numpy as np
- import pandas as pd
- import os
- import argparse
- import time
- import caffe
- CROP_MODES = ['list', 'selective_search']
- COORD_COLS = ['ymin', 'xmin', 'ymax', 'xmax']
- def main(argv):
- pycaffe_dir = os.path.dirname(__file__)
- parser = argparse.ArgumentParser()
- # Required arguments: input and output.
- parser.add_argument(
- "input_file",
- help="Input txt/csv filename. If .txt, must be list of filenames.\
- If .csv, must be comma-separated file with header\
- 'filename, xmin, ymin, xmax, ymax'"
- )
- parser.add_argument(
- "output_file",
- help="Output h5/csv filename. Format depends on extension."
- )
- # Optional arguments.
- parser.add_argument(
- "--model_def",
- default=os.path.join(pycaffe_dir,
- "../models/bvlc_reference_caffenet/deploy.prototxt"),
- help="Model definition file."
- )
- parser.add_argument(
- "--pretrained_model",
- default=os.path.join(pycaffe_dir,
- "../models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel"),
- help="Trained model weights file."
- )
- parser.add_argument(
- "--crop_mode",
- default="selective_search",
- choices=CROP_MODES,
- help="How to generate windows for detection."
- )
- parser.add_argument(
- "--gpu",
- action='store_true',
- help="Switch for gpu computation."
- )
- parser.add_argument(
- "--mean_file",
- default=os.path.join(pycaffe_dir,
- 'caffe/imagenet/ilsvrc_2012_mean.npy'),
- help="Data set image mean of H x W x K dimensions (numpy array). " +
- "Set to '' for no mean subtraction."
- )
- parser.add_argument(
- "--input_scale",
- type=float,
- help="Multiply input features by this scale to finish preprocessing."
- )
- parser.add_argument(
- "--raw_scale",
- type=float,
- default=255.0,
- help="Multiply raw input by this scale before preprocessing."
- )
- parser.add_argument(
- "--channel_swap",
- default='2,1,0',
- help="Order to permute input channels. The default converts " +
- "RGB -> BGR since BGR is the Caffe default by way of OpenCV."
- )
- parser.add_argument(
- "--context_pad",
- type=int,
- default='16',
- help="Amount of surrounding context to collect in input window."
- )
- args = parser.parse_args()
- mean, channel_swap = None, None
- if args.mean_file:
- mean = np.load(args.mean_file)
- if mean.shape[1:] != (1, 1):
- mean = mean.mean(1).mean(1)
- if args.channel_swap:
- channel_swap = [int(s) for s in args.channel_swap.split(',')]
- if args.gpu:
- caffe.set_mode_gpu()
- print("GPU mode")
- else:
- caffe.set_mode_cpu()
- print("CPU mode")
- # Make detector.
- detector = caffe.Detector(args.model_def, args.pretrained_model, mean=mean,
- input_scale=args.input_scale, raw_scale=args.raw_scale,
- channel_swap=channel_swap,
- context_pad=args.context_pad)
- # Load input.
- t = time.time()
- print("Loading input...")
- if args.input_file.lower().endswith('txt'):
- with open(args.input_file) as f:
- inputs = [_.strip() for _ in f.readlines()]
- elif args.input_file.lower().endswith('csv'):
- inputs = pd.read_csv(args.input_file, sep=',', dtype={'filename': str})
- inputs.set_index('filename', inplace=True)
- else:
- raise Exception("Unknown input file type: not in txt or csv.")
- # Detect.
- if args.crop_mode == 'list':
- # Unpack sequence of (image filename, windows).
- images_windows = [
- (ix, inputs.iloc[np.where(inputs.index == ix)][COORD_COLS].values)
- for ix in inputs.index.unique()
- ]
- detections = detector.detect_windows(images_windows)
- else:
- detections = detector.detect_selective_search(inputs)
- print("Processed {} windows in {:.3f} s.".format(len(detections),
- time.time() - t))
- # Collect into dataframe with labeled fields.
- df = pd.DataFrame(detections)
- df.set_index('filename', inplace=True)
- df[COORD_COLS] = pd.DataFrame(
- data=np.vstack(df['window']), index=df.index, columns=COORD_COLS)
- del(df['window'])
- # Save results.
- t = time.time()
- if args.output_file.lower().endswith('csv'):
- # csv
- # Enumerate the class probabilities.
- class_cols = ['class{}'.format(x) for x in range(NUM_OUTPUT)]
- df[class_cols] = pd.DataFrame(
- data=np.vstack(df['feat']), index=df.index, columns=class_cols)
- df.to_csv(args.output_file, cols=COORD_COLS + class_cols)
- else:
- # h5
- df.to_hdf(args.output_file, 'df', mode='w')
- print("Saved to {} in {:.3f} s.".format(args.output_file,
- time.time() - t))
- if __name__ == "__main__":
- import sys
- main(sys.argv)
|