detect.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173
  1. #!/usr/bin/env python
  2. """
  3. detector.py is an out-of-the-box windowed detector
  4. callable from the command line.
  5. By default it configures and runs the Caffe reference ImageNet model.
  6. Note that this model was trained for image classification and not detection,
  7. and finetuning for detection can be expected to improve results.
  8. The selective_search_ijcv_with_python code required for the selective search
  9. proposal mode is available at
  10. https://github.com/sergeyk/selective_search_ijcv_with_python
  11. TODO:
  12. - batch up image filenames as well: don't want to load all of them into memory
  13. - come up with a batching scheme that preserved order / keeps a unique ID
  14. """
  15. import numpy as np
  16. import pandas as pd
  17. import os
  18. import argparse
  19. import time
  20. import caffe
  21. CROP_MODES = ['list', 'selective_search']
  22. COORD_COLS = ['ymin', 'xmin', 'ymax', 'xmax']
  23. def main(argv):
  24. pycaffe_dir = os.path.dirname(__file__)
  25. parser = argparse.ArgumentParser()
  26. # Required arguments: input and output.
  27. parser.add_argument(
  28. "input_file",
  29. help="Input txt/csv filename. If .txt, must be list of filenames.\
  30. If .csv, must be comma-separated file with header\
  31. 'filename, xmin, ymin, xmax, ymax'"
  32. )
  33. parser.add_argument(
  34. "output_file",
  35. help="Output h5/csv filename. Format depends on extension."
  36. )
  37. # Optional arguments.
  38. parser.add_argument(
  39. "--model_def",
  40. default=os.path.join(pycaffe_dir,
  41. "../models/bvlc_reference_caffenet/deploy.prototxt"),
  42. help="Model definition file."
  43. )
  44. parser.add_argument(
  45. "--pretrained_model",
  46. default=os.path.join(pycaffe_dir,
  47. "../models/bvlc_reference_caffenet/bvlc_reference_caffenet.caffemodel"),
  48. help="Trained model weights file."
  49. )
  50. parser.add_argument(
  51. "--crop_mode",
  52. default="selective_search",
  53. choices=CROP_MODES,
  54. help="How to generate windows for detection."
  55. )
  56. parser.add_argument(
  57. "--gpu",
  58. action='store_true',
  59. help="Switch for gpu computation."
  60. )
  61. parser.add_argument(
  62. "--mean_file",
  63. default=os.path.join(pycaffe_dir,
  64. 'caffe/imagenet/ilsvrc_2012_mean.npy'),
  65. help="Data set image mean of H x W x K dimensions (numpy array). " +
  66. "Set to '' for no mean subtraction."
  67. )
  68. parser.add_argument(
  69. "--input_scale",
  70. type=float,
  71. help="Multiply input features by this scale to finish preprocessing."
  72. )
  73. parser.add_argument(
  74. "--raw_scale",
  75. type=float,
  76. default=255.0,
  77. help="Multiply raw input by this scale before preprocessing."
  78. )
  79. parser.add_argument(
  80. "--channel_swap",
  81. default='2,1,0',
  82. help="Order to permute input channels. The default converts " +
  83. "RGB -> BGR since BGR is the Caffe default by way of OpenCV."
  84. )
  85. parser.add_argument(
  86. "--context_pad",
  87. type=int,
  88. default='16',
  89. help="Amount of surrounding context to collect in input window."
  90. )
  91. args = parser.parse_args()
  92. mean, channel_swap = None, None
  93. if args.mean_file:
  94. mean = np.load(args.mean_file)
  95. if mean.shape[1:] != (1, 1):
  96. mean = mean.mean(1).mean(1)
  97. if args.channel_swap:
  98. channel_swap = [int(s) for s in args.channel_swap.split(',')]
  99. if args.gpu:
  100. caffe.set_mode_gpu()
  101. print("GPU mode")
  102. else:
  103. caffe.set_mode_cpu()
  104. print("CPU mode")
  105. # Make detector.
  106. detector = caffe.Detector(args.model_def, args.pretrained_model, mean=mean,
  107. input_scale=args.input_scale, raw_scale=args.raw_scale,
  108. channel_swap=channel_swap,
  109. context_pad=args.context_pad)
  110. # Load input.
  111. t = time.time()
  112. print("Loading input...")
  113. if args.input_file.lower().endswith('txt'):
  114. with open(args.input_file) as f:
  115. inputs = [_.strip() for _ in f.readlines()]
  116. elif args.input_file.lower().endswith('csv'):
  117. inputs = pd.read_csv(args.input_file, sep=',', dtype={'filename': str})
  118. inputs.set_index('filename', inplace=True)
  119. else:
  120. raise Exception("Unknown input file type: not in txt or csv.")
  121. # Detect.
  122. if args.crop_mode == 'list':
  123. # Unpack sequence of (image filename, windows).
  124. images_windows = [
  125. (ix, inputs.iloc[np.where(inputs.index == ix)][COORD_COLS].values)
  126. for ix in inputs.index.unique()
  127. ]
  128. detections = detector.detect_windows(images_windows)
  129. else:
  130. detections = detector.detect_selective_search(inputs)
  131. print("Processed {} windows in {:.3f} s.".format(len(detections),
  132. time.time() - t))
  133. # Collect into dataframe with labeled fields.
  134. df = pd.DataFrame(detections)
  135. df.set_index('filename', inplace=True)
  136. df[COORD_COLS] = pd.DataFrame(
  137. data=np.vstack(df['window']), index=df.index, columns=COORD_COLS)
  138. del(df['window'])
  139. # Save results.
  140. t = time.time()
  141. if args.output_file.lower().endswith('csv'):
  142. # csv
  143. # Enumerate the class probabilities.
  144. class_cols = ['class{}'.format(x) for x in range(NUM_OUTPUT)]
  145. df[class_cols] = pd.DataFrame(
  146. data=np.vstack(df['feat']), index=df.index, columns=class_cols)
  147. df.to_csv(args.output_file, cols=COORD_COLS + class_cols)
  148. else:
  149. # h5
  150. df.to_hdf(args.output_file, 'df', mode='w')
  151. print("Saved to {} in {:.3f} s.".format(args.output_file,
  152. time.time() - t))
  153. if __name__ == "__main__":
  154. import sys
  155. main(sys.argv)