1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
| import hashlib import io import logging import os import random import re
import contextlib2 from lxml import etree import numpy as np import PIL.Image import tensorflow as tf
from object_detection.dataset_tools import tf_record_creation_util from object_detection.utils import dataset_util from object_detection.utils import label_map_util
flags = tf.app.flags flags.DEFINE_string('data_dir', '/research/abc/data/raw_data', 'Root directory to raw pet dataset.') flags.DEFINE_string('output_dir', '/research/abc/data', 'Path to directory to output TFRecords.') flags.DEFINE_string('label_map_path', '/research/abc/data/abc_label_map.pbtxt', 'Path to label map proto') flags.DEFINE_boolean('faces_only', True, 'If True, generates bounding boxes ' 'for pet faces. Otherwise generates bounding boxes (as ' 'well as segmentations for full pet bodies). Note that ' 'in the latter case, the resulting files are much larger.') flags.DEFINE_string('mask_type', 'png', 'How to represent instance ' 'segmentation masks. Options are "png" or "numerical".') flags.DEFINE_integer('num_shards', 1, 'Number of TFRecord shards')
FLAGS = flags.FLAGS
def get_class_name_from_filename(file_name): """Gets the class name from a file.
Args: file_name: The file name to get the class name from. ie. "american_pit_bull_terrier_105.jpg"
Returns: A string of the class name. """ match = re.match(r'([A-Za-z_]+)(_[0-9]+\.jpg)', file_name, re.I) return match.groups()[0]
def dict_to_tf_example(data, label_map_dict, image_subdirectory, ignore_difficult_instances=False, faces_only=True, mask_type='png'): """Convert XML derived dict to tf.Example proto.
Notice that this function normalizes the bounding box coordinates provided by the raw data.
Args: data: dict holding PASCAL XML fields for a single image (obtained by running dataset_util.recursive_parse_xml_to_dict) mask_path: String path to PNG encoded mask. label_map_dict: A map from string label names to integers ids. image_subdirectory: String specifying subdirectory within the Pascal dataset directory holding the actual image data. ignore_difficult_instances: Whether to skip difficult instances in the dataset (default: False). faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes.
Returns: example: The converted tf.Example.
Raises: ValueError: if the image pointed to by data['filename'] is not a valid JPEG """ img_path = os.path.join(image_subdirectory, data['filename']) with tf.gfile.GFile(img_path, 'rb') as fid: encoded_jpg = fid.read() encoded_jpg_io = io.BytesIO(encoded_jpg) image = PIL.Image.open(encoded_jpg_io) if image.format != 'JPEG': raise ValueError('Image format not JPEG') key = hashlib.sha256(encoded_jpg).hexdigest()
width = int(data['size']['width']) height = int(data['size']['height'])
xmins = [] ymins = [] xmaxs = [] ymaxs = [] classes = [] classes_text = [] truncated = [] poses = [] difficult_obj = [] masks = [] if 'object' in data: for obj in data['object']: difficult = bool(int(obj['difficult'])) if ignore_difficult_instances and difficult: continue difficult_obj.append(int(difficult))
if faces_only: xmin = float(obj['bndbox']['xmin']) xmax = float(obj['bndbox']['xmax']) ymin = float(obj['bndbox']['ymin']) ymax = float(obj['bndbox']['ymax'])
xmins.append(xmin / width) ymins.append(ymin / height) xmaxs.append(xmax / width) ymaxs.append(ymax / height) class_name = get_class_name_from_filename(data['filename']) classes_text.append(class_name.encode('utf8')) classes.append(label_map_dict[class_name]) truncated.append(int(obj['truncated'])) poses.append(obj['pose'].encode('utf8'))
feature_dict = { 'image/height': dataset_util.int64_feature(height), 'image/width': dataset_util.int64_feature(width), 'image/filename': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/source_id': dataset_util.bytes_feature( data['filename'].encode('utf8')), 'image/key/sha256': dataset_util.bytes_feature(key.encode('utf8')), 'image/encoded': dataset_util.bytes_feature(encoded_jpg), 'image/format': dataset_util.bytes_feature('jpeg'.encode('utf8')), 'image/object/bbox/xmin': dataset_util.float_list_feature(xmins), 'image/object/bbox/xmax': dataset_util.float_list_feature(xmaxs), 'image/object/bbox/ymin': dataset_util.float_list_feature(ymins), 'image/object/bbox/ymax': dataset_util.float_list_feature(ymaxs), 'image/object/class/text': dataset_util.bytes_list_feature(classes_text), 'image/object/class/label': dataset_util.int64_list_feature(classes), 'image/object/difficult': dataset_util.int64_list_feature(difficult_obj), 'image/object/truncated': dataset_util.int64_list_feature(truncated), 'image/object/view': dataset_util.bytes_list_feature(poses), }
example = tf.train.Example(features=tf.train.Features(feature=feature_dict)) return example
def create_tf_record(output_filename, num_shards, label_map_dict, annotations_dir, image_dir, examples, faces_only=True, mask_type='png'): """Creates a TFRecord file from examples.
Args: output_filename: Path to where output file is saved. num_shards: Number of shards for output file. label_map_dict: The label map dictionary. annotations_dir: Directory where annotation files are stored. image_dir: Directory where image files are stored. examples: Examples to parse and save to tf record. faces_only: If True, generates bounding boxes for pet faces. Otherwise generates bounding boxes (as well as segmentations for full pet bodies). mask_type: 'numerical' or 'png'. 'png' is recommended because it leads to smaller file sizes. """ with contextlib2.ExitStack() as tf_record_close_stack: output_tfrecords = tf_record_creation_util.open_sharded_output_tfrecords( tf_record_close_stack, output_filename, num_shards) for idx, example in enumerate(examples): if idx % 100 == 0: logging.info('On image %d of %d', idx, len(examples)) xml_path = os.path.join(annotations_dir, 'xmls', example + '.xml')
if not os.path.exists(xml_path): logging.warning('Could not find %s, ignoring example.', xml_path) continue with tf.gfile.GFile(xml_path, 'r') as fid: xml_str = fid.read() xml = etree.fromstring(xml_str) data = dataset_util.recursive_parse_xml_to_dict(xml)['annotation']
try: tf_example = dict_to_tf_example( data, label_map_dict, image_dir, faces_only=faces_only, mask_type=mask_type) if tf_example: shard_idx = idx % num_shards output_tfrecords[shard_idx].write(tf_example.SerializeToString()) except ValueError: logging.warning('Invalid example: %s, ignoring.', xml_path)
def main(_): data_dir = FLAGS.data_dir label_map_dict = label_map_util.get_label_map_dict(FLAGS.label_map_path)
logging.info('Reading from abc dataset.') image_dir = os.path.join(data_dir, 'images') annotations_dir = os.path.join(data_dir, 'annotations') examples_path = os.path.join(annotations_dir, 'trainval.txt') examples_list = dataset_util.read_examples_list(examples_path) print(examples_list)
random.seed(42) random.shuffle(examples_list) num_examples = len(examples_list) num_train = int(0.8 * num_examples) train_examples = examples_list[:num_train] val_examples = examples_list[num_train:] logging.info('%d training and %d validation examples.', len(train_examples), len(val_examples))
train_output_path = os.path.join(FLAGS.output_dir, 'abc_train.record') val_output_path = os.path.join(FLAGS.output_dir, 'abc_val.record')
create_tf_record( train_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, train_examples, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type) create_tf_record( val_output_path, FLAGS.num_shards, label_map_dict, annotations_dir, image_dir, val_examples, faces_only=FLAGS.faces_only, mask_type=FLAGS.mask_type)
if __name__ == '__main__': tf.app.run()
|