Object Detection-YOLOv2 Input And Output Encoding

微信扫一扫,分享到朋友圈

Object Detection-YOLOv2 Input And Output Encoding

本文主要学习在PASCAL VOC2012数据集上训练YOLOv2时的Input Encoding和Output Encoding。

Import PASCAL VOC2012 data

import matplotlib.pyplot as plt
import numpy as np
import os, cv2
import sys
print(sys.version)

PASCAL VOC2012支持的Object类别如下:

LABELS = ['aeroplane',  'bicycle', 'bird',  'boat',      'bottle',
'bus',        'car',      'cat',  'chair',     'cow',
'diningtable','dog',    'horse',  'motorbike', 'person',
'pottedplant','sheep',  'sofa',   'train',   'tvmonitor']

假设PASCAL VOC2012数据集的本地路径如下:

train_image_folder = "../ObjectDetectionRCNN/VOCdevkit/VOC2012/JPEGImages/"
train_annot_folder = "../ObjectDetectionRCNN/VOCdevkit/VOC2012/Annotations/"

通过前文定义的parse_annotation准备训练数据。

from backend import parse_annotation
np.random.seed(10)
train_image, seen_train_labels = parse_annotation(train_annot_folder,
train_image_folder,
labels=LABELS)
print("N train = {}".format(len(train_image)))

Input/Output Encoding

YOLO的Input Encoding是非常简单的,只需要读取一张图片,然后将其缩放到指定的大小即可。

Input的图片缩放之后,Output的xmin, ymin, xmax, ymax都需要按照对应的比率缩放。

import copy
class ImageReader(object):
def __init__(self,IMAGE_H,IMAGE_W, norm=None):
'''
IMAGE_H : the height of the rescaled image, e.g., 416
IMAGE_W : the width of the rescaled image, e.g., 416
'''
self.IMAGE_H = IMAGE_H
self.IMAGE_W = IMAGE_W
self.norm    = norm
def encode_core(self,image, reorder_rgb=True):
# resize the image to standard size
image = cv2.resize(image, (self.IMAGE_H, self.IMAGE_W))
if reorder_rgb:
image = image[:,:,::-1]
if self.norm is not None:
image = self.norm(image)
return(image)
def fit(self,train_instance):
'''
read in and resize the image, annotations are resized accordingly.
-- Input --
train_instance : dictionary containing filename, height, width and object
{'filename': 'ObjectDetectionRCNN/VOCdevkit/VOC2012/JPEGImages/2008_000054.jpg',
'height':   333,
'width':    500,
'object': [{'name': 'bird',
'xmax': 318,
'xmin': 284,
'ymax': 184,
'ymin': 100},
{'name': 'bird',
'xmax': 198,
'xmin': 112,
'ymax': 209,
'ymin': 146}]
}
'''
if not isinstance(train_instance, dict):
train_instance = {'filename':train_instance}
image_name = train_instance['filename']
image = cv2.imread(image_name)
h, w, c = image.shape
if image is None:
print('Cannot find ', image_name)
image = self.encode_core(image, reorder_rgb=True)
if "object" in train_instance.keys():
all_objs = copy.deepcopy(train_instance['object'])
# fix object's position and size
for obj in all_objs:
for attr in ['xmin', 'xmax']:
obj[attr] = int(obj[attr] * float(self.IMAGE_W) / w)
obj[attr] = max(min(obj[attr], self.IMAGE_W), 0)
for attr in ['ymin', 'ymax']:
obj[attr] = int(obj[attr] * float(self.IMAGE_H) / h)
obj[attr] = max(min(obj[attr], self.IMAGE_H), 0)
else:
return image
return image, all_objs

InputEncoder的用法如下。原始的图片大小为(500, 486, 3),Encode之后图片的大小为(416, 416, 3),xmin, xmax, ymin和ymax的大小做对应的缩放。

def normalize(image):
return image / 255.
print("*"*30)
print("Input")
timage = train_image[0]
for key, v in timage.items():
print("  {}: {}".format(key,v))
print("*"*30)
print("Output")
inputEncoder = ImageReader(IMAGE_H=416,IMAGE_W=416, norm=normalize)
image, all_objs = inputEncoder.fit(timage)
print("          {}".format(all_objs))
plt.imshow(image)
plt.title("image.shape={}".format(image.shape))
plt.show()

***********************************

Input

object: [{‘name’: ‘person’, ‘xmin’: 174, ‘ymin’: 101, ‘xmax’: 349, ‘ymax’: 351}]

filename: ../ObjectDetectionRCNN/VOCdevkit/VOC2012/JPEGImages/2007_000027.jpg

width: 486

height: 500

**********************************

Output

[{‘name’: ‘person’, ‘xmin’: 148, ‘ymin’: 84, ‘xmax’: 298, ‘ymax’: 292}]


Assign Object To Anchor Box

YOLOv2使用K-means Clustering得到了Anchor Box的大小和数量。


训练图片(Trainning Image)中的每一个Object都被赋给包含该Object的中心点的Grid Cell和该Grid Cell的IOU最大的Anchor Box。

BestAnchorBoxFinder实现找到Object的最佳匹配的Anchor Box的功能。

class BestAnchorBoxFinder(object):
def __init__(self, ANCHORS):
'''
ANCHORS: a np.array of even number length e.g.
_ANCHORS = [4,2, ##  width=4, height=2,  flat large anchor box
2,4, ##  width=2, height=4,  tall large anchor box
1,1] ##  width=1, height=1,  small anchor box
'''
self.anchors = [BoundBox(0, 0, ANCHORS[2*i], ANCHORS[2*i+1])
for i in range(int(len(ANCHORS)//2))]
def _interval_overlap(self,interval_a, interval_b):
x1, x2 = interval_a
x3, x4 = interval_b
if x3 < x1:
if x4 < x1:
return 0
else:
return min(x2,x4) - x1
else:
if x2 < x3:
return 0
else:
return min(x2,x4) - x3
def bbox_iou(self,box1, box2):
intersect_w = self._interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
intersect_h = self._interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
intersect = intersect_w * intersect_h
w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
union = w1*h1 + w2*h2 - intersect
return float(intersect) / union
def find(self,center_w, center_h):
# find the anchor that best predicts this box
best_anchor = -1
max_iou     = -1
# each Anchor box is specialized to have a certain shape.
# e.g., flat large rectangle, or small square
shifted_box = BoundBox(0, 0,center_w, center_h)
##  For given object, find the best anchor box!
for i in range(len(self.anchors)): ## run through each anchor box
anchor = self.anchors[i]
iou    = self.bbox_iou(shifted_box, anchor)
if max_iou < iou:
best_anchor = i
max_iou     = iou
return(best_anchor,max_iou)
class BoundBox:
def __init__(self, xmin, ymin, xmax, ymax, confidence=None,classes=None):
self.xmin, self.ymin = xmin, ymin
self.xmax, self.ymax = xmax, ymax
## the code below are used during inference
# probability
self.confidence      = confidence
# class probaiblities [c1, c2, .. cNclass]
self.set_class(classes)
def set_class(self,classes):
self.classes = classes
self.label   = np.argmax(self.classes)
def get_label(self):
return(self.label)
def get_score(self):
return(self.classes[self.label])

BestAnchorBoxFinder的示例用法如下:

# Anchor box width and height found in https://fairyonice.github.io/Part_1_Object_Detection_with_Yolo_for_VOC_2014_data_anchor_box_clustering.html
_ANCHORS01 = np.array([0.08285376, 0.13705531,
0.20850361, 0.39420716,
0.80552421, 0.77665105,
0.42194719, 0.62385487])
print(".."*40)
print("The three example anchor boxes:")
count = 0
for i in range(0,len(_ANCHORS01),2):
print("anchor box index={}, w={}, h={}".format(count,_ANCHORS01[i],_ANCHORS01[i+1]))
count += 1
print(".."*40)
print("Allocate bounding box of various width and height into the three anchor boxes:")
babf = BestAnchorBoxFinder(_ANCHORS01)
for w in range(1,9,2):
w /= 10.
for h in range(1,9,2):
h /= 10.
best_anchor,max_iou = babf.find(w,h)
print("bounding box (w = {}, h = {}) --> best anchor box index = {}, iou = {:03.2f}".format(
w,h,best_anchor,max_iou))

输入的结果如下:

The three example anchor boxes:
anchor box index=0, w=0.08285376, h=0.13705531
anchor box index=1, w=0.20850361, h=0.39420716
anchor box index=2, w=0.80552421, h=0.77665105
anchor box index=3, w=0.42194719, h=0.62385487
..........................
Allocate bounding box of various width and height into the three anchor boxes:
bounding box (w = 0.1, h = 0.1) --> best anchor box index = 0, iou = 0.63
bounding box (w = 0.1, h = 0.3) --> best anchor box index = 0, iou = 0.38
bounding box (w = 0.1, h = 0.5) --> best anchor box index = 1, iou = 0.42
bounding box (w = 0.1, h = 0.7) --> best anchor box index = 1, iou = 0.35
bounding box (w = 0.3, h = 0.1) --> best anchor box index = 0, iou = 0.25
bounding box (w = 0.3, h = 0.3) --> best anchor box index = 1, iou = 0.57
bounding box (w = 0.3, h = 0.5) --> best anchor box index = 3, iou = 0.57
bounding box (w = 0.3, h = 0.7) --> best anchor box index = 3, iou = 0.65
bounding box (w = 0.5, h = 0.1) --> best anchor box index = 1, iou = 0.19
bounding box (w = 0.5, h = 0.3) --> best anchor box index = 3, iou = 0.44
bounding box (w = 0.5, h = 0.5) --> best anchor box index = 3, iou = 0.70
bounding box (w = 0.5, h = 0.7) --> best anchor box index = 3, iou = 0.75
bounding box (w = 0.7, h = 0.1) --> best anchor box index = 1, iou = 0.16
bounding box (w = 0.7, h = 0.3) --> best anchor box index = 3, iou = 0.37
bounding box (w = 0.7, h = 0.5) --> best anchor box index = 2, iou = 0.56
bounding box (w = 0.7, h = 0.7) --> best anchor box index = 2, iou = 0.78

Bounding Box Encoding


VOC2012数据中,每个Object的记录格式如下:

[{'name': 'person', 'xmin': 150, 'ymin': 84, 'xmax': 300, 'ymax': 294}]

YOLOv2中Bounding Box的坐标格式如下:

[{'name': 'person', 'center_x':225, 'center_y': 210, 'center_w': 150, 'center_h': 189}]

Rescale Bounding Box

上述Bounding Box的单位是像素(pixel),在神经网络训练中需要对Bounding Box坐标标准化,将bx,bw缩放到[0, GRID_W], by,bh缩放到[0, GRID_H]。

YOLOv2将图片划分为13×13的网格,如果Bounding Box的中心y值为150px,标准化之后y值为:150 / 416 * 13 = 4.6875。

def rescale_centerxy(obj,config):
'''
obj:     dictionary containing xmin, xmax, ymin, ymax
config : dictionary containing IMAGE_W, GRID_W, IMAGE_H and GRID_H
'''
center_x = .5*(obj['xmin'] + obj['xmax'])
center_x = center_x / (float(config['IMAGE_W']) / config['GRID_W'])
center_y = .5*(obj['ymin'] + obj['ymax'])
center_y = center_y / (float(config['IMAGE_H']) / config['GRID_H'])
return(center_x,center_y)
def rescale_cebterwh(obj,config):
'''
obj:     dictionary containing xmin, xmax, ymin, ymax
config : dictionary containing IMAGE_W, GRID_W, IMAGE_H and GRID_H
'''
# unit: grid cell
center_w = (obj['xmax'] - obj['xmin']) / (float(config['IMAGE_W']) / config['GRID_W'])
# unit: grid cell
center_h = (obj['ymax'] - obj['ymin']) / (float(config['IMAGE_H']) / config['GRID_H'])
return(center_w,center_h)

Rescale Bounding Box的示例代码如下:

obj    = {'xmin': 150, 'ymin': 84, 'xmax': 300, 'ymax': 294}
config = {"IMAGE_W":416,"IMAGE_H":416,"GRID_W":13,"GRID_H":13}
center_x, center_y = rescale_centerxy(obj,config)
center_w, center_h = rescale_cebterwh(obj,config)
print("cebter_x abd cebter_w should range between 0 and {}".format(config["GRID_W"]))
print("cebter_y abd cebter_h should range between 0 and {}".format(config["GRID_H"]))
print("center_x = {:06.3f} range between 0 and {}".format(center_x, config["GRID_W"]))
print("center_y = {:06.3f} range between 0 and {}".format(center_y, config["GRID_H"]))
print("center_w = {:06.3f} range between 0 and {}".format(center_w, config["GRID_W"]))
print("center_h = {:06.3f} range between 0 and {}".format(center_h, config["GRID_H"]))

Keras’s BatchGenerator

理清楚数据的处理逻辑之后,结合上述讨论的Input和Output Encoding构造Keras的Batch Generator。Batch Generator的作用是在神经网络训练过程中,批量提供输入数据(Input Batch)和Ground Truth数据(Output Batch)。

Input Batch

x_batch的shape为: (BATCH_SIZE, IMAGE_H, IMAGE_W, N_Channels),IMAGE_H、IMAGE_W是图像的宽和高,N_Channels是图像的通道数。

Output Batch

y_batch的shape为(BATCH_SIZE, GRID_H, GRID_W, BOX, 4 + 1 + N classes)。GRID_H和GRID_W是YOLOv2划分的网格数量;Box是预定义的Anchor Box的数量。

y_batch[iframe, igrid_h, igrid_w, ianchor, :4]是(igrid_h, igrid_w)网格(Grid Cell)中包含第ianchor个anchor的Bounding Box坐标(center_x, center_y, center_w, center_h);如果网格(igrid_h, igrid_w)的第ianchor个anchor中不包含任何Object,Bounding Box坐标(center_x, center_y, center_w, center_h)的坐标值都为0。

y_batch[iframe, igrid_h, igrid_w, ianchor, 4]的值表示网格(igrid_h, igrid_w)的第ianchor个Anchor中是否包含Object。如果包含Object,该值为1,否则为0。

y_batch[iframe, igrid_h, igrid_w, ianchor, 5:]表达网格(igrid_h, igrid_w)的第ianchor个Anchor中分类信息。

b_batch的shape为(BATCH_SIZE, 1, 1, 1, TRUE_BOX_BUFFER, 4)。

b_batch[iframe, 1, 1, 1, ibuffer, :]是第ibuffer个Object的Bounding Box。b_batch只是为了方便计算loss,YOLOv2的loss函数比较复杂,后面单独分析。

from keras.utils import Sequence
class SimpleBatchGenerator(Sequence):
def __init__(self, images, config, norm=None, shuffle=True):
'''
config : dictionary containing necessary hyper parameters for traning. e.g.,
{
'IMAGE_H'         : 416,
'IMAGE_W'         : 416,
'GRID_H'          : 13,
'GRID_W'          : 13,
'LABELS'          : ['aeroplane',  'bicycle', 'bird',  'boat',      'bottle',
'bus',        'car',      'cat',  'chair',     'cow',
'diningtable','dog',    'horse',  'motorbike', 'person',
'pottedplant','sheep',  'sofa',   'train',   'tvmonitor'],
'ANCHORS'         : array([ 1.07709888,   1.78171903,
2.71054693,   5.12469308,
10.47181473, 10.09646365,
5.48531347,   8.11011331]),
'BATCH_SIZE'      : 16,
'TRUE_BOX_BUFFER' : 50,
}
'''
self.config = config
self.config["BOX"] = int(len(self.config['ANCHORS'])/2)
self.config["CLASS"] = len(self.config['LABELS'])
self.images = images
self.bestAnchorBoxFinder = BestAnchorBoxFinder(config['ANCHORS'])
self.imageReader = ImageReader(config['IMAGE_H'],config['IMAGE_W'],norm=norm)
self.shuffle = shuffle
if self.shuffle:
np.random.shuffle(self.images)
def __len__(self):
return int(np.ceil(float(len(self.images))/self.config['BATCH_SIZE']))
def __getitem__(self, idx):
'''
== input ==
idx : non-negative integer value e.g., 0
== output ==
x_batch: The numpy array of shape  (BATCH_SIZE, IMAGE_H, IMAGE_W, N channels).
x_batch[iframe,:,:,:] contains a iframeth frame of size  (IMAGE_H,IMAGE_W).
y_batch:
The numpy array of shape  (BATCH_SIZE, GRID_H, GRID_W, BOX, 4 + 1 + N classes).
BOX = The number of anchor boxes.
y_batch[iframe,igrid_h,igrid_w,ianchor,:4] contains (center_x,center_y,center_w,center_h)
of ianchorth anchor at  grid cell=(igrid_h,igrid_w) if the object exists in
this (grid cell, anchor) pair, else they simply contain 0.
y_batch[iframe,igrid_h,igrid_w,ianchor,4] contains 1 if the object exists in this
(grid cell, anchor) pair, else it contains 0.
y_batch[iframe,igrid_h,igrid_w,ianchor,5 + iclass] contains 1 if the iclass^th
class object exists in this (grid cell, anchor) pair, else it contains 0.
b_batch:
The numpy array of shape (BATCH_SIZE, 1, 1, 1, TRUE_BOX_BUFFER, 4).
b_batch[iframe,1,1,1,ibuffer,ianchor,:] contains ibufferth object's
(center_x,center_y,center_w,center_h) in iframeth frame.
If ibuffer > N objects in iframeth frame, then the values are simply 0.
TRUE_BOX_BUFFER has to be some large number, so that the frame with the
biggest number of objects can also record all objects.
The order of the objects do not matter.
This is just a hack to easily calculate loss.
'''
l_bound = idx * self.config['BATCH_SIZE']
r_bound = (idx + 1) * self.config['BATCH_SIZE']
if r_bound > len(self.images):
r_bound = len(self.images)
l_bound = r_bound - self.config['BATCH_SIZE']
instance_count = 0
## prepare empty storage space: this will be output
x_batch = np.zeros((r_bound - l_bound, self.config['IMAGE_H'], self.config['IMAGE_W'], 3))                         # input images
b_batch = np.zeros((r_bound - l_bound, 1     , 1     , 1    ,  self.config['TRUE_BOX_BUFFER'], 4))   # list of self.config['TRUE_self.config['BOX']_BUFFER'] GT boxes
y_batch = np.zeros((r_bound - l_bound, self.config['GRID_H'],  self.config['GRID_W'], self.config['BOX'], 4+1+len(self.config['LABELS'])))                # desired network output
for train_instance in self.images[l_bound:r_bound]:
# augment input image and fix object's position and size
img, all_objs = self.imageReader.fit(train_instance)
# construct output from object's x, y, w, h
true_box_index = 0
for obj in all_objs:
if obj['xmax'] > obj['xmin'] and obj['ymax'] > obj['ymin'] and obj['name'] in self.config['LABELS']:
center_x, center_y = rescale_centerxy(obj,self.config)
grid_x = int(np.floor(center_x))
grid_y = int(np.floor(center_y))
if grid_x < self.config['GRID_W'] and grid_y < self.config['GRID_H']:
obj_indx  = self.config['LABELS'].index(obj['name'])
center_w, center_h = rescale_cebterwh(obj,self.config)
box = [center_x, center_y, center_w, center_h]
best_anchor,max_iou = self.bestAnchorBoxFinder.find(center_w, center_h)
# assign ground truth x, y, w, h, confidence and class probs to y_batch
# it could happen that the same grid cell contain 2 similar shape objects
# as a result the same anchor box is selected as the best anchor box by the multiple objects
# in such ase, the object is over written
y_batch[instance_count, grid_y, grid_x, best_anchor, 0:4] = box # center_x, center_y, w, h
y_batch[instance_count, grid_y, grid_x, best_anchor, 4  ] = 1. # ground truth confidence is 1
y_batch[instance_count, grid_y, grid_x, best_anchor, 5+obj_indx] = 1 # class probability of the object
# assign the true box to b_batch
b_batch[instance_count, 0, 0, 0, true_box_index] = box
true_box_index += 1
true_box_index = true_box_index % self.config['TRUE_BOX_BUFFER']
x_batch[instance_count] = img
# increase instance counter in current batch
instance_count += 1
return [x_batch, b_batch], y_batch
def on_epoch_end(self):
if self.shuffle:
np.random.shuffle(self.images)

Experiment with BatchGenerator

前面_ANCHORS01的值界于[0, 1]之间,所以先对其进行缩放调整。

GRID_H,  GRID_W  = 13 , 13
ANCHORS          = _ANCHORS01
ANCHORS[::2]     = ANCHORS[::2]*GRID_W
ANCHORS[1::2]    = ANCHORS[1::2]*GRID_H
ANCHORS

缩放之后结果如下:

array([1.07709888, 1.78171903, 2.71054693, 5.12469308, 10.47181473,

10.09646365, 5.48531347, 8.11011331])

IMAGE_H, IMAGE_W = 416, 416
BATCH_SIZE       = 16
TRUE_BOX_BUFFER  = 50
generator_config = {
'IMAGE_H'         : IMAGE_H,
'IMAGE_W'         : IMAGE_W,
'GRID_H'          : GRID_H,
'GRID_W'          : GRID_W,
'LABELS'          : LABELS,
'ANCHORS'         : ANCHORS,
'BATCH_SIZE'      : BATCH_SIZE,
'TRUE_BOX_BUFFER' : TRUE_BOX_BUFFER,
}
train_batch_generator = SimpleBatchGenerator(train_image, generator_config,
norm=normalize, shuffle=True)
[x_batch,b_batch],y_batch = train_batch_generator.__getitem__(idx=3)

Input/Output Shape

查看x_batch、b_batch、y_batch的数据的shape大小。

print("x_batch: (BATCH_SIZE, IMAGE_H, IMAGE_W, N channels)           = {}".format(x_batch.shape))
print("y_batch: (BATCH_SIZE, GRID_H, GRID_W, BOX, 4 + 1 + N classes) = {}".format(y_batch.shape))
print("b_batch: (BATCH_SIZE, 1, 1, 1, TRUE_BOX_BUFFER, 4)            = {}".format(b_batch.shape))
x_batch: (BATCH_SIZE, IMAGE_H, IMAGE_W, N channels)           = (16, 416, 416, 3)
y_batch: (BATCH_SIZE, GRID_H, GRID_W, BOX, 4 + 1 + N classes) = (16, 13, 13, 4, 25)
b_batch: (BATCH_SIZE, 1, 1, 1, TRUE_BOX_BUFFER, 4)            = (16, 1, 1, 1, 50, 4)

检查图片中的哪一个Grid Cell、哪一个Anchor Box中包含Objects。

iframe= 1
def check_object_in_grid_anchor_pair(irow):
for igrid_h in range(generator_config["GRID_H"]):
for igrid_w in range(generator_config["GRID_W"]):
for ianchor in range(generator_config["BOX"]):
vec = y_batch[irow,igrid_h,igrid_w,ianchor,:]
C = vec[4] ## ground truth confidence
if C == 1:
class_nm = np.array(LABELS)[np.where(vec[5:])]
assert len(class_nm) == 1
print("igrid_h={:02.0f},igrid_w={:02.0f},iAnchor={:02.0f}, {}".format(
igrid_h,igrid_w,ianchor,class_nm[0]))
check_object_in_grid_anchor_pair(iframe)

igrid_h=06,igrid_w=03,iAnchor=00, dog

igrid_h=08,igrid_w=10,iAnchor=03, motorbike

Visualize BatchGenerator

将BatchGenerator的生成的x_batch和y_ba tch结果可视化出来,以便更直观的观察数据。

def plot_image_with_grid_cell_partition(irow):
img = x_batch[irow]
plt.figure(figsize=(15,15))
plt.imshow(img)
for wh in ["W","H"]:
GRID_       = generator_config["GRID_"  + wh] ## 13
IMAGE_      = generator_config["IMAGE_" + wh] ## 416
if wh == "W":
pltax   = plt.axvline
plttick = plt.xticks
else:
pltax   = plt.axhline
plttick = plt.yticks
for count in range(GRID_):
l = IMAGE_*count/GRID_
pltax(l,color="yellow",alpha=0.3)
plttick([(i + 0.5)*IMAGE_/GRID_ for i in range(GRID_)],
["iGRID{}={}".format(wh,i) for i in range(GRID_)])
def plot_grid(irow):
import seaborn as sns
color_palette = list(sns.xkcd_rgb.values())
iobj = 0
for igrid_h in range(generator_config["GRID_H"]):
for igrid_w in range(generator_config["GRID_W"]):
for ianchor in range(generator_config["BOX"]):
vec = y_batch[irow,igrid_h,igrid_w,ianchor,:]
C = vec[4] ## ground truth confidence
if C == 1:
class_nm = np.array(LABELS)[np.where(vec[5:])]
x, y, w, h = vec[:4]
multx = generator_config["IMAGE_W"]/generator_config["GRID_W"]
multy = generator_config["IMAGE_H"]/generator_config["GRID_H"]
c = color_palette[iobj]
iobj += 1
xmin = x - 0.5*w
ymin = y - 0.5*h
xmax = x + 0.5*w
ymax = y + 0.5*h
# center
plt.text(x*multx,y*multy,
"X",color=c,fontsize=23)
plt.plot(np.array([xmin,xmin])*multx,
np.array([ymin,ymax])*multy,color=c,linewidth=10)
plt.plot(np.array([xmin,xmax])*multx,
np.array([ymin,ymin])*multy,color=c,linewidth=10)
plt.plot(np.array([xmax,xmax])*multx,
np.array([ymax,ymin])*multy,color=c,linewidth=10)
plt.plot(np.array([xmin,xmax])*multx,
np.array([ymax,ymax])*multy,color=c,linewidth=10)
plot_image_with_grid_cell_partition(iframe)
plot_grid(iframe)
plt.show()

igrid_h=06,igrid_w=03,iAnchor=00, dog

​igrid_h=08,igrid_w=10,iAnchor=03, motorbike

More examples

for irow in range(5,10):
print("-"*30)
check_object_in_grid_anchor_pair(irow)
plot_image_with_grid_cell_partition(irow)
plot_grid(irow)
plt.show()

​igrid_h=10,igrid_w=02,iAnchor=01, chair

igrid_h=11,igrid_w=06,iAnchor=01, chair

igrid_h=09,igrid_w=11,iAnchor=01, person

参考材料

https://fairyonice.github.io/Part%202_Object_Detection_with_Yolo_using_VOC_2014_data_input_and_output_encoding.html#Example-useage-for-BestAnchorBoxFinder

常见MFC函数

上一篇

你也可能喜欢

Object Detection-YOLOv2 Input And Output Encoding

长按储存图像,分享给朋友