本文為建立在現有開源工具Augmentor上的進階工具(擴增部分為該開源工具)

適用於影像檔擴增,影像需為黑白彩圖,標註檔案需為yolo格式,label bbox為矩形

前言

在做產學的時候,因為有大量資料要處理,為了能讓模型有更好的表現,需要擴增一些資料,但若是該資料是帶有label的,則也會需要連同label一起擴增,因此就產生了這篇文章的契機

思路

我的想法是使用最直覺的方法:將bbox使用有色(下面使用紅色)矩形來覆蓋,如下圖,將覆蓋完的影像擴增後(本方法使用的是傾斜擴增),再偵測色塊位置,即可得出bbox在擴增之後的位置

car

步驟解析

依據自定義的方式使用aug_matrix函式(屬於Augmentor內的函式)定義傾斜矩陣matrix
read_fileyolobbox2bbox等函式讀取檔案與label後將yolo 標記檔轉為影像座標後將所有bbox存於dic變數裡
遍歷dic中的每個bbox並做以下動作:

  1. drawredpoint將bbox使用紅色矩形覆蓋於影像上
  2. 使用do函式(屬於Augmentor內的函式)將影像進行擴增(依先前定義好的擴增矩陣matrix)
  3. 利用getbboxfromimage函式將擴增完畢後的矩陣進行顏色偵測,偵測所有紅色像素所在位置並取最大及最小值作為bbox的位置
  4. bbox2yolobbox將位置轉回為yolo格式並寫出擴增後資料

完整程式碼

本程式碼適用於影像檔擴增,影像需為黑白彩圖,標註檔案需為yolo格式,label bbox為矩形

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
import cv2
import os
import math
import numpy as np
import random
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw

def yolobbox2bbox(x, y, w, h, dw, dh):
l = int((x - w / 2) * dw)
r = int((x + w / 2) * dw)
t = int((y - h / 2) * dh)
b = int((y + h / 2) * dh)
if l < 0:
l = 0
if r > dw - 1:
r = dw - 1
if t < 0:
t = 0
if b > dh - 1:
b = dh - 1
return (l, t, r, b)

def bbox2yolobbox(box, dw, dh):
x = (box[0] + box[2]) / 2.0
y = (box[1] + box[3]) / 2.0
w = box[2] - box[0]
h = box[3] - box[1]
x = x / dw
w = w / dw
y = y / dh
h = h / dh
return (round(x, 6), round(y, 6), round(w, 6), round(h, 6))

def read_file(file, dw, dh):
dic = {}
for line in file.readlines():
box = yolobbox2bbox(float(line.split(" ")[1]), float(line.split(" ")[2]), float(line.split(" ")[3]), float(line.split(" ")[4]), dw, dh)
dic[box] = int(line.split(" ")[0])
return dic

def drawredpoint(img, box):
draw = ImageDraw.Draw(img)
draw.rectangle([(box[0], box[1]), (box[2], box[3])], fill="#ff0000")
return img

def getbboxfromimage(image):
red = [0,0,255]
img = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
Y2, X2 = np.where(np.all(img==red,axis=2))
x1 = min(X2)
y1 = min(Y2)
x2 = max(X2)
y2 = max(Y2)

return x1, y1, x2, y2

def writeback(file_name, dic, image, image_path, label_path):
new_image_path = image_path + file_name + '.jpg'
new_label_path = label_path + file_name + '.txt'

f1 = open(new_label_path, 'w')

for box in dic:
line = str(dic[box]) + ' ' + str(box[0]) + ' ' + str(box[1]) + ' ' + str(box[2]) + ' ' + str(box[3]) + '\n'
print(line)
f1.write(line)
f1.close()
image.save(new_image_path)

def aug_matrix(skew_type, w, h):

x1 = 0
x2 = h
y1 = 0
y2 = w

original_plane = [(y1, x1), (y2, x1), (y2, x2), (y1, x2)]

max_skew_amount = max(w, h)
max_skew_amount = int(math.ceil(max_skew_amount * magnitude))
skew_amount = random.randint(1, max_skew_amount)

skew = skew_type

if skew_type == "RANDOM":
skew = random.choice(["TILT", "TILT_LEFT_RIGHT", "TILT_TOP_BOTTOM", "CORNER"])
else:
skew = skew_type

if skew == "TILT" or skew == "TILT_LEFT_RIGHT" or skew == "TILT_TOP_BOTTOM":
if skew == "TILT":
skew_direction = random.randint(0, 3)
elif skew == "TILT_LEFT_RIGHT":
skew_direction = random.randint(0, 1)
elif skew == "TILT_TOP_BOTTOM":
skew_direction = random.randint(2, 3)
if skew_direction == 0:
# Left Tilt
new_plane = [(y1, x1 - skew_amount), # Top Left
(y2, x1), # Top Right
(y2, x2), # Bottom Right
(y1, x2 + skew_amount)] # Bottom Left
elif skew_direction == 1:
# Right Tilt
new_plane = [(y1, x1), # Top Left
(y2, x1 - skew_amount), # Top Right
(y2, x2 + skew_amount), # Bottom Right
(y1, x2)] # Bottom Left
elif skew_direction == 2:
# Forward Tilt
new_plane = [(y1 - skew_amount, x1), # Top Left
(y2 + skew_amount, x1), # Top Right
(y2, x2), # Bottom Right
(y1, x2)] # Bottom Left
elif skew_direction == 3:
# Backward Tilt
new_plane = [(y1, x1), # Top Left
(y2, x1), # Top Right
(y2 + skew_amount, x2), # Bottom Right
(y1 - skew_amount, x2)] # Bottom Left
if skew == "CORNER":
skew_direction = random.randint(0, 7)
if skew_direction == 0:
# Skew possibility 0
new_plane = [(y1 - skew_amount, x1), (y2, x1), (y2, x2), (y1, x2)]
elif skew_direction == 1:
# Skew possibility 1
new_plane = [(y1, x1 - skew_amount), (y2, x1), (y2, x2), (y1, x2)]
elif skew_direction == 2:
# Skew possibility 2
new_plane = [(y1, x1), (y2 + skew_amount, x1), (y2, x2), (y1, x2)]
elif skew_direction == 3:
# Skew possibility 3
new_plane = [(y1, x1), (y2, x1 - skew_amount), (y2, x2), (y1, x2)]
elif skew_direction == 4:
# Skew possibility 4
new_plane = [(y1, x1), (y2, x1), (y2 + skew_amount, x2), (y1, x2)]
elif skew_direction == 5:
# Skew possibility 5
new_plane = [(y1, x1), (y2, x1), (y2, x2 + skew_amount), (y1, x2)]
elif skew_direction == 6:
# Skew possibility 6
new_plane = [(y1, x1), (y2, x1), (y2, x2), (y1 - skew_amount, x2)]
elif skew_direction == 7:
# Skew possibility 7
new_plane = [(y1, x1), (y2, x1), (y2, x2), (y1, x2 + skew_amount)]

if skew_type == "ALL":
# Not currently in use, as it makes little sense to skew by the same amount
# in every direction if we have set magnitude manually.
# It may make sense to keep this, if we ensure the skew_amount below is randomised
# and cannot be manually set by the user.
corners = dict()
corners["top_left"] = (y1 - random.randint(1, skew_amount), x1 - random.randint(1, skew_amount))
corners["top_right"] = (y2 + random.randint(1, skew_amount), x1 - random.randint(1, skew_amount))
corners["bottom_right"] = (y2 + random.randint(1, skew_amount), x2 + random.randint(1, skew_amount))
corners["bottom_left"] = (y1 - random.randint(1, skew_amount), x2 + random.randint(1, skew_amount))
new_plane = [corners["top_left"], corners["top_right"], corners["bottom_right"], corners["bottom_left"]]

matrix = []

for p1, p2 in zip(new_plane, original_plane):
matrix.append([p1[0], p1[1], 1, 0, 0, 0, -p2[0] * p1[0], -p2[0] * p1[1]])
matrix.append([0, 0, 0, p1[0], p1[1], 1, -p2[1] * p1[0], -p2[1] * p1[1]])

A = np.matrix(matrix, dtype=np.float)
B = np.array(original_plane).reshape(8)

perspective_skew_coefficients_matrix = np.dot(np.linalg.pinv(A), B)
perspective_skew_coefficients_matrix = np.array(perspective_skew_coefficients_matrix).reshape(8)

return perspective_skew_coefficients_matrix

def do(image, matrix):
return image.transform(image.size, Image.PERSPECTIVE, matrix, resample=Image.BICUBIC)

# 調整擴增參數
skew_type = 'TILT_TOP_BOTTOM'
# "ALL", "RANDOM", "TILT", "TILT_LEFT_RIGHT", "TILT_TOP_BOTTOM", "CORNER", "TILT_TOP_BOTTOM"
magnitude = 0.3

label_path = 'C:/Users/user/Desktop/aug/label_org/'
image_path = r'C:/Users/user/Desktop/aug/image_org/'
newimagepath = r'C:/Users/user/Desktop/aug/image_tilt/'
newlabelpath = 'C:/Users/user/Desktop/aug/label_tilt/'

files = os.listdir(image_path)

for file in files:

file_name = file.split('.')[0]
print(file_name)

newfilename = file_name + '_aug'

img = cv2.imread(image_path + file_name + '.jpg')
rows, cols, ch = img.shape
img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))

f1 = open(label_path + file_name + '.txt', 'r')
dic = read_file(f1, cols, rows)
f1.close()

new_dic = {}
matrix = aug_matrix(skew_type, cols, rows)

for box in dic:
print(box)
image_org = img.copy()
image_org = drawredpoint(image_org, box)
image_tilt = do(image_org, matrix)
x1_tilt, y1_tilt, x2_tilt, y2_tilt = getbboxfromimage(image_tilt)
yolobbox_tilt = bbox2yolobbox((x1_tilt, y1_tilt, x2_tilt, y2_tilt), cols, rows)
new_dic[yolobbox_tilt] = dic[box]

newimage = img.copy()
newimage = do(newimage, matrix)
writeback(newfilename, new_dic, newimage, newimagepath, newlabelpath)

成品展示

左上角為原圖,其餘為擴增後資料
仔細看圖片車頭部分是有框框的,但不明顯,圖片太糊了哭哭
car

心得

此方法雖然可行,但無法確保萬無一失
目前已發現之缺點為若是bbox內物件呈梯形狀且梯形上長下寬,若是擴增方式為後傾,則最後bbox會有多餘部分

此方法目前是遍歷所有bbox且逐一進行畫矩形、擴增、偵測位置的步驟,多餘部分過多,效能不佳,還可以加以改善