From 5f498b3c114e6616aee1cf762d9f85bc6c90e2f5 Mon Sep 17 00:00:00 2001 From: wenlong Date: Mon, 13 Mar 2017 16:20:45 +0800 Subject: [PATCH 1/4] read source code and add some common: --- .gitignore | 1 + demo.py | 76 +++++++++++++++++++++------------ tools/preprocess_pascal_voc.py | 2 + tools/train.py | 2 + yolo/__init__.pyc | Bin 0 -> 203 bytes yolo/dataset/__init__.pyc | Bin 0 -> 195 bytes yolo/dataset/dataset.py | 3 ++ yolo/dataset/dataset.pyc | Bin 0 -> 886 bytes yolo/dataset/text_dataset.py | 21 ++++++--- yolo/dataset/text_dataset.pyc | Bin 0 -> 4740 bytes yolo/net/net.py | 26 ++++++++--- yolo/net/yolo_net.py | 25 ++++++++++- yolo/net/yolo_tiny_net.py | 33 ++++++++++---- yolo/solver/solver.py | 3 ++ yolo/solver/yolo_solver.py | 21 ++++++--- yolo/utils/process_config.py | 6 +++ 16 files changed, 168 insertions(+), 51 deletions(-) create mode 100644 .gitignore create mode 100644 yolo/__init__.pyc create mode 100644 yolo/dataset/__init__.pyc create mode 100644 yolo/dataset/dataset.pyc create mode 100644 yolo/dataset/text_dataset.pyc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0d20b64 --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +*.pyc diff --git a/demo.py b/demo.py index bbf8179..3551baa 100644 --- a/demo.py +++ b/demo.py @@ -1,3 +1,5 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- import sys sys.path.append('./') @@ -7,18 +9,32 @@ import cv2 import numpy as np -classes_name = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train","tvmonitor"] +classes_name = ["aeroplane", "bicycle", "bird", "boat", "bottle", + "bus", "car", "cat", "chair", "cow", + "diningtable", "dog", "horse", "motorbike", "person", + "pottedplant", "sheep", "sofa", "train","tvmonitor"] +common_params = { 'image_size': 448, + 'num_classes': 20, + 'batch_size':1} + +net_params = {'cell_size': 7, + 'boxes_per_cell':2, + 'weight_decay': 0.0005} def process_predicts(predicts): + """ + 对于规范化的输出结果对于特定的用户可能觉得不习惯,那么实现一个接口,将规范化 + 的结果重新编写为用户习惯的数据类型 + """ p_classes = predicts[0, :, :, 0:20] C = predicts[0, :, :, 20:22] coordinate = predicts[0, :, :, 22:] - + # 训练的模型设置超参数 net_params, 其中cell大小设置为7 p_classes = np.reshape(p_classes, (7, 7, 1, 20)) C = np.reshape(C, (7, 7, 2, 1)) - P = C * p_classes + P = C * p_classes # P size = (7, 7, 2, 20) #print P[5,1, 0, :] @@ -51,36 +67,42 @@ def process_predicts(predicts): return xmin, ymin, xmax, ymax, class_num -common_params = {'image_size': 448, 'num_classes': 20, - 'batch_size':1} -net_params = {'cell_size': 7, 'boxes_per_cell':2, 'weight_decay': 0.0005} - -net = YoloTinyNet(common_params, net_params, test=True) - -image = tf.placeholder(tf.float32, (1, 448, 448, 3)) -predicts = net.inference(image) +def main(): -sess = tf.Session() -np_img = cv2.imread('cat.jpg') -resized_img = cv2.resize(np_img, (448, 448)) -np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB) + net = YoloTinyNet(common_params, net_params, test=True) + # tensorflow中声明占位符号image, 这在后面run的时候 + # feed_dict中会出现该占位符和对应的值,意思就是输入数据的来源 + image = tf.placeholder(tf.float32, (1, 448, 448, 3)) + predicts = net.inference(image) + sess = tf.Session() -np_img = np_img.astype(np.float32) + # 转化数据格式 + np_img = cv2.imread('cat.jpg') + resized_img = cv2.resize(np_img, (448, 448)) + np_img = cv2.cvtColor(resized_img, cv2.COLOR_BGR2RGB) -np_img = np_img / 255.0 * 2 - 1 -np_img = np.reshape(np_img, (1, 448, 448, 3)) + np_img = np_img.astype(np.float32) + #白化输入的数据 + np_img = np_img / 255.0 * 2 - 1 + np_img = np.reshape(np_img, (1, 448, 448, 3)) -saver = tf.train.Saver(net.trainable_collection) + saver = tf.train.Saver(net.trainable_collection) -saver.restore(sess, 'models/pretrain/yolo_tiny.ckpt') + saver.restore(sess, 'models/pretrain/yolo_tiny.ckpt') + # The optional feed_dict argument allows the caller to override + # the value of tensors in the graph. + np_predict = sess.run(predicts, feed_dict={image: np_img}) -np_predict = sess.run(predicts, feed_dict={image: np_img}) + xmin, ymin, xmax, ymax, class_num = process_predicts(np_predict) + class_name = classes_name[class_num] + # 绘制预测框, 输出预测类型 + cv2.rectangle(resized_img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 255)) + cv2.putText(resized_img, + class_name, (int(xmin), int(ymin)), 2, 1.5, (0, 0, 255)) + cv2.imwrite('cat_out.jpg', resized_img) + sess.close() -xmin, ymin, xmax, ymax, class_num = process_predicts(np_predict) -class_name = classes_name[class_num] -cv2.rectangle(resized_img, (int(xmin), int(ymin)), (int(xmax), int(ymax)), (0, 0, 255)) -cv2.putText(resized_img, class_name, (int(xmin), int(ymin)), 2, 1.5, (0, 0, 255)) -cv2.imwrite('cat_out.jpg', resized_img) -sess.close() +if __name__ == '__main__': + main() \ No newline at end of file diff --git a/tools/preprocess_pascal_voc.py b/tools/preprocess_pascal_voc.py index 76d43fc..fee497f 100755 --- a/tools/preprocess_pascal_voc.py +++ b/tools/preprocess_pascal_voc.py @@ -1,3 +1,5 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- """preprocess pascal_voc data """ import os diff --git a/tools/train.py b/tools/train.py index 5b399ba..572c67b 100644 --- a/tools/train.py +++ b/tools/train.py @@ -1,3 +1,5 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- import sys from optparse import OptionParser diff --git a/yolo/__init__.pyc b/yolo/__init__.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6eebee04818f9289749078a326b82d3013973776 GIT binary patch literal 203 zcmZSn%*!<^;%G!N0~9a;X$K%K)&LSIK*Y$9!@v*)XEQQHF~Y=|7^0X!3W7C2x-$O* zfu9C5kXgbG(wtb5Se#l?0usmr(riGwI6tQ>wFqQ11A+isG9=*?K7(tK&#Doqg2bK=xE<6D|0$j%~cWq3WZptMv1IQjS&I%KL30eI8+<)GC`oofOH8-&|&r1Yi;5VM6;0Nm_33K z>$Wa|5E=jA!E$37CLln72ax*1RQYAo=*A{RyC14p;(%CNrnpO$ZIh5HV8I;IY&=XNRqF&* zWpuMF^!?#l7do-fu1wnZQN6|;S|jDETA`Fa%?-B<-HcPdj~{o2(_GPcEmF*GzO)zd zO513}a(|52B?;@m+p)>#gaQ%3NnsMof;lyU!%Q-m!>e p?42= self.max_objects: break return [image, labels, object_num] @@ -124,9 +135,9 @@ def record_customer(self): def batch(self): """get batch Returns: - images: 4-D ndarray [batch_size, height, width, 3] - labels: 3-D ndarray [batch_size, max_objects, 5] - objects_num: 1-D ndarray [batch_size] + images: 4-D ndarray [batch_size, height, width, 3] 一个batch中所有图片数据 + labels: 3-D ndarray [batch_size, max_objects, 5] 一个batch中的所有图片的中的所有物体的标签 + objects_num: 1-D ndarray [batch_size] 一个batch中每个图片中object的个数 """ images = [] labels = [] diff --git a/yolo/dataset/text_dataset.pyc b/yolo/dataset/text_dataset.pyc new file mode 100644 index 0000000000000000000000000000000000000000..7f55d47836bcf79401a8d925707657bed9d1e989 GIT binary patch literal 4740 zcmb_f-EtdQ5$+lN%l|)d>=-svOBGRSy(wbv0_6=XNlfil0bZDFk*qdmb}WsoaU_jA za}>*zbrtTZ;*u)fAy>c)aKq(30T(;~RXhOueSJo@6PBALmUYxW=bY)&r~B)k!oTP0 ze|++{CoReU3i$oW7mj3q2GPd9BR!G6BTYvf@~-l(G+oOVlrKoLVELl-iqb6VTF@nF zmZo%Bn&l~7k!EE|SEX4~x+J~2^y|{B+r7%to00ykG-oYek=~s2=cPFhzADkYbR22d zvb&y!w>7=MB@O6=j)c@N6$03`fT6 z#J$LC!>%kc9;T>Dg`Fhrhh`Tt&x`xvQ4|bA)Ah#vcyQM{({bDn$D}#7dSRB`RrR*2 zw?T(v(i~O2jm=sNAG)9ZddS1R#IcRYxgK-lg(K%8fg{G1bDZCmsi^SZqT*CkxR_N^ z5%wxxh|1Ghr3+C-OjX)&S_NU@@PW0dq&k(<vCR^V4dUf290#a z_r%=LNovU6!8z2 z0Kt-m{cP8}@3rHW(OKR!(7sM-ir9=1W;8c7;IjC)k+O%OX?24O)@WV@gHa#n1@wgD zAUQmaS|$sIQHlgcv>GNHG+Y!f-eS7Dh%=^Gs-qW^RuC}E7Zavfwcq#_X$EO#Qd7e( zvqNKFrO154&^?XYu$2qDQGC=jKBxM$UDNxDO+Gh61mVF^D@ohI2~ol)Q)h7dF`tY8YZ-skMCiJxy!BLxx*+m zE0BN;tb6IYXqdFRKCePCF}DP`(H^z1fQnC37|(OJ!w46h8y2S4oHQK3i;RwZZEKX7 zq#vaUxw=6X^*UaLUA<}3 zzy#4Ddk@sZ-KRbd`qV^&EJ@+O(|6C3Uh-6j+yp4cCdipP!!y7IGB1b+u?d1T(AhQy zsjN5`pTe@U?A8j)?wm90EI7BERksGohP&+4oVvT_@V7;?_~orA{GoQrI3fb3N1BwH zpEr>~_-EOv}2XPNx_7>zqLU z=5CG62fcLS9YV3lJo9~Q0 zul^sQBG~(_$|}$qPXY8|#-t|Wf5VE0sbvlCO0qVSUM}$X)$WzCJM~z1T_d-%w3U|Aw~7UR&`m|IZK>?Y3nFgs}qzU zEW!+kLC*_Lf{KFra1UR`7*0U@Dxj*A(-Bp9R+99k{PN_b;Qukbq>GLihzzUj?-*<# zP6Lo_NM2IBT%!^{GNW{GQD+o8cfFl=_q;(nOw;fzFER?Js65-y5JSDsi8yzzAo7;? z{(;vRw*ZP!y5*fsiPJ5wyJcGy7MI)khPJj)vWo^OF=5ZMA`c?qDEgy;HcudM8k;rf z9r*XJ4`W7V%yf^Lc`*ajf+1zC=i8b}ai6WCqDNdRDxY$E(yoE1OzLi_J3NjS(kIyD{|!CSL+Uwad3J;b>9GD zXj!=|4QzjZ1_Kmn#vpKrE`Q{|hYluXA`GLxbii;_4N|Ch98~Cjet;{?9__qm|-eC1d&uAb0^E zEo&4o#MLcdf!kebfKMCP2`8m*C`me%ZtVX0&JA@-*?byq$yBuTX#k7?`+#%fpM0@8d~wpe?XqC?X*n?a4MYmKca>)g9H}2sO!W zV2}!vyNrfmohk1u;ddF|bw^Fk`@2EdkAgr!E(rSQdV6FESwYZFT0!8m?b9~IW*xJ^ z{|v*W)k=Ny7KebVZ&z$_%B8Yfd#6^f-L6#|ixA|dbw;R#FwKWZeBPf=4AE;!GR=Fu zym$Q_YQ9emeGIP;&wQ@2?0的数据采用直接激活的方式,对小于0的数据采用leaky激活方式 + # 此处实现值得学习和借鉴 bool_mask = (x > 0) mask = tf.cast(bool_mask, dtype=dtype) return 1.0 * mask * x + alpha * (1 - mask) * x @@ -160,4 +167,13 @@ def loss(self, predicts, labels, objects_num): labels : 3-D tensor of [batch_size, max_objects, 5] objects_num: 1-D tensor [batch_size] """ - raise NotImplementedError \ No newline at end of file + raise NotImplementedError + +''' +## weight decay: +在机器学习或者模式识别中,会出现overfitting,而当网络逐渐overfitting时网络 +权值逐渐变大,因此,为了避免出现overfitting,会给误差函数添加一个惩罚项,常用 +的惩罚项是所有权重的平方乘以一个衰减常量之和。其用来惩罚大的权值。 +权值衰减惩罚项使得权值收敛到较小的绝对值,而惩罚大的权值。因为大的权值会使得 +系统出现过拟合,降低其泛化性能。 +''' \ No newline at end of file diff --git a/yolo/net/yolo_net.py b/yolo/net/yolo_net.py index 0dfa034..71abb2c 100644 --- a/yolo/net/yolo_net.py +++ b/yolo/net/yolo_net.py @@ -1,3 +1,5 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -123,6 +125,7 @@ def iou(self, boxes1, boxes2): Return: iou: 3-D tensor [CELL_SIZE, CELL_SIZE, BOXES_PER_CELL] """ + # 计算左上角和右下角的位置信息 boxes1 = tf.pack([boxes1[:, :, :, 0] - boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] - boxes1[:, :, :, 3] / 2, boxes1[:, :, :, 0] + boxes1[:, :, :, 2] / 2, boxes1[:, :, :, 1] + boxes1[:, :, :, 3] / 2]) boxes1 = tf.transpose(boxes1, [1, 2, 3, 0]) @@ -134,6 +137,16 @@ def iou(self, boxes1, boxes2): rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:]) #intersection + ''' + 0, 0------------------------> + | ————————————| + | | ——————|—————— + | | | | | + | |—————|—————| | + | |____________| + | + v + ''' intersection = rd - lu inter_square = intersection[:, :, :, 0] * intersection[:, :, :, 1] @@ -285,7 +298,17 @@ def loss(self, predicts, labels, objects_num): label = labels[i, :, :] object_num = objects_num[i] nilboy = tf.ones([7,7,2]) - tuple_results = tf.while_loop(self.cond1, self.body1, [tf.constant(0), object_num, [class_loss, object_loss, noobject_loss, coord_loss], predict, label, nilboy]) + tuple_results = tf.while_loop( + self.cond1, + self.body1, + [ + tf.constant(0), + object_num, + [class_loss, object_loss, noobject_loss, coord_loss], + predict, + label, + nilboy + ]) for j in range(4): loss[j] = loss[j] + tuple_results[2][j] nilboy = tuple_results[5] diff --git a/yolo/net/yolo_tiny_net.py b/yolo/net/yolo_tiny_net.py index 6f3c48e..11d24ea 100644 --- a/yolo/net/yolo_tiny_net.py +++ b/yolo/net/yolo_tiny_net.py @@ -1,3 +1,5 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -117,7 +119,10 @@ def iou(self, boxes1, boxes2): boxes2[0] + boxes2[2] / 2, boxes2[1] + boxes2[3] / 2]) #calculate the left up point + # boxes相当于grandtruth的box,只有一个 + # 但是boxes1相当于ROI,是有很多的 lu = tf.maximum(boxes1[:, :, :, 0:2], boxes2[0:2]) + # calculate the right down point rd = tf.minimum(boxes1[:, :, :, 2:], boxes2[2:]) #intersection @@ -135,13 +140,14 @@ def iou(self, boxes1, boxes2): return inter_square/(square1 + square2 - inter_square + 1e-6) + # loop停止函数和后面的运行函数体的输入参数是一致的,输入相同的参数数据 def cond1(self, num, object_num, loss, predict, label, nilboy): """ if num < object_num """ return num < object_num - + # 运行的函数体的定义 def body1(self, num, object_num, loss, predict, labels, nilboy): """ calculate loss @@ -202,10 +208,10 @@ def body1(self, num, object_num, loss, predict, labels, nilboy): p_C = predict[:, :, self.num_classes:self.num_classes + self.boxes_per_cell] - #calculate truth x,y,sqrt_w,sqrt_h 0-D + #calculate truth x, y, sqrt_w, sqrt_h 0-D x = label[0] y = label[1] - + # TODO:为啥要使用sqrt的宽度和高度来 sqrt_w = tf.sqrt(tf.abs(label[2])) sqrt_h = tf.sqrt(tf.abs(label[3])) #sqrt_w = tf.abs(label[2]) @@ -223,6 +229,7 @@ def body1(self, num, object_num, loss, predict, labels, nilboy): #p_sqrt_h = predict_boxes[:, :, :, 3] p_sqrt_w = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 2]))) p_sqrt_h = tf.sqrt(tf.minimum(self.image_size * 1.0, tf.maximum(0.0, predict_boxes[:, :, :, 3]))) + #calculate truth p 1-D tensor [NUM_CLASSES] P = tf.one_hot(tf.cast(label[4], tf.int32), self.num_classes, dtype=tf.float32) @@ -262,17 +269,27 @@ def loss(self, predicts, labels, objects_num): labels : 3-D tensor of [batch_size, max_objects, 5] objects_num: 1-D tensor [batch_size] """ - class_loss = tf.constant(0, tf.float32) - object_loss = tf.constant(0, tf.float32) - noobject_loss = tf.constant(0, tf.float32) - coord_loss = tf.constant(0, tf.float32) + # 定义不同的loss变量 + class_loss = tf.constant(0, tf.float32) # 分类损失 + object_loss = tf.constant(0, tf.float32) # 有对象的时候与ground truth的损失 + noobject_loss = tf.constant(0, tf.float32) # 预测框中没有对象的时候的损失 + coord_loss = tf.constant(0, tf.float32) # + loss = [0, 0, 0, 0] for i in range(self.batch_size): predict = predicts[i, :, :, :] label = labels[i, :, :] object_num = objects_num[i] nilboy = tf.ones([7,7,2]) - tuple_results = tf.while_loop(self.cond1, self.body1, [tf.constant(0), object_num, [class_loss, object_loss, noobject_loss, coord_loss], predict, label, nilboy]) + tuple_results = tf.while_loop(self.cond1,# 其输入参数就是后面list中的变量 + self.body1,# 输入参数就是其后的list变量,详细见其定义 + [ tf.constant(0), + object_num, + [class_loss, object_loss, noobject_loss, coord_loss], + predict, + label, + nilboy + ]) for j in range(4): loss[j] = loss[j] + tuple_results[2][j] nilboy = tuple_results[5] diff --git a/yolo/solver/solver.py b/yolo/solver/solver.py index 50bb8d4..8fd65b3 100644 --- a/yolo/solver/solver.py +++ b/yolo/solver/solver.py @@ -1,3 +1,6 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- + """Solver Abstract class """ class Solver(object): diff --git a/yolo/solver/yolo_solver.py b/yolo/solver/yolo_solver.py index 0d9b4f3..6888480 100644 --- a/yolo/solver/yolo_solver.py +++ b/yolo/solver/yolo_solver.py @@ -1,3 +1,5 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -54,6 +56,7 @@ def _train(self): def construct_graph(self): # construct graph self.global_step = tf.Variable(0, trainable=False) + # 搭建神经网络模型的输入和输出结构 self.images = tf.placeholder(tf.float32, (self.batch_size, self.height, self.width, 3)) self.labels = tf.placeholder(tf.float32, (self.batch_size, self.max_objects, 5)) self.objects_num = tf.placeholder(tf.int32, (self.batch_size)) @@ -71,6 +74,7 @@ def solve(self): init = tf.global_variables_initializer() + # Merges all summaries collected in the default graph. summary_op = tf.summary.merge_all() sess = tf.Session() @@ -83,12 +87,15 @@ def solve(self): for step in xrange(self.max_iterators): start_time = time.time() + # 获取train data np_images, np_labels, np_objects_num = self.dataset.batch() - - _, loss_value, nilboy = sess.run([self.train_op, self.total_loss, self.nilboy], feed_dict={self.images: np_images, self.labels: np_labels, self.objects_num: np_objects_num}) + # 训练模型一个batch + _, loss_value, nilboy = sess.run([self.train_op, self.total_loss, self.nilboy], + feed_dict= { self.images: np_images, + self.labels: np_labels, + self.objects_num: np_objects_num + }) #loss_value, nilboy = sess.run([self.total_loss, self.nilboy], feed_dict={self.images: np_images, self.labels: np_labels, self.objects_num: np_objects_num}) - - duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' @@ -105,7 +112,11 @@ def solve(self): sys.stdout.flush() if step % 100 == 0: - summary_str = sess.run(summary_op, feed_dict={self.images: np_images, self.labels: np_labels, self.objects_num: np_objects_num}) + summary_str = sess.run(summary_op, feed_dict={ + self.images: np_images, + self.labels: np_labels, + self.objects_num: np_objects_num + }) summary_writer.add_summary(summary_str, step) if step % 5000 == 0: saver2.save(sess, self.train_dir + '/model.ckpt', global_step=step) diff --git a/yolo/utils/process_config.py b/yolo/utils/process_config.py index 93a783f..a8fbbb0 100644 --- a/yolo/utils/process_config.py +++ b/yolo/utils/process_config.py @@ -1,3 +1,9 @@ +#!/usr/bin/python +# -*- coding: utf-8 -*- +""" +从配置文件中读取相应的配置参数设置 +这个方法是可以重用的 +""" import ConfigParser def process_config(conf_file): From 6e543b128d77f9f4f6391fe242fc4bd612d6460b Mon Sep 17 00:00:00 2001 From: wenlong Date: Mon, 13 Mar 2017 16:23:46 +0800 Subject: [PATCH 2/4] update a .gitignore --- .gitignore | 2 ++ yolo/__init__.pyc | Bin 203 -> 0 bytes 2 files changed, 2 insertions(+) delete mode 100644 yolo/__init__.pyc diff --git a/.gitignore b/.gitignore index 0d20b64..1287439 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ *.pyc +*/*.pyc +*/*/*/.pyc diff --git a/yolo/__init__.pyc b/yolo/__init__.pyc deleted file mode 100644 index 6eebee04818f9289749078a326b82d3013973776..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 203 zcmZSn%*!<^;%G!N0~9a;X$K%K)&LSIK*Y$9!@v*)XEQQHF~Y=|7^0X!3W7C2x-$O* zfu9C5kXgbG(wtb5Se#l?0usmr(riGwI6tQ>wFqQ11A+is Date: Fri, 17 Mar 2017 09:01:47 +0800 Subject: [PATCH 3/4] 1. add some reading comment for review --- yolo/net/net.py | 4 ++-- yolo/net/yolo_tiny_net.py | 17 ++++++++++++++--- yolo/solver/yolo_solver.py | 14 +++++++++----- 3 files changed, 25 insertions(+), 10 deletions(-) diff --git a/yolo/net/net.py b/yolo/net/net.py index ac6b7ef..d731004 100644 --- a/yolo/net/net.py +++ b/yolo/net/net.py @@ -103,7 +103,7 @@ def max_pool(self, input, kernel_size, stride): return tf.nn.max_pool(input, ksize=[1, kernel_size[0], kernel_size[1], 1], strides=[1, stride, stride, 1], padding='SAME') - def local(self, scope, input, in_dimension, out_dimension, leaky=True, pretrain=True, train=True): + def local(self, scope, _input, in_dimension, out_dimension, leaky=True, pretrain=True, train=True): """Fully connection layer Args: @@ -114,7 +114,7 @@ def local(self, scope, input, in_dimension, out_dimension, leaky=True, pretrain= output: 2-D tensor [batch_size, out_dimension] """ with tf.variable_scope(scope) as scope: - reshape = tf.reshape(input, [tf.shape(input)[0], -1]) + reshape = tf.reshape(_input, [tf.shape(_input)[0], -1]) weights = self._variable_with_weight_decay('weights', shape=[in_dimension, out_dimension], stddev=0.04, wd=self.weight_decay, pretrain=pretrain, train=train) diff --git a/yolo/net/yolo_tiny_net.py b/yolo/net/yolo_tiny_net.py index 11d24ea..696790a 100644 --- a/yolo/net/yolo_tiny_net.py +++ b/yolo/net/yolo_tiny_net.py @@ -41,7 +41,16 @@ def inference(self, images): predicts: 4-D tensor [batch_size, cell_size, cell_size, num_classes + 5 * boxes_per_cell] """ conv_num = 1 - + """ + conv2d(self, scope, input, kernel_size, stride=1, pretrain=True, train=True) + Args: + input: 4-D tensor [batch_size, height, width, depth] + scope: variable_scope name + kernel_size: [k_height, k_width, in_channel, out_channel] + stride: int32 + Return: + output: 4-D tensor [batch_size, height/stride, width/stride, out_channels] + """ temp_conv = self.conv2d('conv' + str(conv_num), images, [3, 3, 3, 16], stride=1) conv_num += 1 @@ -98,7 +107,7 @@ def inference(self, images): scales = tf.reshape(local3[:, n1:n2], (-1, self.cell_size, self.cell_size, self.boxes_per_cell)) boxes = tf.reshape(local3[:, n2:], (-1, self.cell_size, self.cell_size, self.boxes_per_cell * 4)) - local3 = tf.concat([class_probs, scales, boxes], 3) + local3 = tf.concat(3, [class_probs, scales, boxes]) predicts = local3 @@ -273,7 +282,7 @@ def loss(self, predicts, labels, objects_num): class_loss = tf.constant(0, tf.float32) # 分类损失 object_loss = tf.constant(0, tf.float32) # 有对象的时候与ground truth的损失 noobject_loss = tf.constant(0, tf.float32) # 预测框中没有对象的时候的损失 - coord_loss = tf.constant(0, tf.float32) # + coord_loss = tf.constant(0, tf.float32) # 预测框位置信息损失 loss = [0, 0, 0, 0] for i in range(self.batch_size): @@ -281,6 +290,7 @@ def loss(self, predicts, labels, objects_num): label = labels[i, :, :] object_num = objects_num[i] nilboy = tf.ones([7,7,2]) + # 返回值就是被调用函数题的输入数据 tuple_results = tf.while_loop(self.cond1,# 其输入参数就是后面list中的变量 self.body1,# 输入参数就是其后的list变量,详细见其定义 [ tf.constant(0), @@ -290,6 +300,7 @@ def loss(self, predicts, labels, objects_num): label, nilboy ]) + #累加各类loss值 for j in range(4): loss[j] = loss[j] + tuple_results[2][j] nilboy = tuple_results[5] diff --git a/yolo/solver/yolo_solver.py b/yolo/solver/yolo_solver.py index 6888480..9a747e7 100644 --- a/yolo/solver/yolo_solver.py +++ b/yolo/solver/yolo_solver.py @@ -91,15 +91,17 @@ def solve(self): np_images, np_labels, np_objects_num = self.dataset.batch() # 训练模型一个batch _, loss_value, nilboy = sess.run([self.train_op, self.total_loss, self.nilboy], - feed_dict= { self.images: np_images, - self.labels: np_labels, - self.objects_num: np_objects_num - }) + feed_dict= { + self.images: np_images, + self.labels: np_labels, + self.objects_num: np_objects_num + }) #loss_value, nilboy = sess.run([self.total_loss, self.nilboy], feed_dict={self.images: np_images, self.labels: np_labels, self.objects_num: np_objects_num}) - duration = time.time() - start_time + duration = time.time() - start_time assert not np.isnan(loss_value), 'Model diverged with loss = NaN' + #10次迭代输入计算信息 if step % 10 == 0: num_examples_per_step = self.dataset.batch_size examples_per_sec = num_examples_per_step / duration @@ -111,6 +113,7 @@ def solve(self): examples_per_sec, sec_per_batch)) sys.stdout.flush() + # 100次迭代更新后将预测结果写入文件 if step % 100 == 0: summary_str = sess.run(summary_op, feed_dict={ self.images: np_images, @@ -118,6 +121,7 @@ def solve(self): self.objects_num: np_objects_num }) summary_writer.add_summary(summary_str, step) + # 5000次迭代保存一个模型 if step % 5000 == 0: saver2.save(sess, self.train_dir + '/model.ckpt', global_step=step) sess.close() From 069f567488051782dea0eb6bf093d719abba471d Mon Sep 17 00:00:00 2001 From: cwlseu Date: Fri, 24 Mar 2017 11:45:31 +0800 Subject: [PATCH 4/4] =?UTF-8?q?=20=E9=80=9A=E8=BF=87=E9=98=85?= =?UTF-8?q?=E8=AF=BByolo=E4=BB=A3=E7=A0=81=EF=BC=8C=E6=B7=BB=E5=8A=A0?= =?UTF-8?q?=E4=BA=86=E6=B3=A8=E9=87=8A=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- yolo/dataset/__init__.pyc | Bin 195 -> 0 bytes yolo/dataset/dataset.pyc | Bin 886 -> 0 bytes yolo/dataset/text_dataset.pyc | Bin 4740 -> 0 bytes 3 files changed, 0 insertions(+), 0 deletions(-) delete mode 100644 yolo/dataset/__init__.pyc delete mode 100644 yolo/dataset/dataset.pyc delete mode 100644 yolo/dataset/text_dataset.pyc diff --git a/yolo/dataset/__init__.pyc b/yolo/dataset/__init__.pyc deleted file mode 100644 index 7dec5bd23b663c719b65ec04b66790e46ea832bb..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 195 zcmZSn%*!<^;%G!N0~9a;X$K%KmH`qeK*Y$9!@v*)XEQQHF@gkxH9#se{{w*^m|4OO z(vVn^Se#l?!UJTNq*j!~Lj^Q|dG9=*?K7(tK&#Doqg2bK=xE<6D|0$j%~cWq3WZptMv1IQjS&I%KL30eI8+<)GC`oofOH8-&|&r1Yi;5VM6;0Nm_33K z>$Wa|5E=jA!E$37CLln72ax*1RQYAo=*A{RyC14p;(%CNrnpO$ZIh5HV8I;IY&=XNRqF&* zWpuMF^!?#l7do-fu1wnZQN6|;S|jDETA`Fa%?-B<-HcPdj~{o2(_GPcEmF*GzO)zd zO513}a(|52B?;@m+p)>#gaQ%3NnsMof;lyU!%Q-m!>e p?42=-svOBGRSy(wbv0_6=XNlfil0bZDFk*qdmb}WsoaU_jA za}>*zbrtTZ;*u)fAy>c)aKq(30T(;~RXhOueSJo@6PBALmUYxW=bY)&r~B)k!oTP0 ze|++{CoReU3i$oW7mj3q2GPd9BR!G6BTYvf@~-l(G+oOVlrKoLVELl-iqb6VTF@nF zmZo%Bn&l~7k!EE|SEX4~x+J~2^y|{B+r7%to00ykG-oYek=~s2=cPFhzADkYbR22d zvb&y!w>7=MB@O6=j)c@N6$03`fT6 z#J$LC!>%kc9;T>Dg`Fhrhh`Tt&x`xvQ4|bA)Ah#vcyQM{({bDn$D}#7dSRB`RrR*2 zw?T(v(i~O2jm=sNAG)9ZddS1R#IcRYxgK-lg(K%8fg{G1bDZCmsi^SZqT*CkxR_N^ z5%wxxh|1Ghr3+C-OjX)&S_NU@@PW0dq&k(<vCR^V4dUf290#a z_r%=LNovU6!8z2 z0Kt-m{cP8}@3rHW(OKR!(7sM-ir9=1W;8c7;IjC)k+O%OX?24O)@WV@gHa#n1@wgD zAUQmaS|$sIQHlgcv>GNHG+Y!f-eS7Dh%=^Gs-qW^RuC}E7Zavfwcq#_X$EO#Qd7e( zvqNKFrO154&^?XYu$2qDQGC=jKBxM$UDNxDO+Gh61mVF^D@ohI2~ol)Q)h7dF`tY8YZ-skMCiJxy!BLxx*+m zE0BN;tb6IYXqdFRKCePCF}DP`(H^z1fQnC37|(OJ!w46h8y2S4oHQK3i;RwZZEKX7 zq#vaUxw=6X^*UaLUA<}3 zzy#4Ddk@sZ-KRbd`qV^&EJ@+O(|6C3Uh-6j+yp4cCdipP!!y7IGB1b+u?d1T(AhQy zsjN5`pTe@U?A8j)?wm90EI7BERksGohP&+4oVvT_@V7;?_~orA{GoQrI3fb3N1BwH zpEr>~_-EOv}2XPNx_7>zqLU z=5CG62fcLS9YV3lJo9~Q0 zul^sQBG~(_$|}$qPXY8|#-t|Wf5VE0sbvlCO0qVSUM}$X)$WzCJM~z1T_d-%w3U|Aw~7UR&`m|IZK>?Y3nFgs}qzU zEW!+kLC*_Lf{KFra1UR`7*0U@Dxj*A(-Bp9R+99k{PN_b;Qukbq>GLihzzUj?-*<# zP6Lo_NM2IBT%!^{GNW{GQD+o8cfFl=_q;(nOw;fzFER?Js65-y5JSDsi8yzzAo7;? z{(;vRw*ZP!y5*fsiPJ5wyJcGy7MI)khPJj)vWo^OF=5ZMA`c?qDEgy;HcudM8k;rf z9r*XJ4`W7V%yf^Lc`*ajf+1zC=i8b}ai6WCqDNdRDxY$E(yoE1OzLi_J3NjS(kIyD{|!CSL+Uwad3J;b>9GD zXj!=|4QzjZ1_Kmn#vpKrE`Q{|hYluXA`GLxbii;_4N|Ch98~Cjet;{?9__qm|-eC1d&uAb0^E zEo&4o#MLcdf!kebfKMCP2`8m*C`me%ZtVX0&JA@-*?byq$yBuTX#k7?`+#%fpM0@8d~wpe?XqC?X*n?a4MYmKca>)g9H}2sO!W zV2}!vyNrfmohk1u;ddF|bw^Fk`@2EdkAgr!E(rSQdV6FESwYZFT0!8m?b9~IW*xJ^ z{|v*W)k=Ny7KebVZ&z$_%B8Yfd#6^f-L6#|ixA|dbw;R#FwKWZeBPf=4AE;!GR=Fu zym$Q_YQ9emeGIP;&wQ@2?