diff --git a/CoreMLConverter_ReadMe.md b/CoreMLConverter_ReadMe.md
new file mode 100644
index 00000000..a8868a8c
--- /dev/null
+++ b/CoreMLConverter_ReadMe.md
@@ -0,0 +1,12 @@
+# Steps to run this
+
+1) Create a virtual enviornment
+2) Activate the enviorment
+3) pip install -r requirements.txt
+4) run save_model.py, After running it under checkpoints folder **ppe_yolov4-tiny-608-24thAug** should be generated.
+ This is the keras checkpoint which we'll use in out jupyter notebook
+5) After running the save model launch **PortExistingModelToCoreML.ipynb** . After running the final cell of jupyter
+ Predictions should be visible
+ 
+
+
\ No newline at end of file
diff --git a/PortExistingModelToCoreML.ipynb b/PortExistingModelToCoreML.ipynb
new file mode 100644
index 00000000..cbce78e9
--- /dev/null
+++ b/PortExistingModelToCoreML.ipynb
@@ -0,0 +1,918 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "ece314a9",
+ "metadata": {},
+ "source": [
+ "# imports"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "id": "eb767bd9",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "TensorFlow version is: 2.3.1\n",
+ "Eager execution is: True\n",
+ "Keras version is: 2.4.0\n"
+ ]
+ }
+ ],
+ "source": [
+ "import tensorflow as tf\n",
+ "from tensorflow.keras.models import load_model, Model\n",
+ "import numpy as np\n",
+ "\n",
+ "from tensorflow.keras.layers import Lambda\n",
+ "from tensorflow import slice\n",
+ "\n",
+ "print(\"TensorFlow version is: {}\".format(tf.__version__))\n",
+ "print(\"Eager execution is: {}\".format(tf.executing_eagerly()))\n",
+ "print(\"Keras version is: {}\".format(tf.keras.__version__))"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "26cf5203",
+ "metadata": {},
+ "source": [
+ "# load model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "id": "71edebdd",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "2022-08-24 19:12:00.027384: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN)to use the following CPU instructions in performance-critical operations: AVX2 FMA\n",
+ "To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.\n",
+ "2022-08-24 19:12:00.043642: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fa8f5b424d0 initialized for platform Host (this does not guarantee that XLA will be used). Devices:\n",
+ "2022-08-24 19:12:00.043657: I tensorflow/compiler/xla/service/service.cc:176] StreamExecutor device (0): Host, Default Version\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "WARNING:tensorflow:No training configuration found in save file, so the model was *not* compiled. Compile it manually.\n"
+ ]
+ }
+ ],
+ "source": [
+ "yolo_tiny_model = load_model('checkpoints/ppe_yolov4-tiny-608-24thAug')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "id": "2455137b",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model: \"functional_1\"\n",
+ "__________________________________________________________________________________________________\n",
+ "Layer (type) Output Shape Param # Connected to \n",
+ "==================================================================================================\n",
+ "input_1 (InputLayer) [(None, 608, 608, 3) 0 \n",
+ "__________________________________________________________________________________________________\n",
+ "zero_padding2d (ZeroPadding2D) (None, 609, 609, 3) 0 input_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d (Conv2D) (None, 304, 304, 32) 864 zero_padding2d[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization (BatchNorma (None, 304, 304, 32) 128 conv2d[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu (TensorFl (None, 304, 304, 32) 0 batch_normalization[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "zero_padding2d_1 (ZeroPadding2D (None, 305, 305, 32) 0 tf_op_layer_LeakyRelu[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_1 (Conv2D) (None, 152, 152, 64) 18432 zero_padding2d_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_1 (BatchNor (None, 152, 152, 64) 256 conv2d_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_1 (Tensor (None, 152, 152, 64) 0 batch_normalization_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_2 (Conv2D) (None, 152, 152, 64) 36864 tf_op_layer_LeakyRelu_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_2 (BatchNor (None, 152, 152, 64) 256 conv2d_2[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_2 (Tensor (None, 152, 152, 64) 0 batch_normalization_2[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_split (TensorFlowOp [(None, 152, 152, 32 0 tf_op_layer_LeakyRelu_2[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_3 (Conv2D) (None, 152, 152, 32) 9216 tf_op_layer_split[0][1] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_3 (BatchNor (None, 152, 152, 32) 128 conv2d_3[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_3 (Tensor (None, 152, 152, 32) 0 batch_normalization_3[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_4 (Conv2D) (None, 152, 152, 32) 9216 tf_op_layer_LeakyRelu_3[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_4 (BatchNor (None, 152, 152, 32) 128 conv2d_4[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_4 (Tensor (None, 152, 152, 32) 0 batch_normalization_4[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat (TensorFlowO (None, 152, 152, 64) 0 tf_op_layer_LeakyRelu_4[0][0] \n",
+ " tf_op_layer_LeakyRelu_3[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_5 (Conv2D) (None, 152, 152, 64) 4096 tf_op_layer_concat[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_5 (BatchNor (None, 152, 152, 64) 256 conv2d_5[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_5 (Tensor (None, 152, 152, 64) 0 batch_normalization_5[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_1 (TensorFlo (None, 152, 152, 128 0 tf_op_layer_LeakyRelu_2[0][0] \n",
+ " tf_op_layer_LeakyRelu_5[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "max_pooling2d (MaxPooling2D) (None, 76, 76, 128) 0 tf_op_layer_concat_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_6 (Conv2D) (None, 76, 76, 128) 147456 max_pooling2d[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_6 (BatchNor (None, 76, 76, 128) 512 conv2d_6[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_6 (Tensor (None, 76, 76, 128) 0 batch_normalization_6[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_split_1 (TensorFlow [(None, 76, 76, 64), 0 tf_op_layer_LeakyRelu_6[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_7 (Conv2D) (None, 76, 76, 64) 36864 tf_op_layer_split_1[0][1] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_7 (BatchNor (None, 76, 76, 64) 256 conv2d_7[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_7 (Tensor (None, 76, 76, 64) 0 batch_normalization_7[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_8 (Conv2D) (None, 76, 76, 64) 36864 tf_op_layer_LeakyRelu_7[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_8 (BatchNor (None, 76, 76, 64) 256 conv2d_8[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_8 (Tensor (None, 76, 76, 64) 0 batch_normalization_8[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_2 (TensorFlo (None, 76, 76, 128) 0 tf_op_layer_LeakyRelu_8[0][0] \n",
+ " tf_op_layer_LeakyRelu_7[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_9 (Conv2D) (None, 76, 76, 128) 16384 tf_op_layer_concat_2[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_9 (BatchNor (None, 76, 76, 128) 512 conv2d_9[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_9 (Tensor (None, 76, 76, 128) 0 batch_normalization_9[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_3 (TensorFlo (None, 76, 76, 256) 0 tf_op_layer_LeakyRelu_6[0][0] \n",
+ " tf_op_layer_LeakyRelu_9[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "max_pooling2d_1 (MaxPooling2D) (None, 38, 38, 256) 0 tf_op_layer_concat_3[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_10 (Conv2D) (None, 38, 38, 256) 589824 max_pooling2d_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_10 (BatchNo (None, 38, 38, 256) 1024 conv2d_10[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_10 (Tenso (None, 38, 38, 256) 0 batch_normalization_10[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_split_2 (TensorFlow [(None, 38, 38, 128) 0 tf_op_layer_LeakyRelu_10[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_11 (Conv2D) (None, 38, 38, 128) 147456 tf_op_layer_split_2[0][1] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_11 (BatchNo (None, 38, 38, 128) 512 conv2d_11[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_11 (Tenso (None, 38, 38, 128) 0 batch_normalization_11[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_12 (Conv2D) (None, 38, 38, 128) 147456 tf_op_layer_LeakyRelu_11[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_12 (BatchNo (None, 38, 38, 128) 512 conv2d_12[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_12 (Tenso (None, 38, 38, 128) 0 batch_normalization_12[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_4 (TensorFlo (None, 38, 38, 256) 0 tf_op_layer_LeakyRelu_12[0][0] \n",
+ " tf_op_layer_LeakyRelu_11[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_13 (Conv2D) (None, 38, 38, 256) 65536 tf_op_layer_concat_4[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_13 (BatchNo (None, 38, 38, 256) 1024 conv2d_13[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_13 (Tenso (None, 38, 38, 256) 0 batch_normalization_13[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_5 (TensorFlo (None, 38, 38, 512) 0 tf_op_layer_LeakyRelu_10[0][0] \n",
+ " tf_op_layer_LeakyRelu_13[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "max_pooling2d_2 (MaxPooling2D) (None, 19, 19, 512) 0 tf_op_layer_concat_5[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_14 (Conv2D) (None, 19, 19, 512) 2359296 max_pooling2d_2[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_14 (BatchNo (None, 19, 19, 512) 2048 conv2d_14[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_14 (Tenso (None, 19, 19, 512) 0 batch_normalization_14[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_15 (Conv2D) (None, 19, 19, 256) 131072 tf_op_layer_LeakyRelu_14[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_15 (BatchNo (None, 19, 19, 256) 1024 conv2d_15[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_15 (Tenso (None, 19, 19, 256) 0 batch_normalization_15[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_18 (Conv2D) (None, 19, 19, 128) 32768 tf_op_layer_LeakyRelu_15[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_17 (BatchNo (None, 19, 19, 128) 512 conv2d_18[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_17 (Tenso (None, 19, 19, 128) 0 batch_normalization_17[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_ResizeBilinear (Ten (None, 38, 38, 128) 0 tf_op_layer_LeakyRelu_17[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_6 (TensorFlo (None, 38, 38, 384) 0 tf_op_layer_ResizeBilinear[0][0] \n",
+ " tf_op_layer_LeakyRelu_13[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_19 (Conv2D) (None, 38, 38, 256) 884736 tf_op_layer_concat_6[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_16 (Conv2D) (None, 19, 19, 512) 1179648 tf_op_layer_LeakyRelu_15[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_18 (BatchNo (None, 38, 38, 256) 1024 conv2d_19[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "batch_normalization_16 (BatchNo (None, 19, 19, 512) 2048 conv2d_16[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_18 (Tenso (None, 38, 38, 256) 0 batch_normalization_18[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_LeakyRelu_16 (Tenso (None, 19, 19, 512) 0 batch_normalization_16[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_20 (Conv2D) (None, 38, 38, 24) 6168 tf_op_layer_LeakyRelu_18[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "conv2d_17 (Conv2D) (None, 19, 19, 24) 12312 tf_op_layer_LeakyRelu_16[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Shape (TensorFlowOp (4,) 0 conv2d_20[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Shape_1 (TensorFlow (4,) 0 conv2d_17[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_strided_slice (Tens () 0 tf_op_layer_Shape[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_strided_slice_1 (Te () 0 tf_op_layer_Shape_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape/shape (Tens (5,) 0 tf_op_layer_strided_slice[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_3/shape (Te (5,) 0 tf_op_layer_strided_slice_1[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape (TensorFlow (None, 38, 38, 3, 8) 0 conv2d_20[0][0] \n",
+ " tf_op_layer_Reshape/shape[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_3 (TensorFl (None, 19, 19, 3, 8) 0 conv2d_17[0][0] \n",
+ " tf_op_layer_Reshape_3/shape[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_split_3 (TensorFlow [(None, 38, 38, 3, 2 0 tf_op_layer_Reshape[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_split_4 (TensorFlow [(None, 19, 19, 3, 2 0 tf_op_layer_Reshape_3[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Sigmoid (TensorFlow (None, 38, 38, 3, 2) 0 tf_op_layer_split_3[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Tile/multiples (Ten (5,) 0 tf_op_layer_strided_slice[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Sigmoid_3 (TensorFl (None, 19, 19, 3, 2) 0 tf_op_layer_split_4[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Tile_1/multiples (T (5,) 0 tf_op_layer_strided_slice_1[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Mul (TensorFlowOpLa (None, 38, 38, 3, 2) 0 tf_op_layer_Sigmoid[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Tile (TensorFlowOpL (None, 38, 38, 3, 2) 0 tf_op_layer_Tile/multiples[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Mul_4 (TensorFlowOp (None, 19, 19, 3, 2) 0 tf_op_layer_Sigmoid_3[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Tile_1 (TensorFlowO (None, 19, 19, 3, 2) 0 tf_op_layer_Tile_1/multiples[0][0\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Sub (TensorFlowOpLa (None, 38, 38, 3, 2) 0 tf_op_layer_Mul[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Cast (TensorFlowOpL (None, 38, 38, 3, 2) 0 tf_op_layer_Tile[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Sub_1 (TensorFlowOp (None, 19, 19, 3, 2) 0 tf_op_layer_Mul_4[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Cast_1 (TensorFlowO (None, 19, 19, 3, 2) 0 tf_op_layer_Tile_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_AddV2 (TensorFlowOp (None, 38, 38, 3, 2) 0 tf_op_layer_Sub[0][0] \n",
+ " tf_op_layer_Cast[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Exp (TensorFlowOpLa (None, 38, 38, 3, 2) 0 tf_op_layer_split_3[0][1] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_AddV2_1 (TensorFlow (None, 19, 19, 3, 2) 0 tf_op_layer_Sub_1[0][0] \n",
+ " tf_op_layer_Cast_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Exp_1 (TensorFlowOp (None, 19, 19, 3, 2) 0 tf_op_layer_split_4[0][1] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Mul_1 (TensorFlowOp (None, 38, 38, 3, 2) 0 tf_op_layer_AddV2[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Mul_2 (TensorFlowOp (None, 38, 38, 3, 2) 0 tf_op_layer_Exp[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Mul_5 (TensorFlowOp (None, 19, 19, 3, 2) 0 tf_op_layer_AddV2_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Mul_6 (TensorFlowOp (None, 19, 19, 3, 2) 0 tf_op_layer_Exp_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_7 (TensorFlo (None, 38, 38, 3, 4) 0 tf_op_layer_Mul_1[0][0] \n",
+ " tf_op_layer_Mul_2[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_2/shape (Te (3,) 0 tf_op_layer_strided_slice[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_8 (TensorFlo (None, 19, 19, 3, 4) 0 tf_op_layer_Mul_5[0][0] \n",
+ " tf_op_layer_Mul_6[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_5/shape (Te (3,) 0 tf_op_layer_strided_slice_1[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Sigmoid_1 (TensorFl (None, 38, 38, 3, 1) 0 tf_op_layer_split_3[0][2] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Sigmoid_2 (TensorFl (None, 38, 38, 3, 3) 0 tf_op_layer_split_3[0][3] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Sigmoid_4 (TensorFl (None, 19, 19, 3, 1) 0 tf_op_layer_split_4[0][2] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Sigmoid_5 (TensorFl (None, 19, 19, 3, 3) 0 tf_op_layer_split_4[0][3] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_2 (TensorFl (None, None, 4) 0 tf_op_layer_concat_7[0][0] \n",
+ " tf_op_layer_Reshape_2/shape[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_5 (TensorFl (None, None, 4) 0 tf_op_layer_concat_8[0][0] \n",
+ " tf_op_layer_Reshape_5/shape[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Mul_3 (TensorFlowOp (None, 38, 38, 3, 3) 0 tf_op_layer_Sigmoid_1[0][0] \n",
+ " tf_op_layer_Sigmoid_2[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_1/shape (Te (3,) 0 tf_op_layer_strided_slice[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Mul_7 (TensorFlowOp (None, 19, 19, 3, 3) 0 tf_op_layer_Sigmoid_4[0][0] \n",
+ " tf_op_layer_Sigmoid_5[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_4/shape (Te (3,) 0 tf_op_layer_strided_slice_1[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_9 (TensorFlo (None, None, 4) 0 tf_op_layer_Reshape_2[0][0] \n",
+ " tf_op_layer_Reshape_5[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_1 (TensorFl (None, None, 3) 0 tf_op_layer_Mul_3[0][0] \n",
+ " tf_op_layer_Reshape_1/shape[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_4 (TensorFl (None, None, 3) 0 tf_op_layer_Mul_7[0][0] \n",
+ " tf_op_layer_Reshape_4/shape[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Shape_2 (TensorFlow (3,) 0 tf_op_layer_concat_9[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_10 (TensorFl (None, None, 3) 0 tf_op_layer_Reshape_1[0][0] \n",
+ " tf_op_layer_Reshape_4[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_strided_slice_2 (Te (2,) 0 tf_op_layer_Shape_2[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Max (TensorFlowOpLa (None, None) 0 tf_op_layer_concat_10[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Shape_5 (TensorFlow (3,) 0 tf_op_layer_concat_10[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Shape_3 (TensorFlow (3,) 0 tf_op_layer_concat_9[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Prod (TensorFlowOpL () 0 tf_op_layer_strided_slice_2[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Shape_4 (TensorFlow (3,) 0 tf_op_layer_concat_9[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_GreaterEqual (Tenso (None, None) 0 tf_op_layer_Max[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_strided_slice_5 (Te (2,) 0 tf_op_layer_Shape_5[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_strided_slice_3 (Te (0,) 0 tf_op_layer_Shape_3[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_11/values_1 (1,) 0 tf_op_layer_Prod[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_strided_slice_4 (Te (1,) 0 tf_op_layer_Shape_4[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_7 (TensorFl (None,) 0 tf_op_layer_GreaterEqual[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Shape_6 (TensorFlow (3,) 0 tf_op_layer_concat_10[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Prod_1 (TensorFlowO () 0 tf_op_layer_strided_slice_5[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Shape_7 (TensorFlow (3,) 0 tf_op_layer_concat_10[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_11 (TensorFl (2,) 0 tf_op_layer_strided_slice_3[0][0]\n",
+ " tf_op_layer_concat_11/values_1[0]\n",
+ " tf_op_layer_strided_slice_4[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Where (TensorFlowOp (None, 1) 0 tf_op_layer_Reshape_7[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_strided_slice_6 (Te (0,) 0 tf_op_layer_Shape_6[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_12/values_1 (1,) 0 tf_op_layer_Prod_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_strided_slice_7 (Te (1,) 0 tf_op_layer_Shape_7[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_9 (TensorFl (None,) 0 tf_op_layer_GreaterEqual[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_6 (TensorFl (None, 4) 0 tf_op_layer_concat_9[0][0] \n",
+ " tf_op_layer_concat_11[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Squeeze (TensorFlow (None,) 0 tf_op_layer_Where[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_12 (TensorFl (2,) 0 tf_op_layer_strided_slice_6[0][0]\n",
+ " tf_op_layer_concat_12/values_1[0]\n",
+ " tf_op_layer_strided_slice_7[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Where_1 (TensorFlow (None, 1) 0 tf_op_layer_Reshape_9[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_GatherV2 (TensorFlo (None, 4) 0 tf_op_layer_Reshape_6[0][0] \n",
+ " tf_op_layer_Squeeze[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_8 (TensorFl (None, 3) 0 tf_op_layer_concat_10[0][0] \n",
+ " tf_op_layer_concat_12[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Squeeze_1 (TensorFl (None,) 0 tf_op_layer_Where_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Shape_8 (TensorFlow (3,) 0 tf_op_layer_concat_10[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Shape_9 (TensorFlow (2,) 0 tf_op_layer_GatherV2[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_GatherV2_1 (TensorF (None, 3) 0 tf_op_layer_Reshape_8[0][0] \n",
+ " tf_op_layer_Squeeze_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_strided_slice_8 (Te () 0 tf_op_layer_Shape_8[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_strided_slice_9 (Te () 0 tf_op_layer_Shape_9[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Shape_10 (TensorFlo (3,) 0 tf_op_layer_concat_10[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Shape_11 (TensorFlo (2,) 0 tf_op_layer_GatherV2_1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_10/shape (T (3,) 0 tf_op_layer_strided_slice_8[0][0]\n",
+ " tf_op_layer_strided_slice_9[0][0]\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_strided_slice_10 (T () 0 tf_op_layer_Shape_10[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_strided_slice_11 (T () 0 tf_op_layer_Shape_11[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_10 (TensorF (None, None, None) 0 tf_op_layer_GatherV2[0][0] \n",
+ " tf_op_layer_Reshape_10/shape[0][0\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_11/shape (T (3,) 0 tf_op_layer_strided_slice_10[0][0\n",
+ " tf_op_layer_strided_slice_11[0][0\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_RealDiv (TensorFlow (None, None, None) 0 tf_op_layer_Reshape_10[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_Reshape_11 (TensorF (None, None, None) 0 tf_op_layer_GatherV2_1[0][0] \n",
+ " tf_op_layer_Reshape_11/shape[0][0\n",
+ "__________________________________________________________________________________________________\n",
+ "tf_op_layer_concat_14 (TensorFl (None, None, None) 0 tf_op_layer_RealDiv[0][0] \n",
+ " tf_op_layer_Reshape_11[0][0] \n",
+ "==================================================================================================\n",
+ "Total params: 5,884,944\n",
+ "Trainable params: 5,878,736\n",
+ "Non-trainable params: 6,208\n",
+ "__________________________________________________________________________________________________\n"
+ ]
+ }
+ ],
+ "source": [
+ "yolo_tiny_model.summary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "id": "c5bbaf48",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 4,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "yolo_tiny_model.inputs"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "id": "26564464",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "[]"
+ ]
+ },
+ "execution_count": 5,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "yolo_tiny_model.outputs"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "53d99a56",
+ "metadata": {},
+ "source": [
+ "# read labels"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "id": "ce781052",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "def read_labels(labels_path):\n",
+ " with open(labels_path) as f:\n",
+ " labels = f.readlines()\n",
+ " labels = [c.strip() for c in labels]\n",
+ " return labels"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "id": "f4d7366b",
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "['Person', 'Hat', 'Vest']"
+ ]
+ },
+ "execution_count": 7,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "labels = read_labels('data/classes/ppe_classes.names.txt')\n",
+ "labels"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "1456c0b9",
+ "metadata": {},
+ "source": [
+ "# decoder model"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "id": "3b00ce1c",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "num_classes = len(labels)\n",
+ "total_len = 4 + num_classes"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "id": "2ead3a26",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "inputs = tf.keras.layers.Input(shape=(None, total_len), name='model_input')\n",
+ "slice_x0 = tf.keras.layers.Lambda( lambda x: x[..., 0], name = 'all_x0')(inputs)\n",
+ "slice_y0 = tf.keras.layers.Lambda(lambda x: x[..., 1], name = 'all_y0')(inputs)\n",
+ "\n",
+ "\n",
+ "slice_width = tf.keras.layers.Lambda(lambda x: x[..., 2], name = 'all_widths')(inputs)\n",
+ "\n",
+ "\n",
+ "slice_height = tf.keras.layers.Lambda( lambda x: x[..., 3], name = 'all_heights')(inputs)\n",
+ "\n",
+ "# slice_height = tf.keras.layers.Subtract()([slice_y1, slice_y0])\n",
+ "# slice_width = tf.keras.layers.Subtract()([slice_x1, slice_x0])\n",
+ "\n",
+ "# concatenate_x0_y0 = tf.keras.layers.Concatenate(axis = 0, name='concatenate_x0_y0')([slice_x0, slice_y0, slice_x1, slice_y1])\n",
+ "# transpose_dims = tf.keras.layers.Lambda(lambda x: tf.transpose(x), name ='all_boxes')(concatenate_x0_y0)\n",
+ "\n",
+ "concatenate_x0_y0_x1_y1 = tf.keras.layers.Concatenate(axis = 0, name='concatenate_x0_y0_x1_y1')([slice_x0, slice_y0, slice_width, slice_height])\n",
+ "x_y_w_h = tf.keras.layers.Lambda(lambda x: tf.transpose(x), name ='x_y_w_h')(concatenate_x0_y0_x1_y1)\n",
+ "# normalized_x_y_w_h = tf.keras.layers.Lambda(lambda x: x/608.0, name ='normalized_x_y_w_h')(x_y_w_h)\n",
+ "\n",
+ "slice_confidences = tf.keras.layers.Lambda(lambda x: x[..., 4:], name = 'slice_confidences')(inputs)\n",
+ "scores = tf.keras.layers.Lambda(lambda x: tf.squeeze(x, axis=0), name = 'all_scores')(slice_confidences)\n",
+ "\n",
+ "\n",
+ "decoder_model = tf.keras.Model(inputs = [inputs], outputs = [x_y_w_h, scores ], name = 'yolo_decoder')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "id": "d05f9402",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model: \"yolo_decoder\"\n",
+ "__________________________________________________________________________________________________\n",
+ "Layer (type) Output Shape Param # Connected to \n",
+ "==================================================================================================\n",
+ "model_input (InputLayer) [(None, None, 7)] 0 \n",
+ "__________________________________________________________________________________________________\n",
+ "all_x0 (Lambda) (None, None) 0 model_input[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "all_y0 (Lambda) (None, None) 0 model_input[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "all_widths (Lambda) (None, None) 0 model_input[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "all_heights (Lambda) (None, None) 0 model_input[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "concatenate_x0_y0_x1_y1 (Concat (None, None) 0 all_x0[0][0] \n",
+ " all_y0[0][0] \n",
+ " all_widths[0][0] \n",
+ " all_heights[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "slice_confidences (Lambda) (None, None, 3) 0 model_input[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "x_y_w_h (Lambda) (None, None) 0 concatenate_x0_y0_x1_y1[0][0] \n",
+ "__________________________________________________________________________________________________\n",
+ "all_scores (Lambda) (None, 3) 0 slice_confidences[0][0] \n",
+ "==================================================================================================\n",
+ "Total params: 0\n",
+ "Trainable params: 0\n",
+ "Non-trainable params: 0\n",
+ "__________________________________________________________________________________________________\n"
+ ]
+ }
+ ],
+ "source": [
+ "decoder_model.summary()"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ebed1561",
+ "metadata": {},
+ "source": [
+ "# merege yolo and decoder"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 11,
+ "id": "f39d69ca",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Model: \"functional_1\"\n",
+ "_________________________________________________________________\n",
+ "Layer (type) Output Shape Param # \n",
+ "=================================================================\n",
+ "input_1 (InputLayer) [(None, 608, 608, 3)] 0 \n",
+ "_________________________________________________________________\n",
+ "functional_1 (Functional) (None, None, None) 5884944 \n",
+ "_________________________________________________________________\n",
+ "yolo_decoder (Functional) [(None, None), (None, 3)] 0 \n",
+ "=================================================================\n",
+ "Total params: 5,884,944\n",
+ "Trainable params: 5,878,736\n",
+ "Non-trainable params: 6,208\n",
+ "_________________________________________________________________\n"
+ ]
+ }
+ ],
+ "source": [
+ "x = yolo_tiny_model.outputs[0]\n",
+ "x.shape\n",
+ "decoder_model.input.shape\n",
+ "\n",
+ "inputs = tf.keras.layers.Input(shape=(608, 608, 3))\n",
+ "x = yolo_tiny_model(inputs)\n",
+ "predictions = decoder_model(x)\n",
+ "combined_model = tf.keras.Model(inputs=inputs, outputs=predictions)\n",
+ "combined_model.summary()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 12,
+ "id": "9fbafdff",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "#save combined model\n",
+ "combined_model.save('yolo_decoded.h5')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "fc197e32",
+ "metadata": {},
+ "source": [
+ "# check predictions¶"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 13,
+ "id": "f95140b4",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import colorsys\n",
+ "import random\n",
+ "\n",
+ "def generate_colors(class_names):\n",
+ " hsv_tuples = [(x / len(class_names), 1., 1.) for x in range(len(class_names))]\n",
+ " colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))\n",
+ " colors = list(map(lambda x: (int(x[0] ), int(x[1] ), int(x[2] )), colors))\n",
+ " random.seed(10101) # Fixed seed for consistent colors across runs.\n",
+ " random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes.\n",
+ " random.seed(None) # Reset seed to default.\n",
+ " return colors\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 14,
+ "id": "c6c3a4d8",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from numpy import expand_dims\n",
+ "from tensorflow.keras.preprocessing.image import load_img\n",
+ "from tensorflow.keras.preprocessing.image import img_to_array\n",
+ "\n",
+ "# load and prepare an image\n",
+ "def load_image_pixels(filename, shape):\n",
+ " # load the image to get its shape\n",
+ " image = load_img(filename)\n",
+ " width, height = image.size\n",
+ " # load the image with the required size\n",
+ " image = load_img(filename, interpolation = 'bilinear', target_size=shape)\n",
+ " # convert to numpy array\n",
+ " image = img_to_array(image)\n",
+ " # scale pixel values to [0, 1]\n",
+ " image = image.astype('float32')\n",
+ " image /= 255.0\n",
+ "\n",
+ " # add a dimension so that we have one sample\n",
+ " image = expand_dims(image, 0)\n",
+ " \n",
+ " return image, width, height"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 15,
+ "id": "6ef44649",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from matplotlib import pyplot\n",
+ "from matplotlib.patches import Rectangle\n",
+ "\n",
+ "def draw_preds_bbs(file_name, preds_pipeline, is_coreml = False):\n",
+ " print(preds_pipeline[0].shape)\n",
+ " print(preds_pipeline[1].shape)\n",
+ " boxes = preds_pipeline[0]\n",
+ " scores = preds_pipeline[1]\n",
+ "# boxes = preds_pipeline[2]\n",
+ "\n",
+ " print('start drawing boxes')\n",
+ " data = pyplot.imread(file_name)\n",
+ " ax = pyplot.gca()\n",
+ " colors = generate_colors(labels)\n",
+ " for i in range(len(boxes)):\n",
+ " index = i\n",
+ " box = boxes[index]\n",
+ " class_id = np.argmax(scores[i])\n",
+ " score = scores[i][class_id]\n",
+ " if is_coreml == False :\n",
+ " x1, y1, height, width = box[0], box[1], box[2], box[3]\n",
+ " else:\n",
+ " x1, y1, width, height = box[0], box[1], box[2], box[3]\n",
+ " \n",
+ " x1, y1, width, height = box[0], box[1], box[2], box[3]\n",
+ " \n",
+ " x1 = x1- width/2.0\n",
+ " y1 = y1- height/2.0\n",
+ " \n",
+ " rect = Rectangle(\n",
+ " (x1 * image_w, y1 * image_h),\n",
+ " width * image_w,\n",
+ " height * image_h,\n",
+ " fill=False,\n",
+ " color=colors[class_id]\n",
+ " )\n",
+ " ax.add_patch(rect)\n",
+ " label = \"%s (%.3f)\" % (labels[class_id], score*100)\n",
+ " pyplot.text(\n",
+ " x1* image_w,\n",
+ " y1* image_h,\n",
+ " label, \n",
+ " color= 'white'\n",
+ " )\n",
+ " pyplot.imshow(data)\n",
+ " pyplot.show()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "id": "59e0c487",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "image initial size: 1280 720\n",
+ "input image (1, 608, 608, 3)\n"
+ ]
+ }
+ ],
+ "source": [
+ "# Pre-process the image\n",
+ "input_w, input_h = 608, 608\n",
+ "photo_filename = 'sample_test_images/0.jpg'\n",
+ "# photo_filename = 'empty_image.jpg'\n",
+ "image, image_w, image_h = load_image_pixels(photo_filename, (input_w, input_h))\n",
+ "print(\"image initial size: \", image_w, image_h)\n",
+ "print(\"input image\",image.shape)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "id": "4f222aa7",
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "(21, 4)\n",
+ "(21, 3)\n",
+ "start drawing boxes\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "\n",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {
+ "needs_background": "light"
+ },
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "ytinyhat = combined_model.predict(image)\n",
+ "# print(f\"ytinyhat shape : {ytinyhat.shape}\")\n",
+ "draw_preds_bbs(photo_filename, ytinyhat)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "bfb8b1f9",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.8.13"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/README.md b/README.md
index 8b6da877..e6ee5e1d 100644
--- a/README.md
+++ b/README.md
@@ -3,62 +3,142 @@
YOLOv4, YOLOv4-tiny Implemented in Tensorflow 2.0.
Convert YOLO v4, YOLOv3, YOLO tiny .weights to .pb, .tflite and trt format for tensorflow, tensorflow lite, tensorRT.
+
-Download yolov4.weights file: https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT
+## Getting Started
+### Conda (Recommended)
+```bash
+# Tensorflow CPU
+conda env create -f conda-cpu.yml
+conda activate yolov4-cpu
+
+# Tensorflow GPU
+conda env create -f conda-gpu.yml
+conda activate yolov4-gpu
+```
-### Prerequisites
-* Tensorflow 2.3.0rc0
+### Pip
+```bash
+# TensorFlow CPU
+pip install -r requirements.txt
+
+# TensorFlow GPU
+pip install -r requirements-gpu.txt
+```
+### Nvidia Driver (For GPU, if you are not using Conda Environment and haven't set up CUDA yet)
+Make sure to use CUDA Toolkit version 10.1 as it is the proper version for the TensorFlow version used in this repository.
+https://developer.nvidia.com/cuda-10.1-download-archive-update2
### Performance
-
+Check out how YOLOv4 compares to other object detection systems.
+
+
+
+## Downloading Official Pre-trained Weights
+YOLOv4 comes pre-trained and able to detect 80 classes. For easy demo purposes we will use the pre-trained weights.
+Download pre-trained yolov4.weights file: https://drive.google.com/open?id=1cewMfusmPjYWbrnuJRuKhPMwRe_b9PaT
+
+Copy and paste yolov4.weights from your downloads folder into the 'data' folder of this repository.
+
+If you want to use yolov4-tiny.weights, a smaller model that is faster at running detections but less accurate, download file here: https://github.com/AlexeyAB/darknet/releases/download/darknet_yolo_v4_pre/yolov4-tiny.weights
+
+## Using Custom Trained YOLOv4 Weights
+Learn How To Train Custom YOLOv4 Weights here: https://www.youtube.com/watch?v=mmj3nxGT2YQ
+
+USE MY LICENSE PLATE TRAINED CUSTOM WEIGHTS: https://drive.google.com/file/d/1EUPtbtdF0bjRtNjGv436vDY28EN5DXDH/view?usp=sharing
+
+Copy and paste your custom .weights file into the 'data' folder and copy and paste your custom .names into the 'data/classes/' folder.
+
+The only change within the code you need to make in order for your custom model to work is on line 14 of 'core/config.py' file.
+Update the code to point at your custom .names file as seen below. (my custom .names file is called custom.names but yours might be named differently)
+
-### Demo
+Note: If you are using the pre-trained yolov4 then make sure that line 14 remains coco.names .
+## YOLOv4 Using Tensorflow (tf, .pb model)
+To implement YOLOv4 using TensorFlow, first we convert the .weights into the corresponding TensorFlow model files and then run the model.
```bash
# Convert darknet weights to tensorflow
## yolov4
python save_model.py --weights ./data/yolov4.weights --output ./checkpoints/yolov4-416 --input_size 416 --model yolov4
-## yolov4-tiny
+# yolov4-tiny
python save_model.py --weights ./data/yolov4-tiny.weights --output ./checkpoints/yolov4-tiny-416 --input_size 416 --model yolov4 --tiny
-# Run demo tensorflow
-python detect.py --weights ./checkpoints/yolov4-416 --size 416 --model yolov4 --image ./data/kite.jpg
+# custom yolov4
+python save_model.py --weights ./data/custom.weights --output ./checkpoints/custom-416 --input_size 416 --model yolov4
-python detect.py --weights ./checkpoints/yolov4-tiny-416 --size 416 --model yolov4 --image ./data/kite.jpg --tiny
+# Run yolov4 tensorflow model
+python detect.py --weights ./checkpoints/yolov4-416 --size 416 --model yolov4 --images ./data/images/kite.jpg
+# Run yolov4-tiny tensorflow model
+python detect.py --weights ./checkpoints/yolov4-tiny-416 --size 416 --model yolov4 --images ./data/images/kite.jpg --tiny
+
+# Run custom yolov4 tensorflow model
+python detect.py --weights ./checkpoints/custom-416 --size 416 --model yolov4 --images ./data/images/car.jpg
+
+# Run yolov4 on video
+python detect_video.py --weights ./checkpoints/yolov4-416 --size 416 --model yolov4 --video ./data/video/video.mp4 --output ./detections/results.avi
+
+# Run custom yolov4 model on video
+python detect_video.py --weights ./checkpoints/custom-416 --size 416 --model yolov4 --video ./data/video/cars.mp4 --output ./detections/results.avi
+
+# Run yolov4 on webcam
+python detect_video.py --weights ./checkpoints/yolov4-416 --size 416 --model yolov4 --video 0 --output ./detections/results.avi
```
-If you want to run yolov3 or yolov3-tiny change ``--model yolov3`` in command
+If you want to run yolov3 or yolov3-tiny change ``--model yolov3`` and .weights file in above commands.
-#### Output
+Note: You can also run the detector on multiple images at once by changing the --images flag like such ``--images "./data/images/kite.jpg, ./data/images/dog.jpg"``
-##### Yolov4 original weight
-
+### Result Image(s) (Regular TensorFlow)
+You can find the outputted image(s) showing the detections saved within the 'detections' folder.
+#### Pre-trained YOLOv4 Model Example
+
-##### Yolov4 tflite int8
-
+#### Custom YOLOv4 Model Example (see video link above to train this model)
+
-### Convert to tflite
+### Result Video
+Video saves wherever you point --output flag to. If you don't set the flag then your video will not be saved with detections on it.
+
+## YOLOv4 Using TensorFlow Lite (.tflite model)
+Can also implement YOLOv4 using TensorFlow Lite. TensorFlow Lite is a much smaller model and perfect for mobile or edge devices (raspberry pi, etc).
```bash
# Save tf model for tflite converting
python save_model.py --weights ./data/yolov4.weights --output ./checkpoints/yolov4-416 --input_size 416 --model yolov4 --framework tflite
+# Save custom yolov4 tf model for tflite converting
+python save_model.py --weights ./data/custom.weights --output ./checkpoints/custom-416 --input_size 416 --model yolov4 --framework tflite
+
# yolov4
python convert_tflite.py --weights ./checkpoints/yolov4-416 --output ./checkpoints/yolov4-416.tflite
+# convert custom yolov4 tflite model
+python convert_tflite.py --weights ./checkpoints/custom-416 --output ./checkpoints/custom-416.tflite
+
# yolov4 quantize float16
python convert_tflite.py --weights ./checkpoints/yolov4-416 --output ./checkpoints/yolov4-416-fp16.tflite --quantize_mode float16
# yolov4 quantize int8
python convert_tflite.py --weights ./checkpoints/yolov4-416 --output ./checkpoints/yolov4-416-int8.tflite --quantize_mode int8 --dataset ./coco_dataset/coco/val207.txt
-# Run demo tflite model
-python detect.py --weights ./checkpoints/yolov4-416.tflite --size 416 --model yolov4 --image ./data/kite.jpg --framework tflite
+# Run tflite model
+python detect.py --weights ./checkpoints/yolov4-416.tflite --size 416 --model yolov4 --images ./data/images/kite.jpg --framework tflite
+
+# Run custom tflite model
+python detect.py --weights ./checkpoints/custom-416.tflite --size 416 --model yolov4 --images ./data/images/car.jpg --framework tflite
```
+### Result Image (TensorFlow Lite)
+You can find the outputted image(s) showing the detections saved within the 'detections' folder.
+#### TensorFlow Lite int8 Example
+
+
Yolov4 and Yolov4-tiny int8 quantization have some issues. I will try to fix that. You can try Yolov3 and Yolov3-tiny int8 quantization
-### Convert to TensorRT
+
+## YOLOv4 Using TensorRT
+Can also implement YOLOv4 using TensorFlow's TensorRT. TensorRT is a high-performance inference optimizer and runtime that can be used to perform inference in lower precision (FP16 and INT8) on GPUs. TensorRT can allow up to 8x higher performance than regular TensorFlow.
```bash# yolov3
python save_model.py --weights ./data/yolov3.weights --output ./checkpoints/yolov3.tf --input_size 416 --model yolov3
python convert_trt.py --weights ./checkpoints/yolov3.tf --quantize_mode float16 --output ./checkpoints/yolov3-trt-fp16-416
@@ -70,9 +150,70 @@ python convert_trt.py --weights ./checkpoints/yolov3-tiny.tf --quantize_mode flo
# yolov4
python save_model.py --weights ./data/yolov4.weights --output ./checkpoints/yolov4.tf --input_size 416 --model yolov4
python convert_trt.py --weights ./checkpoints/yolov4.tf --quantize_mode float16 --output ./checkpoints/yolov4-trt-fp16-416
+python detect.py --weights ./checkpoints/yolov4-trt-fp16-416 --model yolov4 --images ./data/images/kite.jpg --framework trt
```
-### Evaluate on COCO 2017 Dataset
+## Command Line Args Reference
+
+```bash
+save_model.py:
+ --weights: path to weights file
+ (default: './data/yolov4.weights')
+ --output: path to output
+ (default: './checkpoints/yolov4-416')
+ --[no]tiny: yolov4 or yolov4-tiny
+ (default: 'False')
+ --input_size: define input size of export model
+ (default: 416)
+ --framework: what framework to use (tf, trt, tflite)
+ (default: tf)
+ --model: yolov3 or yolov4
+ (default: yolov4)
+
+detect.py:
+ --images: path to input images as a string with images separated by ","
+ (default: './data/images/kite.jpg')
+ --output: path to output folder
+ (default: './detections/')
+ --[no]tiny: yolov4 or yolov4-tiny
+ (default: 'False')
+ --weights: path to weights file
+ (default: './checkpoints/yolov4-416')
+ --framework: what framework to use (tf, trt, tflite)
+ (default: tf)
+ --model: yolov3 or yolov4
+ (default: yolov4)
+ --size: resize images to
+ (default: 416)
+ --iou: iou threshold
+ (default: 0.45)
+ --score: confidence threshold
+ (default: 0.25)
+
+detect_video.py:
+ --video: path to input video (use 0 for webcam)
+ (default: './data/video/video.mp4')
+ --output: path to output video (remember to set right codec for given format. e.g. XVID for .avi)
+ (default: None)
+ --output_format: codec used in VideoWriter when saving video to file
+ (default: 'XVID)
+ --[no]tiny: yolov4 or yolov4-tiny
+ (default: 'false')
+ --weights: path to weights file
+ (default: './checkpoints/yolov4-416')
+ --framework: what framework to use (tf, trt, tflite)
+ (default: tf)
+ --model: yolov3 or yolov4
+ (default: yolov4)
+ --size: resize images to
+ (default: 416)
+ --iou: iou threshold
+ (default: 0.45)
+ --score: confidence threshold
+ (default: 0.25)
+```
+
+## Evaluate on COCO 2017 Dataset
```bash
# run script in /script/get_coco_dataset_2017.sh to download COCO 2017 Dataset
# preprocess coco dataset
@@ -98,7 +239,7 @@ python main.py --output results_yolov4_tf
| YoloV3 | 55.43 | 52.32 | |
| YoloV4 | 61.96 | 57.33 | |
-### Benchmark
+## Benchmark
```bash
python benchmarks.py --size 416 --model yolov4 --weights ./data/yolov4.weights
```
@@ -144,7 +285,7 @@ python benchmarks.py --size 416 --model yolov4 --weights ./data/yolov4.weights
| YoloV3 FPS | | | |
| YoloV4 FPS | | | |
-### Traning your own model
+## Traning your own model in TensorFlow
```bash
# Prepare your dataset
# If you want to train from scratch:
@@ -157,7 +298,7 @@ python train.py --weights ./data/yolov4.weights
```
The training performance is not fully reproduced yet, so I recommended to use Alex's [Darknet](https://github.com/AlexeyAB/darknet) to train your own data, then convert the .weights to tensorflow or tflite.
-
+Use this video to train your own model easily in Google Colab: https://www.youtube.com/watch?v=mmj3nxGT2YQ
### TODO
* [x] Convert YOLOv4 to TensorRT
diff --git a/benchmarks.py b/benchmarks.py
index c8b2d736..a8405924 100644
--- a/benchmarks.py
+++ b/benchmarks.py
@@ -15,7 +15,7 @@
flags.DEFINE_string('framework', 'tf', '(tf, tflite, trt')
flags.DEFINE_string('model', 'yolov4', 'yolov3 or yolov4')
flags.DEFINE_string('weights', './data/yolov4.weights', 'path to weights file')
-flags.DEFINE_string('image', './data/kite.jpg', 'path to input image')
+flags.DEFINE_string('images', './data/images/kite.jpg', 'path to input image')
flags.DEFINE_integer('size', 416, 'resize images to')
diff --git a/conda-cpu.yml b/conda-cpu.yml
new file mode 100644
index 00000000..396e0820
--- /dev/null
+++ b/conda-cpu.yml
@@ -0,0 +1,15 @@
+name: yolov4-cpu
+
+dependencies:
+ - python==3.7
+ - pip
+ - matplotlib
+ - opencv
+ - pip:
+ - opencv-python==4.1.1.26
+ - lxml
+ - tqdm
+ - tensorflow==2.3.0rc0
+ - absl-py
+ - easydict
+ - pillow
\ No newline at end of file
diff --git a/conda-gpu.yml b/conda-gpu.yml
new file mode 100644
index 00000000..77ce7113
--- /dev/null
+++ b/conda-gpu.yml
@@ -0,0 +1,17 @@
+name: yolov4-gpu
+
+dependencies:
+ - python==3.7
+ - pip
+ - matplotlib
+ - opencv
+ - cudnn
+ - cudatoolkit==10.1.243
+ - pip:
+ - tensorflow-gpu==2.3.0rc0
+ - opencv-python==4.1.1.26
+ - lxml
+ - tqdm
+ - absl-py
+ - easydict
+ - pillow
diff --git a/core/__pycache__/backbone.cpython-37.pyc b/core/__pycache__/backbone.cpython-37.pyc
new file mode 100644
index 00000000..839dd1f1
Binary files /dev/null and b/core/__pycache__/backbone.cpython-37.pyc differ
diff --git a/core/__pycache__/backbone.cpython-38.pyc b/core/__pycache__/backbone.cpython-38.pyc
new file mode 100644
index 00000000..d8aa4f2d
Binary files /dev/null and b/core/__pycache__/backbone.cpython-38.pyc differ
diff --git a/core/__pycache__/common.cpython-37.pyc b/core/__pycache__/common.cpython-37.pyc
new file mode 100644
index 00000000..b28006d8
Binary files /dev/null and b/core/__pycache__/common.cpython-37.pyc differ
diff --git a/core/__pycache__/common.cpython-38.pyc b/core/__pycache__/common.cpython-38.pyc
new file mode 100644
index 00000000..0b60fe2c
Binary files /dev/null and b/core/__pycache__/common.cpython-38.pyc differ
diff --git a/core/__pycache__/config.cpython-37.pyc b/core/__pycache__/config.cpython-37.pyc
new file mode 100644
index 00000000..1cff50dc
Binary files /dev/null and b/core/__pycache__/config.cpython-37.pyc differ
diff --git a/core/__pycache__/utils.cpython-36.pyc b/core/__pycache__/utils.cpython-36.pyc
new file mode 100644
index 00000000..28ed8af8
Binary files /dev/null and b/core/__pycache__/utils.cpython-36.pyc differ
diff --git a/core/__pycache__/utils.cpython-37.pyc b/core/__pycache__/utils.cpython-37.pyc
new file mode 100644
index 00000000..c9ed088b
Binary files /dev/null and b/core/__pycache__/utils.cpython-37.pyc differ
diff --git a/core/__pycache__/utils.cpython-38.pyc b/core/__pycache__/utils.cpython-38.pyc
new file mode 100644
index 00000000..a424aa42
Binary files /dev/null and b/core/__pycache__/utils.cpython-38.pyc differ
diff --git a/core/__pycache__/yolov4.cpython-36.pyc b/core/__pycache__/yolov4.cpython-36.pyc
new file mode 100644
index 00000000..bf13aa58
Binary files /dev/null and b/core/__pycache__/yolov4.cpython-36.pyc differ
diff --git a/core/__pycache__/yolov4.cpython-37.pyc b/core/__pycache__/yolov4.cpython-37.pyc
new file mode 100644
index 00000000..0a0efaa0
Binary files /dev/null and b/core/__pycache__/yolov4.cpython-37.pyc differ
diff --git a/core/__pycache__/yolov4.cpython-38.pyc b/core/__pycache__/yolov4.cpython-38.pyc
new file mode 100644
index 00000000..e6fca0d2
Binary files /dev/null and b/core/__pycache__/yolov4.cpython-38.pyc differ
diff --git a/core/config.py b/core/config.py
index 7ea25ff6..704fbef0 100644
--- a/core/config.py
+++ b/core/config.py
@@ -11,7 +11,7 @@
# YOLO options
__C.YOLO = edict()
-__C.YOLO.CLASSES = "./data/classes/coco.names"
+__C.YOLO.CLASSES = "./data/classes/ppe_classes.names.txt"
__C.YOLO.ANCHORS = [12,16, 19,36, 40,28, 36,75, 76,55, 72,146, 142,110, 192,243, 459,401]
__C.YOLO.ANCHORS_V3 = [10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326]
__C.YOLO.ANCHORS_TINY = [23,27, 37,58, 81,82, 81,82, 135,169, 344,319]
diff --git a/core/utils.py b/core/utils.py
index 87f7d158..641e798b 100644
--- a/core/utils.py
+++ b/core/utils.py
@@ -124,7 +124,7 @@ def image_preprocess(image, target_size, gt_boxes=None):
gt_boxes[:, [1, 3]] = gt_boxes[:, [1, 3]] * scale + dh
return image_paded, gt_boxes
-def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_label=True):
+def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), allowed_classes=list(read_class_names(cfg.YOLO.CLASSES).values()), show_label=True):
num_classes = len(classes)
image_h, image_w, _ = image.shape
hsv_tuples = [(1.0 * x / num_classes, 1., 1.) for x in range(num_classes)]
@@ -147,19 +147,25 @@ def draw_bbox(image, bboxes, classes=read_class_names(cfg.YOLO.CLASSES), show_la
fontScale = 0.5
score = out_scores[0][i]
class_ind = int(out_classes[0][i])
- bbox_color = colors[class_ind]
- bbox_thick = int(0.6 * (image_h + image_w) / 600)
- c1, c2 = (coor[1], coor[0]), (coor[3], coor[2])
- cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
-
- if show_label:
- bbox_mess = '%s: %.2f' % (classes[class_ind], score)
- t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0]
- c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3)
- cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1) #filled
-
- cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX,
- fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA)
+ class_name = classes[class_ind]
+
+ # check if class is in allowed classes
+ if class_name not in allowed_classes:
+ continue
+ else:
+ bbox_color = colors[class_ind]
+ bbox_thick = int(0.6 * (image_h + image_w) / 600)
+ c1, c2 = (coor[1], coor[0]), (coor[3], coor[2])
+ cv2.rectangle(image, c1, c2, bbox_color, bbox_thick)
+
+ if show_label:
+ bbox_mess = '%s: %.2f' % (classes[class_ind], score)
+ t_size = cv2.getTextSize(bbox_mess, 0, fontScale, thickness=bbox_thick // 2)[0]
+ c3 = (c1[0] + t_size[0], c1[1] - t_size[1] - 3)
+ cv2.rectangle(image, c1, (np.float32(c3[0]), np.float32(c3[1])), bbox_color, -1) #filled
+
+ cv2.putText(image, bbox_mess, (c1[0], np.float32(c1[1] - 2)), cv2.FONT_HERSHEY_SIMPLEX,
+ fontScale, (0, 0, 0), bbox_thick // 2, lineType=cv2.LINE_AA)
return image
def bbox_iou(bboxes1, bboxes2):
diff --git a/core/yolov4.py b/core/yolov4.py
index 1edd656d..ac2a1cb5 100644
--- a/core/yolov4.py
+++ b/core/yolov4.py
@@ -296,6 +296,7 @@ def filter_boxes(box_xywh, scores, score_threshold=0.4, input_shape = tf.constan
class_boxes = tf.boolean_mask(box_xywh, mask)
pred_conf = tf.boolean_mask(scores, mask)
class_boxes = tf.reshape(class_boxes, [tf.shape(scores)[0], -1, tf.shape(class_boxes)[-1]])
+ class_boxes = class_boxes/float(input_shape[0])
pred_conf = tf.reshape(pred_conf, [tf.shape(scores)[0], -1, tf.shape(pred_conf)[-1]])
box_xy, box_wh = tf.split(class_boxes, (2, 2), axis=-1)
@@ -305,16 +306,30 @@ def filter_boxes(box_xywh, scores, score_threshold=0.4, input_shape = tf.constan
box_yx = box_xy[..., ::-1]
box_hw = box_wh[..., ::-1]
- box_mins = (box_yx - (box_hw / 2.)) / input_shape
- box_maxes = (box_yx + (box_hw / 2.)) / input_shape
+ # box_mins = (box_yx - (box_hw / 2.)) / input_shape
+ # box_maxes = (box_yx + (box_hw / 2.)) / input_shape
+
+ box_mins = (box_yx) / input_shape
+ box_maxes = (box_hw) / input_shape
+
+
+ # boxes = tf.concat([
+ # box_mins[..., 0:1], # y_min
+ # box_mins[..., 1:2], # x_min
+ # box_maxes[..., 0:1], # y_max
+ # box_maxes[..., 1:2] # x_max
+ # ], axis=-1)
+
boxes = tf.concat([
- box_mins[..., 0:1], # y_min
box_mins[..., 1:2], # x_min
- box_maxes[..., 0:1], # y_max
- box_maxes[..., 1:2] # x_max
+ box_mins[..., 0:1], # y_min
+ box_maxes[..., 1:2], # width
+ box_maxes[..., 0:1], # height
+
], axis=-1)
+
# return tf.concat([boxes, pred_conf], axis=-1)
- return (boxes, pred_conf)
+ return (class_boxes, pred_conf)
def compute_loss(pred, conv, label, bboxes, STRIDES, NUM_CLASS, IOU_LOSS_THRESH, i=0):
diff --git a/data/classes/ppe_classes.names.txt b/data/classes/ppe_classes.names.txt
new file mode 100644
index 00000000..f0e5eae0
--- /dev/null
+++ b/data/classes/ppe_classes.names.txt
@@ -0,0 +1,3 @@
+Person
+Hat
+Vest
diff --git a/data/girl.png b/data/girl.png
deleted file mode 100644
index 0a5fce32..00000000
Binary files a/data/girl.png and /dev/null differ
diff --git a/data/helpers/custom_config.png b/data/helpers/custom_config.png
new file mode 100644
index 00000000..12397c08
Binary files /dev/null and b/data/helpers/custom_config.png differ
diff --git a/data/helpers/custom_result.png b/data/helpers/custom_result.png
new file mode 100644
index 00000000..c0f95c34
Binary files /dev/null and b/data/helpers/custom_result.png differ
diff --git a/data/helpers/demo.gif b/data/helpers/demo.gif
new file mode 100644
index 00000000..078bd02d
Binary files /dev/null and b/data/helpers/demo.gif differ
diff --git a/data/performance.png b/data/helpers/performance.png
similarity index 100%
rename from data/performance.png
rename to data/helpers/performance.png
diff --git a/data/helpers/result-int8.png b/data/helpers/result-int8.png
new file mode 100644
index 00000000..13be85cf
Binary files /dev/null and b/data/helpers/result-int8.png differ
diff --git a/data/helpers/result.png b/data/helpers/result.png
new file mode 100644
index 00000000..3f3bec1b
Binary files /dev/null and b/data/helpers/result.png differ
diff --git a/data/kite.jpg b/data/kite.jpg
deleted file mode 100644
index 9eb325ac..00000000
Binary files a/data/kite.jpg and /dev/null differ
diff --git a/data/road.mp4 b/data/road.mp4
deleted file mode 100755
index f65149db..00000000
Binary files a/data/road.mp4 and /dev/null differ
diff --git a/detect.py b/detect.py
index 5b94027c..db237e0f 100644
--- a/detect.py
+++ b/detect.py
@@ -1,92 +1,109 @@
-import tensorflow as tf
-physical_devices = tf.config.experimental.list_physical_devices('GPU')
-if len(physical_devices) > 0:
- tf.config.experimental.set_memory_growth(physical_devices[0], True)
-from absl import app, flags, logging
-from absl.flags import FLAGS
-import core.utils as utils
-from core.yolov4 import filter_boxes
-from tensorflow.python.saved_model import tag_constants
-from PIL import Image
-import cv2
-import numpy as np
-from tensorflow.compat.v1 import ConfigProto
-from tensorflow.compat.v1 import InteractiveSession
-
-flags.DEFINE_string('framework', 'tf', '(tf, tflite, trt')
-flags.DEFINE_string('weights', './checkpoints/yolov4-416',
- 'path to weights file')
-flags.DEFINE_integer('size', 416, 'resize images to')
-flags.DEFINE_boolean('tiny', False, 'yolo or yolo-tiny')
-flags.DEFINE_string('model', 'yolov4', 'yolov3 or yolov4')
-flags.DEFINE_string('image', './data/kite.jpg', 'path to input image')
-flags.DEFINE_string('output', 'result.png', 'path to output image')
-flags.DEFINE_float('iou', 0.45, 'iou threshold')
-flags.DEFINE_float('score', 0.25, 'score threshold')
-
-def main(_argv):
- config = ConfigProto()
- config.gpu_options.allow_growth = True
- session = InteractiveSession(config=config)
- STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
- input_size = FLAGS.size
- image_path = FLAGS.image
-
- original_image = cv2.imread(image_path)
- original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
-
- # image_data = utils.image_preprocess(np.copy(original_image), [input_size, input_size])
- image_data = cv2.resize(original_image, (input_size, input_size))
- image_data = image_data / 255.
- # image_data = image_data[np.newaxis, ...].astype(np.float32)
-
- images_data = []
- for i in range(1):
- images_data.append(image_data)
- images_data = np.asarray(images_data).astype(np.float32)
-
- if FLAGS.framework == 'tflite':
- interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
- interpreter.allocate_tensors()
- input_details = interpreter.get_input_details()
- output_details = interpreter.get_output_details()
- print(input_details)
- print(output_details)
- interpreter.set_tensor(input_details[0]['index'], images_data)
- interpreter.invoke()
- pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
- if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
- boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
- else:
- boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
- else:
- saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
- infer = saved_model_loaded.signatures['serving_default']
- batch_data = tf.constant(images_data)
- pred_bbox = infer(batch_data)
- for key, value in pred_bbox.items():
- boxes = value[:, :, 0:4]
- pred_conf = value[:, :, 4:]
-
- boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
- boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
- scores=tf.reshape(
- pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
- max_output_size_per_class=50,
- max_total_size=50,
- iou_threshold=FLAGS.iou,
- score_threshold=FLAGS.score
- )
- pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
- image = utils.draw_bbox(original_image, pred_bbox)
- # image = utils.draw_bbox(image_data*255, pred_bbox)
- image = Image.fromarray(image.astype(np.uint8))
- image.show()
- image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
- cv2.imwrite(FLAGS.output, image)
-
-if __name__ == '__main__':
- try:
- app.run(main)
- except SystemExit:
- pass
+import tensorflow as tf
+physical_devices = tf.config.experimental.list_physical_devices('GPU')
+if len(physical_devices) > 0:
+ tf.config.experimental.set_memory_growth(physical_devices[0], True)
+from absl import app, flags, logging
+from absl.flags import FLAGS
+import core.utils as utils
+from core.config import cfg
+from core.yolov4 import filter_boxes
+from tensorflow.python.saved_model import tag_constants
+from PIL import Image
+import cv2
+import numpy as np
+from tensorflow.compat.v1 import ConfigProto
+from tensorflow.compat.v1 import InteractiveSession
+
+flags.DEFINE_string('framework', 'tf', '(tf, tflite, trt')
+flags.DEFINE_string('weights', './checkpoints/yolov4-416',
+ 'path to weights file')
+flags.DEFINE_integer('size', 416, 'resize images to')
+flags.DEFINE_boolean('tiny', False, 'yolo or yolo-tiny')
+flags.DEFINE_string('model', 'yolov4', 'yolov3 or yolov4')
+flags.DEFINE_list('images', './data/images/kite.jpg', 'path to input image')
+flags.DEFINE_string('output', './detections/', 'path to output folder')
+flags.DEFINE_float('iou', 0.45, 'iou threshold')
+flags.DEFINE_float('score', 0.25, 'score threshold')
+flags.DEFINE_boolean('dont_show', False, 'dont show image output')
+
+def main(_argv):
+ config = ConfigProto()
+ config.gpu_options.allow_growth = True
+ session = InteractiveSession(config=config)
+ STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
+ input_size = FLAGS.size
+ images = FLAGS.images
+
+ # load model
+ if FLAGS.framework == 'tflite':
+ interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
+ else:
+ saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
+
+ # loop through images in list and run Yolov4 model on each
+ for count, image_path in enumerate(images, 1):
+ original_image = cv2.imread(image_path)
+ original_image = cv2.cvtColor(original_image, cv2.COLOR_BGR2RGB)
+
+ image_data = cv2.resize(original_image, (input_size, input_size))
+ image_data = image_data / 255.
+
+ images_data = []
+ for i in range(1):
+ images_data.append(image_data)
+ images_data = np.asarray(images_data).astype(np.float32)
+
+ if FLAGS.framework == 'tflite':
+ interpreter.allocate_tensors()
+ input_details = interpreter.get_input_details()
+ output_details = interpreter.get_output_details()
+ print(input_details)
+ print(output_details)
+ interpreter.set_tensor(input_details[0]['index'], images_data)
+ interpreter.invoke()
+ pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
+ if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
+ boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
+ else:
+ boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25, input_shape=tf.constant([input_size, input_size]))
+ else:
+ infer = saved_model_loaded.signatures['serving_default']
+ batch_data = tf.constant(images_data)
+ pred_bbox = infer(batch_data)
+ for key, value in pred_bbox.items():
+ boxes = value[:, :, 0:4]
+ pred_conf = value[:, :, 4:]
+
+ boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
+ boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
+ scores=tf.reshape(
+ pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
+ max_output_size_per_class=50,
+ max_total_size=50,
+ iou_threshold=FLAGS.iou,
+ score_threshold=FLAGS.score
+ )
+ pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
+
+ # read in all class names from config
+ class_names = utils.read_class_names(cfg.YOLO.CLASSES)
+
+ # by default allow all classes in .names file
+ allowed_classes = list(class_names.values())
+
+ # custom allowed classes (uncomment line below to allow detections for only people)
+ #allowed_classes = ['person']
+
+ image = utils.draw_bbox(original_image, pred_bbox, allowed_classes = allowed_classes)
+
+ image = Image.fromarray(image.astype(np.uint8))
+ if not FLAGS.dont_show:
+ image.show()
+ image = cv2.cvtColor(np.array(image), cv2.COLOR_BGR2RGB)
+ cv2.imwrite(FLAGS.output + 'detection' + str(count) + '.png', image)
+
+if __name__ == '__main__':
+ try:
+ app.run(main)
+ except SystemExit:
+ pass
diff --git a/detect_video.py b/detect_video.py
new file mode 100644
index 00000000..601284b7
--- /dev/null
+++ b/detect_video.py
@@ -0,0 +1,126 @@
+import time
+import tensorflow as tf
+physical_devices = tf.config.experimental.list_physical_devices('GPU')
+if len(physical_devices) > 0:
+ tf.config.experimental.set_memory_growth(physical_devices[0], True)
+from absl import app, flags, logging
+from absl.flags import FLAGS
+import core.utils as utils
+from core.yolov4 import filter_boxes
+from tensorflow.python.saved_model import tag_constants
+from PIL import Image
+import cv2
+import numpy as np
+from tensorflow.compat.v1 import ConfigProto
+from tensorflow.compat.v1 import InteractiveSession
+
+flags.DEFINE_string('framework', 'tf', '(tf, tflite, trt')
+flags.DEFINE_string('weights', './checkpoints/yolov4-416',
+ 'path to weights file')
+flags.DEFINE_integer('size', 416, 'resize images to')
+flags.DEFINE_boolean('tiny', False, 'yolo or yolo-tiny')
+flags.DEFINE_string('model', 'yolov4', 'yolov3 or yolov4')
+flags.DEFINE_string('video', './data/video/video.mp4', 'path to input video or set to 0 for webcam')
+flags.DEFINE_string('output', None, 'path to output video')
+flags.DEFINE_string('output_format', 'XVID', 'codec used in VideoWriter when saving video to file')
+flags.DEFINE_float('iou', 0.45, 'iou threshold')
+flags.DEFINE_float('score', 0.25, 'score threshold')
+flags.DEFINE_boolean('dont_show', False, 'dont show video output')
+
+def main(_argv):
+ config = ConfigProto()
+ config.gpu_options.allow_growth = True
+ session = InteractiveSession(config=config)
+ STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
+ input_size = FLAGS.size
+ video_path = FLAGS.video
+
+ if FLAGS.framework == 'tflite':
+ interpreter = tf.lite.Interpreter(model_path=FLAGS.weights)
+ interpreter.allocate_tensors()
+ input_details = interpreter.get_input_details()
+ output_details = interpreter.get_output_details()
+ print(input_details)
+ print(output_details)
+ else:
+ saved_model_loaded = tf.saved_model.load(FLAGS.weights, tags=[tag_constants.SERVING])
+ infer = saved_model_loaded.signatures['serving_default']
+
+ # begin video capture
+ try:
+ vid = cv2.VideoCapture(int(video_path))
+ except:
+ vid = cv2.VideoCapture(video_path)
+
+ out = None
+
+ if FLAGS.output:
+ # by default VideoCapture returns float instead of int
+ width = int(vid.get(cv2.CAP_PROP_FRAME_WIDTH))
+ height = int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))
+ fps = int(vid.get(cv2.CAP_PROP_FPS))
+ codec = cv2.VideoWriter_fourcc(*FLAGS.output_format)
+ out = cv2.VideoWriter(FLAGS.output, codec, fps, (width, height))
+
+ while True:
+ return_value, frame = vid.read()
+ if return_value:
+ frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
+ image = Image.fromarray(frame)
+ else:
+ print('Video has ended or failed, try a different video format!')
+ break
+
+ frame_size = frame.shape[:2]
+ image_data = cv2.resize(frame, (input_size, input_size))
+ image_data = image_data / 255.
+ image_data = image_data[np.newaxis, ...].astype(np.float32)
+ start_time = time.time()
+
+ if FLAGS.framework == 'tflite':
+ interpreter.set_tensor(input_details[0]['index'], image_data)
+ interpreter.invoke()
+ pred = [interpreter.get_tensor(output_details[i]['index']) for i in range(len(output_details))]
+ if FLAGS.model == 'yolov3' and FLAGS.tiny == True:
+ boxes, pred_conf = filter_boxes(pred[1], pred[0], score_threshold=0.25,
+ input_shape=tf.constant([input_size, input_size]))
+ else:
+ boxes, pred_conf = filter_boxes(pred[0], pred[1], score_threshold=0.25,
+ input_shape=tf.constant([input_size, input_size]))
+ else:
+ batch_data = tf.constant(image_data)
+ pred_bbox = infer(batch_data)
+ for key, value in pred_bbox.items():
+ boxes = value[:, :, 0:4]
+ pred_conf = value[:, :, 4:]
+
+ boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
+ boxes=tf.reshape(boxes, (tf.shape(boxes)[0], -1, 1, 4)),
+ scores=tf.reshape(
+ pred_conf, (tf.shape(pred_conf)[0], -1, tf.shape(pred_conf)[-1])),
+ max_output_size_per_class=50,
+ max_total_size=50,
+ iou_threshold=FLAGS.iou,
+ score_threshold=FLAGS.score
+ )
+ pred_bbox = [boxes.numpy(), scores.numpy(), classes.numpy(), valid_detections.numpy()]
+ image = utils.draw_bbox(frame, pred_bbox)
+ fps = 1.0 / (time.time() - start_time)
+ print("FPS: %.2f" % fps)
+ result = np.asarray(image)
+ cv2.namedWindow("result", cv2.WINDOW_AUTOSIZE)
+ result = cv2.cvtColor(image, cv2.COLOR_RGB2BGR)
+
+ if not FLAGS.dont_show:
+ cv2.imshow("result", result)
+
+ if FLAGS.output:
+ out.write(result)
+ if cv2.waitKey(1) & 0xFF == ord('q'): break
+ cv2.destroyAllWindows()
+
+if __name__ == '__main__':
+ try:
+ app.run(main)
+ except SystemExit:
+ pass
diff --git a/detections/detection1.png b/detections/detection1.png
new file mode 100644
index 00000000..22042207
Binary files /dev/null and b/detections/detection1.png differ
diff --git a/requirements.txt b/requirements.txt
index 5c68a547..b3bd0114 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,8 +1,56 @@
-opencv-python==4.1.1.26
-lxml
-tqdm
-tensorflow==2.3.0rc0
-absl-py
-easydict
-matplotlib
-pillow
+absl-py==0.15.0
+astunparse==1.6.3
+cached-property==1.5.2
+cachetools==5.0.0
+charset-normalizer==2.0.12
+coremltools==5.1.0
+cycler==0.11.0
+easydict==1.9
+fake-bpy-module-2.83==20210701
+flatbuffers==1.12
+fonttools==4.37.0
+gast==0.3.3
+google-auth==2.6.0
+google-auth-oauthlib==0.4.6
+google-pasta==0.2.0
+grpcio==1.34.1
+h5py==2.10.0
+idna==3.3
+importlib-metadata==4.11.3
+Keras==2.2.4
+Keras-Applications==1.0.8
+keras-nightly==2.5.0.dev2021032900
+Keras-Preprocessing==1.1.2
+kiwisolver==1.4.4
+lxml==4.9.1
+Markdown==3.3.6
+matplotlib==3.5.3
+mpmath==1.2.1
+numpy==1.18.5
+oauthlib==3.2.0
+opencv-python==4.1.2.30
+opt-einsum==3.3.0
+Pillow==9.2.0
+protobuf==3.19.4
+pyasn1==0.4.8
+pyasn1-modules==0.2.8
+pydot==1.4.2
+pyparsing==3.0.7
+PyYAML==6.0
+requests==2.27.1
+requests-oauthlib==1.3.1
+rsa==4.8
+scipy==1.7.3
+sympy==1.10
+tensorboard==2.8.0
+tensorboard-data-server==0.6.1
+tensorboard-plugin-wit==1.8.1
+tensorflow==2.3.1
+tensorflow-estimator==2.3.0
+termcolor==1.1.0
+tqdm==4.63.0
+urllib3==1.26.8
+webencodings==0.5.1
+Werkzeug==2.0.3
+wrapt==1.12.1
+yolov4==3.2.0
diff --git a/sample_test_images/0.jpg b/sample_test_images/0.jpg
new file mode 100644
index 00000000..ade46488
Binary files /dev/null and b/sample_test_images/0.jpg differ
diff --git a/save_model.py b/save_model.py
index 5946c554..cbdf699c 100644
--- a/save_model.py
+++ b/save_model.py
@@ -5,14 +5,27 @@
import core.utils as utils
from core.config import cfg
-flags.DEFINE_string('weights', './data/yolov4.weights', 'path to weights file')
-flags.DEFINE_string('output', './checkpoints/yolov4-416', 'path to output')
-flags.DEFINE_boolean('tiny', False, 'is yolo-tiny or not')
-flags.DEFINE_integer('input_size', 416, 'define input size of export model')
-flags.DEFINE_float('score_thres', 0.2, 'define score threshold')
+# # full
+# # --weights ./data/yolov4-tiny.weights --output ./checkpoints/yolov4-tiny-23rdMay-608 --input_size 608 --model yolov4 --tiny
+# flags.DEFINE_string('weights', './data/yolov4.weights', 'path to weights file')
+# flags.DEFINE_string('output', './checkpoints/yolov4-608_1stJun', 'path to output')
+# flags.DEFINE_boolean('tiny', False, 'is yolo-tiny or not')
+# flags.DEFINE_integer('input_size', 608, 'define input size of export model')
+# flags.DEFINE_float('score_thres', 0.2, 'define score threshold')
+# flags.DEFINE_string('framework', 'tf', 'define what framework do you want to convert (tf, trt, tflite)')
+# flags.DEFINE_string('model', 'yolov4', 'yolov3 or yolov4')
+
+# tiny
+
+flags.DEFINE_string('weights', './data/ppe_yolov4-tiny_best.weights', 'path to weights file')
+flags.DEFINE_string('output', './checkpoints/ppe_yolov4-tiny-608-24thAug', 'path to output')
+flags.DEFINE_boolean('tiny', True, 'is yolo-tiny or not')
+flags.DEFINE_integer('input_size', 608, 'define input size of export model')
+flags.DEFINE_float('score_thres', 0.5, 'define score threshold')
flags.DEFINE_string('framework', 'tf', 'define what framework do you want to convert (tf, trt, tflite)')
flags.DEFINE_string('model', 'yolov4', 'yolov3 or yolov4')
+
def save_tf():
STRIDES, ANCHORS, NUM_CLASS, XYSCALE = utils.load_config(FLAGS)
@@ -45,10 +58,13 @@ def save_tf():
else:
boxes, pred_conf = filter_boxes(pred_bbox, pred_prob, score_threshold=FLAGS.score_thres, input_shape=tf.constant([FLAGS.input_size, FLAGS.input_size]))
pred = tf.concat([boxes, pred_conf], axis=-1)
+
+
model = tf.keras.Model(input_layer, pred)
utils.load_weights(model, FLAGS.weights, FLAGS.model, FLAGS.tiny)
model.summary()
model.save(FLAGS.output)
+ model.save('yolo_tiny_1stJune.h5')
def main(_argv):
save_tf()
diff --git a/screenshots/Screenshot 2022-08-24 at 7.23.04 PM.png b/screenshots/Screenshot 2022-08-24 at 7.23.04 PM.png
new file mode 100644
index 00000000..332e543c
Binary files /dev/null and b/screenshots/Screenshot 2022-08-24 at 7.23.04 PM.png differ