Skip to content
This repository was archived by the owner on Aug 28, 2024. It is now read-only.

Commit e709095

Browse files
authored
Merge pull request #71 from jeffxtang/yolov5_custom
transfer learning for YOLOv5 object detection
2 parents 294fa7b + 18af379 commit e709095

File tree

12 files changed

+127
-8
lines changed

12 files changed

+127
-8
lines changed

ObjectDetection/ObjectDetection.xcodeproj/project.pbxproj

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77
objects = {
88

99
/* Begin PBXBuildFile section */
10+
2669BE16270FA65200806A63 /* aicook2.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 2669BE11270FA61000806A63 /* aicook2.jpg */; };
11+
2669BE18270FA65200806A63 /* aicook.txt in Resources */ = {isa = PBXBuildFile; fileRef = 2669BE13270FA62F00806A63 /* aicook.txt */; };
12+
2669BE19270FA65200806A63 /* aicook1.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 2669BE14270FA63D00806A63 /* aicook1.jpg */; };
13+
2669BE1A270FA65200806A63 /* aicook3.jpg in Resources */ = {isa = PBXBuildFile; fileRef = 2669BE15270FA65200806A63 /* aicook3.jpg */; };
1014
266E87232563120D00CF5151 /* classes.txt in Resources */ = {isa = PBXBuildFile; fileRef = 266E87222563120D00CF5151 /* classes.txt */; };
1115
266E8746256350C000CF5151 /* CameraController.swift in Sources */ = {isa = PBXBuildFile; fileRef = 266E8742256350C000CF5151 /* CameraController.swift */; };
1216
266E8747256350C000CF5151 /* CVPixelBuffer+Helper.swift in Sources */ = {isa = PBXBuildFile; fileRef = 266E8743256350C000CF5151 /* CVPixelBuffer+Helper.swift */; };
@@ -29,6 +33,10 @@
2933
/* End PBXBuildFile section */
3034

3135
/* Begin PBXFileReference section */
36+
2669BE11270FA61000806A63 /* aicook2.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = aicook2.jpg; sourceTree = "<group>"; };
37+
2669BE13270FA62F00806A63 /* aicook.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = aicook.txt; sourceTree = "<group>"; };
38+
2669BE14270FA63D00806A63 /* aicook1.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = aicook1.jpg; sourceTree = "<group>"; };
39+
2669BE15270FA65200806A63 /* aicook3.jpg */ = {isa = PBXFileReference; lastKnownFileType = image.jpeg; path = aicook3.jpg; sourceTree = "<group>"; };
3240
266E87222563120D00CF5151 /* classes.txt */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = classes.txt; sourceTree = "<group>"; };
3341
266E8742256350C000CF5151 /* CameraController.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = CameraController.swift; sourceTree = "<group>"; };
3442
266E8743256350C000CF5151 /* CVPixelBuffer+Helper.swift */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.swift; path = "CVPixelBuffer+Helper.swift"; sourceTree = "<group>"; };
@@ -109,6 +117,10 @@
109117
269E7487255CC69400B1D6CA /* test1.png */,
110118
269E748B255CC6D100B1D6CA /* test2.jpg */,
111119
269E748A255CC6D100B1D6CA /* test3.png */,
120+
2669BE13270FA62F00806A63 /* aicook.txt */,
121+
2669BE14270FA63D00806A63 /* aicook1.jpg */,
122+
2669BE11270FA61000806A63 /* aicook2.jpg */,
123+
2669BE15270FA65200806A63 /* aicook3.jpg */,
112124
266E87222563120D00CF5151 /* classes.txt */,
113125
26A8C11326E17F8100F4A58D /* yolov5s.torchscript.ptl */,
114126
);
@@ -201,12 +213,16 @@
201213
buildActionMask = 2147483647;
202214
files = (
203215
269E747E255CC56400B1D6CA /* LaunchScreen.storyboard in Resources */,
216+
2669BE18270FA65200806A63 /* aicook.txt in Resources */,
204217
26A8C11426E17F8100F4A58D /* yolov5s.torchscript.ptl in Resources */,
205218
266E87232563120D00CF5151 /* classes.txt in Resources */,
206219
269E747B255CC56400B1D6CA /* Assets.xcassets in Resources */,
220+
2669BE19270FA65200806A63 /* aicook1.jpg in Resources */,
207221
269E748D255CC6D100B1D6CA /* test2.jpg in Resources */,
222+
2669BE16270FA65200806A63 /* aicook2.jpg in Resources */,
208223
269E7488255CC69400B1D6CA /* test1.png in Resources */,
209224
269E7479255CC56200B1D6CA /* Main.storyboard in Resources */,
225+
2669BE1A270FA65200806A63 /* aicook3.jpg in Resources */,
210226
269E748C255CC6D100B1D6CA /* test3.png in Resources */,
211227
);
212228
runOnlyForDeploymentPostprocessing = 0;

ObjectDetection/ObjectDetection/Utils/PrePostProcessor.swift

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class PrePostProcessor : NSObject {
1717
static let inputWidth = 640
1818
static let inputHeight = 640
1919

20-
// model output is of size 25200*85
20+
// model output is of size 25200*(num_of_class+5)
2121
static let outputRow = 25200 // as decided by the YOLOv5 model for input image of size 640*640
2222
static let outputColumn = 85 // left, top, right, bottom, score and 80 class probability
2323
static let threshold : Float = 0.35 // score above which a detection is generated
@@ -111,7 +111,7 @@ class PrePostProcessor : NSObject {
111111

112112
let rect = CGRect(x: startX+ivScaleX*left, y: startY+top*ivScaleY, width: ivScaleX*(right-left), height: ivScaleY*(bottom-top))
113113

114-
let prediction = Prediction(classIndex: cls, score: Float(truncating: outputs[i*85+4]), rect: rect)
114+
let prediction = Prediction(classIndex: cls, score: Float(truncating: outputs[i*outputColumn+4]), rect: rect)
115115
predictions.append(prediction)
116116
}
117117
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
apple
2+
banana
3+
beef
4+
blueberries
5+
bread
6+
butter
7+
carrot
8+
cheese
9+
chicken
10+
chicken_breast
11+
chocolate
12+
corn
13+
eggs
14+
flour
15+
goat_cheese
16+
green_beans
17+
ground_beef
18+
ham
19+
heavy_cream
20+
lime
21+
milk
22+
mushrooms
23+
onion
24+
potato
25+
shrimp
26+
spinach
27+
strawberries
28+
sugar
29+
sweet_potato
30+
tomato
182 KB
Loading
187 KB
Loading
180 KB
Loading

ObjectDetection/README.md

Lines changed: 79 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44

55
[YOLO](https://pjreddie.com/darknet/yolo/) (You Only Look Once) is one of the fastest and most popular object detection models. [YOLOv5](https://github.com/ultralytics/yolov5) is an open-source implementation of the latest version of YOLO (for a quick test of loading YOLOv5 from PyTorch hub for inference, see [here](https://pytorch.org/hub/ultralytics_yolov5/#load-from-pytorch-hub)). This Object Detection with YOLOv5 iOS sample app uses the PyTorch scripted YOLOv5 model to detect objects of the [80 classes](https://github.com/ultralytics/yolov5/blob/master/data/coco.yaml) trained with the model.
66

7+
**Update 10-07-2021**: A new section of using a custom dataset to fine-tune the YOLOv5 model (aka transfer learning) with steps to change the iOS demo app to use the custom model was added.
8+
79
## Prerequisites
810

911
* PyTorch 1.9 and torchvision 0.10 (Optional)
@@ -15,24 +17,32 @@
1517

1618
To Test Run the Object Detection iOS App, follow the steps below:
1719

18-
### 1. Prepare the Model
20+
### 1. Prepare the model
1921

2022
If you don't have the PyTorch environment set up to run the script, you can download the model file [here](https://pytorch-mobile-demo-apps.s3.us-east-2.amazonaws.com/yolov5s.torchscript.ptl) to the `ios-demo-app/ObjectDetection/ObjectDetection` folder, then skip the rest of this step and go to step 2 directly.
2123

2224
The Python script `export.py` in the `models` folder of the [YOLOv5 repo](https://github.com/ultralytics/yolov5) is used to generate a TorchScript-formatted YOLOv5 model named `yolov5s.torchscript.ptl` for mobile apps.
2325

24-
Open a Mac/Linux/Windows Terminal, run the following commands (note that we use the fork of the original YOLOv5 repo to make sure the code changes work, but feel free to use the original repo):
26+
Open a Mac/Linux/Windows Terminal, run the following commands:
2527

2628
```
27-
git clone https://github.com/jeffxtang/yolov5
29+
git clone https://github.com/ultralytics/yolov5
2830
cd yolov5
29-
pip install -r requirements.txt
31+
pip install -r requirements.txt wanb
3032
```
3133

32-
Finally, run the script below to generate the optimized TorchScript Lite Interpreter model and copy the generated model file `yolov5s.torchscript.ptl` to the `ios-demo-app/ObjectDetection/ObjectDetection` folder:
34+
Note the steps below have been tested with the commit `cd35a009ba964331abccd30f6fa0614224105d39` and if there's any issue with running the script or using the model, try `git reset --hard cd35a009ba964331abccd30f6fa0614224105d39`.
35+
36+
Edit `export.py` to make the following two changes:
37+
38+
* After `f = file.with_suffix('.torchscript.pt')`, add a line `fl = file.with_suffix('.torchscript.ptl')`
39+
40+
* After `(optimize_for_mobile(ts) if optimize else ts).save(f)`, add `(optimize_for_mobile(ts) if optimize else ts)._save_for_lite_interpreter(str(fl))`
41+
42+
Finally, run the script below to generate the optimized TorchScript Lite Interpreter model and copy the generated model file `yolov5s.torchscript.ptl` to the `ios-demo-app/ObjectDetection/ObjectDetection` folder (the original full JIT model `yolov5s.torchscript.pt` was also generated for comparison):
3343

3444
```
35-
python models/export.py
45+
python export.py --weights yolov5s.pt --include torchscript
3646
```
3747

3848
Note that small sized version of the YOLOv5 model, which runs faster but with less accuracy, is generated by default when running the `export.py`. You can also change the value of the `weights` parameter in the `export.py` to generate the medium, large, and extra large version of the model.
@@ -56,3 +66,66 @@ Some example images and the detection results are as follows:
5666

5767
![](screenshot3.png)
5868
![](screenshot4.png)
69+
70+
## Transfer Learning
71+
72+
In this section, you'll see how to use an example dataset called [aicook](https://universe.roboflow.com/karel-cornelis-q2qqg/aicook-lcv4d/4), used to detect ingredients in your fridge, to fine-tune the YOLOv5 model. For more info on the YOLOv5 transfer learning, see [here](https://github.com/ultralytics/yolov5/issues/1314). If you use the default YOLOv5 model to do object detection on what's inside your fridge, you'll likely not get good results. That's why you need to have a custom model trained with a dataset like aicook.
73+
74+
### 1. Download the custom dataset
75+
76+
Simply go to [here](https://universe.roboflow.com/karel-cornelis-q2qqg/aicook-lcv4d/4) to download the aicook dataset in a zip file. Unzip the file to your `yolov5` repo directory, then run `cd yolov5; mv train ..; mv valid ..;` as the aicook `data.yaml` specifies the `train` and `val` folders to be up one level.
77+
78+
### 2. Retrain the YOLOv5 with the custom dataset
79+
80+
Run the script below to generate a custom model `best.torchscript.pt` located in `runs/train/exp/weights`:
81+
82+
```
83+
python train.py --img 640 --batch 16 --epochs 3 --data data.yaml --weights yolov5s.pt
84+
```
85+
86+
The precision of the model with the epochs set as 3 is very low - less than 0.01 actually; with a tool such as [Weights and Biases](https://wandb.ai), which can be set up in a few minutes and has been integrated with YOLOv5, you can find that with `--epochs` set as 80, the precision gets to be 0.95. But on a CPU machine, you can quickly train a custom model using the command above, then test it in the iOS demo app. Below is a sample wandb metrics from 3, 30, and 100 epochs of training:
87+
88+
![](metrics.png)
89+
90+
### 3. Convert the custom model to lite version
91+
92+
With the `export.py` modified as in step 1 `Prepare the model` of the section `Quick Start`, you can convert the new custom model to its TorchScript lite version:
93+
94+
```
95+
python export.py --weights runs/train/exp/weights/best.pt --include torchscript
96+
```
97+
98+
The resulting `best.torchscript.ptl` is located in `runs/train/exp/weights`, which needs to be added to the iOS ObjectDetection demo app project.
99+
100+
### 4. Update the demo app
101+
102+
In Xcode, first in `ViewController.swift`, change line `private let testImages = ["test1.png", "test2.jpg", "test3.png"]` to `private let testImages = ["aicook1.jpg", "aicook2.jpg", "aicook3.jpg", "test1.png", "test2.jpg", "test3.png"]`
103+
(The three aicook test images have been added to the repo.)
104+
105+
Then change lines in `ObjectDetector.swift`:
106+
```
107+
if let filePath = Bundle.main.path(forResource: "yolov5s.torchscript", ofType: "ptl"),
108+
```
109+
to:
110+
```
111+
if let filePath = Bundle.main.path(forResource: "best.torchscript", ofType: "ptl"),
112+
```
113+
and
114+
```
115+
if let filePath = Bundle.main.path(forResource: "classes", ofType: "txt"),
116+
```
117+
to:
118+
```
119+
if let filePath = Bundle.main.path(forResource: "aicook", ofType: "txt"),
120+
```
121+
(aicook.txt defines the 30 custom class names, copied from `data.yaml` in the custom dataset downloaded in step 1 of this section.)
122+
123+
Finally in `PrePostProcessor.swift`, change line `static let outputColumn = 85` to `static let outputColumn = 35`, which is 5 (left, top, right, bottom, score) + 30 (number of custom classes).
124+
125+
Run the app in Xcode and you should see the custom model working on the first three aicook test images:
126+
127+
![](aicook1.png)
128+
![](aicook2.png)
129+
130+
![](aicook3.png)
131+
![](aicook4.png)

ObjectDetection/aicook1.png

122 KB
Loading

ObjectDetection/aicook2.png

114 KB
Loading

ObjectDetection/aicook3.png

120 KB
Loading

0 commit comments

Comments
 (0)