microsoft
diff --git a/‎scenarios/tracking/01_training_introduction.ipynb‎
Lines changed: 194 additions & 44 deletions b/‎scenarios/tracking/01_training_introduction.ipynb‎
Lines changed: 194 additions & 44 deletions
diff --git a/‎utils_cv/classification/plot.py‎
Lines changed: 0 additions & 3 deletions b/‎utils_cv/classification/plot.py‎
Lines changed: 0 additions & 3 deletions
diff --git a/‎utils_cv/tracking/data.py‎
Lines changed: 17 additions & 0 deletions b/‎utils_cv/tracking/data.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎utils_cv/tracking/dataset.py‎
Lines changed: 1 addition & 4 deletions b/‎utils_cv/tracking/dataset.py‎
Lines changed: 1 addition & 4 deletions
@@ -20,7 +20,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Initialization"
+    "## 00 Initialization"
    ]
   },
   {
@@ -45,22 +45,18 @@
     "sys.path.append(\"../../\")\n",
     "\n",
     "import os\n",
+    "import os.path as osp\n",
     "import time\n",
+    "from ipywidgets import Video\n",
     "import matplotlib.pyplot as plt\n",
-    "from typing import Iterator\n",
-    "from pathlib import Path\n",
-    "from PIL import Image\n",
-    "from random import randrange\n",
-    "from typing import Tuple\n",
     "import torch\n",
     "import torchvision\n",
-    "from torchvision import transforms\n",
-    "import scrapbook as sb\n",
     "\n",
-    "from ipywidgets import Video\n",
+    "from utils_cv.tracking.data import Urls\n",
     "from utils_cv.tracking.dataset import TrackingDataset\n",
-    "from utils_cv.tracking.model import TrackingLearner\n",
+    "from utils_cv.tracking.model import TrackingLearner, write_video\n",
     "\n",
+    "from utils_cv.common.data import data_path, download, unzip_url\n",
     "from utils_cv.common.gpu import which_processor, is_windows\n",
     "\n",
     "# Change matplotlib backend so that plots are shown for windows\n",
@@ -115,10 +111,22 @@
     }
    ],
    "source": [
-    "EPOCHS = 2\n",
+    "EPOCHS = 1\n",
     "LEARNING_RATE = 0.0001\n",
     "BATCH_SIZE = 1\n",
+    "\n",
     "SAVE_MODEL = True\n",
+    "FRAME_RATE = 30\n",
+    "\n",
+    "CONF_THRES = 0.3\n",
+    "TRACK_BUFFER = 300\n",
+    "IM_SIZE = (1080, 1920)\n",
+    "\n",
+    "TRAIN_DATA_PATH = unzip_url(Urls.fridge_objects_path, exist_ok=True)\n",
+    "EVAL_DATA_PATH = unzip_url(Urls.carcans_annotations_path, exist_ok=True)\n",
+    "\n",
+    "BASELINE_MODEL = \"./models/all_dla34_new.pth\"\n",
+    "FT_MODEL = \"./models/model_30.pth\"\n",
     "\n",
     "# train on the GPU or on the CPU, if a GPU is not available\n",
     "device = torch.device(\"cuda\") if torch.cuda.is_available() else torch.device(\"cpu\")\n",
@@ -129,43 +137,20 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Prepare Training Dataset"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "['labels_with_ids', '.ipynb_checkpoints', 'images']"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "DATA_PATH_TRAIN = \"./data/odFridgeObjects_FairMOTformat/\"\n",
-    "os.listdir(DATA_PATH_TRAIN)"
+    "## 01 Finetune a Pretrained Model"
    ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Load Training Dataset"
+    "Initialize the training dataset."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
-   "metadata": {
-    "scrolled": true
-   },
+   "execution_count": 4,
+   "metadata": {},
    "outputs": [
     {
      "name": "stdout",
@@ -183,7 +168,7 @@
    ],
    "source": [
     "data_train = TrackingDataset(\n",
-    "    DATA_PATH_TRAIN,\n",
+    "    TRAIN_DATA_PATH,\n",
     "    batch_size=BATCH_SIZE\n",
     ")"
    ]
@@ -192,12 +177,12 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Finetune a Pretrained Model"
+    "Initialize and load the model. We use the baseline FairMOT model, which can be downloaded [here](https://drive.google.com/file/d/1udpOPum8fJdoEQm6n0jsIgMMViOMFinu/view)."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
@@ -209,28 +194,193 @@
     }
    ],
    "source": [
-    "tracker = TrackingLearner(data_train) \n",
+    "tracker = TrackingLearner(data_train, \"./models/fairmot_ft.pth\")\n",
     "print(f\"Model: {type(tracker.model)}\")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 6,
    "metadata": {
     "scrolled": true
    },
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loading /home/jihon/computervision-recipes/scenarios/tracking/models/all_dla34.pth\n",
+      "loaded /home/jihon/computervision-recipes/scenarios/tracking/models/all_dla34.pth, epoch 10\n",
+      "Resumed optimizer with start lr 0.0001\n",
+      "=====  Epoch: 11/11  =====\n"
+     ]
+    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
       "/anaconda/envs/cv/lib/python3.7/site-packages/torch/nn/_reduction.py:43: UserWarning: size_average and reduce args will be deprecated, please use reduction='sum' instead.\n",
       "  warnings.warn(warning.format(ret))\n"
      ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "loss: 1.1128346400433464\n",
+      "hm_loss: 0.06353224289612051\n",
+      "wh_loss: 1.57920023114543\n",
+      "off_loss: 0.18636367223715702\n",
+      "id_loss: 0.8860541224528692\n",
+      "time: 44.016666666666666\n",
+      "Model saved to ./models/fairmot_ft.pth\n"
+     ]
+    }
+   ],
+   "source": [
+    "tracker.fit(num_epochs=EPOCHS, lr=LEARNING_RATE, resume=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Model saved to ./models/model_01.pth\n"
+     ]
     }
    ],
    "source": [
-    "tracker.fit(num_epochs=EPOCHS, lr=LEARNING_RATE)"
+    "if SAVE_MODEL:\n",
+    "    tracker.save(f\"./models/model_{EPOCHS:02d}.pth\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 02 Evaluate"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Note that `EVAL_DATA_PATH` follows the FairMOT input format."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Creating model...\n",
+      "loaded ./models/fairmot_ft.pth, epoch 11\n"
+     ]
+    }
+   ],
+   "source": [
+    "eval_results = tracker.predict(\n",
+    "    EVAL_DATA_PATH,\n",
+    "    conf_thres=CONF_THRES,\n",
+    "    track_buffer=TRACK_BUFFER,\n",
+    "    im_size=IM_SIZE,\n",
+    "    frame_rate=FRAME_RATE\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {
+    "scrolled": true
+   },
+   "outputs": [],
+   "source": [
+    "eval_metrics = tracker.evaluate(eval_results, EVAL_DATA_PATH) "
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 03 Predict"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "input_video = download(\n",
+    "    Urls.carcans_video_path, osp.join(data_path(), \"carcans.mp4\")\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {
+    "scrolled": false
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Creating model...\n",
+      "loaded ./models/fairmot_ft.pth, epoch 11\n",
+      "Lenth of the video: 251 frames\n"
+     ]
+    }
+   ],
+   "source": [
+    "test_results = tracker.predict(\n",
+    "    input_video,\n",
+    "    conf_thres=CONF_THRES,\n",
+    "    track_buffer=TRACK_BUFFER,\n",
+    "    im_size=IM_SIZE,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "output_video = osp.join(data_path(), \"carcans_output.mp4\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "write_video(test_results, input_video, output_video)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Video.from_file(output_video)"
    ]
   }
  ],
 
@@ -34,10 +34,7 @@ def plot_thresholds(
         metric_function: The metric function
         y_pred: predicted probabilities.
         y_true: True class indices.
-<<<<<<< HEAD
         samples: Number of threshold samples
-=======
->>>>>>> master
         figsize: Figure size (w, h)
     """
     metric_name = metric_function.__name__
 
@@ -0,0 +1,17 @@
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# Licensed under the MIT License.
+
+from typing import List
+from urllib.parse import urljoin
+
+
+class Urls:
+    base = "https://cvbp.blob.core.windows.net/public/datasets/tracking/"
+
+    fridge_objects_path = urljoin(base, "odFridgeObjects_FairMOT-Format.zip")
+    carcans_annotations_path = urljoin(base, "carcans_vott-csv-export.zip")
+    carcans_video_path = urljoin(base, "car_cans_8s.mp4")
+
+    @classmethod
+    def all(cls) -> List[str]:
+        return [v for k, v in cls.__dict__.items() if k.endswith("_path")]
@@ -15,10 +15,7 @@ class TrackingDataset:
     """A multi-object tracking dataset."""
 
     def __init__(
-        self,
-        data_root: str,
-        name: str = "default",
-        batch_size: int = 12,
+        self, data_root: str, name: str = "default", batch_size: int = 12,
     ) -> None:
         """
         Args: