Skip to content

Commit 9675f0d

Browse files
add imbalanced classes video code and kaggle cat vs dog
1 parent e066718 commit 9675f0d

File tree

66 files changed

+13018
-12
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

66 files changed

+13018
-12
lines changed

.github/FUNDING.yml

Lines changed: 0 additions & 12 deletions
This file was deleted.
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
{
2+
"cells": [
3+
{
4+
"cell_type": "code",
5+
"execution_count": 1,
6+
"id": "51c78b68",
7+
"metadata": {},
8+
"outputs": [],
9+
"source": [
10+
"import sklearn\n",
11+
"import pandas as pd\n",
12+
"import numpy as np\n",
13+
"from sklearn.linear_model import LogisticRegression\n",
14+
"from sklearn.model_selection import train_test_split\n",
15+
"from sklearn.metrics import log_loss"
16+
]
17+
},
18+
{
19+
"cell_type": "code",
20+
"execution_count": 2,
21+
"id": "4421a043",
22+
"metadata": {},
23+
"outputs": [
24+
{
25+
"name": "stdout",
26+
"output_type": "stream",
27+
"text": [
28+
"Training data shape: (25000, 2560), labels shape: (25000,)\n"
29+
]
30+
},
31+
{
32+
"data": {
33+
"text/plain": [
34+
"LogisticRegression(max_iter=2000)"
35+
]
36+
},
37+
"execution_count": 2,
38+
"metadata": {},
39+
"output_type": "execute_result"
40+
}
41+
],
42+
"source": [
43+
"X = np.load(f'data_features/X_train_b7.npy')\n",
44+
"y = np.load(f'data_features/y_train_b7.npy')\n",
45+
"\n",
46+
"# Split data and train classifier\n",
47+
"print(f\"Training data shape: {X.shape}, labels shape: {y.shape}\")\n",
48+
"X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.001, random_state=1337)\n",
49+
"clf = LogisticRegression(max_iter=2000)\n",
50+
"clf.fit(X_train, y_train)"
51+
]
52+
},
53+
{
54+
"cell_type": "code",
55+
"execution_count": 3,
56+
"id": "d5cfc5b0",
57+
"metadata": {},
58+
"outputs": [
59+
{
60+
"name": "stdout",
61+
"output_type": "stream",
62+
"text": [
63+
"On validation set:\n",
64+
"Accuracy: 1.0\n",
65+
"LOG LOSS: 7.980845755748817e-05 \n",
66+
"%--------------------------------------------------%\n",
67+
"Getting predictions for test set\n",
68+
"Done getting predictions!\n"
69+
]
70+
}
71+
],
72+
"source": [
73+
"# Check on validation\n",
74+
"val_preds= clf.predict_proba(X_val)[:,1]\n",
75+
"print(f\"On validation set:\")\n",
76+
"print(f\"Accuracy: {clf.score(X_val, y_val)}\")\n",
77+
"print(f\"LOG LOSS: {log_loss(y_val, val_preds)} \")\n",
78+
"print(\"%--------------------------------------------------%\")\n",
79+
"\n",
80+
"# Get predictions on test set\n",
81+
"print(\"Getting predictions for test set\")\n",
82+
"X_test = np.load(f'data_features/X_test_b7.npy')\n",
83+
"X_test_preds = clf.predict_proba(X_test)[:,1]\n",
84+
"df = pd.DataFrame({'id': np.arange(1, 12501), 'label': np.clip(X_test_preds, 0.005, 0.995)})\n",
85+
"df.to_csv(f\"submissions/mysubmission.csv\", index=False)\n",
86+
"print(\"Done getting predictions!\")"
87+
]
88+
},
89+
{
90+
"cell_type": "code",
91+
"execution_count": null,
92+
"id": "a9cce7af",
93+
"metadata": {},
94+
"outputs": [],
95+
"source": []
96+
}
97+
],
98+
"metadata": {
99+
"kernelspec": {
100+
"display_name": "Python 3",
101+
"language": "python",
102+
"name": "python3"
103+
},
104+
"language_info": {
105+
"codemirror_mode": {
106+
"name": "ipython",
107+
"version": 3
108+
},
109+
"file_extension": ".py",
110+
"mimetype": "text/x-python",
111+
"name": "python",
112+
"nbconvert_exporter": "python",
113+
"pygments_lexer": "ipython3",
114+
"version": "3.9.2"
115+
}
116+
},
117+
"nbformat": 4,
118+
"nbformat_minor": 5
119+
}
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
import torch
2+
import albumentations as A
3+
from albumentations.pytorch import ToTensorV2
4+
5+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
6+
NUM_WORKERS = 4
7+
BATCH_SIZE = 20
8+
PIN_MEMORY = True
9+
LOAD_MODEL = True
10+
SAVE_MODEL = True
11+
CHECKPOINT_FILE = "b7.pth.tar"
12+
WEIGHT_DECAY = 1e-4
13+
LEARNING_RATE = 1e-4
14+
NUM_EPOCHS = 1
15+
16+
basic_transform = A.Compose(
17+
[
18+
A.Resize(height=448, width=448),
19+
A.Normalize(
20+
mean=[0.485, 0.456, 0.406],
21+
std=[0.229, 0.224, 0.225],
22+
max_pixel_value=255.0,
23+
),
24+
ToTensorV2(),
25+
]
26+
)
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
import os
2+
import re
3+
import numpy as np
4+
from torch.utils.data import Dataset
5+
from PIL import Image
6+
7+
8+
class CatDog(Dataset):
9+
def __init__(self, root, transform=None):
10+
self.images = os.listdir(root)
11+
self.images.sort(key=lambda x: int(re.findall(r"\d+", x)[0]))
12+
self.root = root
13+
self.transform = transform
14+
15+
def __len__(self):
16+
return len(self.images)
17+
18+
def __getitem__(self, index):
19+
file = self.images[index]
20+
img = np.array(Image.open(os.path.join(self.root, file)))
21+
22+
if self.transform is not None:
23+
img = self.transform(image=img)["image"]
24+
25+
if "dog" in file:
26+
label = 1
27+
elif "cat" in file:
28+
label = 0
29+
else:
30+
label = -1
31+
32+
return img, label

0 commit comments

Comments
 (0)