Add python server-side batching example

deliahu · deliahu · commit f4c0951c6586 · 2021-02-15T20:32:03.000-08:00
diff --git a/test/apis/pytorch/iris-classifier/model.py b/test/apis/pytorch/iris-classifier/model.py
@@ -2,9 +2,6 @@
 import torch.nn as nn
 import torch.nn.functional as F
 from torch.autograd import Variable
-from sklearn.datasets import load_iris
-from sklearn.model_selection import train_test_split
-from sklearn.metrics import accuracy_score
 
 
 class IrisNet(nn.Module):
@@ -24,6 +21,10 @@ def forward(self, X):
 
 
 if __name__ == "__main__":
+    from sklearn.datasets import load_iris
+    from sklearn.model_selection import train_test_split
+    from sklearn.metrics import accuracy_score
+
     iris = load_iris()
     X, y = iris.data, iris.target
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)
diff --git a/test/apis/pytorch/iris-classifier/requirements.txt b/test/apis/pytorch/iris-classifier/requirements.txt
@@ -1,2 +1 @@
 torch
-scikit-learn
diff --git a/test/apis/pytorch/server-side-batching/cortex.yaml b/test/apis/pytorch/server-side-batching/cortex.yaml
@@ -0,0 +1,11 @@
+- name: iris-classifier
+  kind: RealtimeAPI
+  predictor:
+    type: python
+    path: predictor.py
+    config:
+      model: s3://cortex-examples/pytorch/iris-classifier/weights.pth
+    server_side_batching:
+      max_batch_size: 8
+      batch_interval: 0.1s
+    threads_per_process: 8
diff --git a/test/apis/pytorch/server-side-batching/model.py b/test/apis/pytorch/server-side-batching/model.py
@@ -0,0 +1,58 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch.autograd import Variable
+
+
+class IrisNet(nn.Module):
+    def __init__(self):
+        super(IrisNet, self).__init__()
+        self.fc1 = nn.Linear(4, 100)
+        self.fc2 = nn.Linear(100, 100)
+        self.fc3 = nn.Linear(100, 3)
+        self.softmax = nn.Softmax(dim=1)
+
+    def forward(self, X):
+        X = F.relu(self.fc1(X))
+        X = self.fc2(X)
+        X = self.fc3(X)
+        X = self.softmax(X)
+        return X
+
+
+if __name__ == "__main__":
+    from sklearn.datasets import load_iris
+    from sklearn.model_selection import train_test_split
+    from sklearn.metrics import accuracy_score
+
+    iris = load_iris()
+    X, y = iris.data, iris.target
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.8, random_state=42)
+
+    train_X = Variable(torch.Tensor(X_train).float())
+    test_X = Variable(torch.Tensor(X_test).float())
+    train_y = Variable(torch.Tensor(y_train).long())
+    test_y = Variable(torch.Tensor(y_test).long())
+
+    model = IrisNet()
+
+    criterion = nn.CrossEntropyLoss()
+
+    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
+
+    for epoch in range(1000):
+        optimizer.zero_grad()
+        out = model(train_X)
+        loss = criterion(out, train_y)
+        loss.backward()
+        optimizer.step()
+
+        if epoch % 100 == 0:
+            print("number of epoch {} loss {}".format(epoch, loss))
+
+    predict_out = model(test_X)
+    _, predict_y = torch.max(predict_out, 1)
+
+    print("prediction accuracy {}".format(accuracy_score(test_y.data, predict_y.data)))
+
+    torch.save(model.state_dict(), "weights.pth")
diff --git a/test/apis/pytorch/server-side-batching/predictor.py b/test/apis/pytorch/server-side-batching/predictor.py
@@ -0,0 +1,54 @@
+import re
+import torch
+import os
+import boto3
+from botocore import UNSIGNED
+from botocore.client import Config
+from model import IrisNet
+
+labels = ["setosa", "versicolor", "virginica"]
+
+
+class PythonPredictor:
+    def __init__(self, config):
+        # download the model
+        bucket, key = re.match("s3://(.+?)/(.+)", config["model"]).groups()
+
+        if os.environ.get("AWS_ACCESS_KEY_ID"):
+            s3 = boto3.client("s3")  # client will use your credentials if available
+        else:
+            s3 = boto3.client("s3", config=Config(signature_version=UNSIGNED))  # anonymous client
+
+        s3.download_file(bucket, key, "/tmp/model.pth")
+
+        # initialize the model
+        model = IrisNet()
+        model.load_state_dict(torch.load("/tmp/model.pth"))
+        model.eval()
+
+        self.model = model
+
+    def predict(self, payload):
+        responses = []
+
+        # note: this is not the most efficient way, it's just to test server-side batching
+        for sample in payload:
+            # Convert the request to a tensor and pass it into the model
+            input_tensor = torch.FloatTensor(
+                [
+                    [
+                        sample["sepal_length"],
+                        sample["sepal_width"],
+                        sample["petal_length"],
+                        sample["petal_width"],
+                    ]
+                ]
+            )
+
+            # Run the prediction
+            output = self.model(input_tensor)
+
+            # Translate the model output to the corresponding label string
+            responses.append(labels[torch.argmax(output[0])])
+
+        return responses
diff --git a/test/apis/pytorch/server-side-batching/requirements.txt b/test/apis/pytorch/server-side-batching/requirements.txt
@@ -0,0 +1 @@
+torch
diff --git a/test/apis/pytorch/server-side-batching/sample.json b/test/apis/pytorch/server-side-batching/sample.json
@@ -0,0 +1,6 @@
+{
+    "sepal_length": 2.2,
+    "sepal_width": 3.6,
+    "petal_length": 1.4,
+    "petal_width": 3.3
+}