Added replicationcontroller permissions to permissions-fix.yaml. Uploaded new examples with datasets from tensorflow_datasets.

chavesresearch · chavesresearch · commit d0da7419e7e8 · 2022-07-21T13:02:51.000+02:00
diff --git a/examples/EUROSAT_RAW_format/README.md b/examples/EUROSAT_RAW_format/README.md
@@ -0,0 +1,61 @@
+# EUROSAT and VGG16
+
+The following VGG16 TensorFlow deep learning model has been used in Kafka-ML for this example using the EUROSAT dataset:
+
+```
+base_model = tf.keras.applications.VGG16(include_top=False, weights='imagenet', input_shape=(64,64,3))
+x = tf.keras.layers.Flatten()(base_model.output)
+x = tf.keras.layers.Dense(1000, activation='relu')(x)
+x = tf.keras.layers.Dense(512, activation='relu')(x)
+x = tf.keras.layers.Dense(128, activation='relu')(x)
+predictions = tf.keras.layers.Dense(10, activation = 'softmax')(x)
+
+model = tf.keras.Model(inputs = base_model.input, outputs = predictions)
+
+model.compile(optimizer=tf.keras.optimizers.SGD(0.001),
+      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
+      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
+```
+The batch_size used is 256 and the training configuration (epochs=50, shuffle=True).
+
+In the PyTorch Case, the following VGG16 deep learning model has been used in Kafka-ML for the EUROSAT dataset example:
+
+```
+class VGG16(nn.Module):
+    def __init__(self):
+        super(VGG16, self).__init__()
+        self.pretrained = models.vgg16(pretrained=True)
+        self.flatten = nn.Flatten()
+        self.linear_relu_stack = nn.Sequential(
+            nn.Linear(1000, 512),
+            nn.ReLU(),
+            nn.Linear(512, 256),
+            nn.ReLU(),
+            nn.Linear(256, 128),
+            nn.ReLU(),
+            nn.Linear(128, 10),
+            nn.Softmax()
+        )
+        
+    def forward(self, x):
+        x = self.pretrained(x)
+        x = self.flatten(x)
+        output = self.linear_relu_stack(x)
+        return output
+
+    def loss_fn(self):
+        return nn.CrossEntropyLoss()
+
+    def optimizer(self):
+        return torch.optim.SGD(model.parameters(), lr=1e-3)
+
+    def metrics(self):
+        val_metrics = {
+            "accuracy": Accuracy(),
+            "loss": Loss(self.loss_fn())
+         }
+        return val_metrics
+
+model = VGG16()
+```
+The batch_size used is 256 and the training configuration (max_epochs=50, shuffle=True)
diff --git a/examples/EUROSAT_RAW_format/eurosat_dataset_inference_example.py b/examples/EUROSAT_RAW_format/eurosat_dataset_inference_example.py
@@ -0,0 +1,32 @@
+import tensorflow as tf
+import tensorflow_datasets as tfds
+import logging
+from kafka import KafkaProducer, KafkaConsumer
+
+logging.basicConfig(level=logging.INFO)
+
+INPUT_TOPIC = 'eurosat-in'
+OUTPUT_TOPIC = 'eurosat-out'
+BOOTSTRAP_SERVERS= '127.0.0.1:9094'
+ITEMS_TO_PREDICT = 10
+
+eurosat = tfds.load('eurosat', as_supervised=True, shuffle_files=True, 
+                     split=[f"train[:{ITEMS_TO_PREDICT}]"], data_dir='datasets/eurosat')
+
+producer = KafkaProducer(bootstrap_servers=BOOTSTRAP_SERVERS)
+"""Creates a producer to send the values to predict"""
+
+for image, _ in eurosat[0]:
+    producer.send(INPUT_TOPIC, image.numpy().tobytes())
+    """Sends the value to predict to Kafka"""
+producer.flush()
+producer.close()
+
+output_consumer = KafkaConsumer(OUTPUT_TOPIC, bootstrap_servers=BOOTSTRAP_SERVERS, group_id="output_group")
+"""Creates an output consumer to receive the predictions"""
+
+print('\n')
+
+print('Output consumer: ')
+for msg in output_consumer:
+  print (msg.value.decode())
diff --git a/examples/EUROSAT_RAW_format/eurosat_dataset_training_example.py b/examples/EUROSAT_RAW_format/eurosat_dataset_training_example.py
@@ -0,0 +1,21 @@
+import sys
+sys.path.append(sys.path[0] + "/../..") 
+"""To allow importing datasources"""
+
+from datasources.raw_sink import  RawSink
+import tensorflow as tf
+import tensorflow_datasets as tfds
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+eurosat = RawSink(boostrap_servers='localhost:9094', topic='automl', deployment_id=1,
+        description='eurosat dataset', validation_rate=0.1, test_rate=0.1)
+
+ds = tfds.load('eurosat', as_supervised=True, shuffle_files=True, data_dir='datasets/eurosat')
+ds['train'] = ds['train'].shuffle(buffer_size=1000)
+
+for image, label in ds['train']:
+    eurosat.send(data=image.numpy(), label=label.numpy())
+
+eurosat.close()
diff --git a/examples/EUROSAT_RAW_format/requirements.txt b/examples/EUROSAT_RAW_format/requirements.txt
@@ -0,0 +1,3 @@
+tensorflow==2.7.0
+tensorflow-datasets==4.5.2
+kafka-python==2.0.2
diff --git a/examples/SO2SAT_RAW_format/README.md b/examples/SO2SAT_RAW_format/README.md
@@ -0,0 +1,61 @@
+# SO2SAT and VGG16
+
+The following VGG16 TensorFlow deep learning model has been used in Kafka-ML for this example using the SO2SAT dataset:
+
+```
+base_model = tf.keras.applications.VGG16(include_top=False, weights='imagenet', input_shape=(32,32,3))
+x = tf.keras.layers.Flatten()(base_model.output)
+x = tf.keras.layers.Dense(1000, activation='relu')(x)
+x = tf.keras.layers.Dense(512, activation='relu')(x)
+x = tf.keras.layers.Dense(128, activation='relu')(x)
+predictions = tf.keras.layers.Dense(17, activation = 'softmax')(x)
+
+model = tf.keras.Model(inputs = base_model.input, outputs = predictions)
+
+model.compile(optimizer=tf.keras.optimizers.SGD(0.001),
+      loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
+      metrics=[tf.keras.metrics.SparseCategoricalAccuracy()])
+```
+The batch_size used is 256 and the training configuration (epochs=50, shuffle=True).
+
+In the PyTorch Case, the following VGG16 deep learning model has been used in Kafka-ML for the SO2SAT dataset example:
+
+```
+class VGG16(nn.Module):
+    def __init__(self):
+        super(VGG16, self).__init__()
+        self.pretrained = models.vgg16(pretrained=True)
+        self.flatten = nn.Flatten()
+        self.linear_relu_stack = nn.Sequential(
+            nn.Linear(1000, 512),
+            nn.ReLU(),
+            nn.Linear(512, 256),
+            nn.ReLU(),
+            nn.Linear(256, 128),
+            nn.ReLU(),
+            nn.Linear(128, 17),
+            nn.Softmax()
+        )
+        
+    def forward(self, x):
+        x = self.pretrained(x)
+        x = self.flatten(x)
+        output = self.linear_relu_stack(x)
+        return output
+
+    def loss_fn(self):
+        return nn.CrossEntropyLoss()
+
+    def optimizer(self):
+        return torch.optim.SGD(model.parameters(), lr=1e-3)
+
+    def metrics(self):
+        val_metrics = {
+            "accuracy": Accuracy(),
+            "loss": Loss(self.loss_fn())
+         }
+        return val_metrics
+
+model = VGG16()
+```
+The batch_size used is 256 and the training configuration (max_epochs=50, shuffle=True)
diff --git a/examples/SO2SAT_RAW_format/requirements.txt b/examples/SO2SAT_RAW_format/requirements.txt
@@ -0,0 +1,3 @@
+tensorflow==2.7.0
+tensorflow-datasets==4.5.2
+kafka-python==2.0.2
diff --git a/examples/SO2SAT_RAW_format/so2sat_dataset_inference_example.py b/examples/SO2SAT_RAW_format/so2sat_dataset_inference_example.py
@@ -0,0 +1,32 @@
+import tensorflow as tf
+import tensorflow_datasets as tfds
+import logging
+from kafka import KafkaProducer, KafkaConsumer
+
+logging.basicConfig(level=logging.INFO)
+
+INPUT_TOPIC = 'so2sat-in'
+OUTPUT_TOPIC = 'so2sat-out'
+BOOTSTRAP_SERVERS= '127.0.0.1:9094'
+ITEMS_TO_PREDICT = 10
+
+so2sat = tfds.load('so2sat', as_supervised=True, shuffle_files=True, 
+                       split=[f"validation[:{ITEMS_TO_PREDICT}]"], data_dir='datasets/so2sat')
+
+producer = KafkaProducer(bootstrap_servers=BOOTSTRAP_SERVERS)
+"""Creates a producer to send the values to predict"""
+
+for image, _ in so2sat[0]:
+    producer.send(INPUT_TOPIC, image.numpy().tobytes())
+    """Sends the value to predict to Kafka"""
+producer.flush()
+producer.close()
+
+output_consumer = KafkaConsumer(OUTPUT_TOPIC, bootstrap_servers=BOOTSTRAP_SERVERS, group_id="output_group")
+"""Creates an output consumer to receive the predictions"""
+
+print('\n')
+
+print('Output consumer: ')
+for msg in output_consumer:
+  print (msg.value.decode())
diff --git a/examples/SO2SAT_RAW_format/so2sat_dataset_training_example.py b/examples/SO2SAT_RAW_format/so2sat_dataset_training_example.py
@@ -0,0 +1,26 @@
+import sys
+sys.path.append(sys.path[0] + "/../..") 
+"""To allow importing datasources"""
+
+from datasources.raw_sink import  RawSink
+import tensorflow as tf
+import tensorflow_datasets as tfds
+import logging
+
+logging.basicConfig(level=logging.INFO)
+
+so2sat = RawSink(boostrap_servers='localhost:9094', topic='automl', deployment_id=1,
+        description='so2sat dataset', validation_rate=0.1, test_rate=0.1)
+
+ds = tfds.load('so2sat', as_supervised=True, shuffle_files=True, data_dir='datasets/so2sat')
+
+ds['train'] = ds['train'].shuffle(buffer_size=1000)
+ds['validation'] = ds['validation'].shuffle(buffer_size=1000)
+
+for image, label in ds['train']:
+    so2sat.send(data=image.numpy(), label=label.numpy())
+
+for image, label in ds['validation']:
+    so2sat.send(data=image.numpy(), label=label.numpy())
+
+so2sat.close()
diff --git a/permissions-fix.yaml b/permissions-fix.yaml
@@ -6,7 +6,7 @@ metadata:
   namespace: kafkaml
 rules:
 - apiGroups: ["", "apps", "batch"]
-  resources: [ "deployments", "jobs", pods", "replicasets", services" ]
+  resources: [  "deployments", "jobs", "pods", "replicasets", "services", "replicationcontrollers"]
   verbs: [ "create", "get", "list", "delete", "watch"]
 ---
 apiVersion: rbac.authorization.k8s.io/v1

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,3 @@`
	`1`	`+tensorflow==2.7.0`
	`2`	`+tensorflow-datasets==4.5.2`
	`3`	`+kafka-python==2.0.2`