graphcore
diff --git a/‎applications/popart/faster-rcnn/.gitignore‎
Lines changed: 3 additions & 1 deletion b/‎applications/popart/faster-rcnn/.gitignore‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎applications/popart/faster-rcnn/IPU/_globals.py‎
Lines changed: 14 additions & 1 deletion b/‎applications/popart/faster-rcnn/IPU/_globals.py‎
Lines changed: 14 additions & 1 deletion
diff --git a/‎applications/popart/faster-rcnn/IPU/basic_func.py‎
Lines changed: 12 additions & 10 deletions b/‎applications/popart/faster-rcnn/IPU/basic_func.py‎
Lines changed: 12 additions & 10 deletions
diff --git a/‎applications/popart/faster-rcnn/IPU/combined_func.py‎
Lines changed: 10 additions & 2 deletions b/‎applications/popart/faster-rcnn/IPU/combined_func.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎applications/popart/faster-rcnn/IPU/custom_ops/Makefile‎
Lines changed: 20 additions & 0 deletions b/‎applications/popart/faster-rcnn/IPU/custom_ops/Makefile‎
Lines changed: 20 additions & 0 deletions
diff --git a/‎applications/popart/faster-rcnn/IPU/custom_ops/include/customop.h‎
Lines changed: 2 additions & 0 deletions b/‎applications/popart/faster-rcnn/IPU/custom_ops/include/customop.h‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎applications/popart/faster-rcnn/IPU/custom_ops/nms/nms_codelet.cpp‎
Lines changed: 44 additions & 19 deletions b/‎applications/popart/faster-rcnn/IPU/custom_ops/nms/nms_codelet.cpp‎
Lines changed: 44 additions & 19 deletions
@@ -17,4 +17,6 @@ build
 /debug_tmp_data/
 /wandb/
 /notebooks/
-/engine_cache/
+/engine_cache/
+/deprecated_yamls/
+/IPU/custom_ops/roi_align/report/
@@ -16,16 +16,29 @@
     'deviceType': 'ipu',
     'options': popart.SessionOptions(),
     'seed': int(time.time()),
-    'float_precision': 'FLOAT',
+    'weight_fp16': None,
     'available_memory_proportion': None,
     'global_initializer': {},
     'exclude_weights': [],
     'all_weights': [],
     'all_trainable_weights': [],
     'load_strict': False,
+    'all_tensors_info': [],
 }
 
 
+def get_all_tensors_info():
+    return GLOBAL_V['all_tensors_info']
+
+
+def set_weight_fp16(_state):
+    GLOBAL_V['weight_fp16'] = _state
+
+
+def get_weight_fp16():
+    return GLOBAL_V['weight_fp16']
+
+
 def set_exclude_weights(exclude_weights):
     if isinstance(exclude_weights, str):
         exclude_weights = [exclude_weights]
 
@@ -7,7 +7,7 @@
 import string
 import numpy as np
 import popart
-from _globals import GLOBAL_V, set_batch, get_batch_size, get_anchor_return_type, train_mode_on, train_mode, safe_mode, safe_mode_on, safe_mode_off, get_builder, set_builder, set_seed, get_seed, set_options, get_options, set_device, get_device_type, get_ai_onnx_version, set_memory_proportion, get_memory_proportion, enable_global_initializer, get_global_initializer, get_exclude_weights, set_exclude_weights, get_all_trainable_weights, load_model, set_load_strict, load_strict
+from _globals import GLOBAL_V, set_batch, get_batch_size, get_anchor_return_type, train_mode_on, train_mode, safe_mode, safe_mode_on, safe_mode_off, get_builder, set_builder, set_seed, get_seed, set_options, get_options, set_device, get_device_type, get_ai_onnx_version, set_memory_proportion, get_memory_proportion, enable_global_initializer, get_global_initializer, get_exclude_weights, set_exclude_weights, get_all_trainable_weights, load_model, set_load_strict, load_strict, set_weight_fp16, get_weight_fp16, get_all_tensors_info
 
 CONSTANT_COUNTER = [0]
 TENSOR_NAMES = []
@@ -820,7 +820,7 @@ def align_tensor(tensors):
     return tensors
 
 
-def int32toint64(tensor):
+def int32toint64(t):
     return t.cast('INT64') if t.type == 'int32' else t
 
 
@@ -839,6 +839,7 @@ def __init__(self, name, nodata=False):
         if safe_mode() and not nodata:
             assert isinstance(self.pureShape, (list, tuple))
             assert isinstance(self.dtype, str)
+        get_all_tensors_info().append(str(self))
 
     def copy_from_tensor(self, tensor):
         assert self.__class__.__name__ == tensor.__class__.__name__
@@ -1067,6 +1068,13 @@ def getIpuIndex(self, ):
         assert name is not None
         return name
 
+    def __repr__(self, ):
+        string = self.__class__.__name__ + ': ' + self.__name + ', shape: ' + str(
+            self.pureShape) + ', dtype: ' + self.dtype
+        string = string + ', constant'
+        string += ', ID: ' + str(id(self))
+        return string
+
     @property
     def pureShape(self):
         return self.data.shape
@@ -1081,13 +1089,6 @@ def dtype(self):
     def as_list(self, ):
         return self.data.tolist()
 
-    def __repr__(self, ):
-        string = self.__class__.__name__ + ': ' + self.__name + ', shape: ' + str(
-            self.pureShape) + ', dtype: ' + self.dtype
-        string = string + ', constant: ' + str(self.data)
-        string += ', ID: ' + str(id(self))
-        return string
-
     def __getitem__(self, index):
         if isinstance(index, int):
             return constant(self.data[index])
@@ -1174,7 +1175,8 @@ def nllloss(prob,
             label,
             reductionType=popart.ReductionType.Mean,
             debugPrefix=''):
-    #
+    # prob: scaled probabilities, [batch, classes], float
+    # label: labels, [batch,], int32
     with name_scope(debugPrefix):
         loss = get_builder().aiGraphcore.nllloss(
             [prob.getIpuIndex(), label.getIpuIndex()],
 
@@ -53,7 +53,6 @@ def conv2d(input,
            bias=True,
            train=True,
            strides=[1, 1],
-           pads=[1, 1, 1, 1],
            dilations=[1, 1],
            group=1,
            filters_data=None,
@@ -75,12 +74,21 @@ def conv2d(input,
             np.asarray(filters_data.shape) == np.asarray(weights_shape))
     else:
         filters_data = np.ones(weights_shape, bF.mappin_gc2npy[input.dtype])
-    local_weights_fp16_on = fp16_on if weights_fp16_on is None else weights_fp16_on
+    local_weights_fp16_on = fp16_on
+    if bF.get_weight_fp16() is not None:
+        local_weights_fp16_on = bF.get_weight_fp16()
+    if weights_fp16_on is not None:
+        local_weights_fp16_on = weights_fp16_on
+    if input.dtype.upper() in ['FLOAT', 'FLOAT32'] and local_weights_fp16_on:
+        raise RuntimeError('weights cannnot be fp16 while input is fp32')
     weights = temporary_init_weights(filters_data,
                                      debugContext + "weight",
                                      fp16_on=local_weights_fp16_on,
                                      train=train)
     if fp16_on and local_weights_fp16_on is False:
+        if isinstance(weights, bF.ConstantTensor):
+            # casting 32 to 16 might be different between IPU and numpy
+            weights = bF.TTensor(weights.getIpuIndex())
         weights = weights.cast('FLOAT16')
 
     # init bias
 
@@ -0,0 +1,20 @@
+SUBDIRS = nms roi_align
+
+cur_makefile_path := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
+
+.PHONY: all
+all:
+	@echo "// Copyright (c) 2021 Graphcore Ltd. All rights reserved." > include/customop.h
+	@echo "#define CUSTOM_OPS_PATH \"$(cur_makefile_path)\"" >> include/customop.h
+	@list='$(SUBDIRS)'; for subdir in $$list; do \
+		echo "make in $$subdir";\
+		$(MAKE) -C $$subdir;\
+	done
+
+.PHONY: clean
+clean:
+	@echo Making clean
+	@list='$(SUBDIRS)'; for subdir in $$list; do \
+		echo "make in $$subdir";\
+		$(MAKE) -C $$subdir clean;\
+	done
@@ -0,0 +1,2 @@
+// Copyright (c) 2021 Graphcore Ltd. All rights reserved.
+#define CUSTOM_OPS_PATH "/localdata/hudi/test/room1/public_examples/applications/popart/faster-rcnn/IPU/custom_ops"
@@ -210,8 +210,7 @@ class NmsCoreVertex : public Vertex
         Input<float> nms_thresh;
 
         Input<unsigned int>   idx;
-        Vector<Input<T>> box_i;    //vector of 4 elements filled per vertice
-        Input<int>   finish_r;
+        Vector<Input<T>> box_i;  // vector of 4 elements filled per vertice
 
         // The compute method performs core computation
         bool compute()
@@ -227,20 +226,20 @@ class NmsCoreVertex : public Vertex
             float box_b[4] = {box_i[0], box_i[1], box_i[2], box_i[3]};
             float box_s[4] = {box_r[0], box_r[1], box_r[2], box_r[3]};
 
-            if(keep_r[0]==1.0f and finish_r != 1)
+            if(keep_r[0]==1.0f)
             {   
                 float xy1_0 = (box_r[0] > box_i[0])? box_r[0] : box_i[0]; 
                 float xy1_1 = (box_r[1] > box_i[1])? box_r[1] : box_i[1];
 
                 float xy2_0 = (box_r[2] < box_i[2])? box_r[2] : box_i[2];
                 float xy2_1 = (box_r[3] < box_i[3])? box_r[3] : box_i[3];
 
-                float tmp0 = xy2_0 - xy1_0; //+ 1.0f;
-                float tmp1 = xy2_1 - xy1_1; //+ 1.0f;
+                float tmp0 = xy2_0 - xy1_0;  // + 1.0f;
+                float tmp1 = xy2_1 - xy1_1;  // + 1.0f;
                 if(tmp0 < 0.0f)
-                    tmp0 = 0.0f;//-tmp0;
+                    tmp0 = 0.0f;
                 if(tmp1 < 0.0f)
-                    tmp1 = 0.0f;//-tmp1;
+                    tmp1 = 0.0f;
 
 
                 float inter = tmp0 * tmp1;
@@ -394,13 +393,11 @@ class PartialFetchBoxVertex : public Vertex
     public:
         Input<int> in_row_start;
         Input<int> in_row_end;
-        Vector<Input<Vector<T>>> in_tensor; // Per Vertex sees subtensor of shape [bs, (5*Top_n)*4]
-        // Vector<Input<int>> j_tensor;            // Per Vertex sees subtensor of shape [bs], value within [0, 5*top_n) 
-        Input<int> batch_size;                  // bs
-        Input<int> length;                      // Suppose to be 5*top_n
+        Vector<Input<Vector<T>>> in_tensor;  // Per Vertex sees subtensor of shape [bs, (5*Top_n)*4]
+        Input<int> batch_size;  // bs
 
-        Input<Vector<int>>       sorted_index;
-        Vector<Output<Vector<T>>>         out_val;  // Per Vertex fill sub-tensor of shape [bs, 4]
+        Input<Vector<int>> sorted_index;
+        Vector<Output<Vector<T>>> out_val;  // Per Vertex fill sub-tensor of shape [bs, 4]
 
 
     bool compute()
@@ -584,25 +581,28 @@ template <typename T>
 class UpdateStateVertex : public Vertex
 {
     public:        
-        Input<Vector<int>> num_nonzeros_in_scores; // [bs] shaped
-        Input<int> batch_size;                     //  bs
-        InOut<Vector<int>> iTensor;                // [bs] shaped
-        Output<Vector<int>> finish;                // [bs] shaped
+        Input<Vector<int>> num_nonzeros_in_scores;  // [bs] shaped
+        Input<int> batch_size;  //  bs
+        InOut<Vector<int>> iTensor;  // [bs] shaped
+        Output<Vector<int>> finish;  // [bs] shaped
+        Output<Vector<int>> flag_test;
 
         // The compute method performs core computation
         bool compute()
-        {   
+        {
             for(int sample = 0; sample < batch_size; sample++)
             {
                 int i = iTensor[sample];
                 int scores_num_nonzeros = num_nonzeros_in_scores[sample];
-
+                
                 if(i >= scores_num_nonzeros)
                 {
+                    flag_test[sample] = 1;
                     break;
                 }
                 else
                     iTensor[sample] = (i + 1);
+                    flag_test[sample] = 2;
             }
             return true;
         }
@@ -615,3 +615,28 @@ class UpdateStateVertex : public Vertex
 template class UpdateStateVertex<float>;
 template class UpdateStateVertex<half>;
 
+class setResultVertex : public poplar::Vertex {
+public:
+  setResultVertex();
+
+  Vector<InOut<int>> res;  // {L}
+
+  bool compute() {
+    int L = res.size();
+    if (res[L - 1] != 0 || L <= 1) {
+        return true;
+    }
+    int count = 1;
+    for (int i = L - 2; i >= 0; i--) {
+        if (res[i + 1] == res[i]) {
+            count++;
+        } else {
+            break;
+        }
+    }
+    for (int i = L - 1; i >= L - count; i--) {
+        res[i] = -1;
+    }
+    return true;
+  }
+};
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+// Copyright (c) 2021 Graphcore Ltd. All rights reserved.`
	`2`	`+#define CUSTOM_OPS_PATH "/localdata/hudi/test/room1/public_examples/applications/popart/faster-rcnn/IPU/custom_ops"`