Skip to content

Commit b24e2a8

Browse files
committed
Add Faster-RCNN model
1 parent 32fc7bd commit b24e2a8

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+1149
-452
lines changed

applications/popart/faster-rcnn/.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,4 +17,6 @@ build
1717
/debug_tmp_data/
1818
/wandb/
1919
/notebooks/
20-
/engine_cache/
20+
/engine_cache/
21+
/deprecated_yamls/
22+
/IPU/custom_ops/roi_align/report/

applications/popart/faster-rcnn/IPU/_globals.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,29 @@
1616
'deviceType': 'ipu',
1717
'options': popart.SessionOptions(),
1818
'seed': int(time.time()),
19-
'float_precision': 'FLOAT',
19+
'weight_fp16': None,
2020
'available_memory_proportion': None,
2121
'global_initializer': {},
2222
'exclude_weights': [],
2323
'all_weights': [],
2424
'all_trainable_weights': [],
2525
'load_strict': False,
26+
'all_tensors_info': [],
2627
}
2728

2829

30+
def get_all_tensors_info():
31+
return GLOBAL_V['all_tensors_info']
32+
33+
34+
def set_weight_fp16(_state):
35+
GLOBAL_V['weight_fp16'] = _state
36+
37+
38+
def get_weight_fp16():
39+
return GLOBAL_V['weight_fp16']
40+
41+
2942
def set_exclude_weights(exclude_weights):
3043
if isinstance(exclude_weights, str):
3144
exclude_weights = [exclude_weights]

applications/popart/faster-rcnn/IPU/basic_func.py

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
import string
88
import numpy as np
99
import popart
10-
from _globals import GLOBAL_V, set_batch, get_batch_size, get_anchor_return_type, train_mode_on, train_mode, safe_mode, safe_mode_on, safe_mode_off, get_builder, set_builder, set_seed, get_seed, set_options, get_options, set_device, get_device_type, get_ai_onnx_version, set_memory_proportion, get_memory_proportion, enable_global_initializer, get_global_initializer, get_exclude_weights, set_exclude_weights, get_all_trainable_weights, load_model, set_load_strict, load_strict
10+
from _globals import GLOBAL_V, set_batch, get_batch_size, get_anchor_return_type, train_mode_on, train_mode, safe_mode, safe_mode_on, safe_mode_off, get_builder, set_builder, set_seed, get_seed, set_options, get_options, set_device, get_device_type, get_ai_onnx_version, set_memory_proportion, get_memory_proportion, enable_global_initializer, get_global_initializer, get_exclude_weights, set_exclude_weights, get_all_trainable_weights, load_model, set_load_strict, load_strict, set_weight_fp16, get_weight_fp16, get_all_tensors_info
1111

1212
CONSTANT_COUNTER = [0]
1313
TENSOR_NAMES = []
@@ -820,7 +820,7 @@ def align_tensor(tensors):
820820
return tensors
821821

822822

823-
def int32toint64(tensor):
823+
def int32toint64(t):
824824
return t.cast('INT64') if t.type == 'int32' else t
825825

826826

@@ -839,6 +839,7 @@ def __init__(self, name, nodata=False):
839839
if safe_mode() and not nodata:
840840
assert isinstance(self.pureShape, (list, tuple))
841841
assert isinstance(self.dtype, str)
842+
get_all_tensors_info().append(str(self))
842843

843844
def copy_from_tensor(self, tensor):
844845
assert self.__class__.__name__ == tensor.__class__.__name__
@@ -1067,6 +1068,13 @@ def getIpuIndex(self, ):
10671068
assert name is not None
10681069
return name
10691070

1071+
def __repr__(self, ):
1072+
string = self.__class__.__name__ + ': ' + self.__name + ', shape: ' + str(
1073+
self.pureShape) + ', dtype: ' + self.dtype
1074+
string = string + ', constant'
1075+
string += ', ID: ' + str(id(self))
1076+
return string
1077+
10701078
@property
10711079
def pureShape(self):
10721080
return self.data.shape
@@ -1081,13 +1089,6 @@ def dtype(self):
10811089
def as_list(self, ):
10821090
return self.data.tolist()
10831091

1084-
def __repr__(self, ):
1085-
string = self.__class__.__name__ + ': ' + self.__name + ', shape: ' + str(
1086-
self.pureShape) + ', dtype: ' + self.dtype
1087-
string = string + ', constant: ' + str(self.data)
1088-
string += ', ID: ' + str(id(self))
1089-
return string
1090-
10911092
def __getitem__(self, index):
10921093
if isinstance(index, int):
10931094
return constant(self.data[index])
@@ -1174,7 +1175,8 @@ def nllloss(prob,
11741175
label,
11751176
reductionType=popart.ReductionType.Mean,
11761177
debugPrefix=''):
1177-
#
1178+
# prob: scaled probabilities, [batch, classes], float
1179+
# label: labels, [batch,], int32
11781180
with name_scope(debugPrefix):
11791181
loss = get_builder().aiGraphcore.nllloss(
11801182
[prob.getIpuIndex(), label.getIpuIndex()],

applications/popart/faster-rcnn/IPU/combined_func.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ def conv2d(input,
5353
bias=True,
5454
train=True,
5555
strides=[1, 1],
56-
pads=[1, 1, 1, 1],
5756
dilations=[1, 1],
5857
group=1,
5958
filters_data=None,
@@ -75,12 +74,21 @@ def conv2d(input,
7574
np.asarray(filters_data.shape) == np.asarray(weights_shape))
7675
else:
7776
filters_data = np.ones(weights_shape, bF.mappin_gc2npy[input.dtype])
78-
local_weights_fp16_on = fp16_on if weights_fp16_on is None else weights_fp16_on
77+
local_weights_fp16_on = fp16_on
78+
if bF.get_weight_fp16() is not None:
79+
local_weights_fp16_on = bF.get_weight_fp16()
80+
if weights_fp16_on is not None:
81+
local_weights_fp16_on = weights_fp16_on
82+
if input.dtype.upper() in ['FLOAT', 'FLOAT32'] and local_weights_fp16_on:
83+
raise RuntimeError('weights cannnot be fp16 while input is fp32')
7984
weights = temporary_init_weights(filters_data,
8085
debugContext + "weight",
8186
fp16_on=local_weights_fp16_on,
8287
train=train)
8388
if fp16_on and local_weights_fp16_on is False:
89+
if isinstance(weights, bF.ConstantTensor):
90+
# casting 32 to 16 might be different between IPU and numpy
91+
weights = bF.TTensor(weights.getIpuIndex())
8492
weights = weights.cast('FLOAT16')
8593

8694
# init bias
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
SUBDIRS = nms roi_align
2+
3+
cur_makefile_path := $(shell dirname $(realpath $(firstword $(MAKEFILE_LIST))))
4+
5+
.PHONY: all
6+
all:
7+
@echo "// Copyright (c) 2021 Graphcore Ltd. All rights reserved." > include/customop.h
8+
@echo "#define CUSTOM_OPS_PATH \"$(cur_makefile_path)\"" >> include/customop.h
9+
@list='$(SUBDIRS)'; for subdir in $$list; do \
10+
echo "make in $$subdir";\
11+
$(MAKE) -C $$subdir;\
12+
done
13+
14+
.PHONY: clean
15+
clean:
16+
@echo Making clean
17+
@list='$(SUBDIRS)'; for subdir in $$list; do \
18+
echo "make in $$subdir";\
19+
$(MAKE) -C $$subdir clean;\
20+
done
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
// Copyright (c) 2021 Graphcore Ltd. All rights reserved.
2+
#define CUSTOM_OPS_PATH "/localdata/hudi/test/room1/public_examples/applications/popart/faster-rcnn/IPU/custom_ops"

applications/popart/faster-rcnn/IPU/custom_ops/nms/nms_codelet.cpp

Lines changed: 44 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -210,8 +210,7 @@ class NmsCoreVertex : public Vertex
210210
Input<float> nms_thresh;
211211

212212
Input<unsigned int> idx;
213-
Vector<Input<T>> box_i; //vector of 4 elements filled per vertice
214-
Input<int> finish_r;
213+
Vector<Input<T>> box_i; // vector of 4 elements filled per vertice
215214

216215
// The compute method performs core computation
217216
bool compute()
@@ -227,20 +226,20 @@ class NmsCoreVertex : public Vertex
227226
float box_b[4] = {box_i[0], box_i[1], box_i[2], box_i[3]};
228227
float box_s[4] = {box_r[0], box_r[1], box_r[2], box_r[3]};
229228

230-
if(keep_r[0]==1.0f and finish_r != 1)
229+
if(keep_r[0]==1.0f)
231230
{
232231
float xy1_0 = (box_r[0] > box_i[0])? box_r[0] : box_i[0];
233232
float xy1_1 = (box_r[1] > box_i[1])? box_r[1] : box_i[1];
234233

235234
float xy2_0 = (box_r[2] < box_i[2])? box_r[2] : box_i[2];
236235
float xy2_1 = (box_r[3] < box_i[3])? box_r[3] : box_i[3];
237236

238-
float tmp0 = xy2_0 - xy1_0; //+ 1.0f;
239-
float tmp1 = xy2_1 - xy1_1; //+ 1.0f;
237+
float tmp0 = xy2_0 - xy1_0; // + 1.0f;
238+
float tmp1 = xy2_1 - xy1_1; // + 1.0f;
240239
if(tmp0 < 0.0f)
241-
tmp0 = 0.0f;//-tmp0;
240+
tmp0 = 0.0f;
242241
if(tmp1 < 0.0f)
243-
tmp1 = 0.0f;//-tmp1;
242+
tmp1 = 0.0f;
244243

245244

246245
float inter = tmp0 * tmp1;
@@ -394,13 +393,11 @@ class PartialFetchBoxVertex : public Vertex
394393
public:
395394
Input<int> in_row_start;
396395
Input<int> in_row_end;
397-
Vector<Input<Vector<T>>> in_tensor; // Per Vertex sees subtensor of shape [bs, (5*Top_n)*4]
398-
// Vector<Input<int>> j_tensor; // Per Vertex sees subtensor of shape [bs], value within [0, 5*top_n)
399-
Input<int> batch_size; // bs
400-
Input<int> length; // Suppose to be 5*top_n
396+
Vector<Input<Vector<T>>> in_tensor; // Per Vertex sees subtensor of shape [bs, (5*Top_n)*4]
397+
Input<int> batch_size; // bs
401398

402-
Input<Vector<int>> sorted_index;
403-
Vector<Output<Vector<T>>> out_val; // Per Vertex fill sub-tensor of shape [bs, 4]
399+
Input<Vector<int>> sorted_index;
400+
Vector<Output<Vector<T>>> out_val; // Per Vertex fill sub-tensor of shape [bs, 4]
404401

405402

406403
bool compute()
@@ -584,25 +581,28 @@ template <typename T>
584581
class UpdateStateVertex : public Vertex
585582
{
586583
public:
587-
Input<Vector<int>> num_nonzeros_in_scores; // [bs] shaped
588-
Input<int> batch_size; // bs
589-
InOut<Vector<int>> iTensor; // [bs] shaped
590-
Output<Vector<int>> finish; // [bs] shaped
584+
Input<Vector<int>> num_nonzeros_in_scores; // [bs] shaped
585+
Input<int> batch_size; // bs
586+
InOut<Vector<int>> iTensor; // [bs] shaped
587+
Output<Vector<int>> finish; // [bs] shaped
588+
Output<Vector<int>> flag_test;
591589

592590
// The compute method performs core computation
593591
bool compute()
594-
{
592+
{
595593
for(int sample = 0; sample < batch_size; sample++)
596594
{
597595
int i = iTensor[sample];
598596
int scores_num_nonzeros = num_nonzeros_in_scores[sample];
599-
597+
600598
if(i >= scores_num_nonzeros)
601599
{
600+
flag_test[sample] = 1;
602601
break;
603602
}
604603
else
605604
iTensor[sample] = (i + 1);
605+
flag_test[sample] = 2;
606606
}
607607
return true;
608608
}
@@ -615,3 +615,28 @@ class UpdateStateVertex : public Vertex
615615
template class UpdateStateVertex<float>;
616616
template class UpdateStateVertex<half>;
617617

618+
class setResultVertex : public poplar::Vertex {
619+
public:
620+
setResultVertex();
621+
622+
Vector<InOut<int>> res; // {L}
623+
624+
bool compute() {
625+
int L = res.size();
626+
if (res[L - 1] != 0 || L <= 1) {
627+
return true;
628+
}
629+
int count = 1;
630+
for (int i = L - 2; i >= 0; i--) {
631+
if (res[i + 1] == res[i]) {
632+
count++;
633+
} else {
634+
break;
635+
}
636+
}
637+
for (int i = L - 1; i >= L - count; i--) {
638+
res[i] = -1;
639+
}
640+
return true;
641+
}
642+
};

0 commit comments

Comments
 (0)