Skip to content

Commit 5e15d3e

Browse files
committed
SubstreamMultiCycle works! But it should be improved for reduction operators in SubstreamMultiCycle.
1 parent 25c9b1f commit 5e15d3e

File tree

4 files changed

+323
-11
lines changed

4 files changed

+323
-11
lines changed
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
TARGET=$(shell ls *.py | grep -v test | grep -v parsetab.py)
2+
ARGS=
3+
4+
PYTHON=python3
5+
#PYTHON=python
6+
#OPT=-m pdb
7+
#OPT=-m cProfile -s time
8+
#OPT=-m cProfile -o profile.rslt
9+
10+
.PHONY: all
11+
all: test
12+
13+
.PHONY: run
14+
run:
15+
$(PYTHON) $(OPT) $(TARGET) $(ARGS)
16+
17+
.PHONY: test
18+
test:
19+
$(PYTHON) -m pytest -vv
20+
21+
.PHONY: check
22+
check:
23+
$(PYTHON) $(OPT) $(TARGET) $(ARGS) > tmp.v
24+
iverilog -tnull -Wall tmp.v
25+
rm -f tmp.v
26+
27+
.PHONY: clean
28+
clean:
29+
rm -rf *.pyc __pycache__ parsetab.py .cache *.out *.png *.dot tmp.v uut.vcd
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
from __future__ import absolute_import
2+
from __future__ import print_function
3+
4+
import os
5+
import veriloggen
6+
import thread_stream_substream_multicycle
7+
8+
9+
def test(request):
10+
veriloggen.reset()
11+
12+
simtype = request.config.getoption('--sim')
13+
14+
rslt = thread_stream_substream_multicycle.run(filename=None, simtype=simtype,
15+
outputfile=os.path.splitext(os.path.basename(__file__))[0] + '.out')
16+
17+
verify_rslt = rslt.splitlines()[-1]
18+
assert(verify_rslt == '# verify: PASSED')
Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,198 @@
1+
from __future__ import absolute_import
2+
from __future__ import print_function
3+
import sys
4+
import os
5+
6+
# the next line can be removed after installation
7+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(
8+
os.path.dirname(os.path.dirname(os.path.abspath(__file__)))))))
9+
10+
from veriloggen import *
11+
import veriloggen.thread as vthread
12+
import veriloggen.types.axi as axi
13+
14+
15+
def mkLed():
16+
m = Module('blinkled')
17+
clk = m.Input('CLK')
18+
rst = m.Input('RST')
19+
20+
datawidth = 32
21+
addrwidth = 10
22+
myaxi = vthread.AXIM(m, 'myaxi', clk, rst, datawidth)
23+
ram_a = vthread.RAM(m, 'ram_a', clk, rst, datawidth, addrwidth)
24+
ram_b = vthread.RAM(m, 'ram_b', clk, rst, datawidth, addrwidth)
25+
ram_c = vthread.RAM(m, 'ram_c', clk, rst, datawidth, addrwidth)
26+
27+
mulstrm = vthread.Stream(m, 'mul_stream', clk, rst)
28+
mulx = mulstrm.source('x')
29+
muly = mulstrm.source('y')
30+
mulz = mulx * muly
31+
mulz = mulz + mulx + muly - mulx - muly
32+
mulstrm.sink(mulz, 'z')
33+
34+
wrapstrm = vthread.Stream(m, 'wrap_stream', clk, rst)
35+
a = wrapstrm.source('a')
36+
b = wrapstrm.source('b')
37+
a = a + 1
38+
b = b + 1
39+
sub = wrapstrm.substream_multicycle(mulstrm)
40+
sub.to_source('x', a)
41+
sub.to_source('y', b)
42+
c = sub.from_sink('z')
43+
c = c + 100
44+
wrapstrm.sink(c, 'c')
45+
46+
all_ok = m.TmpReg(initval=0)
47+
48+
def comp_stream_mul(size, offset):
49+
mulstrm.set_source('x', ram_a, offset, size)
50+
mulstrm.set_source('y', ram_b, offset, size)
51+
mulstrm.set_sink('z', ram_c, offset, size)
52+
mulstrm.run()
53+
mulstrm.join()
54+
55+
def comp_stream_wrap(size, offset):
56+
wrapstrm.set_source('a', ram_a, offset, size)
57+
wrapstrm.set_source('b', ram_b, offset, size)
58+
wrapstrm.set_sink('c', ram_c, offset, size)
59+
wrapstrm.run()
60+
wrapstrm.join()
61+
62+
def comp_sequential_mul(size, offset):
63+
sum = 0
64+
for i in range(size):
65+
a = ram_a.read(i + offset)
66+
b = ram_b.read(i + offset)
67+
sum = a * b
68+
ram_c.write(i + offset, sum)
69+
70+
def comp_sequential_wrap(size, offset):
71+
sum = 0
72+
for i in range(size):
73+
a = ram_a.read(i + offset) + 1
74+
b = ram_b.read(i + offset) + 1
75+
sum = a * b
76+
sum += 100
77+
ram_c.write(i + offset, sum)
78+
79+
def check(size, offset_stream, offset_seq):
80+
for i in range(size):
81+
st = ram_c.read(i + offset_stream)
82+
sq = ram_c.read(i + offset_seq)
83+
if vthread.verilog.NotEql(st, sq):
84+
all_ok.value = False
85+
if all_ok:
86+
print('# verify: PASSED')
87+
else:
88+
print('# verify: FAILED')
89+
90+
def comp(size):
91+
all_ok.value = True
92+
93+
# mul
94+
# stream
95+
offset = 0
96+
myaxi.dma_read(ram_a, offset, 0, size)
97+
myaxi.dma_read(ram_b, offset, 512, size)
98+
comp_stream_mul(size, offset)
99+
myaxi.dma_write(ram_c, offset, 1024, size)
100+
101+
# sequential
102+
offset = size
103+
myaxi.dma_read(ram_a, offset, 0, size)
104+
myaxi.dma_read(ram_b, offset, 512, size)
105+
comp_sequential_mul(size, offset)
106+
myaxi.dma_write(ram_c, offset, 1024 * 2, size)
107+
108+
# verification
109+
print('# MUL')
110+
myaxi.dma_read(ram_c, 0, 1024, size)
111+
myaxi.dma_read(ram_c, offset, 1024 * 2, size)
112+
check(size, 0, offset)
113+
114+
# wrap
115+
# stream
116+
offset = 0
117+
myaxi.dma_read(ram_a, offset, 0, size)
118+
myaxi.dma_read(ram_b, offset, 512, size)
119+
comp_stream_wrap(size, offset)
120+
myaxi.dma_write(ram_c, offset, 1024, size)
121+
122+
# sequential
123+
offset = size
124+
myaxi.dma_read(ram_a, offset, 0, size)
125+
myaxi.dma_read(ram_b, offset, 512, size)
126+
comp_sequential_wrap(size, offset)
127+
myaxi.dma_write(ram_c, offset, 1024 * 2, size)
128+
129+
# verification
130+
print('# WRAP')
131+
myaxi.dma_read(ram_c, 0, 1024, size)
132+
myaxi.dma_read(ram_c, offset, 1024 * 2, size)
133+
check(size, 0, offset)
134+
135+
vthread.finish()
136+
137+
th = vthread.Thread(m, 'th_comp', clk, rst, comp)
138+
# fsm = th.start(32)
139+
fsm = th.start(8)
140+
141+
return m
142+
143+
144+
def mkTest(memimg_name=None):
145+
m = Module('test')
146+
147+
# target instance
148+
led = mkLed()
149+
150+
# copy paras and ports
151+
params = m.copy_params(led)
152+
ports = m.copy_sim_ports(led)
153+
154+
clk = ports['CLK']
155+
rst = ports['RST']
156+
157+
memory = axi.AxiMemoryModel(m, 'memory', clk, rst, memimg_name=memimg_name)
158+
memory.connect(ports, 'myaxi')
159+
160+
uut = m.Instance(led, 'uut',
161+
params=m.connect_params(led),
162+
ports=m.connect_ports(led))
163+
164+
# simulation.setup_waveform(m, uut)
165+
simulation.setup_clock(m, clk, hperiod=5)
166+
init = simulation.setup_reset(m, rst, m.make_reset(), period=100)
167+
168+
init.add(
169+
Delay(1000000),
170+
Systask('finish'),
171+
)
172+
173+
return m
174+
175+
176+
def run(filename='tmp.v', simtype='iverilog', outputfile=None):
177+
178+
if outputfile is None:
179+
outputfile = os.path.splitext(os.path.basename(__file__))[0] + '.out'
180+
181+
memimg_name = 'memimg_' + outputfile
182+
183+
test = mkTest(memimg_name=memimg_name)
184+
185+
if filename is not None:
186+
test.to_verilog(filename)
187+
188+
sim = simulation.Simulator(test, sim=simtype)
189+
rslt = sim.run(outputfile=outputfile)
190+
lines = rslt.splitlines()
191+
if simtype == 'verilator' and lines[-1].startswith('-'):
192+
rslt = '\n'.join(lines[:-1])
193+
return rslt
194+
195+
196+
if __name__ == '__main__':
197+
rslt = run(filename='tmp.v')
198+
print(rslt)

veriloggen/thread/stream.py

Lines changed: 78 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,7 @@ def parameter(self, name=None, datawidth=None, point=0, signed=True):
404404
self.var_name_id_map[name] = _id
405405

406406
var.next_parameter_data = self.module.Reg('_%s_next_parameter_data' % prefix,
407-
datawidth, initval=0)
407+
datawidth, initval=0)
408408
var.next_parameter_data.no_write_check = True
409409
var.has_parameter_data = False
410410

@@ -425,6 +425,21 @@ def substream(self, substrm):
425425
self.substreams.append(sub)
426426
return sub
427427

428+
def substream_multicycle(self, substrm):
429+
_id = self.var_id_count
430+
name = 'substream_multicycle_%d' % _id
431+
432+
if name in self.var_name_map:
433+
raise ValueError("'%s' is already defined in stream '%s'" %
434+
(name, self.name))
435+
436+
prefix = self._prefix(name)
437+
self.var_id_count += 1
438+
439+
sub = SubstreamMultiCycle(self.module, self.clock, self.reset, substrm, self)
440+
self.substreams.append(sub)
441+
return sub
442+
428443
def read_RAM(self, name, addr, when=None,
429444
datawidth=None, point=0, signed=True):
430445

@@ -1386,8 +1401,6 @@ def _synthesize_run(self):
13861401
self.dump_enable(1)
13871402
)
13881403

1389-
self.fsm.If(start_cond, self.stream_oready).goto_next()
1390-
13911404
substreams = self._collect_substreams()
13921405
for sub in substreams:
13931406
start_stage = sub.start_stage
@@ -1420,9 +1433,20 @@ def _synthesize_run(self):
14201433
cond(1)
14211434
)
14221435

1436+
self.fsm.If(start_cond, self.stream_oready).goto_next()
1437+
14231438
# compute (at this cycle, source_idle <- 0)
14241439
self.fsm.If(self.stream_oready).goto_next()
14251440

1441+
# NOT OK
1442+
# substreams = self._collect_substreams()
1443+
# for sub in substreams:
1444+
# if isinstance(sub, SubstreamMultiCycle):
1445+
# sub_fsm = sub.substrm.fsm
1446+
# sub_fsm.seq.If(self.fsm.here)(
1447+
# sub.substrm.stream_ivalid(sub.ii_count == 0)
1448+
# )
1449+
14261450
# compute and join
14271451
done_cond = None
14281452
for key, source_idle in sorted(self.source_idle_map.items(),
@@ -3200,21 +3224,21 @@ def func(*args, **kwargs):
32003224

32013225
class Substream(BaseSubstream):
32023226

3203-
def __init__(self, module, clock, reset, substrm, strm=None):
3227+
def __init__(self, module, clock, reset, substrm, strm):
32043228
self.module = module
32053229
self.clock = clock
32063230
self.reset = reset
32073231
self.reset_delay = 0
32083232

3209-
if strm is not None:
3210-
util.add_enable_cond(substrm.is_root, strm.busy, 0)
3211-
# parent to child
3212-
util.add_disable_cond(substrm.stream_oready, strm.busy, strm.stream_oready)
3213-
# child to parent
3214-
util.add_disable_cond(strm.stream_internal_oready, strm.busy, substrm.stream_internal_oready)
3215-
32163233
BaseSubstream.__init__(self, substrm, strm)
32173234

3235+
util.add_enable_cond(substrm.is_root, strm.busy, 0)
3236+
# parent to child
3237+
util.add_disable_cond(substrm.stream_oready, strm.busy, strm.stream_oready)
3238+
# child to parent
3239+
util.add_disable_cond(strm.stream_internal_oready, strm.busy,
3240+
substrm.stream_internal_oready)
3241+
32183242
def to_source(self, name, data):
32193243
source_name = self.substrm._dataname(name)
32203244
cond = self.module.Reg(compiler._tmp_name(self.name('%s_cond' % source_name)),
@@ -3239,3 +3263,46 @@ def _collect_substreams(self):
32393263
for s in ret:
32403264
s.reset_delay += 1 + self.start_stage
32413265
return ret
3266+
3267+
3268+
class SubstreamMultiCycle(Substream):
3269+
3270+
def __init__(self, module, clock, reset, substrm, strm):
3271+
self.module = module
3272+
self.clock = clock
3273+
self.reset = reset
3274+
self.reset_delay = 0
3275+
3276+
BaseSubstream.__init__(self, substrm, strm)
3277+
3278+
util.add_enable_cond(substrm.is_root, strm.busy, 0)
3279+
3280+
self.iteration_interval = self.latency - 1
3281+
self.latency = 1 + 1
3282+
3283+
enable_cond = vtypes.Ands(self.strm.busy, self.strm.stream_oready, substrm.stream_ivalid)
3284+
3285+
self.ii_count = self.module.Reg(self.name('ii_count'),
3286+
int(math.ceil(math.log(self.iteration_interval, 2))) + 1, initval=0)
3287+
self.ii_stall_cond = self.module.Wire(self.name('ii_stall_cond'))
3288+
self.ii_stall_cond.assign(self.ii_count > 0)
3289+
util.add_disable_cond(self.strm.stream_internal_oready, self.ii_stall_cond, vtypes.Int(0))
3290+
3291+
self.strm.seq.If(enable_cond)(
3292+
self.ii_count.inc()
3293+
)
3294+
self.strm.seq.If(self.ii_count > 0)(
3295+
self.ii_count.inc()
3296+
)
3297+
self.strm.seq.If(self.ii_count == self.iteration_interval - 1)(
3298+
self.ii_count(0)
3299+
)
3300+
3301+
# parent to child
3302+
child_oready = vtypes.Ors(self.strm.stream_oready,
3303+
vtypes.Ands(self.substrm.stream_internal_oready, self.ii_stall_cond))
3304+
util.add_disable_cond(self.substrm.stream_oready, self.strm.busy, child_oready)
3305+
3306+
# child to parent
3307+
parent_internal_oready = vtypes.Not(self.ii_stall_cond)
3308+
util.add_disable_cond(self.strm.stream_internal_oready, self.strm.busy, parent_internal_oready)

0 commit comments

Comments
 (0)