Skip to content

Commit dc918fc

Browse files
committed
csr.bus: add CSRElement and CSRMultiplexer.
1 parent 1f20170 commit dc918fc

File tree

5 files changed

+507
-0
lines changed

5 files changed

+507
-0
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,7 @@
22
*.pyc
33
/*.egg-info
44
/.eggs
5+
6+
# tests
7+
*.vcd
8+
*.gtkw

nmigen_soc/csr/__init__.py

Whitespace-only changes.

nmigen_soc/csr/bus.py

Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
from functools import reduce
2+
from nmigen import *
3+
from nmigen import tracer
4+
5+
6+
__all__ = ["CSRElement", "CSRMultiplexer"]
7+
8+
9+
class CSRElement(Record):
10+
"""Peripheral-side CSR interface.
11+
12+
A low-level interface to a single atomically readable and writable register in a peripheral.
13+
This interface supports any register width and semantics, provided that both reads and writes
14+
always succeed and complete in one cycle.
15+
16+
Parameters
17+
----------
18+
width : int
19+
Width of the register.
20+
name : str
21+
Name of the underlying record.
22+
23+
Attributes
24+
----------
25+
r_data : Signal(width)
26+
Read data. Must be always valid, and is sampled when ``r_stb`` is asserted.
27+
r_stb : Signal()
28+
Read strobe. Registers with read side effects should perform the read side effect when this
29+
strobe is asserted.
30+
w_data : Signal(width)
31+
Write data. Valid only when ``w_stb`` is asserted.
32+
w_stb : Signal()
33+
Write strobe. Registers should update their value or perform the write side effect when
34+
this strobe is asserted.
35+
"""
36+
def __init__(self, width, access, *, name=None, src_loc_at=0):
37+
if not isinstance(width, int) or width < 0:
38+
raise ValueError("Width must be a non-negative integer, not {!r}"
39+
.format(width))
40+
if access not in ("r", "w", "rw"):
41+
raise ValueError("Access mode must be one of \"r\", \"w\", or \"rw\", not {!r}"
42+
.format(access))
43+
44+
self.width = int(width)
45+
self.access = access
46+
47+
layout = []
48+
if "r" in self.access:
49+
layout += [
50+
("r_data", width),
51+
("r_stb", 1),
52+
]
53+
if "w" in self.access:
54+
layout += [
55+
("w_data", width),
56+
("w_stb", 1),
57+
]
58+
super().__init__(layout, name=name, src_loc_at=1)
59+
60+
61+
class CSRMultiplexer(Elaboratable):
62+
"""CPU-side CSR interface.
63+
64+
A low-level interface to a set of peripheral CSR registers that implements address-based
65+
multiplexing and atomic updates of wide registers.
66+
67+
Operation
68+
---------
69+
70+
The CSR multiplexer splits each CSR register into chunks according to its data width. Each
71+
chunk is assigned an address, and the first chunk of each register always has the provided
72+
minimum alignment. This allows accessing CSRs of any size using any datapath width.
73+
74+
When the first chunk of a register is read, the value of a register is captured, and reads
75+
from subsequent chunks of the same register return the captured values. When any chunk except
76+
the last chunk of a register is written, the written value is captured; a write to the last
77+
chunk writes the captured value to the register. This allows atomically accessing CSRs larger
78+
than datapath width.
79+
80+
Reads to padding bytes return zeroes, and writes to padding bytes are ignored.
81+
82+
Writes are registered, and add 1 cycle of latency.
83+
84+
Wide registers
85+
--------------
86+
87+
Because the CSR bus conserves logic and routing resources, it is common to e.g. access
88+
a CSR bus with an *n*-bit data path from a CPU with a *k*-bit datapath in cases where CSR
89+
access latency is less important than resource usage. In this case, two strategies are
90+
possible for connecting the CSR bus to the CPU:
91+
* The CPU could access the CSR bus directly (with no intervening logic other than simple
92+
translation of control signals). In this case, the register alignment should be set
93+
to 1, and each *w*-bit register would occupy *ceil(w/n)* addresses from the CPU
94+
perspective, requiring the same amount of memory instructions to access.
95+
* The CPU could also access the CSR bus through a width down-converter, which would issue
96+
*k/n* CSR accesses for each CPU access. In this case, the register alignment should be
97+
set to *k/n*, and each *w*-bit register would occupy *ceil(w/k)* addresses from the CPU
98+
perspective, requiring the same amount of memory instructions to access.
99+
100+
If alignment is greater than 1, it affects which CSR bus write is considered a write to
101+
the last register chunk. For example, if a 24-bit register is used with a 8-bit CSR bus and
102+
a CPU with a 32-bit datapath, a write to this register requires 4 CSR bus writes to complete
103+
and the 4th write is the one that actually writes the value to the register. This allows
104+
determining write latency solely from the amount of addresses the register occupies in
105+
the CPU address space, and the width of the CSR bus.
106+
107+
Parameters
108+
----------
109+
addr_width : int
110+
Address width. At most ``(2 ** addr_width) * data_width`` register bits will be available.
111+
data_width : int
112+
Data width. Registers are accessed in ``data_width`` sized chunks.
113+
alignment : int
114+
Register alignment. The address assigned to each register will be a multiple of
115+
``2 ** alignment``.
116+
117+
Attributes
118+
----------
119+
addr : Signal(addr_width)
120+
Address for reads and writes.
121+
r_data : Signal(data_width)
122+
Read data. Valid on the next cycle after ``r_stb`` is asserted.
123+
r_stb : Signal()
124+
Read strobe. If ``addr`` points to the first chunk of a register, captures register value
125+
and causes read side effects to be performed (if any). If ``addr`` points to any chunk
126+
of a register, latches the captured value to ``r_data``. Otherwise, latches zero
127+
to ``r_data``.
128+
w_data : Signal(data_width)
129+
Write data. Must be valid when ``w_stb`` is asserted.
130+
w_stb : Signal()
131+
Write strobe. If ``addr`` points to the last chunk of a register, writes captured value
132+
to the register and causes write side effects to be performed (if any). If ``addr`` points
133+
to any chunk of a register, latches ``w_data`` to the captured value. Otherwise, does
134+
nothing.
135+
"""
136+
def __init__(self, *, addr_width, data_width, alignment=0):
137+
if not isinstance(addr_width, int) or addr_width <= 0:
138+
raise ValueError("Address width must be a positive integer, not {!r}"
139+
.format(addr_width))
140+
if not isinstance(data_width, int) or data_width <= 0:
141+
raise ValueError("Data width must be a positive integer, not {!r}"
142+
.format(data_width))
143+
if not isinstance(alignment, int) or alignment < 0:
144+
raise ValueError("Alignment must be a non-negative integer, not {!r}"
145+
.format(alignment))
146+
147+
self.addr_width = int(addr_width)
148+
self.data_width = int(data_width)
149+
self.alignment = alignment
150+
151+
self._next_addr = 0
152+
self._elements = dict()
153+
154+
self.addr = Signal(addr_width)
155+
self.r_data = Signal(data_width)
156+
self.r_stb = Signal()
157+
self.w_data = Signal(data_width)
158+
self.w_stb = Signal()
159+
160+
def add(self, element):
161+
"""Add a register.
162+
163+
Arguments
164+
---------
165+
element : CSRElement
166+
Interface of the register.
167+
168+
Return value
169+
------------
170+
An ``(addr, size)`` tuple, where ``addr`` is the address assigned to the first chunk of
171+
the register, and ``size`` is the amount of chunks it takes, which may be greater than
172+
``element.size // self.data_width`` due to alignment.
173+
"""
174+
if not isinstance(element, CSRElement):
175+
raise TypeError("Element must be an instance of CSRElement, not {!r}"
176+
.format(element))
177+
178+
addr = self.align_to(self.alignment)
179+
self._next_addr += (element.width + self.data_width - 1) // self.data_width
180+
size = self.align_to(self.alignment) - addr
181+
self._elements[addr] = element, size
182+
return addr, size
183+
184+
def align_to(self, alignment):
185+
"""Align the next register explicitly.
186+
187+
Arguments
188+
---------
189+
alignment : int
190+
Register alignment. The address assigned to the next register will be a multiple of
191+
``2 ** alignment`` or ``2 ** self.alignment``, whichever is greater.
192+
193+
Return value
194+
------------
195+
Address of the next register.
196+
"""
197+
if not isinstance(alignment, int) or alignment < 0:
198+
raise ValueError("Alignment must be a non-negative integer, not {!r}"
199+
.format(alignment))
200+
201+
align_chunks = 1 << alignment
202+
if self._next_addr % align_chunks != 0:
203+
self._next_addr += align_chunks - (self._next_addr % align_chunks)
204+
return self._next_addr
205+
206+
def elaborate(self, platform):
207+
m = Module()
208+
209+
# Instead of a straightforward multiplexer for reads, use a per-element address comparator,
210+
# clear the shadow register when it does not match, and OR every selected shadow register
211+
# part to form the output. This can save a significant amount of logic; the size of
212+
# a complete k-OR or k-MUX gate tree for n inputs is `s = ceil((n - 1) / (k - 1))`,
213+
# and its logic depth is `ceil(log_k(s))`, but a 4-LUT can implement either a 4-OR or
214+
# a 2-MUX gate.
215+
r_data_fanin = 0
216+
217+
for elem_addr, (elem, elem_size) in self._elements.items():
218+
shadow = Signal(elem.width, name="{}__shadow".format(elem.name))
219+
if "w" in elem.access:
220+
m.d.comb += elem.w_data.eq(shadow)
221+
222+
# Enumerate every address used by the register explicitly, rather than using
223+
# arithmetic comparisons, since some toolchains (e.g. Yosys) are too eager to infer
224+
# carry chains for comparisons, even with a constant. (Register sizes don't have
225+
# to be powers of 2.)
226+
with m.Switch(self.addr):
227+
for chunk_offset in range(elem_size):
228+
chunk_slice = slice(chunk_offset * self.data_width,
229+
(chunk_offset + 1) * self.data_width)
230+
with m.Case(elem_addr + chunk_offset):
231+
if "r" in elem.access:
232+
chunk_r_stb = Signal(self.data_width,
233+
name="{}__r_stb_{}".format(elem.name, chunk_offset))
234+
r_data_fanin |= Mux(chunk_r_stb, shadow[chunk_slice], 0)
235+
if chunk_offset == 0:
236+
m.d.comb += elem.r_stb.eq(self.r_stb)
237+
with m.If(self.r_stb):
238+
m.d.sync += shadow.eq(elem.r_data)
239+
# Delay by 1 cycle, allowing reads to be pipelined.
240+
m.d.sync += chunk_r_stb.eq(self.r_stb)
241+
242+
if "w" in elem.access:
243+
if chunk_offset == elem_size - 1:
244+
# Delay by 1 cycle, avoiding combinatorial paths through
245+
# the CSR bus and into CSR registers.
246+
m.d.sync += elem.w_stb.eq(self.w_stb)
247+
with m.If(self.w_stb):
248+
m.d.sync += shadow[chunk_slice].eq(self.w_data)
249+
250+
with m.Default():
251+
m.d.sync += shadow.eq(0)
252+
253+
m.d.comb += self.r_data.eq(r_data_fanin)
254+
255+
return m

nmigen_soc/test/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)