@@ -85,9 +85,9 @@ class CSRMultiplexer(Elaboratable):
8585 --------------
8686
8787 Because the CSR bus conserves logic and routing resources, it is common to e.g. access
88- a CSR bus with an *n*-bit data path from a CPU with a *k*-bit datapath in cases where CSR
89- access latency is less important than resource usage. In this case, two strategies are
90- possible for connecting the CSR bus to the CPU:
88+ a CSR bus with an *n*-bit data path from a CPU with a *k*-bit datapath (*k>n*) in cases
89+ where CSR access latency is less important than resource usage. In this case, two strategies
90+ are possible for connecting the CSR bus to the CPU:
9191 * The CPU could access the CSR bus directly (with no intervening logic other than simple
9292 translation of control signals). In this case, the register alignment should be set
9393 to 1, and each *w*-bit register would occupy *ceil(w/n)* addresses from the CPU
@@ -207,11 +207,10 @@ def elaborate(self, platform):
207207 m = Module ()
208208
209209 # Instead of a straightforward multiplexer for reads, use a per-element address comparator,
210- # clear the shadow register when it does not match, and OR every selected shadow register
211- # part to form the output. This can save a significant amount of logic; the size of
212- # a complete k-OR or k-MUX gate tree for n inputs is `s = ceil((n - 1) / (k - 1))`,
213- # and its logic depth is `ceil(log_k(s))`, but a 4-LUT can implement either a 4-OR or
214- # a 2-MUX gate.
210+ # AND the shadow register chunk with the comparator output, and OR all of those together.
211+ # If the toolchain doesn't already synthesize multiplexer trees this way, this trick can
212+ # save a significant amount of logic, since e.g. one 4-LUT can pack one 2-MUX, but two
213+ # 2-AND or 2-OR gates.
215214 r_data_fanin = 0
216215
217216 for elem_addr , (elem , elem_size ) in self ._elements .items ():
0 commit comments