|
| 1 | +#ifndef HEADER_fd_src_vinyl_cq_fd_vinyl_cq_h |
| 2 | +#define HEADER_fd_src_vinyl_cq_fd_vinyl_cq_h |
| 3 | + |
| 4 | +/* A fd_vinyl_comp_t provides details about vinyl request completions. */ |
| 5 | + |
| 6 | +#include "../fd_vinyl_base.h" |
| 7 | + |
| 8 | +/* FD_VINYL_COMP_{ALIGN,FOOTPRINT} give the byte alignment and footprint |
| 9 | + of a fd_vinyl_comp_t. ALIGN is a reasonable power-of-2. FOOTPRINT |
| 10 | + is a multiple of ALIGN. */ |
| 11 | + |
| 12 | +#define FD_VINYL_COMP_ALIGN (32UL) |
| 13 | +#define FD_VINYL_COMP_FOOTPRINT (32UL) |
| 14 | + |
| 15 | +/* FD_VINYL_COMP_QUOTA_MAX gives the maximum client acquire quota that |
| 16 | + can be returned by a completion. */ |
| 17 | + |
| 18 | +#define FD_VINYL_COMP_QUOTA_MAX (65535UL) |
| 19 | + |
| 20 | +/* FIXME: consider eching back the val_gaddr and err_gaddr array back |
| 21 | + too? Maybe helpful in the case of read pipelining (e.g. request made |
| 22 | + on one thread, completion received on a different thread). But the |
| 23 | + req_id can probably encode this for a case like this without needing |
| 24 | + to bloat the completion footprint. */ |
| 25 | + |
| 26 | +struct __attribute__((aligned(FD_VINYL_COMP_ALIGN))) fd_vinyl_comp { |
| 27 | + ulong seq; /* Completion sequence number */ |
| 28 | + ulong req_id; /* Echoed from corresponding request */ |
| 29 | + ulong link_id; /* Echoed from corresponding request */ |
| 30 | + short err; /* FD_VINYL_SUCCESS (0) if the request was processed (see request err array for individual failure details) |
| 31 | + FD_VINYL_ERR_* (negative) otherwise (no items in the request were processed) */ |
| 32 | + ushort batch_cnt; /* Num items requested */ |
| 33 | + ushort fail_cnt; /* If a successful completion, num items that failed processing, in [0,batch_cnt]. |
| 34 | + If a failed completion, 0 (no items were processed). */ |
| 35 | + ushort quota_rem; /* Client quota remaining when request completed processing. |
| 36 | + 0<=quota_rem<=client quota_max<=FD_VINYL_COMP_QUOTA_MAX */ |
| 37 | +}; |
| 38 | + |
| 39 | +typedef struct fd_vinyl_comp fd_vinyl_comp_t; |
| 40 | + |
| 41 | +/* A fd_vinyl_cq_t is an interprocess sharable persistent SPMC |
| 42 | + queue used to communicate fd_vinyl_comp_t completions from a vinyl |
| 43 | + tile to clients. It is implemented as a hybrid direct mapped cache / |
| 44 | + queue with similar lockfree properties to a tango mcache. |
| 45 | + Specifically, when publishing a completion into a cq, a producer: |
| 46 | +
|
| 47 | + - Assigns the completion the cq's next sequence number and determines |
| 48 | + the corresponding cache line. |
| 49 | +
|
| 50 | + - Sets the cache line seq to seq-1. Because the cq's cache has at |
| 51 | + least 4 lines (2 is enough for this particular case), this is a seq |
| 52 | + that will never be held in that line. This atomically indicates to |
| 53 | + a concurrent consumer that seq-comp_cnt is no longer available in |
| 54 | + cache and that seq is in the process of being published into the |
| 55 | + line. |
| 56 | +
|
| 57 | + - Set the rest of the completion fields. |
| 58 | +
|
| 59 | + - Sets the cache line seq to seq. This atomically indicates that |
| 60 | + completion seq is ready. |
| 61 | +
|
| 62 | + - Advances the cq's seq cursor. From a consumer's point of view, |
| 63 | + this cursor has the property that that [0,seq) have been published |
| 64 | + and (seq,ULONG_MAX] have not been published. This cursor should |
| 65 | + only be used by consumers to synchronize their local cursors at |
| 66 | + initialization. |
| 67 | +
|
| 68 | + When reading completions from a cq, a consumer: |
| 69 | +
|
| 70 | + - Determine the cache line for consumer's seq. |
| 71 | +
|
| 72 | + - Reads the cache line seq. |
| 73 | +
|
| 74 | + - Reads the rest of the completion fields. |
| 75 | +
|
| 76 | + - Reads the cache line seq again. |
| 77 | +
|
| 78 | + If the first and second reads do not match, the vinyl tile is in the |
| 79 | + process of updating that cache line (most likely the consumer is |
| 80 | + caught up and producer is about to produce seq ... the consumer |
| 81 | + should try again soon). |
| 82 | +
|
| 83 | + Otherwise, if the read sequence numbers match the consumer's seq, the |
| 84 | + consumer received completion seq and should advance their local seq. |
| 85 | +
|
| 86 | + If they are behind the consumer's seq, the producer has not written |
| 87 | + seq yet and the consumer should try again later. |
| 88 | +
|
| 89 | + If they are ahead of the consumer's seq, the producer has overrun the |
| 90 | + consumer (the amount ahead gives a ballpark how far the consumer fell |
| 91 | + behind) and the consumer should recover / reinit. As such, flow |
| 92 | + control is managed by the application (e.g. if the application |
| 93 | + ensures that there are at most comp_cnt completion generating vinyl |
| 94 | + requests pending at any given time on this cq, no overruns are |
| 95 | + possible). |
| 96 | +
|
| 97 | + Note that because fd_vinyl_comp_t are AVX-2 friendly, it is possible |
| 98 | + to SIMD accelerate producers and consumers (also similar to |
| 99 | + fd_tango). */ |
| 100 | + |
| 101 | +/* FIXME: consider making comp_cnt a compile time constant? */ |
| 102 | + |
| 103 | +#define FD_VINYL_CQ_MAGIC (0xfd3a7352dc03a6c0UL) /* fd warm snd cq magc version 0 */ |
| 104 | + |
| 105 | +struct __attribute__((aligned(128))) fd_vinyl_cq_private { |
| 106 | + |
| 107 | + ulong magic; /* ==FD_VINYL_CQ_MAGIC */ |
| 108 | + ulong comp_cnt; /* Number of completions that can be in flight on this cq at any given time, power of 2 of least 4 */ |
| 109 | + uchar _[ 112 ]; /* Put seq on a separate cache line pair */ |
| 110 | + ulong seq; /* Completion sequence number to publish next */ |
| 111 | + /* padding to 128 alignment */ |
| 112 | + /* fd_vinyl_comp_t comp[ comp_cnt ] here, seq number at idx = seq & (comp_cnt-1UL) when available */ |
| 113 | + /* padding to 128 alignment */ |
| 114 | + |
| 115 | +}; |
| 116 | + |
| 117 | +typedef struct fd_vinyl_cq_private fd_vinyl_cq_t; |
| 118 | + |
| 119 | +FD_PROTOTYPES_BEGIN |
| 120 | + |
| 121 | +/* fd_vinyl_cq_{align,footprint,new,join,leave_delete} have the usual |
| 122 | + interprocess shared persistent memory object semantics. comp_cnt is |
| 123 | + a power-of-2 of at least 4 that gives the number completions that can |
| 124 | + be in flight on this cq. */ |
| 125 | + |
| 126 | +FD_FN_CONST ulong fd_vinyl_cq_align ( void ); |
| 127 | +FD_FN_CONST ulong fd_vinyl_cq_footprint( ulong comp_cnt ); |
| 128 | +void * fd_vinyl_cq_new ( void * shmem, ulong comp_cnt ); |
| 129 | +fd_vinyl_cq_t * fd_vinyl_cq_join ( void * shcq ); |
| 130 | +void * fd_vinyl_cq_leave ( fd_vinyl_cq_t * cq ); |
| 131 | +void * fd_vinyl_cq_delete ( void * shcq ); |
| 132 | + |
| 133 | +/* fd_vinyl_cq_comp returns the location in the caller's address space |
| 134 | + of the cq's completion array. fd_vinyl_cq_comp_const is a const |
| 135 | + correct version. fd_vinyl_cq_comp_cnt is the size of this array. |
| 136 | + The lifetime of the returned array is the lifetime of the local join. |
| 137 | + These assume cq is a current local join. |
| 138 | +
|
| 139 | + fd_vinyl_cq_comp_idx gives the array index that will cache completion |
| 140 | + seq in a cq completion array with comp_cnt elements. */ |
| 141 | + |
| 142 | +FD_FN_CONST static inline fd_vinyl_comp_t * |
| 143 | +fd_vinyl_cq_comp( fd_vinyl_cq_t * cq ) { |
| 144 | + return (fd_vinyl_comp_t *)(cq+1); |
| 145 | +} |
| 146 | + |
| 147 | +FD_FN_CONST static inline fd_vinyl_comp_t const * |
| 148 | +fd_vinyl_cq_comp_const( fd_vinyl_cq_t const * cq ) { |
| 149 | + return (fd_vinyl_comp_t const *)(cq+1); |
| 150 | +} |
| 151 | + |
| 152 | +FD_FN_PURE static inline ulong fd_vinyl_cq_comp_cnt( fd_vinyl_cq_t const * cq ) { return cq->comp_cnt; } |
| 153 | + |
| 154 | +FD_FN_CONST static inline ulong fd_vinyl_cq_comp_idx( ulong seq, ulong comp_cnt ) { return seq & (comp_cnt-1UL); } |
| 155 | + |
| 156 | +/* fd_vinyl_cq_seq returns the position of the producer's sequence |
| 157 | + number cursor. Specifically, at some point during the call, |
| 158 | + completions [0,seq) were published, completions (seq,ULONG_MAX] were |
| 159 | + not been published, and completion seq was either published, being |
| 160 | + published or not published. This is used for initial synchronization |
| 161 | + between producer and consumers. This is a compiler fence. */ |
| 162 | + |
| 163 | +static inline ulong |
| 164 | +fd_vinyl_cq_seq( fd_vinyl_cq_t const * cq ) { |
| 165 | + FD_COMPILER_MFENCE(); |
| 166 | + ulong seq = cq->seq; |
| 167 | + FD_COMPILER_MFENCE(); |
| 168 | + return seq; |
| 169 | +} |
| 170 | + |
| 171 | +/* fd_vinyl_cq_send sends a completion. If comp is non-NULL, the |
| 172 | + completion will be written out-of-band to the location comp (assumes |
| 173 | + comp->seq is not 1 on entry and will set to 1 once the send it done). |
| 174 | + Otherwise, if cq is non-NULL, the completion will be enquened into |
| 175 | + the given cq (assumes cq is a current local join). If both cq and |
| 176 | + comp are NULL, this is a no-op. This is a compiler fence. */ |
| 177 | + |
| 178 | +/* FIXME: consider SIMD accelerating */ |
| 179 | + |
| 180 | +static inline void |
| 181 | +fd_vinyl_cq_send( fd_vinyl_cq_t * cq, |
| 182 | + fd_vinyl_comp_t * comp, |
| 183 | + ulong req_id, |
| 184 | + ulong link_id, |
| 185 | + int err, /* In [-2^15,2^15) */ |
| 186 | + ulong batch_cnt, /* In [0,2^16) */ |
| 187 | + ulong fail_cnt, /* In [0,2^16) */ |
| 188 | + ulong quota_rem ) { /* In [0,2^16) */ |
| 189 | + |
| 190 | + ulong stack_seq[1]; |
| 191 | + |
| 192 | + ulong seq; |
| 193 | + ulong * _seq; |
| 194 | + |
| 195 | + if( FD_UNLIKELY( comp ) ) { /* Send directly */ |
| 196 | + |
| 197 | + seq = 1UL; /* For direct sends, comp->seq should have already been set to something != 1 */ |
| 198 | + _seq = stack_seq; |
| 199 | + |
| 200 | + } else if( FD_LIKELY( cq ) ) { /* Send via cq */ |
| 201 | + |
| 202 | + seq = cq->seq; |
| 203 | + _seq = &cq->seq; |
| 204 | + comp = fd_vinyl_cq_comp( cq ) + fd_vinyl_cq_comp_idx( seq, cq->comp_cnt ); |
| 205 | + |
| 206 | + FD_COMPILER_MFENCE(); |
| 207 | + comp->seq = seq - 1UL; /* Mark completion seq being written */ |
| 208 | + |
| 209 | + } else { /* No place to send completion */ |
| 210 | + |
| 211 | + FD_COMPILER_MFENCE(); /* Consistent semantics in all cases */ |
| 212 | + return; |
| 213 | + |
| 214 | + } |
| 215 | + |
| 216 | + FD_COMPILER_MFENCE(); |
| 217 | + comp->req_id = req_id; |
| 218 | + comp->link_id = link_id; |
| 219 | + comp->err = (short)err; |
| 220 | + comp->batch_cnt = (ushort)batch_cnt; |
| 221 | + comp->fail_cnt = (ushort)fail_cnt; |
| 222 | + comp->quota_rem = (ushort)quota_rem; |
| 223 | + FD_COMPILER_MFENCE(); |
| 224 | + comp->seq = seq; /* Mark completion seq as written */ |
| 225 | + FD_COMPILER_MFENCE(); |
| 226 | + *_seq = seq + 1UL; /* Record that completions [0,seq) are published */ |
| 227 | + FD_COMPILER_MFENCE(); |
| 228 | + |
| 229 | +} |
| 230 | + |
| 231 | +/* fd_vinyl_cq_recv receives completion seq from the given cq. Returns |
| 232 | + 0 on success, positive if completion seq has not been published yet |
| 233 | + and negative if the consumer has been overrun by the producer. On |
| 234 | + return, *dst contains the desired completion on success and is |
| 235 | + clobbered otherwise. Assumes cq is a current local join and dst is |
| 236 | + valid. This is a compiler fence. */ |
| 237 | + |
| 238 | +/* FIXME: consider SIMD accelerating */ |
| 239 | + |
| 240 | +static inline long |
| 241 | +fd_vinyl_cq_recv( fd_vinyl_cq_t const * cq, |
| 242 | + ulong seq, |
| 243 | + fd_vinyl_comp_t * dst ) { |
| 244 | + fd_vinyl_comp_t const * src = fd_vinyl_cq_comp_const( cq ) + fd_vinyl_cq_comp_idx( seq, cq->comp_cnt ); |
| 245 | + |
| 246 | + FD_COMPILER_MFENCE(); |
| 247 | + ulong seq0 = src->seq; |
| 248 | + FD_COMPILER_MFENCE(); |
| 249 | + *dst = *src; |
| 250 | + FD_COMPILER_MFENCE(); |
| 251 | + ulong seq1 = src->seq; |
| 252 | + FD_COMPILER_MFENCE(); |
| 253 | + |
| 254 | + long diff0 = (long)(seq-seq0); |
| 255 | + long diff1 = (long)(seq-seq1); |
| 256 | + return fd_long_if( !diff0, diff1, diff0 ); |
| 257 | +} |
| 258 | + |
| 259 | +FD_PROTOTYPES_END |
| 260 | + |
| 261 | +#endif /* HEADER_fd_src_vinyl_cq_fd_vinyl_cq_h */ |
0 commit comments