|
| 1 | +ibmveth: Add multi buffers rx replenishment hcall support |
| 2 | + |
| 3 | +jira LE-4649 |
| 4 | +Rebuild_History Non-Buildable kernel-5.14.0-570.60.1.el9_6 |
| 5 | +commit-author Mingming Cao <mmc@linux.ibm.com> |
| 6 | +commit 2094200b5f77e6710f9594571889f64f31966de1 |
| 7 | +Empty-Commit: Cherry-Pick Conflicts during history rebuild. |
| 8 | +Will be included in final tarball splat. Ref for failed cherry-pick at: |
| 9 | +ciq/ciq_backports/kernel-5.14.0-570.60.1.el9_6/2094200b.failed |
| 10 | + |
| 11 | +This patch enables batched RX buffer replenishment in ibmveth by |
| 12 | +using the new firmware-supported h_add_logical_lan_buffers() hcall |
| 13 | + to submit up to 8 RX buffers in a single call, instead of repeatedly |
| 14 | +calling the single-buffer h_add_logical_lan_buffer() hcall. |
| 15 | + |
| 16 | +During the probe, with the patch, the driver queries ILLAN attributes |
| 17 | +to detect IBMVETH_ILLAN_RX_MULTI_BUFF_SUPPORT bit. If the attribute is |
| 18 | +present, rx_buffers_per_hcall is set to 8, enabling batched replenishment. |
| 19 | +Otherwise, it defaults to 1, preserving the original upstream behavior |
| 20 | + with no change in code flow for unsupported systems. |
| 21 | + |
| 22 | +The core rx replenish logic remains the same. But when batching |
| 23 | +is enabled, the driver aggregates up to 8 fully prepared descriptors |
| 24 | +into a single h_add_logical_lan_buffers() hypercall. If any allocation |
| 25 | +or DMA mapping fails while preparing a batch, only the successfully |
| 26 | +prepared buffers are submitted, and the remaining are deferred for |
| 27 | +the next replenish cycle. |
| 28 | + |
| 29 | +If at runtime the firmware stops accepting the batched hcall—e,g, |
| 30 | +after a Live Partition Migration (LPM) to a host that does not |
| 31 | +support h_add_logical_lan_buffers(), the hypercall returns H_FUNCTION. |
| 32 | +In that case, the driver transparently disables batching, resets |
| 33 | +rx_buffers_per_hcall to 1, and falls back to the single-buffer hcall |
| 34 | +in next future replenishments to take care of these and future buffers. |
| 35 | + |
| 36 | +Test were done on systems with firmware that both supports and |
| 37 | +does not support the new h_add_logical_lan_buffers hcall. |
| 38 | + |
| 39 | +On supported firmware, this reduces hypercall overhead significantly |
| 40 | +over multiple buffers. SAR measurements showed about a 15% improvement |
| 41 | +in packet processing rate under moderate RX load, with heavier traffic |
| 42 | +seeing gains more than 30% |
| 43 | + |
| 44 | + Signed-off-by: Mingming Cao <mmc@linux.ibm.com> |
| 45 | + Reviewed-by: Brian King <bjking1@linux.ibm.com> |
| 46 | + Reviewed-by: Haren Myneni <haren@linux.ibm.com> |
| 47 | + Reviewed-by: Dave Marquardt <davemarq@linux.ibm.com> |
| 48 | + Reviewed-by: Simon Horman <horms@kernel.org> |
| 49 | +Link: https://patch.msgid.link/20250719091356.57252-1-mmc@linux.ibm.com |
| 50 | + Signed-off-by: Paolo Abeni <pabeni@redhat.com> |
| 51 | + |
| 52 | +(cherry picked from commit 2094200b5f77e6710f9594571889f64f31966de1) |
| 53 | + Signed-off-by: Jonathan Maple <jmaple@ciq.com> |
| 54 | + |
| 55 | +# Conflicts: |
| 56 | +# drivers/net/ethernet/ibm/ibmveth.c |
| 57 | +# drivers/net/ethernet/ibm/ibmveth.h |
| 58 | +diff --cc drivers/net/ethernet/ibm/ibmveth.c |
| 59 | +index 04192190beba,6f0821f1e798..000000000000 |
| 60 | +--- a/drivers/net/ethernet/ibm/ibmveth.c |
| 61 | ++++ b/drivers/net/ethernet/ibm/ibmveth.c |
| 62 | +@@@ -224,86 -227,156 +229,177 @@@ static void ibmveth_replenish_buffer_po |
| 63 | + |
| 64 | + mb(); |
| 65 | + |
| 66 | +- for (i = 0; i < count; ++i) { |
| 67 | +- union ibmveth_buf_desc desc; |
| 68 | ++ batch = adapter->rx_buffers_per_hcall; |
| 69 | + |
| 70 | +- free_index = pool->consumer_index; |
| 71 | +- index = pool->free_map[free_index]; |
| 72 | +- skb = NULL; |
| 73 | ++ while (remaining > 0) { |
| 74 | ++ unsigned int free_index = pool->consumer_index; |
| 75 | + |
| 76 | +++<<<<<<< HEAD |
| 77 | + + BUG_ON(index == IBM_VETH_INVALID_MAP); |
| 78 | +++======= |
| 79 | ++ /* Fill a batch of descriptors */ |
| 80 | ++ for (filled = 0; filled < min(remaining, batch); filled++) { |
| 81 | ++ index = pool->free_map[free_index]; |
| 82 | ++ if (WARN_ON(index == IBM_VETH_INVALID_MAP)) { |
| 83 | ++ adapter->replenish_add_buff_failure++; |
| 84 | ++ netdev_info(adapter->netdev, |
| 85 | ++ "Invalid map index %u, reset\n", |
| 86 | ++ index); |
| 87 | ++ schedule_work(&adapter->work); |
| 88 | ++ break; |
| 89 | ++ } |
| 90 | + |
| 91 | +- /* are we allocating a new buffer or recycling an old one */ |
| 92 | +- if (pool->skbuff[index]) |
| 93 | +- goto reuse; |
| 94 | ++ if (!pool->skbuff[index]) { |
| 95 | ++ struct sk_buff *skb = NULL; |
| 96 | ++ |
| 97 | ++ skb = netdev_alloc_skb(adapter->netdev, |
| 98 | ++ pool->buff_size); |
| 99 | ++ if (!skb) { |
| 100 | ++ adapter->replenish_no_mem++; |
| 101 | ++ adapter->replenish_add_buff_failure++; |
| 102 | ++ break; |
| 103 | ++ } |
| 104 | ++ |
| 105 | ++ dma_addr = dma_map_single(dev, skb->data, |
| 106 | ++ pool->buff_size, |
| 107 | ++ DMA_FROM_DEVICE); |
| 108 | ++ if (dma_mapping_error(dev, dma_addr)) { |
| 109 | ++ dev_kfree_skb_any(skb); |
| 110 | ++ adapter->replenish_add_buff_failure++; |
| 111 | ++ break; |
| 112 | ++ } |
| 113 | ++ |
| 114 | ++ pool->dma_addr[index] = dma_addr; |
| 115 | ++ pool->skbuff[index] = skb; |
| 116 | ++ } else { |
| 117 | ++ /* re-use case */ |
| 118 | ++ dma_addr = pool->dma_addr[index]; |
| 119 | ++ } |
| 120 | + |
| 121 | +- skb = netdev_alloc_skb(adapter->netdev, pool->buff_size); |
| 122 | ++ if (rx_flush) { |
| 123 | ++ unsigned int len; |
| 124 | + |
| 125 | +- if (!skb) { |
| 126 | +- netdev_dbg(adapter->netdev, |
| 127 | +- "replenish: unable to allocate skb\n"); |
| 128 | +- adapter->replenish_no_mem++; |
| 129 | +- break; |
| 130 | ++ len = adapter->netdev->mtu + IBMVETH_BUFF_OH; |
| 131 | ++ len = min(pool->buff_size, len); |
| 132 | ++ ibmveth_flush_buffer(pool->skbuff[index]->data, |
| 133 | ++ len); |
| 134 | ++ } |
| 135 | ++ |
| 136 | ++ descs[filled].fields.flags_len = IBMVETH_BUF_VALID | |
| 137 | ++ pool->buff_size; |
| 138 | ++ descs[filled].fields.address = dma_addr; |
| 139 | ++ |
| 140 | ++ correlators[filled] = ((u64)pool->index << 32) | index; |
| 141 | ++ *(u64 *)pool->skbuff[index]->data = correlators[filled]; |
| 142 | ++ |
| 143 | ++ free_index++; |
| 144 | ++ if (free_index >= pool->size) |
| 145 | ++ free_index = 0; |
| 146 | + } |
| 147 | +++>>>>>>> 2094200b5f77 (ibmveth: Add multi buffers rx replenishment hcall support) |
| 148 | + |
| 149 | +- dma_addr = dma_map_single(&adapter->vdev->dev, skb->data, |
| 150 | +- pool->buff_size, DMA_FROM_DEVICE); |
| 151 | ++ if (!filled) |
| 152 | ++ break; |
| 153 | + |
| 154 | +- if (dma_mapping_error(&adapter->vdev->dev, dma_addr)) |
| 155 | +- goto failure; |
| 156 | ++ /* single buffer case*/ |
| 157 | ++ if (filled == 1) |
| 158 | ++ lpar_rc = h_add_logical_lan_buffer(vdev->unit_address, |
| 159 | ++ descs[0].desc); |
| 160 | ++ else |
| 161 | ++ /* Multi-buffer hcall */ |
| 162 | ++ lpar_rc = h_add_logical_lan_buffers(vdev->unit_address, |
| 163 | ++ descs[0].desc, |
| 164 | ++ descs[1].desc, |
| 165 | ++ descs[2].desc, |
| 166 | ++ descs[3].desc, |
| 167 | ++ descs[4].desc, |
| 168 | ++ descs[5].desc, |
| 169 | ++ descs[6].desc, |
| 170 | ++ descs[7].desc); |
| 171 | ++ if (lpar_rc != H_SUCCESS) { |
| 172 | ++ dev_warn_ratelimited(dev, |
| 173 | ++ "RX h_add_logical_lan failed: filled=%u, rc=%lu, batch=%u\n", |
| 174 | ++ filled, lpar_rc, batch); |
| 175 | ++ goto hcall_failure; |
| 176 | ++ } |
| 177 | + |
| 178 | +- pool->dma_addr[index] = dma_addr; |
| 179 | +- pool->skbuff[index] = skb; |
| 180 | ++ /* Only update pool state after hcall succeeds */ |
| 181 | ++ for (i = 0; i < filled; i++) { |
| 182 | ++ free_index = pool->consumer_index; |
| 183 | ++ pool->free_map[free_index] = IBM_VETH_INVALID_MAP; |
| 184 | + |
| 185 | +- if (rx_flush) { |
| 186 | +- unsigned int len = min(pool->buff_size, |
| 187 | +- adapter->netdev->mtu + |
| 188 | +- IBMVETH_BUFF_OH); |
| 189 | +- ibmveth_flush_buffer(skb->data, len); |
| 190 | ++ pool->consumer_index++; |
| 191 | ++ if (pool->consumer_index >= pool->size) |
| 192 | ++ pool->consumer_index = 0; |
| 193 | + } |
| 194 | +- reuse: |
| 195 | +- dma_addr = pool->dma_addr[index]; |
| 196 | +- desc.fields.flags_len = IBMVETH_BUF_VALID | pool->buff_size; |
| 197 | +- desc.fields.address = dma_addr; |
| 198 | + |
| 199 | +- correlator = ((u64)pool->index << 32) | index; |
| 200 | +- *(u64 *)pool->skbuff[index]->data = correlator; |
| 201 | ++ buffers_added += filled; |
| 202 | ++ adapter->replenish_add_buff_success += filled; |
| 203 | ++ remaining -= filled; |
| 204 | + |
| 205 | +- lpar_rc = h_add_logical_lan_buffer(adapter->vdev->unit_address, |
| 206 | +- desc.desc); |
| 207 | ++ memset(&descs, 0, sizeof(descs)); |
| 208 | ++ memset(&correlators, 0, sizeof(correlators)); |
| 209 | ++ continue; |
| 210 | + |
| 211 | +- if (lpar_rc != H_SUCCESS) { |
| 212 | +- netdev_warn(adapter->netdev, |
| 213 | +- "%sadd_logical_lan failed %lu\n", |
| 214 | +- skb ? "" : "When recycling: ", lpar_rc); |
| 215 | +- goto failure; |
| 216 | +- } |
| 217 | ++ hcall_failure: |
| 218 | ++ for (i = 0; i < filled; i++) { |
| 219 | ++ index = correlators[i] & 0xffffffffUL; |
| 220 | ++ dma_addr = pool->dma_addr[index]; |
| 221 | + |
| 222 | +- pool->free_map[free_index] = IBM_VETH_INVALID_MAP; |
| 223 | +- pool->consumer_index++; |
| 224 | +- if (pool->consumer_index >= pool->size) |
| 225 | +- pool->consumer_index = 0; |
| 226 | ++ if (pool->skbuff[index]) { |
| 227 | ++ if (dma_addr && |
| 228 | ++ !dma_mapping_error(dev, dma_addr)) |
| 229 | ++ dma_unmap_single(dev, dma_addr, |
| 230 | ++ pool->buff_size, |
| 231 | ++ DMA_FROM_DEVICE); |
| 232 | + |
| 233 | +- buffers_added++; |
| 234 | +- adapter->replenish_add_buff_success++; |
| 235 | ++ dev_kfree_skb_any(pool->skbuff[index]); |
| 236 | ++ pool->skbuff[index] = NULL; |
| 237 | ++ } |
| 238 | ++ } |
| 239 | ++ adapter->replenish_add_buff_failure += filled; |
| 240 | ++ |
| 241 | ++ /* |
| 242 | ++ * If multi rx buffers hcall is no longer supported by FW |
| 243 | ++ * e.g. in the case of Live Parttion Migration |
| 244 | ++ */ |
| 245 | ++ if (batch > 1 && lpar_rc == H_FUNCTION) { |
| 246 | ++ /* |
| 247 | ++ * Instead of retry submit single buffer individually |
| 248 | ++ * here just set the max rx buffer per hcall to 1 |
| 249 | ++ * buffers will be respleshed next time |
| 250 | ++ * when ibmveth_replenish_buffer_pool() is called again |
| 251 | ++ * with single-buffer case |
| 252 | ++ */ |
| 253 | ++ netdev_info(adapter->netdev, |
| 254 | ++ "RX Multi buffers not supported by FW, rc=%lu\n", |
| 255 | ++ lpar_rc); |
| 256 | ++ adapter->rx_buffers_per_hcall = 1; |
| 257 | ++ netdev_info(adapter->netdev, |
| 258 | ++ "Next rx replesh will fall back to single-buffer hcall\n"); |
| 259 | ++ } |
| 260 | ++ break; |
| 261 | + } |
| 262 | + |
| 263 | + mb(); |
| 264 | + atomic_add(buffers_added, &(pool->available)); |
| 265 | +++<<<<<<< HEAD |
| 266 | + + return; |
| 267 | + + |
| 268 | + +failure: |
| 269 | + + |
| 270 | + + if (dma_addr && !dma_mapping_error(&adapter->vdev->dev, dma_addr)) |
| 271 | + + dma_unmap_single(&adapter->vdev->dev, |
| 272 | + + pool->dma_addr[index], pool->buff_size, |
| 273 | + + DMA_FROM_DEVICE); |
| 274 | + + dev_kfree_skb_any(pool->skbuff[index]); |
| 275 | + + pool->skbuff[index] = NULL; |
| 276 | + + adapter->replenish_add_buff_failure++; |
| 277 | + + |
| 278 | + + mb(); |
| 279 | + + atomic_add(buffers_added, &(pool->available)); |
| 280 | +++======= |
| 281 | +++>>>>>>> 2094200b5f77 (ibmveth: Add multi buffers rx replenishment hcall support) |
| 282 | + } |
| 283 | + |
| 284 | + /* |
| 285 | +diff --cc drivers/net/ethernet/ibm/ibmveth.h |
| 286 | +index 8468e2c59d7a,068f99df133e..000000000000 |
| 287 | +--- a/drivers/net/ethernet/ibm/ibmveth.h |
| 288 | ++++ b/drivers/net/ethernet/ibm/ibmveth.h |
| 289 | +@@@ -134,38 -154,40 +154,59 @@@ struct ibmveth_rx_q |
| 290 | + }; |
| 291 | + |
| 292 | + struct ibmveth_adapter { |
| 293 | +++<<<<<<< HEAD |
| 294 | + + struct vio_dev *vdev; |
| 295 | + + struct net_device *netdev; |
| 296 | + + struct napi_struct napi; |
| 297 | + + unsigned int mcastFilterSize; |
| 298 | + + void * buffer_list_addr; |
| 299 | + + void * filter_list_addr; |
| 300 | + + void *tx_ltb_ptr[IBMVETH_MAX_QUEUES]; |
| 301 | + + unsigned int tx_ltb_size; |
| 302 | + + dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES]; |
| 303 | + + dma_addr_t buffer_list_dma; |
| 304 | + + dma_addr_t filter_list_dma; |
| 305 | + + struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; |
| 306 | + + struct ibmveth_rx_q rx_queue; |
| 307 | + + int rx_csum; |
| 308 | + + int large_send; |
| 309 | + + bool is_active_trunk; |
| 310 | +++======= |
| 311 | ++ struct vio_dev *vdev; |
| 312 | ++ struct net_device *netdev; |
| 313 | ++ struct napi_struct napi; |
| 314 | ++ struct work_struct work; |
| 315 | ++ unsigned int mcastFilterSize; |
| 316 | ++ void *buffer_list_addr; |
| 317 | ++ void *filter_list_addr; |
| 318 | ++ void *tx_ltb_ptr[IBMVETH_MAX_QUEUES]; |
| 319 | ++ unsigned int tx_ltb_size; |
| 320 | ++ dma_addr_t tx_ltb_dma[IBMVETH_MAX_QUEUES]; |
| 321 | ++ dma_addr_t buffer_list_dma; |
| 322 | ++ dma_addr_t filter_list_dma; |
| 323 | ++ struct ibmveth_buff_pool rx_buff_pool[IBMVETH_NUM_BUFF_POOLS]; |
| 324 | ++ struct ibmveth_rx_q rx_queue; |
| 325 | ++ int rx_csum; |
| 326 | ++ int large_send; |
| 327 | ++ bool is_active_trunk; |
| 328 | ++ unsigned int rx_buffers_per_hcall; |
| 329 | + - |
| 330 | + - u64 fw_ipv6_csum_support; |
| 331 | + - u64 fw_ipv4_csum_support; |
| 332 | + - u64 fw_large_send_support; |
| 333 | + - /* adapter specific stats */ |
| 334 | + - u64 replenish_task_cycles; |
| 335 | + - u64 replenish_no_mem; |
| 336 | + - u64 replenish_add_buff_failure; |
| 337 | + - u64 replenish_add_buff_success; |
| 338 | + - u64 rx_invalid_buffer; |
| 339 | + - u64 rx_no_buffer; |
| 340 | + - u64 tx_map_failed; |
| 341 | + - u64 tx_send_failed; |
| 342 | + - u64 tx_large_packets; |
| 343 | + - u64 rx_large_packets; |
| 344 | + - /* Ethtool settings */ |
| 345 | +++>>>>>>> 2094200b5f77 (ibmveth: Add multi buffers rx replenishment hcall support) |
| 346 | + + |
| 347 | + + u64 fw_ipv6_csum_support; |
| 348 | + + u64 fw_ipv4_csum_support; |
| 349 | + + u64 fw_large_send_support; |
| 350 | + + /* adapter specific stats */ |
| 351 | + + u64 replenish_task_cycles; |
| 352 | + + u64 replenish_no_mem; |
| 353 | + + u64 replenish_add_buff_failure; |
| 354 | + + u64 replenish_add_buff_success; |
| 355 | + + u64 rx_invalid_buffer; |
| 356 | + + u64 rx_no_buffer; |
| 357 | + + u64 tx_map_failed; |
| 358 | + + u64 tx_send_failed; |
| 359 | + + u64 tx_large_packets; |
| 360 | + + u64 rx_large_packets; |
| 361 | + + /* Ethtool settings */ |
| 362 | + u8 duplex; |
| 363 | + u32 speed; |
| 364 | + }; |
| 365 | +diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h |
| 366 | +index 56e0c8767a7f..4693a00f7128 100644 |
| 367 | +--- a/arch/powerpc/include/asm/hvcall.h |
| 368 | ++++ b/arch/powerpc/include/asm/hvcall.h |
| 369 | +@@ -258,6 +258,7 @@ |
| 370 | + #define H_QUERY_INT_STATE 0x1E4 |
| 371 | + #define H_POLL_PENDING 0x1D8 |
| 372 | + #define H_ILLAN_ATTRIBUTES 0x244 |
| 373 | ++#define H_ADD_LOGICAL_LAN_BUFFERS 0x248 |
| 374 | + #define H_MODIFY_HEA_QP 0x250 |
| 375 | + #define H_QUERY_HEA_QP 0x254 |
| 376 | + #define H_QUERY_HEA 0x258 |
| 377 | +* Unmerged path drivers/net/ethernet/ibm/ibmveth.c |
| 378 | +* Unmerged path drivers/net/ethernet/ibm/ibmveth.h |
0 commit comments