1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2015 6WIND S.A. 5 * Copyright 2015 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #include <stddef.h> 35 #include <assert.h> 36 #include <stdint.h> 37 #include <string.h> 38 #include <inttypes.h> 39 #include <errno.h> 40 #include <netinet/in.h> 41 #include <linux/if.h> 42 #include <sys/ioctl.h> 43 #include <arpa/inet.h> 44 45 /* Verbs header. */ 46 /* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */ 47 #ifdef PEDANTIC 48 #pragma GCC diagnostic ignored "-pedantic" 49 #endif 50 #include <infiniband/verbs.h> 51 #ifdef PEDANTIC 52 #pragma GCC diagnostic error "-pedantic" 53 #endif 54 55 /* DPDK headers don't like -pedantic. */ 56 #ifdef PEDANTIC 57 #pragma GCC diagnostic ignored "-pedantic" 58 #endif 59 #include <rte_ether.h> 60 #include <rte_ethdev.h> 61 #include <rte_common.h> 62 #ifdef PEDANTIC 63 #pragma GCC diagnostic error "-pedantic" 64 #endif 65 66 #include "mlx5.h" 67 #include "mlx5_utils.h" 68 #include "mlx5_rxtx.h" 69 #include "mlx5_defs.h" 70 71 /** 72 * Get MAC address by querying netdevice. 73 * 74 * @param[in] priv 75 * struct priv for the requested device. 76 * @param[out] mac 77 * MAC address output buffer. 78 * 79 * @return 80 * 0 on success, -1 on failure and errno is set. 81 */ 82 int 83 priv_get_mac(struct priv *priv, uint8_t (*mac)[ETHER_ADDR_LEN]) 84 { 85 struct ifreq request; 86 87 if (priv_ifreq(priv, SIOCGIFHWADDR, &request)) 88 return -1; 89 memcpy(mac, request.ifr_hwaddr.sa_data, ETHER_ADDR_LEN); 90 return 0; 91 } 92 93 /** 94 * Delete MAC flow steering rule. 95 * 96 * @param hash_rxq 97 * Pointer to hash RX queue structure. 98 * @param mac_index 99 * MAC address index. 100 * @param vlan_index 101 * VLAN index to use. 102 */ 103 static void 104 hash_rxq_del_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index, 105 unsigned int vlan_index) 106 { 107 #ifndef NDEBUG 108 const uint8_t (*mac)[ETHER_ADDR_LEN] = 109 (const uint8_t (*)[ETHER_ADDR_LEN]) 110 hash_rxq->priv->mac[mac_index].addr_bytes; 111 #endif 112 113 assert(mac_index < RTE_DIM(hash_rxq->mac_flow)); 114 assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index])); 115 if (hash_rxq->mac_flow[mac_index][vlan_index] == NULL) 116 return; 117 DEBUG("%p: removing MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u" 118 " VLAN index %u", 119 (void *)hash_rxq, 120 (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5], 121 mac_index, 122 vlan_index); 123 claim_zero(ibv_exp_destroy_flow(hash_rxq->mac_flow 124 [mac_index][vlan_index])); 125 hash_rxq->mac_flow[mac_index][vlan_index] = NULL; 126 } 127 128 /** 129 * Unregister a MAC address from a hash RX queue. 130 * 131 * @param hash_rxq 132 * Pointer to hash RX queue structure. 133 * @param mac_index 134 * MAC address index. 135 */ 136 static void 137 hash_rxq_mac_addr_del(struct hash_rxq *hash_rxq, unsigned int mac_index) 138 { 139 unsigned int i; 140 141 assert(mac_index < RTE_DIM(hash_rxq->mac_flow)); 142 for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow[mac_index])); ++i) 143 hash_rxq_del_mac_flow(hash_rxq, mac_index, i); 144 } 145 146 /** 147 * Unregister all MAC addresses from a hash RX queue. 148 * 149 * @param hash_rxq 150 * Pointer to hash RX queue structure. 151 */ 152 void 153 hash_rxq_mac_addrs_del(struct hash_rxq *hash_rxq) 154 { 155 unsigned int i; 156 157 for (i = 0; (i != RTE_DIM(hash_rxq->mac_flow)); ++i) 158 hash_rxq_mac_addr_del(hash_rxq, i); 159 } 160 161 /** 162 * Unregister a MAC address. 163 * 164 * This is done for each hash RX queue. 165 * 166 * @param priv 167 * Pointer to private structure. 168 * @param mac_index 169 * MAC address index. 170 */ 171 static void 172 priv_mac_addr_del(struct priv *priv, unsigned int mac_index) 173 { 174 unsigned int i; 175 176 assert(mac_index < RTE_DIM(priv->mac)); 177 if (!BITFIELD_ISSET(priv->mac_configured, mac_index)) 178 return; 179 for (i = 0; (i != priv->hash_rxqs_n); ++i) 180 hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[i], mac_index); 181 BITFIELD_RESET(priv->mac_configured, mac_index); 182 } 183 184 /** 185 * Unregister all MAC addresses from all hash RX queues. 186 * 187 * @param priv 188 * Pointer to private structure. 189 */ 190 void 191 priv_mac_addrs_disable(struct priv *priv) 192 { 193 unsigned int i; 194 195 for (i = 0; (i != priv->hash_rxqs_n); ++i) 196 hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[i]); 197 } 198 199 /** 200 * DPDK callback to remove a MAC address. 201 * 202 * @param dev 203 * Pointer to Ethernet device structure. 204 * @param index 205 * MAC address index. 206 */ 207 void 208 mlx5_mac_addr_remove(struct rte_eth_dev *dev, uint32_t index) 209 { 210 struct priv *priv = dev->data->dev_private; 211 212 if (mlx5_is_secondary()) 213 return; 214 215 priv_lock(priv); 216 DEBUG("%p: removing MAC address from index %" PRIu32, 217 (void *)dev, index); 218 if (index >= RTE_DIM(priv->mac)) 219 goto end; 220 priv_mac_addr_del(priv, index); 221 end: 222 priv_unlock(priv); 223 } 224 225 /** 226 * Add MAC flow steering rule. 227 * 228 * @param hash_rxq 229 * Pointer to hash RX queue structure. 230 * @param mac_index 231 * MAC address index to register. 232 * @param vlan_index 233 * VLAN index to use. 234 * 235 * @return 236 * 0 on success, errno value on failure. 237 */ 238 static int 239 hash_rxq_add_mac_flow(struct hash_rxq *hash_rxq, unsigned int mac_index, 240 unsigned int vlan_index) 241 { 242 struct ibv_exp_flow *flow; 243 struct priv *priv = hash_rxq->priv; 244 const uint8_t (*mac)[ETHER_ADDR_LEN] = 245 (const uint8_t (*)[ETHER_ADDR_LEN]) 246 priv->mac[mac_index].addr_bytes; 247 FLOW_ATTR_SPEC_ETH(data, priv_flow_attr(priv, NULL, 0, hash_rxq->type)); 248 struct ibv_exp_flow_attr *attr = &data->attr; 249 struct ibv_exp_flow_spec_eth *spec = &data->spec; 250 unsigned int vlan_enabled = !!priv->vlan_filter_n; 251 unsigned int vlan_id = priv->vlan_filter[vlan_index]; 252 253 assert(mac_index < RTE_DIM(hash_rxq->mac_flow)); 254 assert(vlan_index < RTE_DIM(hash_rxq->mac_flow[mac_index])); 255 if (hash_rxq->mac_flow[mac_index][vlan_index] != NULL) 256 return 0; 257 /* 258 * No padding must be inserted by the compiler between attr and spec. 259 * This layout is expected by libibverbs. 260 */ 261 assert(((uint8_t *)attr + sizeof(*attr)) == (uint8_t *)spec); 262 priv_flow_attr(priv, attr, sizeof(data), hash_rxq->type); 263 /* The first specification must be Ethernet. */ 264 assert(spec->type == IBV_EXP_FLOW_SPEC_ETH); 265 assert(spec->size == sizeof(*spec)); 266 *spec = (struct ibv_exp_flow_spec_eth){ 267 .type = IBV_EXP_FLOW_SPEC_ETH, 268 .size = sizeof(*spec), 269 .val = { 270 .dst_mac = { 271 (*mac)[0], (*mac)[1], (*mac)[2], 272 (*mac)[3], (*mac)[4], (*mac)[5] 273 }, 274 .vlan_tag = (vlan_enabled ? htons(vlan_id) : 0), 275 }, 276 .mask = { 277 .dst_mac = "\xff\xff\xff\xff\xff\xff", 278 .vlan_tag = (vlan_enabled ? htons(0xfff) : 0), 279 }, 280 }; 281 DEBUG("%p: adding MAC address %02x:%02x:%02x:%02x:%02x:%02x index %u" 282 " VLAN index %u filtering %s, ID %u", 283 (void *)hash_rxq, 284 (*mac)[0], (*mac)[1], (*mac)[2], (*mac)[3], (*mac)[4], (*mac)[5], 285 mac_index, 286 vlan_index, 287 (vlan_enabled ? "enabled" : "disabled"), 288 vlan_id); 289 /* Create related flow. */ 290 errno = 0; 291 flow = ibv_exp_create_flow(hash_rxq->qp, attr); 292 if (flow == NULL) { 293 /* It's not clear whether errno is always set in this case. */ 294 ERROR("%p: flow configuration failed, errno=%d: %s", 295 (void *)hash_rxq, errno, 296 (errno ? strerror(errno) : "Unknown error")); 297 if (errno) 298 return errno; 299 return EINVAL; 300 } 301 hash_rxq->mac_flow[mac_index][vlan_index] = flow; 302 return 0; 303 } 304 305 /** 306 * Register a MAC address in a hash RX queue. 307 * 308 * @param hash_rxq 309 * Pointer to hash RX queue structure. 310 * @param mac_index 311 * MAC address index to register. 312 * 313 * @return 314 * 0 on success, errno value on failure. 315 */ 316 static int 317 hash_rxq_mac_addr_add(struct hash_rxq *hash_rxq, unsigned int mac_index) 318 { 319 struct priv *priv = hash_rxq->priv; 320 unsigned int i = 0; 321 int ret; 322 323 assert(mac_index < RTE_DIM(hash_rxq->mac_flow)); 324 assert(RTE_DIM(hash_rxq->mac_flow[mac_index]) == 325 RTE_DIM(priv->vlan_filter)); 326 /* Add a MAC address for each VLAN filter, or at least once. */ 327 do { 328 ret = hash_rxq_add_mac_flow(hash_rxq, mac_index, i); 329 if (ret) { 330 /* Failure, rollback. */ 331 while (i != 0) 332 hash_rxq_del_mac_flow(hash_rxq, mac_index, 333 --i); 334 return ret; 335 } 336 } while (++i < priv->vlan_filter_n); 337 return 0; 338 } 339 340 /** 341 * Register all MAC addresses in a hash RX queue. 342 * 343 * @param hash_rxq 344 * Pointer to hash RX queue structure. 345 * 346 * @return 347 * 0 on success, errno value on failure. 348 */ 349 int 350 hash_rxq_mac_addrs_add(struct hash_rxq *hash_rxq) 351 { 352 struct priv *priv = hash_rxq->priv; 353 unsigned int i; 354 int ret; 355 356 assert(RTE_DIM(priv->mac) == RTE_DIM(hash_rxq->mac_flow)); 357 for (i = 0; (i != RTE_DIM(priv->mac)); ++i) { 358 if (!BITFIELD_ISSET(priv->mac_configured, i)) 359 continue; 360 ret = hash_rxq_mac_addr_add(hash_rxq, i); 361 if (!ret) 362 continue; 363 /* Failure, rollback. */ 364 while (i != 0) 365 hash_rxq_mac_addr_del(hash_rxq, --i); 366 assert(ret > 0); 367 return ret; 368 } 369 return 0; 370 } 371 372 /** 373 * Register a MAC address. 374 * 375 * This is done for each hash RX queue. 376 * 377 * @param priv 378 * Pointer to private structure. 379 * @param mac_index 380 * MAC address index to use. 381 * @param mac 382 * MAC address to register. 383 * 384 * @return 385 * 0 on success, errno value on failure. 386 */ 387 int 388 priv_mac_addr_add(struct priv *priv, unsigned int mac_index, 389 const uint8_t (*mac)[ETHER_ADDR_LEN]) 390 { 391 unsigned int i; 392 int ret; 393 394 assert(mac_index < RTE_DIM(priv->mac)); 395 /* First, make sure this address isn't already configured. */ 396 for (i = 0; (i != RTE_DIM(priv->mac)); ++i) { 397 /* Skip this index, it's going to be reconfigured. */ 398 if (i == mac_index) 399 continue; 400 if (!BITFIELD_ISSET(priv->mac_configured, i)) 401 continue; 402 if (memcmp(priv->mac[i].addr_bytes, *mac, sizeof(*mac))) 403 continue; 404 /* Address already configured elsewhere, return with error. */ 405 return EADDRINUSE; 406 } 407 if (BITFIELD_ISSET(priv->mac_configured, mac_index)) 408 priv_mac_addr_del(priv, mac_index); 409 priv->mac[mac_index] = (struct ether_addr){ 410 { 411 (*mac)[0], (*mac)[1], (*mac)[2], 412 (*mac)[3], (*mac)[4], (*mac)[5] 413 } 414 }; 415 if (!priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC)) 416 goto end; 417 for (i = 0; (i != priv->hash_rxqs_n); ++i) { 418 ret = hash_rxq_mac_addr_add(&(*priv->hash_rxqs)[i], mac_index); 419 if (!ret) 420 continue; 421 /* Failure, rollback. */ 422 while (i != 0) 423 hash_rxq_mac_addr_del(&(*priv->hash_rxqs)[--i], 424 mac_index); 425 return ret; 426 } 427 end: 428 BITFIELD_SET(priv->mac_configured, mac_index); 429 return 0; 430 } 431 432 /** 433 * Register all MAC addresses in all hash RX queues. 434 * 435 * @param priv 436 * Pointer to private structure. 437 * 438 * @return 439 * 0 on success, errno value on failure. 440 */ 441 int 442 priv_mac_addrs_enable(struct priv *priv) 443 { 444 unsigned int i; 445 int ret; 446 447 if (!priv_allow_flow_type(priv, HASH_RXQ_FLOW_TYPE_MAC)) 448 return 0; 449 for (i = 0; (i != priv->hash_rxqs_n); ++i) { 450 ret = hash_rxq_mac_addrs_add(&(*priv->hash_rxqs)[i]); 451 if (!ret) 452 continue; 453 /* Failure, rollback. */ 454 while (i != 0) 455 hash_rxq_mac_addrs_del(&(*priv->hash_rxqs)[--i]); 456 assert(ret > 0); 457 return ret; 458 } 459 return 0; 460 } 461 462 /** 463 * DPDK callback to add a MAC address. 464 * 465 * @param dev 466 * Pointer to Ethernet device structure. 467 * @param mac_addr 468 * MAC address to register. 469 * @param index 470 * MAC address index. 471 * @param vmdq 472 * VMDq pool index to associate address with (ignored). 473 */ 474 void 475 mlx5_mac_addr_add(struct rte_eth_dev *dev, struct ether_addr *mac_addr, 476 uint32_t index, uint32_t vmdq) 477 { 478 struct priv *priv = dev->data->dev_private; 479 480 if (mlx5_is_secondary()) 481 return; 482 483 (void)vmdq; 484 priv_lock(priv); 485 DEBUG("%p: adding MAC address at index %" PRIu32, 486 (void *)dev, index); 487 if (index >= RTE_DIM(priv->mac)) 488 goto end; 489 priv_mac_addr_add(priv, index, 490 (const uint8_t (*)[ETHER_ADDR_LEN]) 491 mac_addr->addr_bytes); 492 end: 493 priv_unlock(priv); 494 } 495 496 /** 497 * DPDK callback to set primary MAC address. 498 * 499 * @param dev 500 * Pointer to Ethernet device structure. 501 * @param mac_addr 502 * MAC address to register. 503 */ 504 void 505 mlx5_mac_addr_set(struct rte_eth_dev *dev, struct ether_addr *mac_addr) 506 { 507 DEBUG("%p: setting primary MAC address", (void *)dev); 508 mlx5_mac_addr_remove(dev, 0); 509 mlx5_mac_addr_add(dev, mac_addr, 0, 0); 510 } 511