1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_ 7 #define _RTE_ETH_FAILSAFE_PRIVATE_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 #include <pthread.h> 12 13 #include <rte_atomic.h> 14 #include <rte_dev.h> 15 #include <rte_ethdev_driver.h> 16 #include <rte_devargs.h> 17 #include <rte_flow.h> 18 #include <rte_interrupts.h> 19 20 #define FAILSAFE_DRIVER_NAME "Fail-safe PMD" 21 #define FAILSAFE_OWNER_NAME "Fail-safe" 22 23 #define PMD_FAILSAFE_MAC_KVARG "mac" 24 #define PMD_FAILSAFE_HOTPLUG_POLL_KVARG "hotplug_poll" 25 #define PMD_FAILSAFE_PARAM_STRING \ 26 "dev(<ifc>)," \ 27 "exec(<shell command>)," \ 28 "fd(<fd number>)," \ 29 "mac=mac_addr," \ 30 "hotplug_poll=u64" \ 31 "" 32 33 #define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000 34 35 #define FAILSAFE_MAX_ETHPORTS 2 36 #define FAILSAFE_MAX_ETHADDR 128 37 38 #define DEVARGS_MAXLEN 4096 39 40 enum rxp_service_state { 41 SS_NO_SERVICE = 0, 42 SS_REGISTERED, 43 SS_READY, 44 SS_RUNNING, 45 }; 46 47 /* TYPES */ 48 49 struct rx_proxy { 50 /* epoll file descriptor */ 51 int efd; 52 /* event vector to be used by epoll */ 53 struct rte_epoll_event *evec; 54 /* rte service id */ 55 uint32_t sid; 56 /* service core id */ 57 uint32_t scid; 58 enum rxp_service_state sstate; 59 }; 60 61 struct rxq { 62 struct fs_priv *priv; 63 uint16_t qid; 64 /* next sub_device to poll */ 65 struct sub_device *sdev; 66 unsigned int socket_id; 67 int event_fd; 68 unsigned int enable_events:1; 69 struct rte_eth_rxq_info info; 70 rte_atomic64_t refcnt[]; 71 }; 72 73 struct txq { 74 struct fs_priv *priv; 75 uint16_t qid; 76 unsigned int socket_id; 77 struct rte_eth_txq_info info; 78 rte_atomic64_t refcnt[]; 79 }; 80 81 struct rte_flow { 82 TAILQ_ENTRY(rte_flow) next; 83 /* sub_flows */ 84 struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS]; 85 /* flow description for synchronization */ 86 struct rte_flow_conv_rule rule; 87 uint8_t rule_data[]; 88 }; 89 90 enum dev_state { 91 DEV_UNDEFINED, 92 DEV_PARSED, 93 DEV_PROBED, 94 DEV_ACTIVE, 95 DEV_STARTED, 96 }; 97 98 struct fs_stats { 99 struct rte_eth_stats stats; 100 uint64_t timestamp; 101 }; 102 103 /* 104 * Allocated in shared memory. 105 */ 106 struct sub_device { 107 /* Exhaustive DPDK device description */ 108 struct sub_device *next; 109 struct rte_devargs devargs; 110 struct rte_bus *bus; /* for primary process only. */ 111 struct rte_device *dev; /* for primary process only. */ 112 uint8_t sid; 113 /* Device state machine */ 114 enum dev_state state; 115 /* Last stats snapshot passed to user */ 116 struct fs_stats stats_snapshot; 117 /* Some device are defined as a command line */ 118 char *cmdline; 119 /* Others are retrieved through a file descriptor */ 120 char *fd_str; 121 /* fail-safe device backreference */ 122 uint16_t fs_port_id; /* shared between processes */ 123 /* sub device port id*/ 124 uint16_t sdev_port_id; /* shared between processes */ 125 /* flag calling for recollection */ 126 volatile unsigned int remove:1; 127 /* flow isolation state */ 128 int flow_isolated:1; 129 /* RMV callback registration state */ 130 unsigned int rmv_callback:1; 131 /* LSC callback registration state */ 132 unsigned int lsc_callback:1; 133 }; 134 135 /* 136 * This is referenced by eth_dev->data->dev_private 137 * This is shared between processes. 138 */ 139 struct fs_priv { 140 struct rte_eth_dev_data *data; /* backreference to shared data. */ 141 /* 142 * Set of sub_devices. 143 * subs[0] is the preferred device 144 * any other is just another slave 145 */ 146 struct sub_device *subs; /* shared between processes */ 147 uint8_t subs_head; /* if head == tail, no subs */ 148 uint8_t subs_tail; /* first invalid */ 149 uint8_t subs_tx; /* current emitting device */ 150 uint8_t current_probed; 151 /* flow mapping */ 152 TAILQ_HEAD(sub_flows, rte_flow) flow_list; 153 /* current number of mac_addr slots allocated. */ 154 uint32_t nb_mac_addr; 155 struct rte_ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR]; 156 uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR]; 157 uint32_t nb_mcast_addr; 158 struct rte_ether_addr *mcast_addrs; 159 /* current capabilities */ 160 struct rte_eth_dev_owner my_owner; /* Unique owner. */ 161 struct rte_intr_handle intr_handle; /* Port interrupt handle. */ 162 /* 163 * Fail-safe state machine. 164 * This level will be tracking state of the EAL and eth 165 * layer at large as defined by the user application. 166 * It will then steer the sub_devices toward the same 167 * synchronized state. 168 */ 169 enum dev_state state; 170 struct rte_eth_stats stats_accumulator; 171 /* 172 * Rx interrupts/events proxy. 173 * The PMD issues Rx events to the EAL on behalf of its subdevices, 174 * it does that by registering an event-fd for each of its queues with 175 * the EAL. A PMD service thread listens to all the Rx events from the 176 * subdevices, when an Rx event is issued by a subdevice it will be 177 * caught by this service with will trigger an Rx event in the 178 * appropriate failsafe Rx queue. 179 */ 180 struct rx_proxy rxp; 181 pthread_mutex_t hotplug_mutex; 182 /* Hot-plug mutex is locked by the alarm mechanism. */ 183 volatile unsigned int alarm_lock:1; 184 unsigned int pending_alarm:1; /* An alarm is pending */ 185 /* flow isolation state */ 186 int flow_isolated:1; 187 }; 188 189 /* FAILSAFE_INTR */ 190 191 int failsafe_rx_intr_install(struct rte_eth_dev *dev); 192 void failsafe_rx_intr_uninstall(struct rte_eth_dev *dev); 193 int failsafe_rx_intr_install_subdevice(struct sub_device *sdev); 194 void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev); 195 196 /* MISC */ 197 198 int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev); 199 int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev); 200 201 /* RX / TX */ 202 203 void failsafe_set_burst_fn(struct rte_eth_dev *dev, int force_safe); 204 205 uint16_t failsafe_rx_burst(void *rxq, 206 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 207 uint16_t failsafe_tx_burst(void *txq, 208 struct rte_mbuf **tx_pkts, uint16_t nb_pkts); 209 210 uint16_t failsafe_rx_burst_fast(void *rxq, 211 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 212 uint16_t failsafe_tx_burst_fast(void *txq, 213 struct rte_mbuf **tx_pkts, uint16_t nb_pkts); 214 215 /* ARGS */ 216 217 int failsafe_args_parse(struct rte_eth_dev *dev, const char *params); 218 void failsafe_args_free(struct rte_eth_dev *dev); 219 int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params); 220 int failsafe_args_parse_subs(struct rte_eth_dev *dev); 221 222 /* EAL */ 223 224 int failsafe_eal_init(struct rte_eth_dev *dev); 225 int failsafe_eal_uninit(struct rte_eth_dev *dev); 226 227 /* ETH_DEV */ 228 229 int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev); 230 void failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev); 231 void failsafe_dev_remove(struct rte_eth_dev *dev); 232 void failsafe_stats_increment(struct rte_eth_stats *to, 233 struct rte_eth_stats *from); 234 int failsafe_eth_rmv_event_callback(uint16_t port_id, 235 enum rte_eth_event_type type, 236 void *arg, void *out); 237 int failsafe_eth_lsc_event_callback(uint16_t port_id, 238 enum rte_eth_event_type event, 239 void *cb_arg, void *out); 240 int failsafe_eth_new_event_callback(uint16_t port_id, 241 enum rte_eth_event_type event, 242 void *cb_arg, void *out); 243 244 /* GLOBALS */ 245 246 extern const char pmd_failsafe_driver_name[]; 247 extern const struct eth_dev_ops failsafe_ops; 248 extern const struct rte_flow_ops fs_flow_ops; 249 extern uint64_t failsafe_hotplug_poll; 250 extern int failsafe_mac_from_arg; 251 252 /* HELPERS */ 253 254 /* dev: (struct rte_eth_dev *) fail-safe device */ 255 #define PRIV(dev) \ 256 ((struct fs_priv *)(dev)->data->dev_private) 257 258 /* sdev: (struct sub_device *) */ 259 #define ETH(sdev) \ 260 ((sdev)->sdev_port_id == RTE_MAX_ETHPORTS ? \ 261 NULL : &rte_eth_devices[(sdev)->sdev_port_id]) 262 263 /* sdev: (struct sub_device *) */ 264 #define PORT_ID(sdev) \ 265 ((sdev)->sdev_port_id) 266 267 /* sdev: (struct sub_device *) */ 268 #define SUB_ID(sdev) \ 269 ((sdev)->sid) 270 271 /** 272 * Stateful iterator construct over fail-safe sub-devices: 273 * s: (struct sub_device *), iterator 274 * i: (uint8_t), increment 275 * dev: (struct rte_eth_dev *), fail-safe ethdev 276 * state: (enum dev_state), minimum acceptable device state 277 */ 278 #define FOREACH_SUBDEV_STATE(s, i, dev, state) \ 279 for (s = fs_find_next((dev), 0, state, &i); \ 280 s != NULL; \ 281 s = fs_find_next((dev), i + 1, state, &i)) 282 283 /** 284 * Iterator construct over fail-safe sub-devices: 285 * s: (struct sub_device *), iterator 286 * i: (uint8_t), increment 287 * dev: (struct rte_eth_dev *), fail-safe ethdev 288 */ 289 #define FOREACH_SUBDEV(s, i, dev) \ 290 FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED) 291 292 /* dev: (struct rte_eth_dev *) fail-safe device */ 293 #define PREFERRED_SUBDEV(dev) \ 294 (&PRIV(dev)->subs[0]) 295 296 /* dev: (struct rte_eth_dev *) fail-safe device */ 297 #define TX_SUBDEV(dev) \ 298 (PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \ 299 : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \ 300 : &PRIV(dev)->subs[PRIV(dev)->subs_tx])) 301 302 /** 303 * s: (struct sub_device *) 304 * ops: (struct eth_dev_ops) member 305 */ 306 #define SUBOPS(s, ops) \ 307 (ETH(s)->dev_ops->ops) 308 309 /** 310 * Atomic guard 311 */ 312 313 /** 314 * a: (rte_atomic64_t) 315 */ 316 #define FS_ATOMIC_P(a) \ 317 rte_atomic64_set(&(a), 1) 318 319 /** 320 * a: (rte_atomic64_t) 321 */ 322 #define FS_ATOMIC_V(a) \ 323 rte_atomic64_set(&(a), 0) 324 325 /** 326 * s: (struct sub_device *) 327 * i: uint16_t qid 328 */ 329 #define FS_ATOMIC_RX(s, i) \ 330 rte_atomic64_read( \ 331 &((struct rxq *) \ 332 (fs_dev(s)->data->rx_queues[i]))->refcnt[(s)->sid]) 333 /** 334 * s: (struct sub_device *) 335 * i: uint16_t qid 336 */ 337 #define FS_ATOMIC_TX(s, i) \ 338 rte_atomic64_read( \ 339 &((struct txq *) \ 340 (fs_dev(s)->data->tx_queues[i]))->refcnt[(s)->sid]) 341 342 #ifdef RTE_EXEC_ENV_FREEBSD 343 #define FS_THREADID_TYPE void* 344 #define FS_THREADID_FMT "p" 345 #else 346 #define FS_THREADID_TYPE unsigned long 347 #define FS_THREADID_FMT "lu" 348 #endif 349 350 extern int failsafe_logtype; 351 352 #define LOG__(l, m, ...) \ 353 rte_log(RTE_LOG_ ## l, failsafe_logtype, \ 354 "net_failsafe: " m "%c", __VA_ARGS__) 355 356 #define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n') 357 #define DEBUG(...) LOG_(DEBUG, __VA_ARGS__) 358 #define INFO(...) LOG_(INFO, __VA_ARGS__) 359 #define WARN(...) LOG_(WARNING, __VA_ARGS__) 360 #define ERROR(...) LOG_(ERR, __VA_ARGS__) 361 362 /* inlined functions */ 363 364 static inline struct sub_device * 365 fs_find_next(struct rte_eth_dev *dev, 366 uint8_t sid, 367 enum dev_state min_state, 368 uint8_t *sid_out) 369 { 370 struct sub_device *subs; 371 uint8_t tail; 372 373 subs = PRIV(dev)->subs; 374 tail = PRIV(dev)->subs_tail; 375 while (sid < tail) { 376 if (subs[sid].state >= min_state) 377 break; 378 sid++; 379 } 380 *sid_out = sid; 381 if (sid >= tail) 382 return NULL; 383 return &subs[sid]; 384 } 385 386 static inline struct rte_eth_dev * 387 fs_dev(struct sub_device *sdev) { 388 return &rte_eth_devices[sdev->fs_port_id]; 389 } 390 391 /* 392 * Lock hot-plug mutex. 393 * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism. 394 */ 395 static inline int 396 fs_lock(struct rte_eth_dev *dev, unsigned int is_alarm) 397 { 398 int ret; 399 400 if (is_alarm) { 401 ret = pthread_mutex_trylock(&PRIV(dev)->hotplug_mutex); 402 if (ret) { 403 DEBUG("Hot-plug mutex lock trying failed(%s), will try" 404 " again later...", strerror(ret)); 405 return ret; 406 } 407 PRIV(dev)->alarm_lock = 1; 408 } else { 409 ret = pthread_mutex_lock(&PRIV(dev)->hotplug_mutex); 410 if (ret) { 411 ERROR("Cannot lock mutex(%s)", strerror(ret)); 412 return ret; 413 } 414 } 415 return ret; 416 } 417 418 /* 419 * Unlock hot-plug mutex. 420 * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism. 421 */ 422 static inline void 423 fs_unlock(struct rte_eth_dev *dev, unsigned int is_alarm) 424 { 425 int ret; 426 427 if (is_alarm) { 428 RTE_ASSERT(PRIV(dev)->alarm_lock == 1); 429 PRIV(dev)->alarm_lock = 0; 430 } 431 ret = pthread_mutex_unlock(&PRIV(dev)->hotplug_mutex); 432 if (ret) 433 ERROR("Cannot unlock hot-plug mutex(%s)", strerror(ret)); 434 } 435 436 /* 437 * Switch emitting device. 438 * If banned is set, banned must not be considered for 439 * the role of emitting device. 440 */ 441 static inline void 442 fs_switch_dev(struct rte_eth_dev *dev, 443 struct sub_device *banned) 444 { 445 struct sub_device *txd; 446 enum dev_state req_state; 447 448 req_state = PRIV(dev)->state; 449 txd = TX_SUBDEV(dev); 450 if (PREFERRED_SUBDEV(dev)->state >= req_state && 451 PREFERRED_SUBDEV(dev) != banned) { 452 if (txd != PREFERRED_SUBDEV(dev) && 453 (txd == NULL || 454 (req_state == DEV_STARTED) || 455 (txd && txd->state < DEV_STARTED))) { 456 DEBUG("Switching tx_dev to preferred sub_device"); 457 PRIV(dev)->subs_tx = 0; 458 } 459 } else if ((txd && txd->state < req_state) || 460 txd == NULL || 461 txd == banned) { 462 struct sub_device *sdev = NULL; 463 uint8_t i; 464 465 /* Using acceptable device */ 466 FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) { 467 if (sdev == banned) 468 continue; 469 DEBUG("Switching tx_dev to sub_device %d", 470 i); 471 PRIV(dev)->subs_tx = i; 472 break; 473 } 474 if (i >= PRIV(dev)->subs_tail || sdev == NULL) { 475 DEBUG("No device ready, deactivating tx_dev"); 476 PRIV(dev)->subs_tx = PRIV(dev)->subs_tail; 477 } 478 } else { 479 return; 480 } 481 failsafe_set_burst_fn(dev, 0); 482 rte_wmb(); 483 } 484 485 /* 486 * Adjust error value and rte_errno to the fail-safe actual error value. 487 */ 488 static inline int 489 fs_err(struct sub_device *sdev, int err) 490 { 491 /* A device removal shouldn't be reported as an error. */ 492 if (sdev->remove == 1 || err == -EIO) 493 return rte_errno = 0; 494 return err; 495 } 496 #endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */ 497