1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_ 7 #define _RTE_ETH_FAILSAFE_PRIVATE_H_ 8 9 #include <sys/queue.h> 10 #include <pthread.h> 11 12 #include <rte_atomic.h> 13 #include <rte_dev.h> 14 #include <rte_ethdev_driver.h> 15 #include <rte_devargs.h> 16 #include <rte_interrupts.h> 17 18 #define FAILSAFE_DRIVER_NAME "Fail-safe PMD" 19 #define FAILSAFE_OWNER_NAME "Fail-safe" 20 21 #define PMD_FAILSAFE_MAC_KVARG "mac" 22 #define PMD_FAILSAFE_HOTPLUG_POLL_KVARG "hotplug_poll" 23 #define PMD_FAILSAFE_PARAM_STRING \ 24 "dev(<ifc>)," \ 25 "exec(<shell command>)," \ 26 "fd(<fd number>)," \ 27 "mac=mac_addr," \ 28 "hotplug_poll=u64" \ 29 "" 30 31 #define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000 32 33 #define FAILSAFE_MAX_ETHPORTS 2 34 #define FAILSAFE_MAX_ETHADDR 128 35 36 #define DEVARGS_MAXLEN 4096 37 38 enum rxp_service_state { 39 SS_NO_SERVICE = 0, 40 SS_REGISTERED, 41 SS_READY, 42 SS_RUNNING, 43 }; 44 45 /* TYPES */ 46 47 struct rx_proxy { 48 /* epoll file descriptor */ 49 int efd; 50 /* event vector to be used by epoll */ 51 struct rte_epoll_event *evec; 52 /* rte service id */ 53 uint32_t sid; 54 /* service core id */ 55 uint32_t scid; 56 enum rxp_service_state sstate; 57 }; 58 59 struct rxq { 60 struct fs_priv *priv; 61 uint16_t qid; 62 /* next sub_device to poll */ 63 struct sub_device *sdev; 64 unsigned int socket_id; 65 int event_fd; 66 unsigned int enable_events:1; 67 struct rte_eth_rxq_info info; 68 rte_atomic64_t refcnt[]; 69 }; 70 71 struct txq { 72 struct fs_priv *priv; 73 uint16_t qid; 74 unsigned int socket_id; 75 struct rte_eth_txq_info info; 76 rte_atomic64_t refcnt[]; 77 }; 78 79 struct rte_flow { 80 TAILQ_ENTRY(rte_flow) next; 81 /* sub_flows */ 82 struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS]; 83 /* flow description for synchronization */ 84 struct rte_flow_desc *fd; 85 }; 86 87 enum dev_state { 88 DEV_UNDEFINED, 89 DEV_PARSED, 90 DEV_PROBED, 91 DEV_ACTIVE, 92 DEV_STARTED, 93 }; 94 95 struct fs_stats { 96 struct rte_eth_stats stats; 97 uint64_t timestamp; 98 }; 99 100 struct sub_device { 101 /* Exhaustive DPDK device description */ 102 struct sub_device *next; 103 struct rte_devargs devargs; 104 struct rte_bus *bus; 105 struct rte_device *dev; 106 struct rte_eth_dev *edev; 107 uint8_t sid; 108 /* Device state machine */ 109 enum dev_state state; 110 /* Last stats snapshot passed to user */ 111 struct fs_stats stats_snapshot; 112 /* Some device are defined as a command line */ 113 char *cmdline; 114 /* Others are retrieved through a file descriptor */ 115 char *fd_str; 116 /* fail-safe device backreference */ 117 struct rte_eth_dev *fs_dev; 118 /* flag calling for recollection */ 119 volatile unsigned int remove:1; 120 /* flow isolation state */ 121 int flow_isolated:1; 122 /* RMV callback registration state */ 123 unsigned int rmv_callback:1; 124 /* LSC callback registration state */ 125 unsigned int lsc_callback:1; 126 }; 127 128 struct fs_priv { 129 struct rte_eth_dev *dev; 130 /* 131 * Set of sub_devices. 132 * subs[0] is the preferred device 133 * any other is just another slave 134 */ 135 struct sub_device *subs; 136 uint8_t subs_head; /* if head == tail, no subs */ 137 uint8_t subs_tail; /* first invalid */ 138 uint8_t subs_tx; /* current emitting device */ 139 uint8_t current_probed; 140 /* flow mapping */ 141 TAILQ_HEAD(sub_flows, rte_flow) flow_list; 142 /* current number of mac_addr slots allocated. */ 143 uint32_t nb_mac_addr; 144 struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR]; 145 uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR]; 146 uint32_t nb_mcast_addr; 147 struct ether_addr *mcast_addrs; 148 /* current capabilities */ 149 struct rte_eth_dev_info infos; 150 struct rte_eth_dev_owner my_owner; /* Unique owner. */ 151 struct rte_intr_handle intr_handle; /* Port interrupt handle. */ 152 /* 153 * Fail-safe state machine. 154 * This level will be tracking state of the EAL and eth 155 * layer at large as defined by the user application. 156 * It will then steer the sub_devices toward the same 157 * synchronized state. 158 */ 159 enum dev_state state; 160 struct rte_eth_stats stats_accumulator; 161 /* 162 * Rx interrupts/events proxy. 163 * The PMD issues Rx events to the EAL on behalf of its subdevices, 164 * it does that by registering an event-fd for each of its queues with 165 * the EAL. A PMD service thread listens to all the Rx events from the 166 * subdevices, when an Rx event is issued by a subdevice it will be 167 * caught by this service with will trigger an Rx event in the 168 * appropriate failsafe Rx queue. 169 */ 170 struct rx_proxy rxp; 171 pthread_mutex_t hotplug_mutex; 172 /* Hot-plug mutex is locked by the alarm mechanism. */ 173 volatile unsigned int alarm_lock:1; 174 unsigned int pending_alarm:1; /* An alarm is pending */ 175 /* flow isolation state */ 176 int flow_isolated:1; 177 }; 178 179 /* FAILSAFE_INTR */ 180 181 int failsafe_rx_intr_install(struct rte_eth_dev *dev); 182 void failsafe_rx_intr_uninstall(struct rte_eth_dev *dev); 183 int failsafe_rx_intr_install_subdevice(struct sub_device *sdev); 184 void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev); 185 186 /* MISC */ 187 188 int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev); 189 int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev); 190 191 /* RX / TX */ 192 193 void set_burst_fn(struct rte_eth_dev *dev, int force_safe); 194 195 uint16_t failsafe_rx_burst(void *rxq, 196 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 197 uint16_t failsafe_tx_burst(void *txq, 198 struct rte_mbuf **tx_pkts, uint16_t nb_pkts); 199 200 uint16_t failsafe_rx_burst_fast(void *rxq, 201 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 202 uint16_t failsafe_tx_burst_fast(void *txq, 203 struct rte_mbuf **tx_pkts, uint16_t nb_pkts); 204 205 /* ARGS */ 206 207 int failsafe_args_parse(struct rte_eth_dev *dev, const char *params); 208 void failsafe_args_free(struct rte_eth_dev *dev); 209 int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params); 210 int failsafe_args_parse_subs(struct rte_eth_dev *dev); 211 212 /* EAL */ 213 214 int failsafe_eal_init(struct rte_eth_dev *dev); 215 int failsafe_eal_uninit(struct rte_eth_dev *dev); 216 217 /* ETH_DEV */ 218 219 int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev); 220 void failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev); 221 void failsafe_dev_remove(struct rte_eth_dev *dev); 222 void failsafe_stats_increment(struct rte_eth_stats *to, 223 struct rte_eth_stats *from); 224 int failsafe_eth_rmv_event_callback(uint16_t port_id, 225 enum rte_eth_event_type type, 226 void *arg, void *out); 227 int failsafe_eth_lsc_event_callback(uint16_t port_id, 228 enum rte_eth_event_type event, 229 void *cb_arg, void *out); 230 int failsafe_eth_new_event_callback(uint16_t port_id, 231 enum rte_eth_event_type event, 232 void *cb_arg, void *out); 233 234 /* GLOBALS */ 235 236 extern const char pmd_failsafe_driver_name[]; 237 extern const struct eth_dev_ops failsafe_ops; 238 extern const struct rte_flow_ops fs_flow_ops; 239 extern uint64_t hotplug_poll; 240 extern int mac_from_arg; 241 242 /* HELPERS */ 243 244 /* dev: (struct rte_eth_dev *) fail-safe device */ 245 #define PRIV(dev) \ 246 ((struct fs_priv *)(dev)->data->dev_private) 247 248 /* sdev: (struct sub_device *) */ 249 #define ETH(sdev) \ 250 ((sdev)->edev) 251 252 /* sdev: (struct sub_device *) */ 253 #define PORT_ID(sdev) \ 254 (ETH(sdev)->data->port_id) 255 256 /* sdev: (struct sub_device *) */ 257 #define SUB_ID(sdev) \ 258 ((sdev)->sid) 259 260 /** 261 * Stateful iterator construct over fail-safe sub-devices: 262 * s: (struct sub_device *), iterator 263 * i: (uint8_t), increment 264 * dev: (struct rte_eth_dev *), fail-safe ethdev 265 * state: (enum dev_state), minimum acceptable device state 266 */ 267 #define FOREACH_SUBDEV_STATE(s, i, dev, state) \ 268 for (s = fs_find_next((dev), 0, state, &i); \ 269 s != NULL; \ 270 s = fs_find_next((dev), i + 1, state, &i)) 271 272 /** 273 * Iterator construct over fail-safe sub-devices: 274 * s: (struct sub_device *), iterator 275 * i: (uint8_t), increment 276 * dev: (struct rte_eth_dev *), fail-safe ethdev 277 */ 278 #define FOREACH_SUBDEV(s, i, dev) \ 279 FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED) 280 281 /* dev: (struct rte_eth_dev *) fail-safe device */ 282 #define PREFERRED_SUBDEV(dev) \ 283 (&PRIV(dev)->subs[0]) 284 285 /* dev: (struct rte_eth_dev *) fail-safe device */ 286 #define TX_SUBDEV(dev) \ 287 (PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \ 288 : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \ 289 : &PRIV(dev)->subs[PRIV(dev)->subs_tx])) 290 291 /** 292 * s: (struct sub_device *) 293 * ops: (struct eth_dev_ops) member 294 */ 295 #define SUBOPS(s, ops) \ 296 (ETH(s)->dev_ops->ops) 297 298 /** 299 * Atomic guard 300 */ 301 302 /** 303 * a: (rte_atomic64_t) 304 */ 305 #define FS_ATOMIC_P(a) \ 306 rte_atomic64_set(&(a), 1) 307 308 /** 309 * a: (rte_atomic64_t) 310 */ 311 #define FS_ATOMIC_V(a) \ 312 rte_atomic64_set(&(a), 0) 313 314 /** 315 * s: (struct sub_device *) 316 * i: uint16_t qid 317 */ 318 #define FS_ATOMIC_RX(s, i) \ 319 rte_atomic64_read( \ 320 &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \ 321 ) 322 /** 323 * s: (struct sub_device *) 324 * i: uint16_t qid 325 */ 326 #define FS_ATOMIC_TX(s, i) \ 327 rte_atomic64_read( \ 328 &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \ 329 ) 330 331 #ifdef RTE_EXEC_ENV_BSDAPP 332 #define FS_THREADID_TYPE void* 333 #define FS_THREADID_FMT "p" 334 #else 335 #define FS_THREADID_TYPE unsigned long 336 #define FS_THREADID_FMT "lu" 337 #endif 338 339 extern int failsafe_logtype; 340 341 #define LOG__(l, m, ...) \ 342 rte_log(RTE_LOG_ ## l, failsafe_logtype, \ 343 "net_failsafe: " m "%c", __VA_ARGS__) 344 345 #define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n') 346 #define DEBUG(...) LOG_(DEBUG, __VA_ARGS__) 347 #define INFO(...) LOG_(INFO, __VA_ARGS__) 348 #define WARN(...) LOG_(WARNING, __VA_ARGS__) 349 #define ERROR(...) LOG_(ERR, __VA_ARGS__) 350 351 /* inlined functions */ 352 353 static inline struct sub_device * 354 fs_find_next(struct rte_eth_dev *dev, 355 uint8_t sid, 356 enum dev_state min_state, 357 uint8_t *sid_out) 358 { 359 struct sub_device *subs; 360 uint8_t tail; 361 362 subs = PRIV(dev)->subs; 363 tail = PRIV(dev)->subs_tail; 364 while (sid < tail) { 365 if (subs[sid].state >= min_state) 366 break; 367 sid++; 368 } 369 *sid_out = sid; 370 if (sid >= tail) 371 return NULL; 372 return &subs[sid]; 373 } 374 375 /* 376 * Lock hot-plug mutex. 377 * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism. 378 */ 379 static inline int 380 fs_lock(struct rte_eth_dev *dev, unsigned int is_alarm) 381 { 382 int ret; 383 384 if (is_alarm) { 385 ret = pthread_mutex_trylock(&PRIV(dev)->hotplug_mutex); 386 if (ret) { 387 DEBUG("Hot-plug mutex lock trying failed(%s), will try" 388 " again later...", strerror(ret)); 389 return ret; 390 } 391 PRIV(dev)->alarm_lock = 1; 392 } else { 393 ret = pthread_mutex_lock(&PRIV(dev)->hotplug_mutex); 394 if (ret) { 395 ERROR("Cannot lock mutex(%s)", strerror(ret)); 396 return ret; 397 } 398 } 399 DEBUG("Hot-plug mutex was locked by thread %" FS_THREADID_FMT "%s", 400 (FS_THREADID_TYPE)pthread_self(), 401 PRIV(dev)->alarm_lock ? " by the hot-plug alarm" : ""); 402 return ret; 403 } 404 405 /* 406 * Unlock hot-plug mutex. 407 * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism. 408 */ 409 static inline void 410 fs_unlock(struct rte_eth_dev *dev, unsigned int is_alarm) 411 { 412 int ret; 413 unsigned int prev_alarm_lock = PRIV(dev)->alarm_lock; 414 415 if (is_alarm) { 416 RTE_ASSERT(PRIV(dev)->alarm_lock == 1); 417 PRIV(dev)->alarm_lock = 0; 418 } 419 ret = pthread_mutex_unlock(&PRIV(dev)->hotplug_mutex); 420 if (ret) 421 ERROR("Cannot unlock hot-plug mutex(%s)", strerror(ret)); 422 else 423 DEBUG("Hot-plug mutex was unlocked by thread %" FS_THREADID_FMT "%s", 424 (FS_THREADID_TYPE)pthread_self(), 425 prev_alarm_lock ? " by the hot-plug alarm" : ""); 426 } 427 428 /* 429 * Switch emitting device. 430 * If banned is set, banned must not be considered for 431 * the role of emitting device. 432 */ 433 static inline void 434 fs_switch_dev(struct rte_eth_dev *dev, 435 struct sub_device *banned) 436 { 437 struct sub_device *txd; 438 enum dev_state req_state; 439 440 req_state = PRIV(dev)->state; 441 txd = TX_SUBDEV(dev); 442 if (PREFERRED_SUBDEV(dev)->state >= req_state && 443 PREFERRED_SUBDEV(dev) != banned) { 444 if (txd != PREFERRED_SUBDEV(dev) && 445 (txd == NULL || 446 (req_state == DEV_STARTED) || 447 (txd && txd->state < DEV_STARTED))) { 448 DEBUG("Switching tx_dev to preferred sub_device"); 449 PRIV(dev)->subs_tx = 0; 450 } 451 } else if ((txd && txd->state < req_state) || 452 txd == NULL || 453 txd == banned) { 454 struct sub_device *sdev = NULL; 455 uint8_t i; 456 457 /* Using acceptable device */ 458 FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) { 459 if (sdev == banned) 460 continue; 461 DEBUG("Switching tx_dev to sub_device %d", 462 i); 463 PRIV(dev)->subs_tx = i; 464 break; 465 } 466 if (i >= PRIV(dev)->subs_tail || sdev == NULL) { 467 DEBUG("No device ready, deactivating tx_dev"); 468 PRIV(dev)->subs_tx = PRIV(dev)->subs_tail; 469 } 470 } else { 471 return; 472 } 473 set_burst_fn(dev, 0); 474 rte_wmb(); 475 } 476 477 /* 478 * Adjust error value and rte_errno to the fail-safe actual error value. 479 */ 480 static inline int 481 fs_err(struct sub_device *sdev, int err) 482 { 483 /* A device removal shouldn't be reported as an error. */ 484 if (sdev->remove == 1 || err == -EIO) 485 return rte_errno = 0; 486 return err; 487 } 488 #endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */ 489