1 /* SPDX-License-Identifier: BSD-3-Clause 2 * Copyright 2017 6WIND S.A. 3 * Copyright 2017 Mellanox Technologies, Ltd 4 */ 5 6 #ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_ 7 #define _RTE_ETH_FAILSAFE_PRIVATE_H_ 8 9 #include <stdint.h> 10 #include <sys/queue.h> 11 #include <pthread.h> 12 13 #include <rte_atomic.h> 14 #include <rte_dev.h> 15 #include <rte_ethdev_driver.h> 16 #include <rte_devargs.h> 17 #include <rte_flow.h> 18 #include <rte_interrupts.h> 19 20 #define FAILSAFE_DRIVER_NAME "Fail-safe PMD" 21 #define FAILSAFE_OWNER_NAME "Fail-safe" 22 23 #define PMD_FAILSAFE_MAC_KVARG "mac" 24 #define PMD_FAILSAFE_HOTPLUG_POLL_KVARG "hotplug_poll" 25 #define PMD_FAILSAFE_PARAM_STRING \ 26 "dev(<ifc>)," \ 27 "exec(<shell command>)," \ 28 "fd(<fd number>)," \ 29 "mac=mac_addr," \ 30 "hotplug_poll=u64" \ 31 "" 32 33 #define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000 34 35 #define FAILSAFE_MAX_ETHPORTS 2 36 #define FAILSAFE_MAX_ETHADDR 128 37 38 #define DEVARGS_MAXLEN 4096 39 40 enum rxp_service_state { 41 SS_NO_SERVICE = 0, 42 SS_REGISTERED, 43 SS_READY, 44 SS_RUNNING, 45 }; 46 47 /* TYPES */ 48 49 struct rx_proxy { 50 /* epoll file descriptor */ 51 int efd; 52 /* event vector to be used by epoll */ 53 struct rte_epoll_event *evec; 54 /* rte service id */ 55 uint32_t sid; 56 /* service core id */ 57 uint32_t scid; 58 enum rxp_service_state sstate; 59 }; 60 61 struct rxq { 62 struct fs_priv *priv; 63 uint16_t qid; 64 /* next sub_device to poll */ 65 struct sub_device *sdev; 66 unsigned int socket_id; 67 int event_fd; 68 unsigned int enable_events:1; 69 struct rte_eth_rxq_info info; 70 rte_atomic64_t refcnt[]; 71 }; 72 73 struct txq { 74 struct fs_priv *priv; 75 uint16_t qid; 76 unsigned int socket_id; 77 struct rte_eth_txq_info info; 78 rte_atomic64_t refcnt[]; 79 }; 80 81 struct rte_flow { 82 TAILQ_ENTRY(rte_flow) next; 83 /* sub_flows */ 84 struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS]; 85 /* flow description for synchronization */ 86 struct rte_flow_conv_rule rule; 87 uint8_t rule_data[]; 88 }; 89 90 enum dev_state { 91 DEV_UNDEFINED, 92 DEV_PARSED, 93 DEV_PROBED, 94 DEV_ACTIVE, 95 DEV_STARTED, 96 }; 97 98 struct fs_stats { 99 struct rte_eth_stats stats; 100 uint64_t timestamp; 101 }; 102 103 struct sub_device { 104 /* Exhaustive DPDK device description */ 105 struct sub_device *next; 106 struct rte_devargs devargs; 107 struct rte_bus *bus; 108 struct rte_device *dev; 109 struct rte_eth_dev *edev; 110 uint8_t sid; 111 /* Device state machine */ 112 enum dev_state state; 113 /* Last stats snapshot passed to user */ 114 struct fs_stats stats_snapshot; 115 /* Some device are defined as a command line */ 116 char *cmdline; 117 /* Others are retrieved through a file descriptor */ 118 char *fd_str; 119 /* fail-safe device backreference */ 120 struct rte_eth_dev *fs_dev; 121 /* flag calling for recollection */ 122 volatile unsigned int remove:1; 123 /* flow isolation state */ 124 int flow_isolated:1; 125 /* RMV callback registration state */ 126 unsigned int rmv_callback:1; 127 /* LSC callback registration state */ 128 unsigned int lsc_callback:1; 129 }; 130 131 struct fs_priv { 132 struct rte_eth_dev *dev; 133 /* 134 * Set of sub_devices. 135 * subs[0] is the preferred device 136 * any other is just another slave 137 */ 138 struct sub_device *subs; 139 uint8_t subs_head; /* if head == tail, no subs */ 140 uint8_t subs_tail; /* first invalid */ 141 uint8_t subs_tx; /* current emitting device */ 142 uint8_t current_probed; 143 /* flow mapping */ 144 TAILQ_HEAD(sub_flows, rte_flow) flow_list; 145 /* current number of mac_addr slots allocated. */ 146 uint32_t nb_mac_addr; 147 struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR]; 148 uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR]; 149 uint32_t nb_mcast_addr; 150 struct ether_addr *mcast_addrs; 151 /* current capabilities */ 152 struct rte_eth_dev_info infos; 153 struct rte_eth_dev_owner my_owner; /* Unique owner. */ 154 struct rte_intr_handle intr_handle; /* Port interrupt handle. */ 155 /* 156 * Fail-safe state machine. 157 * This level will be tracking state of the EAL and eth 158 * layer at large as defined by the user application. 159 * It will then steer the sub_devices toward the same 160 * synchronized state. 161 */ 162 enum dev_state state; 163 struct rte_eth_stats stats_accumulator; 164 /* 165 * Rx interrupts/events proxy. 166 * The PMD issues Rx events to the EAL on behalf of its subdevices, 167 * it does that by registering an event-fd for each of its queues with 168 * the EAL. A PMD service thread listens to all the Rx events from the 169 * subdevices, when an Rx event is issued by a subdevice it will be 170 * caught by this service with will trigger an Rx event in the 171 * appropriate failsafe Rx queue. 172 */ 173 struct rx_proxy rxp; 174 pthread_mutex_t hotplug_mutex; 175 /* Hot-plug mutex is locked by the alarm mechanism. */ 176 volatile unsigned int alarm_lock:1; 177 unsigned int pending_alarm:1; /* An alarm is pending */ 178 /* flow isolation state */ 179 int flow_isolated:1; 180 }; 181 182 /* FAILSAFE_INTR */ 183 184 int failsafe_rx_intr_install(struct rte_eth_dev *dev); 185 void failsafe_rx_intr_uninstall(struct rte_eth_dev *dev); 186 int failsafe_rx_intr_install_subdevice(struct sub_device *sdev); 187 void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev); 188 189 /* MISC */ 190 191 int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev); 192 int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev); 193 194 /* RX / TX */ 195 196 void set_burst_fn(struct rte_eth_dev *dev, int force_safe); 197 198 uint16_t failsafe_rx_burst(void *rxq, 199 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 200 uint16_t failsafe_tx_burst(void *txq, 201 struct rte_mbuf **tx_pkts, uint16_t nb_pkts); 202 203 uint16_t failsafe_rx_burst_fast(void *rxq, 204 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 205 uint16_t failsafe_tx_burst_fast(void *txq, 206 struct rte_mbuf **tx_pkts, uint16_t nb_pkts); 207 208 /* ARGS */ 209 210 int failsafe_args_parse(struct rte_eth_dev *dev, const char *params); 211 void failsafe_args_free(struct rte_eth_dev *dev); 212 int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params); 213 int failsafe_args_parse_subs(struct rte_eth_dev *dev); 214 215 /* EAL */ 216 217 int failsafe_eal_init(struct rte_eth_dev *dev); 218 int failsafe_eal_uninit(struct rte_eth_dev *dev); 219 220 /* ETH_DEV */ 221 222 int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev); 223 void failsafe_eth_dev_unregister_callbacks(struct sub_device *sdev); 224 void failsafe_dev_remove(struct rte_eth_dev *dev); 225 void failsafe_stats_increment(struct rte_eth_stats *to, 226 struct rte_eth_stats *from); 227 int failsafe_eth_rmv_event_callback(uint16_t port_id, 228 enum rte_eth_event_type type, 229 void *arg, void *out); 230 int failsafe_eth_lsc_event_callback(uint16_t port_id, 231 enum rte_eth_event_type event, 232 void *cb_arg, void *out); 233 int failsafe_eth_new_event_callback(uint16_t port_id, 234 enum rte_eth_event_type event, 235 void *cb_arg, void *out); 236 237 /* GLOBALS */ 238 239 extern const char pmd_failsafe_driver_name[]; 240 extern const struct eth_dev_ops failsafe_ops; 241 extern const struct rte_flow_ops fs_flow_ops; 242 extern uint64_t hotplug_poll; 243 extern int mac_from_arg; 244 245 /* HELPERS */ 246 247 /* dev: (struct rte_eth_dev *) fail-safe device */ 248 #define PRIV(dev) \ 249 ((struct fs_priv *)(dev)->data->dev_private) 250 251 /* sdev: (struct sub_device *) */ 252 #define ETH(sdev) \ 253 ((sdev)->edev) 254 255 /* sdev: (struct sub_device *) */ 256 #define PORT_ID(sdev) \ 257 (ETH(sdev)->data->port_id) 258 259 /* sdev: (struct sub_device *) */ 260 #define SUB_ID(sdev) \ 261 ((sdev)->sid) 262 263 /** 264 * Stateful iterator construct over fail-safe sub-devices: 265 * s: (struct sub_device *), iterator 266 * i: (uint8_t), increment 267 * dev: (struct rte_eth_dev *), fail-safe ethdev 268 * state: (enum dev_state), minimum acceptable device state 269 */ 270 #define FOREACH_SUBDEV_STATE(s, i, dev, state) \ 271 for (s = fs_find_next((dev), 0, state, &i); \ 272 s != NULL; \ 273 s = fs_find_next((dev), i + 1, state, &i)) 274 275 /** 276 * Iterator construct over fail-safe sub-devices: 277 * s: (struct sub_device *), iterator 278 * i: (uint8_t), increment 279 * dev: (struct rte_eth_dev *), fail-safe ethdev 280 */ 281 #define FOREACH_SUBDEV(s, i, dev) \ 282 FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED) 283 284 /* dev: (struct rte_eth_dev *) fail-safe device */ 285 #define PREFERRED_SUBDEV(dev) \ 286 (&PRIV(dev)->subs[0]) 287 288 /* dev: (struct rte_eth_dev *) fail-safe device */ 289 #define TX_SUBDEV(dev) \ 290 (PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \ 291 : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \ 292 : &PRIV(dev)->subs[PRIV(dev)->subs_tx])) 293 294 /** 295 * s: (struct sub_device *) 296 * ops: (struct eth_dev_ops) member 297 */ 298 #define SUBOPS(s, ops) \ 299 (ETH(s)->dev_ops->ops) 300 301 /** 302 * Atomic guard 303 */ 304 305 /** 306 * a: (rte_atomic64_t) 307 */ 308 #define FS_ATOMIC_P(a) \ 309 rte_atomic64_set(&(a), 1) 310 311 /** 312 * a: (rte_atomic64_t) 313 */ 314 #define FS_ATOMIC_V(a) \ 315 rte_atomic64_set(&(a), 0) 316 317 /** 318 * s: (struct sub_device *) 319 * i: uint16_t qid 320 */ 321 #define FS_ATOMIC_RX(s, i) \ 322 rte_atomic64_read( \ 323 &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \ 324 ) 325 /** 326 * s: (struct sub_device *) 327 * i: uint16_t qid 328 */ 329 #define FS_ATOMIC_TX(s, i) \ 330 rte_atomic64_read( \ 331 &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \ 332 ) 333 334 #ifdef RTE_EXEC_ENV_BSDAPP 335 #define FS_THREADID_TYPE void* 336 #define FS_THREADID_FMT "p" 337 #else 338 #define FS_THREADID_TYPE unsigned long 339 #define FS_THREADID_FMT "lu" 340 #endif 341 342 extern int failsafe_logtype; 343 344 #define LOG__(l, m, ...) \ 345 rte_log(RTE_LOG_ ## l, failsafe_logtype, \ 346 "net_failsafe: " m "%c", __VA_ARGS__) 347 348 #define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n') 349 #define DEBUG(...) LOG_(DEBUG, __VA_ARGS__) 350 #define INFO(...) LOG_(INFO, __VA_ARGS__) 351 #define WARN(...) LOG_(WARNING, __VA_ARGS__) 352 #define ERROR(...) LOG_(ERR, __VA_ARGS__) 353 354 /* inlined functions */ 355 356 static inline struct sub_device * 357 fs_find_next(struct rte_eth_dev *dev, 358 uint8_t sid, 359 enum dev_state min_state, 360 uint8_t *sid_out) 361 { 362 struct sub_device *subs; 363 uint8_t tail; 364 365 subs = PRIV(dev)->subs; 366 tail = PRIV(dev)->subs_tail; 367 while (sid < tail) { 368 if (subs[sid].state >= min_state) 369 break; 370 sid++; 371 } 372 *sid_out = sid; 373 if (sid >= tail) 374 return NULL; 375 return &subs[sid]; 376 } 377 378 /* 379 * Lock hot-plug mutex. 380 * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism. 381 */ 382 static inline int 383 fs_lock(struct rte_eth_dev *dev, unsigned int is_alarm) 384 { 385 int ret; 386 387 if (is_alarm) { 388 ret = pthread_mutex_trylock(&PRIV(dev)->hotplug_mutex); 389 if (ret) { 390 DEBUG("Hot-plug mutex lock trying failed(%s), will try" 391 " again later...", strerror(ret)); 392 return ret; 393 } 394 PRIV(dev)->alarm_lock = 1; 395 } else { 396 ret = pthread_mutex_lock(&PRIV(dev)->hotplug_mutex); 397 if (ret) { 398 ERROR("Cannot lock mutex(%s)", strerror(ret)); 399 return ret; 400 } 401 } 402 DEBUG("Hot-plug mutex was locked by thread %" FS_THREADID_FMT "%s", 403 (FS_THREADID_TYPE)pthread_self(), 404 PRIV(dev)->alarm_lock ? " by the hot-plug alarm" : ""); 405 return ret; 406 } 407 408 /* 409 * Unlock hot-plug mutex. 410 * is_alarm means that the caller is, for sure, the hot-plug alarm mechanism. 411 */ 412 static inline void 413 fs_unlock(struct rte_eth_dev *dev, unsigned int is_alarm) 414 { 415 int ret; 416 unsigned int prev_alarm_lock = PRIV(dev)->alarm_lock; 417 418 if (is_alarm) { 419 RTE_ASSERT(PRIV(dev)->alarm_lock == 1); 420 PRIV(dev)->alarm_lock = 0; 421 } 422 ret = pthread_mutex_unlock(&PRIV(dev)->hotplug_mutex); 423 if (ret) 424 ERROR("Cannot unlock hot-plug mutex(%s)", strerror(ret)); 425 else 426 DEBUG("Hot-plug mutex was unlocked by thread %" FS_THREADID_FMT "%s", 427 (FS_THREADID_TYPE)pthread_self(), 428 prev_alarm_lock ? " by the hot-plug alarm" : ""); 429 } 430 431 /* 432 * Switch emitting device. 433 * If banned is set, banned must not be considered for 434 * the role of emitting device. 435 */ 436 static inline void 437 fs_switch_dev(struct rte_eth_dev *dev, 438 struct sub_device *banned) 439 { 440 struct sub_device *txd; 441 enum dev_state req_state; 442 443 req_state = PRIV(dev)->state; 444 txd = TX_SUBDEV(dev); 445 if (PREFERRED_SUBDEV(dev)->state >= req_state && 446 PREFERRED_SUBDEV(dev) != banned) { 447 if (txd != PREFERRED_SUBDEV(dev) && 448 (txd == NULL || 449 (req_state == DEV_STARTED) || 450 (txd && txd->state < DEV_STARTED))) { 451 DEBUG("Switching tx_dev to preferred sub_device"); 452 PRIV(dev)->subs_tx = 0; 453 } 454 } else if ((txd && txd->state < req_state) || 455 txd == NULL || 456 txd == banned) { 457 struct sub_device *sdev = NULL; 458 uint8_t i; 459 460 /* Using acceptable device */ 461 FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) { 462 if (sdev == banned) 463 continue; 464 DEBUG("Switching tx_dev to sub_device %d", 465 i); 466 PRIV(dev)->subs_tx = i; 467 break; 468 } 469 if (i >= PRIV(dev)->subs_tail || sdev == NULL) { 470 DEBUG("No device ready, deactivating tx_dev"); 471 PRIV(dev)->subs_tx = PRIV(dev)->subs_tail; 472 } 473 } else { 474 return; 475 } 476 set_burst_fn(dev, 0); 477 rte_wmb(); 478 } 479 480 /* 481 * Adjust error value and rte_errno to the fail-safe actual error value. 482 */ 483 static inline int 484 fs_err(struct sub_device *sdev, int err) 485 { 486 /* A device removal shouldn't be reported as an error. */ 487 if (sdev->remove == 1 || err == -EIO) 488 return rte_errno = 0; 489 return err; 490 } 491 #endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */ 492