1 /*- 2 * BSD LICENSE 3 * 4 * Copyright 2017 6WIND S.A. 5 * Copyright 2017 Mellanox. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * * Neither the name of 6WIND S.A. nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 24 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 25 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 26 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 27 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 28 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 29 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 30 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 31 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 32 */ 33 34 #ifndef _RTE_ETH_FAILSAFE_PRIVATE_H_ 35 #define _RTE_ETH_FAILSAFE_PRIVATE_H_ 36 37 #include <sys/queue.h> 38 39 #include <rte_atomic.h> 40 #include <rte_dev.h> 41 #include <rte_ethdev_driver.h> 42 #include <rte_devargs.h> 43 #include <rte_interrupts.h> 44 45 #define FAILSAFE_DRIVER_NAME "Fail-safe PMD" 46 47 #define PMD_FAILSAFE_MAC_KVARG "mac" 48 #define PMD_FAILSAFE_HOTPLUG_POLL_KVARG "hotplug_poll" 49 #define PMD_FAILSAFE_PARAM_STRING \ 50 "dev(<ifc>)," \ 51 "exec(<shell command>)," \ 52 "fd(<fd number>)," \ 53 "mac=mac_addr," \ 54 "hotplug_poll=u64" \ 55 "" 56 57 #define FAILSAFE_HOTPLUG_DEFAULT_TIMEOUT_MS 2000 58 59 #define FAILSAFE_MAX_ETHPORTS 2 60 #define FAILSAFE_MAX_ETHADDR 128 61 62 #define DEVARGS_MAXLEN 4096 63 64 enum rxp_service_state { 65 SS_NO_SERVICE = 0, 66 SS_REGISTERED, 67 SS_READY, 68 SS_RUNNING, 69 }; 70 71 /* TYPES */ 72 73 struct rx_proxy { 74 /* epoll file descriptor */ 75 int efd; 76 /* event vector to be used by epoll */ 77 struct rte_epoll_event *evec; 78 /* rte service id */ 79 uint32_t sid; 80 /* service core id */ 81 uint32_t scid; 82 enum rxp_service_state sstate; 83 }; 84 85 struct rxq { 86 struct fs_priv *priv; 87 uint16_t qid; 88 /* next sub_device to poll */ 89 struct sub_device *sdev; 90 unsigned int socket_id; 91 int event_fd; 92 unsigned int enable_events:1; 93 struct rte_eth_rxq_info info; 94 rte_atomic64_t refcnt[]; 95 }; 96 97 struct txq { 98 struct fs_priv *priv; 99 uint16_t qid; 100 unsigned int socket_id; 101 struct rte_eth_txq_info info; 102 rte_atomic64_t refcnt[]; 103 }; 104 105 struct rte_flow { 106 TAILQ_ENTRY(rte_flow) next; 107 /* sub_flows */ 108 struct rte_flow *flows[FAILSAFE_MAX_ETHPORTS]; 109 /* flow description for synchronization */ 110 struct rte_flow_desc *fd; 111 }; 112 113 enum dev_state { 114 DEV_UNDEFINED, 115 DEV_PARSED, 116 DEV_PROBED, 117 DEV_ACTIVE, 118 DEV_STARTED, 119 }; 120 121 struct fs_stats { 122 struct rte_eth_stats stats; 123 uint64_t timestamp; 124 }; 125 126 struct sub_device { 127 /* Exhaustive DPDK device description */ 128 struct sub_device *next; 129 struct rte_devargs devargs; 130 struct rte_bus *bus; 131 struct rte_device *dev; 132 struct rte_eth_dev *edev; 133 uint8_t sid; 134 /* Device state machine */ 135 enum dev_state state; 136 /* Last stats snapshot passed to user */ 137 struct fs_stats stats_snapshot; 138 /* Some device are defined as a command line */ 139 char *cmdline; 140 /* Others are retrieved through a file descriptor */ 141 char *fd_str; 142 /* fail-safe device backreference */ 143 struct rte_eth_dev *fs_dev; 144 /* flag calling for recollection */ 145 volatile unsigned int remove:1; 146 /* flow isolation state */ 147 int flow_isolated:1; 148 }; 149 150 struct fs_priv { 151 struct rte_eth_dev *dev; 152 /* 153 * Set of sub_devices. 154 * subs[0] is the preferred device 155 * any other is just another slave 156 */ 157 struct sub_device *subs; 158 uint8_t subs_head; /* if head == tail, no subs */ 159 uint8_t subs_tail; /* first invalid */ 160 uint8_t subs_tx; /* current emitting device */ 161 uint8_t current_probed; 162 /* flow mapping */ 163 TAILQ_HEAD(sub_flows, rte_flow) flow_list; 164 /* current number of mac_addr slots allocated. */ 165 uint32_t nb_mac_addr; 166 struct ether_addr mac_addrs[FAILSAFE_MAX_ETHADDR]; 167 uint32_t mac_addr_pool[FAILSAFE_MAX_ETHADDR]; 168 /* current capabilities */ 169 struct rte_eth_dev_info infos; 170 struct rte_intr_handle intr_handle; /* Port interrupt handle. */ 171 /* 172 * Fail-safe state machine. 173 * This level will be tracking state of the EAL and eth 174 * layer at large as defined by the user application. 175 * It will then steer the sub_devices toward the same 176 * synchronized state. 177 */ 178 enum dev_state state; 179 struct rte_eth_stats stats_accumulator; 180 /* 181 * Rx interrupts/events proxy. 182 * The PMD issues Rx events to the EAL on behalf of its subdevices, 183 * it does that by registering an event-fd for each of its queues with 184 * the EAL. A PMD service thread listens to all the Rx events from the 185 * subdevices, when an Rx event is issued by a subdevice it will be 186 * caught by this service with will trigger an Rx event in the 187 * appropriate failsafe Rx queue. 188 */ 189 struct rx_proxy rxp; 190 unsigned int pending_alarm:1; /* An alarm is pending */ 191 /* flow isolation state */ 192 int flow_isolated:1; 193 }; 194 195 /* FAILSAFE_INTR */ 196 197 int failsafe_rx_intr_install(struct rte_eth_dev *dev); 198 void failsafe_rx_intr_uninstall(struct rte_eth_dev *dev); 199 int failsafe_rx_intr_install_subdevice(struct sub_device *sdev); 200 void failsafe_rx_intr_uninstall_subdevice(struct sub_device *sdev); 201 202 /* MISC */ 203 204 int failsafe_hotplug_alarm_install(struct rte_eth_dev *dev); 205 int failsafe_hotplug_alarm_cancel(struct rte_eth_dev *dev); 206 207 /* RX / TX */ 208 209 void set_burst_fn(struct rte_eth_dev *dev, int force_safe); 210 211 uint16_t failsafe_rx_burst(void *rxq, 212 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 213 uint16_t failsafe_tx_burst(void *txq, 214 struct rte_mbuf **tx_pkts, uint16_t nb_pkts); 215 216 uint16_t failsafe_rx_burst_fast(void *rxq, 217 struct rte_mbuf **rx_pkts, uint16_t nb_pkts); 218 uint16_t failsafe_tx_burst_fast(void *txq, 219 struct rte_mbuf **tx_pkts, uint16_t nb_pkts); 220 221 /* ARGS */ 222 223 int failsafe_args_parse(struct rte_eth_dev *dev, const char *params); 224 void failsafe_args_free(struct rte_eth_dev *dev); 225 int failsafe_args_count_subdevice(struct rte_eth_dev *dev, const char *params); 226 int failsafe_args_parse_subs(struct rte_eth_dev *dev); 227 228 /* EAL */ 229 230 int failsafe_eal_init(struct rte_eth_dev *dev); 231 int failsafe_eal_uninit(struct rte_eth_dev *dev); 232 233 /* ETH_DEV */ 234 235 int failsafe_eth_dev_state_sync(struct rte_eth_dev *dev); 236 void failsafe_dev_remove(struct rte_eth_dev *dev); 237 void failsafe_stats_increment(struct rte_eth_stats *to, 238 struct rte_eth_stats *from); 239 int failsafe_eth_rmv_event_callback(uint16_t port_id, 240 enum rte_eth_event_type type, 241 void *arg, void *out); 242 int failsafe_eth_lsc_event_callback(uint16_t port_id, 243 enum rte_eth_event_type event, 244 void *cb_arg, void *out); 245 246 /* GLOBALS */ 247 248 extern const char pmd_failsafe_driver_name[]; 249 extern const struct eth_dev_ops failsafe_ops; 250 extern const struct rte_flow_ops fs_flow_ops; 251 extern uint64_t hotplug_poll; 252 extern int mac_from_arg; 253 254 /* HELPERS */ 255 256 /* dev: (struct rte_eth_dev *) fail-safe device */ 257 #define PRIV(dev) \ 258 ((struct fs_priv *)(dev)->data->dev_private) 259 260 /* sdev: (struct sub_device *) */ 261 #define ETH(sdev) \ 262 ((sdev)->edev) 263 264 /* sdev: (struct sub_device *) */ 265 #define PORT_ID(sdev) \ 266 (ETH(sdev)->data->port_id) 267 268 /* sdev: (struct sub_device *) */ 269 #define SUB_ID(sdev) \ 270 ((sdev)->sid) 271 272 /** 273 * Stateful iterator construct over fail-safe sub-devices: 274 * s: (struct sub_device *), iterator 275 * i: (uint8_t), increment 276 * dev: (struct rte_eth_dev *), fail-safe ethdev 277 * state: (enum dev_state), minimum acceptable device state 278 */ 279 #define FOREACH_SUBDEV_STATE(s, i, dev, state) \ 280 for (s = fs_find_next((dev), 0, state, &i); \ 281 s != NULL; \ 282 s = fs_find_next((dev), i + 1, state, &i)) 283 284 /** 285 * Iterator construct over fail-safe sub-devices: 286 * s: (struct sub_device *), iterator 287 * i: (uint8_t), increment 288 * dev: (struct rte_eth_dev *), fail-safe ethdev 289 */ 290 #define FOREACH_SUBDEV(s, i, dev) \ 291 FOREACH_SUBDEV_STATE(s, i, dev, DEV_UNDEFINED) 292 293 /* dev: (struct rte_eth_dev *) fail-safe device */ 294 #define PREFERRED_SUBDEV(dev) \ 295 (&PRIV(dev)->subs[0]) 296 297 /* dev: (struct rte_eth_dev *) fail-safe device */ 298 #define TX_SUBDEV(dev) \ 299 (PRIV(dev)->subs_tx >= PRIV(dev)->subs_tail ? NULL \ 300 : (PRIV(dev)->subs[PRIV(dev)->subs_tx].state < DEV_PROBED ? NULL \ 301 : &PRIV(dev)->subs[PRIV(dev)->subs_tx])) 302 303 /** 304 * s: (struct sub_device *) 305 * ops: (struct eth_dev_ops) member 306 */ 307 #define SUBOPS(s, ops) \ 308 (ETH(s)->dev_ops->ops) 309 310 /** 311 * Atomic guard 312 */ 313 314 /** 315 * a: (rte_atomic64_t) 316 */ 317 #define FS_ATOMIC_P(a) \ 318 rte_atomic64_set(&(a), 1) 319 320 /** 321 * a: (rte_atomic64_t) 322 */ 323 #define FS_ATOMIC_V(a) \ 324 rte_atomic64_set(&(a), 0) 325 326 /** 327 * s: (struct sub_device *) 328 * i: uint16_t qid 329 */ 330 #define FS_ATOMIC_RX(s, i) \ 331 rte_atomic64_read( \ 332 &((struct rxq *)((s)->fs_dev->data->rx_queues[i]))->refcnt[(s)->sid] \ 333 ) 334 /** 335 * s: (struct sub_device *) 336 * i: uint16_t qid 337 */ 338 #define FS_ATOMIC_TX(s, i) \ 339 rte_atomic64_read( \ 340 &((struct txq *)((s)->fs_dev->data->tx_queues[i]))->refcnt[(s)->sid] \ 341 ) 342 343 #define LOG__(level, m, ...) \ 344 RTE_LOG(level, PMD, "net_failsafe: " m "%c", __VA_ARGS__) 345 #define LOG_(level, ...) LOG__(level, __VA_ARGS__, '\n') 346 #define DEBUG(...) LOG_(DEBUG, __VA_ARGS__) 347 #define INFO(...) LOG_(INFO, __VA_ARGS__) 348 #define WARN(...) LOG_(WARNING, __VA_ARGS__) 349 #define ERROR(...) LOG_(ERR, __VA_ARGS__) 350 351 /* inlined functions */ 352 353 static inline struct sub_device * 354 fs_find_next(struct rte_eth_dev *dev, 355 uint8_t sid, 356 enum dev_state min_state, 357 uint8_t *sid_out) 358 { 359 struct sub_device *subs; 360 uint8_t tail; 361 362 subs = PRIV(dev)->subs; 363 tail = PRIV(dev)->subs_tail; 364 while (sid < tail) { 365 if (subs[sid].state >= min_state) 366 break; 367 sid++; 368 } 369 *sid_out = sid; 370 if (sid >= tail) 371 return NULL; 372 return &subs[sid]; 373 } 374 375 /* 376 * Switch emitting device. 377 * If banned is set, banned must not be considered for 378 * the role of emitting device. 379 */ 380 static inline void 381 fs_switch_dev(struct rte_eth_dev *dev, 382 struct sub_device *banned) 383 { 384 struct sub_device *txd; 385 enum dev_state req_state; 386 387 req_state = PRIV(dev)->state; 388 txd = TX_SUBDEV(dev); 389 if (PREFERRED_SUBDEV(dev)->state >= req_state && 390 PREFERRED_SUBDEV(dev) != banned) { 391 if (txd != PREFERRED_SUBDEV(dev) && 392 (txd == NULL || 393 (req_state == DEV_STARTED) || 394 (txd && txd->state < DEV_STARTED))) { 395 DEBUG("Switching tx_dev to preferred sub_device"); 396 PRIV(dev)->subs_tx = 0; 397 } 398 } else if ((txd && txd->state < req_state) || 399 txd == NULL || 400 txd == banned) { 401 struct sub_device *sdev = NULL; 402 uint8_t i; 403 404 /* Using acceptable device */ 405 FOREACH_SUBDEV_STATE(sdev, i, dev, req_state) { 406 if (sdev == banned) 407 continue; 408 DEBUG("Switching tx_dev to sub_device %d", 409 i); 410 PRIV(dev)->subs_tx = i; 411 break; 412 } 413 if (i >= PRIV(dev)->subs_tail || sdev == NULL) { 414 DEBUG("No device ready, deactivating tx_dev"); 415 PRIV(dev)->subs_tx = PRIV(dev)->subs_tail; 416 } 417 } else { 418 return; 419 } 420 set_burst_fn(dev, 0); 421 rte_wmb(); 422 } 423 424 /* 425 * Adjust error value and rte_errno to the fail-safe actual error value. 426 */ 427 static inline int 428 fs_err(struct sub_device *sdev, int err) 429 { 430 /* A device removal shouldn't be reported as an error. */ 431 if (sdev->remove == 1 || err == -EIO) 432 return rte_errno = 0; 433 return err; 434 } 435 #endif /* _RTE_ETH_FAILSAFE_PRIVATE_H_ */ 436