xref: /dpdk/drivers/common/cnxk/roc_dev.c (revision e9fd1ebf981f361844aea9ec94e17f4bda5e1479)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 
5 #include <fcntl.h>
6 #include <inttypes.h>
7 #include <string.h>
8 #include <sys/mman.h>
9 #include <unistd.h>
10 
11 #include "roc_api.h"
12 #include "roc_priv.h"
13 
14 /* PCI Extended capability ID */
15 #define ROC_PCI_EXT_CAP_ID_SRIOV 0x10 /* SRIOV cap */
16 
17 /* Single Root I/O Virtualization */
18 #define ROC_PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */
19 
20 /* VF Mbox handler thread name */
21 #define MBOX_HANDLER_NAME_MAX_LEN RTE_THREAD_INTERNAL_NAME_SIZE
22 
23 /* VF interrupt message pending bits - mbox or flr */
24 #define ROC_DEV_MBOX_PEND BIT_ULL(0)
25 #define ROC_DEV_FLR_PEND  BIT_ULL(1)
26 static void *
27 mbox_mem_map(off_t off, size_t size)
28 {
29 	void *va = MAP_FAILED;
30 	int mem_fd;
31 
32 	if (size <= 0 || !off) {
33 		plt_err("Invalid mbox area off 0x%lx size %lu", off, size);
34 		goto error;
35 	}
36 
37 	mem_fd = open("/dev/mem", O_RDWR);
38 	if (mem_fd < 0)
39 		goto error;
40 
41 	va = plt_mmap(NULL, size, PLT_PROT_READ | PLT_PROT_WRITE,
42 		      PLT_MAP_SHARED, mem_fd, off);
43 	close(mem_fd);
44 
45 	if (va == MAP_FAILED)
46 		plt_err("Failed to mmap sz=0x%zx, fd=%d, off=%jd", size, mem_fd,
47 			(intmax_t)off);
48 error:
49 	return va;
50 }
51 
52 static void
53 mbox_mem_unmap(void *va, size_t size)
54 {
55 	if (va)
56 		munmap(va, size);
57 }
58 
59 static int
60 pf_af_sync_msg(struct dev *dev, struct mbox_msghdr **rsp)
61 {
62 	uint32_t timeout = 0, sleep = 1;
63 	struct mbox *mbox = dev->mbox;
64 	struct mbox_dev *mdev = &mbox->dev[0];
65 
66 	volatile uint64_t int_status = 0;
67 	struct mbox_msghdr *msghdr;
68 	uint64_t off;
69 	int rc = 0;
70 
71 	/* We need to disable PF interrupts. We are in timer interrupt */
72 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1C);
73 
74 	/* Send message */
75 	mbox_msg_send(mbox, 0);
76 
77 	do {
78 		plt_delay_ms(sleep);
79 		timeout += sleep;
80 		if (timeout >= mbox->rsp_tmo) {
81 			plt_err("Message timeout: %dms", mbox->rsp_tmo);
82 			rc = -EIO;
83 			break;
84 		}
85 		int_status = plt_read64(dev->bar2 + RVU_PF_INT);
86 	} while ((int_status & 0x1) != 0x1);
87 
88 	/* Clear */
89 	plt_write64(int_status, dev->bar2 + RVU_PF_INT);
90 
91 	/* Enable interrupts */
92 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1S);
93 
94 	if (rc == 0) {
95 		/* Get message */
96 		off = mbox->rx_start +
97 		      PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
98 		msghdr = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + off);
99 		if (rsp)
100 			*rsp = msghdr;
101 		rc = msghdr->rc;
102 	}
103 
104 	return rc;
105 }
106 
107 /* PF will send the messages to AF and wait for responses and forward the
108  * responses to VF.
109  */
110 static int
111 af_pf_wait_msg(struct dev *dev, uint16_t vf, int num_msg)
112 {
113 	uint32_t timeout = 0, sleep = 1;
114 	struct mbox *mbox = dev->mbox;
115 	struct mbox_dev *mdev = &mbox->dev[0];
116 	volatile uint64_t int_status;
117 	struct mbox_hdr *req_hdr;
118 	struct mbox_msghdr *msg;
119 	struct mbox_msghdr *rsp;
120 	uint64_t offset;
121 	size_t size;
122 	int i;
123 
124 	/* We need to disable PF interrupts. We are in timer interrupt */
125 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1C);
126 
127 	/* Send message to AF */
128 	mbox_msg_send(mbox, 0);
129 
130 	/* Wait for AF response */
131 	do {
132 		plt_delay_ms(sleep);
133 		timeout++;
134 		if (timeout >= mbox->rsp_tmo) {
135 			plt_err("Routed messages %d timeout: %dms", num_msg,
136 				mbox->rsp_tmo);
137 			break;
138 		}
139 		int_status = plt_read64(dev->bar2 + RVU_PF_INT);
140 	} while ((int_status & 0x1) != 0x1);
141 
142 	/* Clear */
143 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT);
144 
145 	/* Enable interrupts */
146 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1S);
147 
148 	req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start);
149 	if (req_hdr->num_msgs != num_msg)
150 		plt_err("Routed messages: %d received: %d", num_msg,
151 			req_hdr->num_msgs);
152 
153 	/* Get messages from mbox */
154 	offset = mbox->rx_start +
155 		 PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
156 	for (i = 0; i < req_hdr->num_msgs; i++) {
157 		msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
158 		size = mbox->rx_start + msg->next_msgoff - offset;
159 
160 		/* Reserve PF/VF mbox message */
161 		size = PLT_ALIGN(size, MBOX_MSG_ALIGN);
162 		rsp = mbox_alloc_msg(&dev->mbox_vfpf, vf, size);
163 		if (!rsp) {
164 			plt_err("Failed to reserve VF%d message", vf);
165 			continue;
166 		}
167 
168 		mbox_rsp_init(msg->id, rsp);
169 
170 		/* Copy message from AF<->PF mbox to PF<->VF mbox */
171 		mbox_memcpy((uint8_t *)rsp + sizeof(struct mbox_msghdr),
172 			    (uint8_t *)msg + sizeof(struct mbox_msghdr),
173 			    size - sizeof(struct mbox_msghdr));
174 
175 		/* Set status and sender pf_func data */
176 		rsp->rc = msg->rc;
177 		rsp->pcifunc = msg->pcifunc;
178 
179 		/* Whenever a PF comes up, AF sends the link status to it but
180 		 * when VF comes up no such event is sent to respective VF.
181 		 * Using MBOX_MSG_NIX_LF_START_RX response from AF for the
182 		 * purpose and send the link status of PF to VF.
183 		 */
184 		if (msg->id == MBOX_MSG_NIX_LF_START_RX) {
185 			/* Send link status to VF */
186 			struct cgx_link_user_info linfo;
187 			struct mbox_msghdr *vf_msg;
188 			size_t sz;
189 
190 			/* Get the link status */
191 			memset(&linfo, 0, sizeof(struct cgx_link_user_info));
192 			if (dev->ops && dev->ops->link_status_get)
193 				dev->ops->link_status_get(dev->roc_nix, &linfo);
194 
195 			sz = PLT_ALIGN(mbox_id2size(MBOX_MSG_CGX_LINK_EVENT),
196 				       MBOX_MSG_ALIGN);
197 			/* Prepare the message to be sent */
198 			vf_msg = mbox_alloc_msg(&dev->mbox_vfpf_up, vf, sz);
199 			if (vf_msg) {
200 				mbox_req_init(MBOX_MSG_CGX_LINK_EVENT, vf_msg);
201 				mbox_memcpy((uint8_t *)vf_msg + sizeof(struct mbox_msghdr), &linfo,
202 					    sizeof(struct cgx_link_user_info));
203 
204 				vf_msg->rc = msg->rc;
205 				vf_msg->pcifunc = msg->pcifunc;
206 				/* Send to VF */
207 				mbox_msg_send_up(&dev->mbox_vfpf_up, vf);
208 				mbox_wait_for_zero(&dev->mbox_vfpf_up, vf);
209 			}
210 		}
211 
212 		offset = mbox->rx_start + msg->next_msgoff;
213 	}
214 
215 	return req_hdr->num_msgs;
216 }
217 
218 /* PF receives mbox DOWN messages from VF and forwards to AF */
219 static int
220 vf_pf_process_msgs(struct dev *dev, uint16_t vf)
221 {
222 	struct mbox *mbox = &dev->mbox_vfpf;
223 	struct mbox_dev *mdev = &mbox->dev[vf];
224 	struct mbox_hdr *req_hdr;
225 	struct mbox_msghdr *msg;
226 	int offset, routed = 0;
227 	size_t size;
228 	uint16_t i;
229 
230 	req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start);
231 	if (!req_hdr->num_msgs)
232 		return 0;
233 
234 	offset = mbox->rx_start + PLT_ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN);
235 
236 	mbox_get(dev->mbox);
237 	for (i = 0; i < req_hdr->num_msgs; i++) {
238 		msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
239 		size = mbox->rx_start + msg->next_msgoff - offset;
240 
241 		/* RVU_PF_FUNC_S */
242 		msg->pcifunc = dev_pf_func(dev->pf, vf);
243 
244 		if (msg->id == MBOX_MSG_READY) {
245 			struct ready_msg_rsp *rsp;
246 			uint16_t max_bits = sizeof(dev->active_vfs[0]) * 8;
247 
248 			/* Handle READY message in PF */
249 			dev->active_vfs[vf / max_bits] |=
250 				BIT_ULL(vf % max_bits);
251 			rsp = (struct ready_msg_rsp *)mbox_alloc_msg(
252 				mbox, vf, sizeof(*rsp));
253 			if (!rsp) {
254 				plt_err("Failed to alloc VF%d READY message",
255 					vf);
256 				continue;
257 			}
258 
259 			mbox_rsp_init(msg->id, rsp);
260 
261 			/* PF/VF function ID */
262 			rsp->hdr.pcifunc = msg->pcifunc;
263 			rsp->hdr.rc = 0;
264 		} else {
265 			struct mbox_msghdr *af_req;
266 			/* Reserve AF/PF mbox message */
267 			size = PLT_ALIGN(size, MBOX_MSG_ALIGN);
268 			af_req = mbox_alloc_msg(dev->mbox, 0, size);
269 			if (af_req == NULL)
270 				return -ENOSPC;
271 			mbox_req_init(msg->id, af_req);
272 
273 			/* Copy message from VF<->PF mbox to PF<->AF mbox */
274 			mbox_memcpy((uint8_t *)af_req +
275 					    sizeof(struct mbox_msghdr),
276 				    (uint8_t *)msg + sizeof(struct mbox_msghdr),
277 				    size - sizeof(struct mbox_msghdr));
278 			af_req->pcifunc = msg->pcifunc;
279 			routed++;
280 		}
281 		offset = mbox->rx_start + msg->next_msgoff;
282 	}
283 
284 	if (routed > 0) {
285 		plt_base_dbg("pf:%d routed %d messages from vf:%d to AF",
286 			     dev->pf, routed, vf);
287 		/* PF will send the messages to AF and wait for responses */
288 		af_pf_wait_msg(dev, vf, routed);
289 		mbox_reset(dev->mbox, 0);
290 	}
291 	mbox_put(dev->mbox);
292 
293 	/* Send mbox responses to VF */
294 	if (mdev->num_msgs) {
295 		plt_base_dbg("pf:%d reply %d messages to vf:%d", dev->pf,
296 			     mdev->num_msgs, vf);
297 		mbox_msg_send(mbox, vf);
298 	}
299 
300 	return i;
301 }
302 
303 /* VF sends Ack to PF's UP messages */
304 static int
305 vf_pf_process_up_msgs(struct dev *dev, uint16_t vf)
306 {
307 	struct mbox *mbox = &dev->mbox_vfpf_up;
308 	struct mbox_dev *mdev = &mbox->dev[vf];
309 	struct mbox_hdr *req_hdr;
310 	struct mbox_msghdr *msg;
311 	int msgs_acked = 0;
312 	int offset;
313 	uint16_t i;
314 
315 	req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start);
316 	if (req_hdr->num_msgs == 0)
317 		return 0;
318 
319 	offset = mbox->rx_start + PLT_ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN);
320 
321 	for (i = 0; i < req_hdr->num_msgs; i++) {
322 		msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
323 
324 		msgs_acked++;
325 		/* RVU_PF_FUNC_S */
326 		msg->pcifunc = dev_pf_func(dev->pf, vf);
327 
328 		switch (msg->id) {
329 		case MBOX_MSG_CGX_LINK_EVENT:
330 			plt_base_dbg("PF: Msg 0x%x (%s) fn:0x%x (pf:%d,vf:%d)",
331 				     msg->id, mbox_id2name(msg->id),
332 				     msg->pcifunc, dev_get_pf(msg->pcifunc),
333 				     dev_get_vf(msg->pcifunc));
334 			break;
335 		case MBOX_MSG_CGX_PTP_RX_INFO:
336 			plt_base_dbg("PF: Msg 0x%x (%s) fn:0x%x (pf:%d,vf:%d)",
337 				     msg->id, mbox_id2name(msg->id),
338 				     msg->pcifunc, dev_get_pf(msg->pcifunc),
339 				     dev_get_vf(msg->pcifunc));
340 			break;
341 		default:
342 			plt_err("Not handled UP msg 0x%x (%s) func:0x%x",
343 				msg->id, mbox_id2name(msg->id), msg->pcifunc);
344 		}
345 		offset = mbox->rx_start + msg->next_msgoff;
346 	}
347 	mbox_reset(mbox, vf);
348 	mdev->msgs_acked = msgs_acked;
349 	plt_wmb();
350 
351 	return i;
352 }
353 
354 /* PF handling messages from VF */
355 static void
356 roc_vf_pf_mbox_handle_msg(void *param, dev_intr_t *intr)
357 {
358 	uint16_t vf, max_vf, max_bits;
359 	struct dev *dev = param;
360 
361 	max_bits = sizeof(dev->intr.bits[0]) * sizeof(uint64_t);
362 	max_vf = max_bits * MAX_VFPF_DWORD_BITS;
363 
364 	for (vf = 0; vf < max_vf; vf++) {
365 		if (intr->bits[vf / max_bits] & BIT_ULL(vf % max_bits)) {
366 			plt_base_dbg("Process vf:%d request (pf:%d, vf:%d)", vf,
367 				     dev->pf, dev->vf);
368 			/* VF initiated down messages */
369 			vf_pf_process_msgs(dev, vf);
370 			/* VF replies to PF's UP messages */
371 			vf_pf_process_up_msgs(dev, vf);
372 			intr->bits[vf / max_bits] &= ~(BIT_ULL(vf % max_bits));
373 		}
374 	}
375 }
376 
377 /* IRQ to PF from VF - PF context (interrupt thread) */
378 static void
379 roc_vf_pf_mbox_irq(void *param)
380 {
381 	bool signal_thread = false;
382 	struct dev *dev = param;
383 	dev_intr_t intrb;
384 	uint64_t intr;
385 	int vfpf, sz;
386 
387 	sz = sizeof(intrb.bits[0]) * MAX_VFPF_DWORD_BITS;
388 	memset(intrb.bits, 0, sz);
389 	for (vfpf = 0; vfpf < MAX_VFPF_DWORD_BITS; ++vfpf) {
390 		intr = plt_read64(dev->bar2 + RVU_PF_VFPF_MBOX_INTX(vfpf));
391 		if (!intr)
392 			continue;
393 
394 		plt_base_dbg("vfpf: %d intr: 0x%" PRIx64 " (pf:%d, vf:%d)",
395 			     vfpf, intr, dev->pf, dev->vf);
396 
397 		/* Save and clear intr bits */
398 		intrb.bits[vfpf] |= intr;
399 		plt_write64(intr, dev->bar2 + RVU_PF_VFPF_MBOX_INTX(vfpf));
400 		signal_thread = true;
401 	}
402 
403 	if (signal_thread) {
404 		pthread_mutex_lock(&dev->sync.mutex);
405 		/* Interrupt state was saved in local variable first, as dev->intr.bits
406 		 * is a shared resources between VF msg and interrupt thread.
407 		 */
408 		memcpy(dev->intr.bits, intrb.bits, sz);
409 		/* MBOX message received from VF */
410 		dev->sync.msg_avail |= ROC_DEV_MBOX_PEND;
411 		/* Signal vf message handler thread */
412 		pthread_cond_signal(&dev->sync.pfvf_msg_cond);
413 		pthread_mutex_unlock(&dev->sync.mutex);
414 	}
415 }
416 
417 /* Received response from AF (PF context) / PF (VF context) */
418 static void
419 process_msgs(struct dev *dev, struct mbox *mbox)
420 {
421 	struct mbox_dev *mdev = &mbox->dev[0];
422 	struct mbox_hdr *req_hdr;
423 	struct mbox_msghdr *msg;
424 	int msgs_acked = 0;
425 	int offset;
426 	uint16_t i;
427 
428 	req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start);
429 	if (req_hdr->num_msgs == 0)
430 		return;
431 
432 	offset = mbox->rx_start + PLT_ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN);
433 	for (i = 0; i < req_hdr->num_msgs; i++) {
434 		msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
435 
436 		msgs_acked++;
437 		plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id,
438 			     mbox_id2name(msg->id), dev_get_pf(msg->pcifunc),
439 			     dev_get_vf(msg->pcifunc));
440 
441 		switch (msg->id) {
442 			/* Add message id's that are handled here */
443 		case MBOX_MSG_READY:
444 			/* Get our identity */
445 			dev->pf_func = msg->pcifunc;
446 			break;
447 		case MBOX_MSG_CGX_PRIO_FLOW_CTRL_CFG:
448 		case MBOX_MSG_CGX_CFG_PAUSE_FRM:
449 			/* Handling the case where one VF tries to disable PFC
450 			 * while PFC already configured on other VFs. This is
451 			 * not an error but a warning which can be ignored.
452 			 */
453 			if (msg->rc) {
454 				if (msg->rc == LMAC_AF_ERR_PERM_DENIED) {
455 					plt_mbox_dbg(
456 						"Receive Flow control disable not permitted "
457 						"as its used by other PFVFs");
458 					msg->rc = 0;
459 				} else {
460 					plt_err("Message (%s) response has err=%d",
461 						mbox_id2name(msg->id), msg->rc);
462 				}
463 			}
464 			break;
465 		case MBOX_MSG_CGX_PROMISC_DISABLE:
466 		case MBOX_MSG_CGX_PROMISC_ENABLE:
467 			if (msg->rc) {
468 				if (msg->rc == LMAC_AF_ERR_INVALID_PARAM) {
469 					plt_mbox_dbg("Already in same promisc state");
470 					msg->rc = 0;
471 				} else {
472 					plt_err("Message (%s) response has err=%d",
473 						mbox_id2name(msg->id), msg->rc);
474 				}
475 			}
476 			break;
477 
478 		default:
479 			if (msg->rc)
480 				plt_err("Message (%s) response has err=%d (%s)",
481 					mbox_id2name(msg->id), msg->rc, roc_error_msg_get(msg->rc));
482 			break;
483 		}
484 		offset = mbox->rx_start + msg->next_msgoff;
485 	}
486 
487 	mbox_reset(mbox, 0);
488 	/* Update acked if someone is waiting a message - mbox_wait is waiting */
489 	mdev->msgs_acked = msgs_acked;
490 	plt_wmb();
491 }
492 
493 /* Copies the message received from AF and sends it to VF */
494 static void
495 pf_vf_mbox_send_up_msg(struct dev *dev, void *rec_msg)
496 {
497 	uint16_t max_bits = sizeof(dev->active_vfs[0]) * sizeof(uint64_t);
498 	struct mbox *vf_mbox = &dev->mbox_vfpf_up;
499 	struct msg_req *msg = rec_msg;
500 	struct mbox_msghdr *vf_msg;
501 	uint16_t vf;
502 	size_t size;
503 
504 	size = PLT_ALIGN(mbox_id2size(msg->hdr.id), MBOX_MSG_ALIGN);
505 	if (size < sizeof(struct mbox_msghdr))
506 		return;
507 	/* Send UP message to all VF's */
508 	for (vf = 0; vf < vf_mbox->ndevs; vf++) {
509 		/* VF active */
510 		if (!(dev->active_vfs[vf / max_bits] & (BIT_ULL(vf))))
511 			continue;
512 
513 		plt_base_dbg("(%s) size: %zx to VF: %d",
514 			     mbox_id2name(msg->hdr.id), size, vf);
515 
516 		/* Reserve PF/VF mbox message */
517 		vf_msg = mbox_alloc_msg(vf_mbox, vf, size);
518 		if (!vf_msg) {
519 			plt_err("Failed to alloc VF%d UP message", vf);
520 			continue;
521 		}
522 		mbox_req_init(msg->hdr.id, vf_msg);
523 
524 		/*
525 		 * Copy message from AF<->PF UP mbox
526 		 * to PF<->VF UP mbox
527 		 */
528 		mbox_memcpy((uint8_t *)vf_msg + sizeof(struct mbox_msghdr),
529 			    (uint8_t *)msg + sizeof(struct mbox_msghdr),
530 			    size - sizeof(struct mbox_msghdr));
531 
532 		vf_msg->rc = msg->hdr.rc;
533 		/* Set PF to be a sender */
534 		vf_msg->pcifunc = dev->pf_func;
535 
536 		/* Send to VF */
537 		mbox_msg_send(vf_mbox, vf);
538 		mbox_wait_for_zero(&dev->mbox_vfpf_up, vf);
539 	}
540 }
541 
542 static int
543 mbox_up_handler_rep_repte_notify(struct dev *dev, struct rep_repte_req *req, struct msg_rsp *rsp)
544 {
545 	struct roc_eswitch_repte_notify_msg *notify_msg;
546 	int rc = 0;
547 
548 	plt_base_dbg("pf:%d/vf:%d msg id 0x%x (%s) from: pf:%d/vf:%d", dev_get_pf(dev->pf_func),
549 		     dev_get_vf(dev->pf_func), req->hdr.id, mbox_id2name(req->hdr.id),
550 		     dev_get_pf(req->hdr.pcifunc), dev_get_vf(req->hdr.pcifunc));
551 
552 	plt_base_dbg("repte pcifunc %x, enable %d", req->repte_pcifunc, req->enable);
553 	if (dev->ops && dev->ops->repte_notify) {
554 		notify_msg = plt_zmalloc(sizeof(struct roc_eswitch_repte_notify_msg), 0);
555 		if (!notify_msg) {
556 			plt_err("Failed to allocate memory");
557 			rc = -ENOMEM;
558 			goto fail;
559 		}
560 		notify_msg->type = ROC_ESWITCH_REPTE_STATE;
561 		notify_msg->state.hw_func = req->repte_pcifunc;
562 		notify_msg->state.enable = req->enable;
563 
564 		rc = dev->ops->repte_notify(dev->roc_nix, (void *)notify_msg);
565 		if (rc < 0)
566 			plt_err("Failed to sent new representee %x notification to %s",
567 				req->repte_pcifunc, (req->enable == true) ? "enable" : "disable");
568 
569 		plt_free(notify_msg);
570 	}
571 fail:
572 	rsp->hdr.rc = rc;
573 	return rc;
574 }
575 
576 static int
577 mbox_up_handler_rep_set_mtu(struct dev *dev, struct rep_mtu *req, struct msg_rsp *rsp)
578 {
579 	struct roc_eswitch_repte_notify_msg *notify_msg;
580 	int rc = 0;
581 
582 	plt_base_dbg("pf:%d/vf:%d msg id 0x%x (%s) from: pf:%d/vf:%d", dev_get_pf(dev->pf_func),
583 		     dev_get_vf(dev->pf_func), req->hdr.id, mbox_id2name(req->hdr.id),
584 		     dev_get_pf(req->hdr.pcifunc), dev_get_vf(req->hdr.pcifunc));
585 
586 	plt_base_dbg("rep pcifunc %x, rep id %d mtu %d", req->rep_pcifunc, req->rep_id, req->mtu);
587 	if (dev->ops && dev->ops->repte_notify) {
588 		notify_msg = plt_zmalloc(sizeof(struct roc_eswitch_repte_notify_msg), 0);
589 		if (!notify_msg) {
590 			plt_err("Failed to allocate memory");
591 			rc = -ENOMEM;
592 			goto fail;
593 		}
594 		notify_msg->type = ROC_ESWITCH_REPTE_MTU;
595 		notify_msg->mtu.hw_func = req->rep_pcifunc;
596 		notify_msg->mtu.rep_id = req->rep_id;
597 		notify_msg->mtu.mtu = req->mtu;
598 
599 		rc = dev->ops->repte_notify(dev->roc_nix, (void *)notify_msg);
600 		if (rc < 0)
601 			plt_err("Failed to send new mtu notification for representee %x ",
602 				req->rep_pcifunc);
603 
604 		plt_free(notify_msg);
605 	}
606 fail:
607 	rsp->hdr.rc = rc;
608 	return rc;
609 }
610 
611 static int
612 mbox_up_handler_mcs_intr_notify(struct dev *dev, struct mcs_intr_info *info, struct msg_rsp *rsp)
613 {
614 	struct roc_mcs_event_desc desc = {0};
615 	struct roc_mcs *mcs;
616 
617 	plt_base_dbg("pf:%d/vf:%d msg id 0x%x (%s) from: pf:%d/vf:%d", dev_get_pf(dev->pf_func),
618 		     dev_get_vf(dev->pf_func), info->hdr.id, mbox_id2name(info->hdr.id),
619 		     dev_get_pf(info->hdr.pcifunc), dev_get_vf(info->hdr.pcifunc));
620 
621 	mcs = roc_idev_mcs_get(info->mcs_id);
622 	if (!mcs)
623 		goto exit;
624 
625 	if (info->intr_mask) {
626 		switch (info->intr_mask) {
627 		case MCS_CPM_RX_SECTAG_V_EQ1_INT:
628 			desc.type = ROC_MCS_EVENT_SECTAG_VAL_ERR;
629 			desc.subtype = ROC_MCS_EVENT_RX_SECTAG_V_EQ1;
630 			break;
631 		case MCS_CPM_RX_SECTAG_E_EQ0_C_EQ1_INT:
632 			desc.type = ROC_MCS_EVENT_SECTAG_VAL_ERR;
633 			desc.subtype = ROC_MCS_EVENT_RX_SECTAG_E_EQ0_C_EQ1;
634 			break;
635 		case MCS_CPM_RX_SECTAG_SL_GTE48_INT:
636 			desc.type = ROC_MCS_EVENT_SECTAG_VAL_ERR;
637 			desc.subtype = ROC_MCS_EVENT_RX_SECTAG_SL_GTE48;
638 			break;
639 		case MCS_CPM_RX_SECTAG_ES_EQ1_SC_EQ1_INT:
640 			desc.type = ROC_MCS_EVENT_SECTAG_VAL_ERR;
641 			desc.subtype = ROC_MCS_EVENT_RX_SECTAG_ES_EQ1_SC_EQ1;
642 			break;
643 		case MCS_CPM_RX_SECTAG_SC_EQ1_SCB_EQ1_INT:
644 			desc.type = ROC_MCS_EVENT_SECTAG_VAL_ERR;
645 			desc.subtype = ROC_MCS_EVENT_RX_SECTAG_SC_EQ1_SCB_EQ1;
646 			break;
647 		case MCS_CPM_RX_PACKET_XPN_EQ0_INT:
648 			desc.type = ROC_MCS_EVENT_RX_SA_PN_HARD_EXP;
649 			desc.metadata.sa_idx = info->sa_id;
650 			break;
651 		case MCS_CPM_RX_PN_THRESH_REACHED_INT:
652 			desc.type = ROC_MCS_EVENT_RX_SA_PN_SOFT_EXP;
653 			desc.metadata.sa_idx = info->sa_id;
654 			break;
655 		case MCS_CPM_TX_PACKET_XPN_EQ0_INT:
656 			desc.type = ROC_MCS_EVENT_TX_SA_PN_HARD_EXP;
657 			desc.metadata.sa_idx = info->sa_id;
658 			break;
659 		case MCS_CPM_TX_PN_THRESH_REACHED_INT:
660 			desc.type = ROC_MCS_EVENT_TX_SA_PN_SOFT_EXP;
661 			desc.metadata.sa_idx = info->sa_id;
662 			break;
663 		case MCS_CPM_TX_SA_NOT_VALID_INT:
664 			desc.type = ROC_MCS_EVENT_SA_NOT_VALID;
665 			break;
666 		case MCS_BBE_RX_DFIFO_OVERFLOW_INT:
667 		case MCS_BBE_TX_DFIFO_OVERFLOW_INT:
668 			desc.type = ROC_MCS_EVENT_FIFO_OVERFLOW;
669 			desc.subtype = ROC_MCS_EVENT_DATA_FIFO_OVERFLOW;
670 			desc.metadata.lmac_id = info->lmac_id;
671 			break;
672 		case MCS_BBE_RX_PLFIFO_OVERFLOW_INT:
673 		case MCS_BBE_TX_PLFIFO_OVERFLOW_INT:
674 			desc.type = ROC_MCS_EVENT_FIFO_OVERFLOW;
675 			desc.subtype = ROC_MCS_EVENT_POLICY_FIFO_OVERFLOW;
676 			desc.metadata.lmac_id = info->lmac_id;
677 			break;
678 		case MCS_PAB_RX_CHAN_OVERFLOW_INT:
679 		case MCS_PAB_TX_CHAN_OVERFLOW_INT:
680 			desc.type = ROC_MCS_EVENT_FIFO_OVERFLOW;
681 			desc.subtype = ROC_MCS_EVENT_PKT_ASSM_FIFO_OVERFLOW;
682 			desc.metadata.lmac_id = info->lmac_id;
683 			break;
684 		default:
685 			goto exit;
686 		}
687 
688 		mcs_event_cb_process(mcs, &desc);
689 	}
690 
691 exit:
692 	rsp->hdr.rc = 0;
693 	return 0;
694 }
695 
696 static int
697 mbox_up_handler_cgx_link_event(struct dev *dev, struct cgx_link_info_msg *msg,
698 			       struct msg_rsp *rsp)
699 {
700 	struct cgx_link_user_info *linfo = &msg->link_info;
701 	void *roc_nix = dev->roc_nix;
702 
703 	plt_base_dbg("pf:%d/vf:%d NIC Link %s --> 0x%x (%s) from: pf:%d/vf:%d",
704 		     dev_get_pf(dev->pf_func), dev_get_vf(dev->pf_func),
705 		     linfo->link_up ? "UP" : "DOWN", msg->hdr.id,
706 		     mbox_id2name(msg->hdr.id), dev_get_pf(msg->hdr.pcifunc),
707 		     dev_get_vf(msg->hdr.pcifunc));
708 
709 	/* PF gets link notification from AF */
710 	if (dev_get_pf(msg->hdr.pcifunc) == 0) {
711 		if (dev->ops && dev->ops->link_status_update)
712 			dev->ops->link_status_update(roc_nix, linfo);
713 
714 		/* Forward the same message as received from AF to VF */
715 		pf_vf_mbox_send_up_msg(dev, msg);
716 	} else {
717 		/* VF gets link up notification */
718 		if (dev->ops && dev->ops->link_status_update)
719 			dev->ops->link_status_update(roc_nix, linfo);
720 	}
721 
722 	rsp->hdr.rc = 0;
723 	return 0;
724 }
725 
726 static int
727 mbox_up_handler_cgx_ptp_rx_info(struct dev *dev,
728 				struct cgx_ptp_rx_info_msg *msg,
729 				struct msg_rsp *rsp)
730 {
731 	void *roc_nix = dev->roc_nix;
732 
733 	plt_base_dbg("pf:%d/vf:%d PTP mode %s --> 0x%x (%s) from: pf:%d/vf:%d",
734 		     dev_get_pf(dev->pf_func), dev_get_vf(dev->pf_func),
735 		     msg->ptp_en ? "ENABLED" : "DISABLED", msg->hdr.id,
736 		     mbox_id2name(msg->hdr.id), dev_get_pf(msg->hdr.pcifunc),
737 		     dev_get_vf(msg->hdr.pcifunc));
738 
739 	/* PF gets PTP notification from AF */
740 	if (dev_get_pf(msg->hdr.pcifunc) == 0) {
741 		if (dev->ops && dev->ops->ptp_info_update)
742 			dev->ops->ptp_info_update(roc_nix, msg->ptp_en);
743 
744 		/* Forward the same message as received from AF to VF */
745 		pf_vf_mbox_send_up_msg(dev, msg);
746 	} else {
747 		/* VF gets PTP notification */
748 		if (dev->ops && dev->ops->ptp_info_update)
749 			dev->ops->ptp_info_update(roc_nix, msg->ptp_en);
750 	}
751 
752 	rsp->hdr.rc = 0;
753 	return 0;
754 }
755 
756 static int
757 mbox_process_msgs_up(struct dev *dev, struct mbox_msghdr *req)
758 {
759 	/* Check if valid, if not reply with a invalid msg */
760 	if (req->sig != MBOX_REQ_SIG)
761 		return -EIO;
762 
763 	switch (req->id) {
764 	default:
765 		reply_invalid_msg(&dev->mbox_up, 0, 0, req->id);
766 		break;
767 #define M(_name, _id, _fn_name, _req_type, _rsp_type)                          \
768 	case _id: {                                                            \
769 		struct _rsp_type *rsp;                                         \
770 		int err;                                                       \
771 		rsp = (struct _rsp_type *)mbox_alloc_msg(                      \
772 			&dev->mbox_up, 0, sizeof(struct _rsp_type));           \
773 		if (!rsp)                                                      \
774 			return -ENOMEM;                                        \
775 		rsp->hdr.id = _id;                                             \
776 		rsp->hdr.sig = MBOX_RSP_SIG;                                   \
777 		rsp->hdr.pcifunc = dev->pf_func;                               \
778 		rsp->hdr.rc = 0;                                               \
779 		err = mbox_up_handler_##_fn_name(dev, (struct _req_type *)req, \
780 						 rsp);                         \
781 		return err;                                                    \
782 	}
783 		MBOX_UP_CGX_MESSAGES
784 		MBOX_UP_MCS_MESSAGES
785 		MBOX_UP_REP_MESSAGES
786 #undef M
787 	}
788 
789 	return -ENODEV;
790 }
791 
792 /* Received up messages from AF (PF context) / PF (in context) */
793 static void
794 process_msgs_up(struct dev *dev, struct mbox *mbox)
795 {
796 	struct mbox_dev *mdev = &mbox->dev[0];
797 	struct mbox_hdr *req_hdr;
798 	struct mbox_msghdr *msg;
799 	int i, err, offset;
800 
801 	req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start);
802 	if (req_hdr->num_msgs == 0)
803 		return;
804 
805 	offset = mbox->rx_start + PLT_ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN);
806 	for (i = 0; i < req_hdr->num_msgs; i++) {
807 		msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
808 
809 		plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id,
810 			     mbox_id2name(msg->id), dev_get_pf(msg->pcifunc),
811 			     dev_get_vf(msg->pcifunc));
812 		err = mbox_process_msgs_up(dev, msg);
813 		if (err)
814 			plt_err("Error %d handling 0x%x (%s)", err, msg->id,
815 				mbox_id2name(msg->id));
816 		offset = mbox->rx_start + msg->next_msgoff;
817 	}
818 	/* Send mbox responses */
819 	if (mdev->num_msgs) {
820 		plt_base_dbg("Reply num_msgs:%d", mdev->num_msgs);
821 		mbox_msg_send(mbox, 0);
822 	}
823 }
824 
825 /* IRQ to VF from PF - VF context (interrupt thread) */
826 static void
827 roc_pf_vf_mbox_irq(void *param)
828 {
829 	struct dev *dev = param;
830 	uint64_t mbox_data;
831 	uint64_t intr;
832 
833 	intr = plt_read64(dev->bar2 + RVU_VF_INT);
834 	if (intr == 0)
835 		plt_base_dbg("Proceeding to check mbox UP messages if any");
836 
837 	plt_write64(intr, dev->bar2 + RVU_VF_INT);
838 	plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
839 
840 	/* Reading for UP/DOWN message, next message sending will be delayed
841 	 * by 1ms until this region is zeroed mbox_wait_for_zero()
842 	 */
843 	mbox_data = plt_read64(dev->bar2 + RVU_VF_VFPF_MBOX0);
844 	/* If interrupt occurred for down message */
845 	if (mbox_data & MBOX_DOWN_MSG) {
846 		mbox_data &= ~MBOX_DOWN_MSG;
847 		plt_write64(mbox_data, dev->bar2 + RVU_VF_VFPF_MBOX0);
848 
849 		/* First process all configuration messages */
850 		process_msgs(dev, dev->mbox);
851 	}
852 	/* If interrupt occurred for UP message */
853 	if (mbox_data & MBOX_UP_MSG) {
854 		mbox_data &= ~MBOX_UP_MSG;
855 		plt_write64(mbox_data, dev->bar2 + RVU_VF_VFPF_MBOX0);
856 
857 		/* Process Uplink messages */
858 		process_msgs_up(dev, &dev->mbox_up);
859 	}
860 }
861 
862 /* IRQ to PF from AF - PF context (interrupt thread) */
863 static void
864 roc_af_pf_mbox_irq(void *param)
865 {
866 	struct dev *dev = param;
867 	uint64_t mbox_data;
868 	uint64_t intr;
869 
870 	intr = plt_read64(dev->bar2 + RVU_PF_INT);
871 	if (intr == 0)
872 		plt_base_dbg("Proceeding to check mbox UP messages if any");
873 
874 	plt_write64(intr, dev->bar2 + RVU_PF_INT);
875 	plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
876 
877 	/* Reading for UP/DOWN message, next message sending will be delayed
878 	 * by 1ms until this region is zeroed mbox_wait_for_zero()
879 	 */
880 	mbox_data = plt_read64(dev->bar2 + RVU_PF_PFAF_MBOX0);
881 	/* If interrupt occurred for down message */
882 	if (mbox_data & MBOX_DOWN_MSG) {
883 		mbox_data &= ~MBOX_DOWN_MSG;
884 		plt_write64(mbox_data, dev->bar2 + RVU_PF_PFAF_MBOX0);
885 
886 		/* First process all configuration messages */
887 		process_msgs(dev, dev->mbox);
888 	}
889 	/* If interrupt occurred for up message */
890 	if (mbox_data & MBOX_UP_MSG) {
891 		mbox_data &= ~MBOX_UP_MSG;
892 		plt_write64(mbox_data, dev->bar2 + RVU_PF_PFAF_MBOX0);
893 
894 		/* Process Uplink messages */
895 		process_msgs_up(dev, &dev->mbox_up);
896 	}
897 }
898 
899 static int
900 mbox_register_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
901 {
902 	struct plt_intr_handle *intr_handle = pci_dev->intr_handle;
903 	int i, rc;
904 
905 	/* HW clear irq */
906 	for (i = 0; i < MAX_VFPF_DWORD_BITS; ++i)
907 		plt_write64(~0ull,
908 			    dev->bar2 + RVU_PF_VFPF_MBOX_INT_ENA_W1CX(i));
909 
910 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1C);
911 
912 	/* MBOX interrupt for VF(0...63) <-> PF */
913 	rc = dev_irq_register(intr_handle, roc_vf_pf_mbox_irq, dev,
914 			      RVU_PF_INT_VEC_VFPF_MBOX0);
915 
916 	if (rc) {
917 		plt_err("Fail to register PF(VF0-63) mbox irq");
918 		return rc;
919 	}
920 	/* MBOX interrupt for VF(64...128) <-> PF */
921 	rc = dev_irq_register(intr_handle, roc_vf_pf_mbox_irq, dev,
922 			      RVU_PF_INT_VEC_VFPF_MBOX1);
923 
924 	if (rc) {
925 		plt_err("Fail to register PF(VF64-128) mbox irq");
926 		return rc;
927 	}
928 	/* MBOX interrupt AF <-> PF */
929 	rc = dev_irq_register(intr_handle, roc_af_pf_mbox_irq, dev,
930 			      RVU_PF_INT_VEC_AFPF_MBOX);
931 	if (rc) {
932 		plt_err("Fail to register AF<->PF mbox irq");
933 		return rc;
934 	}
935 
936 	/* HW enable intr */
937 	for (i = 0; i < MAX_VFPF_DWORD_BITS; ++i)
938 		plt_write64(~0ull,
939 			    dev->bar2 + RVU_PF_VFPF_MBOX_INT_ENA_W1SX(i));
940 
941 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT);
942 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1S);
943 
944 	return rc;
945 }
946 
947 static int
948 mbox_register_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
949 {
950 	struct plt_intr_handle *intr_handle = pci_dev->intr_handle;
951 	int rc;
952 
953 	/* Clear irq */
954 	plt_write64(~0ull, dev->bar2 + RVU_VF_INT_ENA_W1C);
955 
956 	/* MBOX interrupt PF <-> VF */
957 	rc = dev_irq_register(intr_handle, roc_pf_vf_mbox_irq, dev,
958 			      RVU_VF_INT_VEC_MBOX);
959 	if (rc) {
960 		plt_err("Fail to register PF<->VF mbox irq");
961 		return rc;
962 	}
963 
964 	/* HW enable intr */
965 	plt_write64(~0ull, dev->bar2 + RVU_VF_INT);
966 	plt_write64(~0ull, dev->bar2 + RVU_VF_INT_ENA_W1S);
967 
968 	return rc;
969 }
970 
971 int
972 dev_mbox_register_irq(struct plt_pci_device *pci_dev, struct dev *dev)
973 {
974 	if (dev_is_vf(dev))
975 		return mbox_register_vf_irq(pci_dev, dev);
976 	else
977 		return mbox_register_pf_irq(pci_dev, dev);
978 }
979 
980 static void
981 mbox_unregister_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
982 {
983 	struct plt_intr_handle *intr_handle = pci_dev->intr_handle;
984 	int i;
985 
986 	/* HW clear irq */
987 	for (i = 0; i < MAX_VFPF_DWORD_BITS; ++i)
988 		plt_write64(~0ull,
989 			    dev->bar2 + RVU_PF_VFPF_MBOX_INT_ENA_W1CX(i));
990 
991 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1C);
992 
993 	/* Unregister the interrupt handler for each vectors */
994 	/* MBOX interrupt for VF(0...63) <-> PF */
995 	dev_irq_unregister(intr_handle, roc_vf_pf_mbox_irq, dev,
996 			   RVU_PF_INT_VEC_VFPF_MBOX0);
997 
998 	/* MBOX interrupt for VF(64...128) <-> PF */
999 	dev_irq_unregister(intr_handle, roc_vf_pf_mbox_irq, dev,
1000 			   RVU_PF_INT_VEC_VFPF_MBOX1);
1001 
1002 	/* MBOX interrupt AF <-> PF */
1003 	dev_irq_unregister(intr_handle, roc_af_pf_mbox_irq, dev,
1004 			   RVU_PF_INT_VEC_AFPF_MBOX);
1005 }
1006 
1007 static void
1008 mbox_unregister_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
1009 {
1010 	struct plt_intr_handle *intr_handle = pci_dev->intr_handle;
1011 
1012 	/* Clear irq */
1013 	plt_write64(~0ull, dev->bar2 + RVU_VF_INT_ENA_W1C);
1014 
1015 	/* Unregister the interrupt handler */
1016 	dev_irq_unregister(intr_handle, roc_pf_vf_mbox_irq, dev,
1017 			   RVU_VF_INT_VEC_MBOX);
1018 }
1019 
1020 static void
1021 mbox_unregister_irq(struct plt_pci_device *pci_dev, struct dev *dev)
1022 {
1023 	if (dev_is_vf(dev))
1024 		mbox_unregister_vf_irq(pci_dev, dev);
1025 	else
1026 		mbox_unregister_pf_irq(pci_dev, dev);
1027 }
1028 
1029 static int
1030 vf_flr_send_msg(struct dev *dev, uint16_t vf)
1031 {
1032 	struct mbox *mbox = dev->mbox;
1033 	struct msg_req *req;
1034 	int rc;
1035 
1036 	req = mbox_alloc_msg_vf_flr(mbox_get(mbox));
1037 	if (req == NULL)
1038 		return -ENOSPC;
1039 	/* Overwrite pcifunc to indicate VF */
1040 	req->hdr.pcifunc = dev_pf_func(dev->pf, vf);
1041 
1042 	/* Sync message in interrupt context */
1043 	rc = pf_af_sync_msg(dev, NULL);
1044 	if (rc)
1045 		plt_err("Failed to send VF FLR mbox msg, rc=%d", rc);
1046 
1047 	mbox_put(mbox);
1048 
1049 	return rc;
1050 }
1051 
1052 static void
1053 roc_pf_vf_flr_irq(void *param)
1054 {
1055 	struct dev *dev = (struct dev *)param;
1056 	bool signal_thread = false;
1057 	dev_intr_t flr;
1058 	uintptr_t bar2;
1059 	uint64_t intr;
1060 	int i, sz;
1061 
1062 	bar2 = dev->bar2;
1063 
1064 	sz = sizeof(flr.bits[0]) * MAX_VFPF_DWORD_BITS;
1065 	memset(flr.bits, 0, sz);
1066 	for (i = 0; i < MAX_VFPF_DWORD_BITS; ++i) {
1067 		intr = plt_read64(bar2 + RVU_PF_VFFLR_INTX(i));
1068 		if (!intr)
1069 			continue;
1070 
1071 		/* Clear interrupt */
1072 		plt_write64(intr, bar2 + RVU_PF_VFFLR_INTX(i));
1073 		/* Disable the interrupt */
1074 		plt_write64(intr,
1075 			    bar2 + RVU_PF_VFFLR_INT_ENA_W1CX(i));
1076 
1077 		/* Save FLR interrupts per VF as bits */
1078 		flr.bits[i] |= intr;
1079 		/* Enable interrupt */
1080 		plt_write64(~0ull,
1081 			    bar2 + RVU_PF_VFFLR_INT_ENA_W1SX(i));
1082 		signal_thread = true;
1083 	}
1084 
1085 	if (signal_thread) {
1086 		pthread_mutex_lock(&dev->sync.mutex);
1087 		/* Interrupt state was saved in local variable first, as dev->flr.bits
1088 		 * is a shared resources between VF msg and interrupt thread.
1089 		 */
1090 		memcpy(dev->flr.bits, flr.bits, sz);
1091 		/* FLR message received from VF */
1092 		dev->sync.msg_avail |= ROC_DEV_FLR_PEND;
1093 		/* Signal vf message handler thread */
1094 		pthread_cond_signal(&dev->sync.pfvf_msg_cond);
1095 		pthread_mutex_unlock(&dev->sync.mutex);
1096 	}
1097 }
1098 
1099 static int
1100 vf_flr_unregister_irqs(struct plt_pci_device *pci_dev, struct dev *dev)
1101 {
1102 	struct plt_intr_handle *intr_handle = pci_dev->intr_handle;
1103 	int i;
1104 
1105 	plt_base_dbg("Unregister VF FLR interrupts for %s", pci_dev->name);
1106 
1107 	/* HW clear irq */
1108 	for (i = 0; i < MAX_VFPF_DWORD_BITS; i++)
1109 		plt_write64(~0ull, dev->bar2 + RVU_PF_VFFLR_INT_ENA_W1CX(i));
1110 
1111 	dev_irq_unregister(intr_handle, roc_pf_vf_flr_irq, dev,
1112 			   RVU_PF_INT_VEC_VFFLR0);
1113 
1114 	dev_irq_unregister(intr_handle, roc_pf_vf_flr_irq, dev,
1115 			   RVU_PF_INT_VEC_VFFLR1);
1116 
1117 	return 0;
1118 }
1119 
1120 int
1121 dev_vf_flr_register_irqs(struct plt_pci_device *pci_dev, struct dev *dev)
1122 {
1123 	struct plt_intr_handle *handle = pci_dev->intr_handle;
1124 	int i, rc;
1125 
1126 	plt_base_dbg("Register VF FLR interrupts for %s", pci_dev->name);
1127 
1128 	rc = dev_irq_register(handle, roc_pf_vf_flr_irq, dev,
1129 			      RVU_PF_INT_VEC_VFFLR0);
1130 	if (rc)
1131 		plt_err("Failed to init RVU_PF_INT_VEC_VFFLR0 rc=%d", rc);
1132 
1133 	rc = dev_irq_register(handle, roc_pf_vf_flr_irq, dev,
1134 			      RVU_PF_INT_VEC_VFFLR1);
1135 	if (rc)
1136 		plt_err("Failed to init RVU_PF_INT_VEC_VFFLR1 rc=%d", rc);
1137 
1138 	/* Enable HW interrupt */
1139 	for (i = 0; i < MAX_VFPF_DWORD_BITS; ++i) {
1140 		plt_write64(~0ull, dev->bar2 + RVU_PF_VFFLR_INTX(i));
1141 		plt_write64(~0ull, dev->bar2 + RVU_PF_VFTRPENDX(i));
1142 		plt_write64(~0ull, dev->bar2 + RVU_PF_VFFLR_INT_ENA_W1SX(i));
1143 	}
1144 	return 0;
1145 }
1146 
1147 static void
1148 vf_flr_handle_msg(void *param, dev_intr_t *flr)
1149 {
1150 	uint16_t vf, max_vf, max_bits;
1151 	struct dev *dev = param;
1152 
1153 	max_bits = sizeof(flr->bits[0]) * sizeof(uint64_t);
1154 	max_vf = max_bits * MAX_VFPF_DWORD_BITS;
1155 
1156 	for (vf = 0; vf < max_vf; vf++) {
1157 		if (flr->bits[vf / max_bits] & BIT_ULL(vf % max_bits)) {
1158 			plt_base_dbg("Process FLR vf:%d request (pf:%d, vf:%d)",
1159 				     vf, dev->pf, dev->vf);
1160 			/* Inform AF about VF reset */
1161 			vf_flr_send_msg(dev, vf);
1162 			flr->bits[vf / max_bits] &= ~(BIT_ULL(vf % max_bits));
1163 
1164 			/* Signal FLR finish */
1165 			plt_write64(BIT_ULL(vf % max_bits),
1166 				    dev->bar2 + RVU_PF_VFTRPENDX(vf / max_bits));
1167 		}
1168 	}
1169 }
1170 
1171 static uint32_t
1172 pf_vf_mbox_thread_main(void *arg)
1173 {
1174 	struct dev *dev = arg;
1175 	bool is_flr, is_mbox;
1176 	dev_intr_t flr, intr;
1177 	int sz, rc;
1178 
1179 	sz = sizeof(intr.bits[0]) * MAX_VFPF_DWORD_BITS;
1180 	pthread_mutex_lock(&dev->sync.mutex);
1181 	while (dev->sync.start_thread) {
1182 		do {
1183 			rc = pthread_cond_wait(&dev->sync.pfvf_msg_cond, &dev->sync.mutex);
1184 		} while (rc != 0);
1185 
1186 		if (!dev->sync.msg_avail) {
1187 			continue;
1188 		} else {
1189 			while (dev->sync.msg_avail) {
1190 				/* Check which VF msg received */
1191 				is_mbox = dev->sync.msg_avail & ROC_DEV_MBOX_PEND;
1192 				is_flr = dev->sync.msg_avail & ROC_DEV_FLR_PEND;
1193 				memcpy(intr.bits, dev->intr.bits, sz);
1194 				memcpy(flr.bits, dev->flr.bits, sz);
1195 				memset(dev->flr.bits, 0, sz);
1196 				memset(dev->intr.bits, 0, sz);
1197 				dev->sync.msg_avail = 0;
1198 				/* Unlocking for interrupt thread to grab lock
1199 				 * and update msg_avail field.
1200 				 */
1201 				pthread_mutex_unlock(&dev->sync.mutex);
1202 				/* Calling respective message handlers */
1203 				if (is_mbox)
1204 					roc_vf_pf_mbox_handle_msg(dev, &intr);
1205 				if (is_flr)
1206 					vf_flr_handle_msg(dev, &flr);
1207 				/* Locking as cond wait will unlock before wait */
1208 				pthread_mutex_lock(&dev->sync.mutex);
1209 			}
1210 		}
1211 	}
1212 
1213 	pthread_mutex_unlock(&dev->sync.mutex);
1214 
1215 	return 0;
1216 }
1217 
1218 static void
1219 clear_rvum_interrupts(struct dev *dev)
1220 {
1221 	uint64_t intr;
1222 	int i;
1223 
1224 	if (dev_is_vf(dev)) {
1225 		/* Clear VF mbox interrupt */
1226 		intr = plt_read64(dev->bar2 + RVU_VF_INT);
1227 		if (intr)
1228 			plt_write64(intr, dev->bar2 + RVU_VF_INT);
1229 	} else {
1230 		/* Clear AF PF interrupt line */
1231 		intr = plt_read64(dev->bar2 + RVU_PF_INT);
1232 		if (intr)
1233 			plt_write64(intr, dev->bar2 + RVU_PF_INT);
1234 		for (i = 0; i < MAX_VFPF_DWORD_BITS; ++i) {
1235 			/* Clear MBOX interrupts */
1236 			intr = plt_read64(dev->bar2 + RVU_PF_VFPF_MBOX_INTX(i));
1237 			if (intr)
1238 				plt_write64(intr,
1239 					    dev->bar2 +
1240 						    RVU_PF_VFPF_MBOX_INTX(i));
1241 			/* Clear VF FLR interrupts */
1242 			intr = plt_read64(dev->bar2 + RVU_PF_VFFLR_INTX(i));
1243 			if (intr)
1244 				plt_write64(intr,
1245 					    dev->bar2 + RVU_PF_VFFLR_INTX(i));
1246 		}
1247 	}
1248 }
1249 
1250 int
1251 dev_active_vfs(struct dev *dev)
1252 {
1253 	int i, count = 0;
1254 
1255 	for (i = 0; i < MAX_VFPF_DWORD_BITS; i++)
1256 		count += plt_popcount32(dev->active_vfs[i]);
1257 
1258 	return count;
1259 }
1260 
1261 static void
1262 dev_vf_hwcap_update(struct plt_pci_device *pci_dev, struct dev *dev)
1263 {
1264 	switch (pci_dev->id.device_id) {
1265 	case PCI_DEVID_CNXK_RVU_PF:
1266 		break;
1267 	case PCI_DEVID_CNXK_RVU_SSO_TIM_VF:
1268 	case PCI_DEVID_CNXK_RVU_NPA_VF:
1269 	case PCI_DEVID_CN10K_RVU_CPT_VF:
1270 	case PCI_DEVID_CN9K_RVU_CPT_VF:
1271 	case PCI_DEVID_CNXK_RVU_AF_VF:
1272 	case PCI_DEVID_CNXK_RVU_VF:
1273 	case PCI_DEVID_CNXK_RVU_SDP_VF:
1274 	case PCI_DEVID_CNXK_RVU_NIX_INL_VF:
1275 	case PCI_DEVID_CNXK_RVU_ESWITCH_VF:
1276 		dev->hwcap |= DEV_HWCAP_F_VF;
1277 		break;
1278 	}
1279 }
1280 
1281 static uintptr_t
1282 dev_vf_mbase_get(struct plt_pci_device *pci_dev, struct dev *dev)
1283 {
1284 	void *vf_mbase = NULL;
1285 	uintptr_t pa;
1286 
1287 	if (dev_is_vf(dev))
1288 		return 0;
1289 
1290 	/* For CN10K onwards, it is just after PF MBOX */
1291 	if (!roc_model_is_cn9k())
1292 		return dev->bar4 + MBOX_SIZE;
1293 
1294 	pa = plt_read64(dev->bar2 + RVU_PF_VF_BAR4_ADDR);
1295 	if (!pa) {
1296 		plt_err("Invalid VF mbox base pa");
1297 		return pa;
1298 	}
1299 
1300 	vf_mbase = mbox_mem_map(pa, MBOX_SIZE * pci_dev->max_vfs);
1301 	if (vf_mbase == MAP_FAILED) {
1302 		plt_err("Failed to mmap vf mbase at pa 0x%lx, rc=%d", pa,
1303 			errno);
1304 		return 0;
1305 	}
1306 	return (uintptr_t)vf_mbase;
1307 }
1308 
1309 static void
1310 dev_vf_mbase_put(struct plt_pci_device *pci_dev, uintptr_t vf_mbase)
1311 {
1312 	if (!vf_mbase || !pci_dev->max_vfs || !roc_model_is_cn9k())
1313 		return;
1314 
1315 	mbox_mem_unmap((void *)vf_mbase, MBOX_SIZE * pci_dev->max_vfs);
1316 }
1317 
1318 static int
1319 dev_setup_shared_lmt_region(struct mbox *mbox, bool valid_iova, uint64_t iova)
1320 {
1321 	struct lmtst_tbl_setup_req *req;
1322 	int rc;
1323 
1324 	req = mbox_alloc_msg_lmtst_tbl_setup(mbox_get(mbox));
1325 	if (!req) {
1326 		rc = -ENOSPC;
1327 		goto exit;
1328 	}
1329 
1330 	/* This pcifunc is defined with primary pcifunc whose LMT address
1331 	 * will be shared. If call contains valid IOVA, following pcifunc
1332 	 * field is of no use.
1333 	 */
1334 	req->pcifunc = valid_iova ? 0 : idev_lmt_pffunc_get();
1335 	req->use_local_lmt_region = valid_iova;
1336 	req->lmt_iova = iova;
1337 
1338 	rc = mbox_process(mbox);
1339 exit:
1340 	mbox_put(mbox);
1341 	return rc;
1342 }
1343 
1344 /* Total no of lines * size of each lmtline */
1345 #define LMT_REGION_SIZE (ROC_NUM_LMT_LINES * ROC_LMT_LINE_SZ)
1346 static int
1347 dev_lmt_setup(struct dev *dev)
1348 {
1349 	char name[PLT_MEMZONE_NAMESIZE];
1350 	const struct plt_memzone *mz;
1351 	struct idev_cfg *idev;
1352 	int rc;
1353 
1354 	if (roc_model_is_cn9k()) {
1355 		dev->lmt_base = dev->bar2 + (RVU_BLOCK_ADDR_LMT << 20);
1356 		return 0;
1357 	}
1358 
1359 	/* [CN10K, .) */
1360 
1361 	/* Set common lmt region from second pf_func onwards. */
1362 	if (!dev->disable_shared_lmt && idev_lmt_pffunc_get() &&
1363 	    dev->pf_func != idev_lmt_pffunc_get()) {
1364 		rc = dev_setup_shared_lmt_region(dev->mbox, false, 0);
1365 		if (!rc) {
1366 			/* On success, updating lmt base of secondary pf_funcs
1367 			 * with primary pf_func's lmt base.
1368 			 */
1369 			dev->lmt_base = roc_idev_lmt_base_addr_get();
1370 			return rc;
1371 		}
1372 		plt_err("Failed to setup shared lmt region, pf_func %d err %d "
1373 			"Using respective LMT region per pf func",
1374 			dev->pf_func, rc);
1375 	}
1376 
1377 	/* Allocating memory for LMT region */
1378 	sprintf(name, "LMT_MAP%x", dev->pf_func);
1379 
1380 	/* Setting alignment to ensure correct masking for resetting to lmt base
1381 	 * of a core after all lmt lines under that core are used.
1382 	 * Alignment value LMT_REGION_SIZE to handle the case where all lines
1383 	 * are used by 1 core.
1384 	 */
1385 	mz = plt_lmt_region_reserve_aligned(name, LMT_REGION_SIZE,
1386 					    LMT_REGION_SIZE);
1387 	if (!mz) {
1388 		plt_err("Memory alloc failed: %s", strerror(errno));
1389 		goto fail;
1390 	}
1391 
1392 	/* Share the IOVA address with Kernel */
1393 	rc = dev_setup_shared_lmt_region(dev->mbox, true, mz->iova);
1394 	if (rc) {
1395 		errno = rc;
1396 		goto free;
1397 	}
1398 
1399 	dev->lmt_base = mz->iova;
1400 	dev->lmt_mz = mz;
1401 	/* Base LMT address should be chosen from only those pci funcs which
1402 	 * participate in LMT shared mode.
1403 	 */
1404 	if (!dev->disable_shared_lmt) {
1405 		idev = idev_get_cfg();
1406 		if (!idev) {
1407 			errno = EFAULT;
1408 			goto free;
1409 		}
1410 
1411 		if (!__atomic_load_n(&idev->lmt_pf_func, __ATOMIC_ACQUIRE)) {
1412 			idev->lmt_base_addr = dev->lmt_base;
1413 			idev->lmt_pf_func = dev->pf_func;
1414 			idev->num_lmtlines = RVU_LMT_LINE_MAX;
1415 		}
1416 	}
1417 
1418 	return 0;
1419 free:
1420 	plt_memzone_free(mz);
1421 fail:
1422 	return -errno;
1423 }
1424 
1425 static bool
1426 dev_cache_line_size_valid(void)
1427 {
1428 	if (roc_model_is_cn9k()) {
1429 		if (PLT_CACHE_LINE_SIZE != 128) {
1430 			plt_err("Cache line size of %d is wrong for CN9K",
1431 				PLT_CACHE_LINE_SIZE);
1432 			return false;
1433 		}
1434 	} else if (roc_model_is_cn10k()) {
1435 		if (PLT_CACHE_LINE_SIZE == 128) {
1436 			plt_warn("Cache line size of %d might affect performance",
1437 				 PLT_CACHE_LINE_SIZE);
1438 		} else if (PLT_CACHE_LINE_SIZE != 64) {
1439 			plt_err("Cache line size of %d is wrong for CN10K",
1440 				PLT_CACHE_LINE_SIZE);
1441 			return false;
1442 		}
1443 	}
1444 
1445 	return true;
1446 }
1447 
1448 int
1449 dev_init(struct dev *dev, struct plt_pci_device *pci_dev)
1450 {
1451 	char name[MBOX_HANDLER_NAME_MAX_LEN];
1452 	int direction, up_direction, rc;
1453 	uintptr_t bar2, bar4, mbox;
1454 	uintptr_t vf_mbase = 0;
1455 	uint64_t intr_offset;
1456 
1457 	if (!dev_cache_line_size_valid())
1458 		return -EFAULT;
1459 
1460 	if (!roc_plt_lmt_validate()) {
1461 		plt_err("Failed to validate LMT line");
1462 		return -EFAULT;
1463 	}
1464 
1465 	bar2 = (uintptr_t)pci_dev->mem_resource[2].addr;
1466 	bar4 = (uintptr_t)pci_dev->mem_resource[4].addr;
1467 	if (bar2 == 0 || bar4 == 0) {
1468 		plt_err("Failed to get PCI bars");
1469 		rc = -ENODEV;
1470 		goto error;
1471 	}
1472 
1473 	/* Trigger fault on bar2 and bar4 regions
1474 	 * to avoid BUG_ON in remap_pfn_range()
1475 	 * in latest kernel.
1476 	 */
1477 	*(volatile uint64_t *)bar2;
1478 	*(volatile uint64_t *)bar4;
1479 
1480 	/* Check ROC model supported */
1481 	if (roc_model->flag == 0) {
1482 		rc = UTIL_ERR_INVALID_MODEL;
1483 		goto error;
1484 	}
1485 
1486 	dev->maxvf = pci_dev->max_vfs;
1487 	dev->bar2 = bar2;
1488 	dev->bar4 = bar4;
1489 	dev_vf_hwcap_update(pci_dev, dev);
1490 
1491 	if (dev_is_vf(dev)) {
1492 		mbox = (roc_model_is_cn9k() ?
1493 			bar4 : (bar2 + RVU_VF_MBOX_REGION));
1494 		direction = MBOX_DIR_VFPF;
1495 		up_direction = MBOX_DIR_VFPF_UP;
1496 		intr_offset = RVU_VF_INT;
1497 	} else {
1498 		mbox = bar4;
1499 		direction = MBOX_DIR_PFAF;
1500 		up_direction = MBOX_DIR_PFAF_UP;
1501 		intr_offset = RVU_PF_INT;
1502 	}
1503 
1504 	/* Clear all RVUM interrupts */
1505 	clear_rvum_interrupts(dev);
1506 
1507 	/* Initialize the local mbox */
1508 	rc = mbox_init(&dev->mbox_local, mbox, bar2, direction, 1, intr_offset);
1509 	if (rc)
1510 		goto error;
1511 	dev->mbox = &dev->mbox_local;
1512 
1513 	rc = mbox_init(&dev->mbox_up, mbox, bar2, up_direction, 1, intr_offset);
1514 	if (rc)
1515 		goto mbox_fini;
1516 
1517 	/* Register mbox interrupts */
1518 	rc = dev_mbox_register_irq(pci_dev, dev);
1519 	if (rc)
1520 		goto mbox_fini;
1521 
1522 	/* Check the readiness of PF/VF */
1523 	rc = send_ready_msg(dev->mbox, &dev->pf_func);
1524 	if (rc)
1525 		goto mbox_unregister;
1526 
1527 	dev->pf = dev_get_pf(dev->pf_func);
1528 	dev->vf = dev_get_vf(dev->pf_func);
1529 	memset(&dev->active_vfs, 0, sizeof(dev->active_vfs));
1530 
1531 	/* Allocate memory for device ops */
1532 	dev->ops = plt_zmalloc(sizeof(struct dev_ops), 0);
1533 	if (dev->ops == NULL) {
1534 		rc = -ENOMEM;
1535 		goto mbox_unregister;
1536 	}
1537 
1538 	/* Found VF devices in a PF device */
1539 	if (pci_dev->max_vfs > 0) {
1540 		/* Remap mbox area for all vf's */
1541 		vf_mbase = dev_vf_mbase_get(pci_dev, dev);
1542 		if (!vf_mbase) {
1543 			rc = -ENODEV;
1544 			goto mbox_unregister;
1545 		}
1546 		/* Init mbox object */
1547 		rc = mbox_init(&dev->mbox_vfpf, vf_mbase, bar2, MBOX_DIR_PFVF,
1548 			       pci_dev->max_vfs, intr_offset);
1549 		if (rc)
1550 			goto iounmap;
1551 
1552 		/* PF -> VF UP messages */
1553 		rc = mbox_init(&dev->mbox_vfpf_up, vf_mbase, bar2,
1554 			       MBOX_DIR_PFVF_UP, pci_dev->max_vfs, intr_offset);
1555 		if (rc)
1556 			goto iounmap;
1557 
1558 		/* Create a thread for handling msgs from VFs */
1559 		pthread_cond_init(&dev->sync.pfvf_msg_cond, NULL);
1560 		pthread_mutex_init(&dev->sync.mutex, NULL);
1561 
1562 		snprintf(name, MBOX_HANDLER_NAME_MAX_LEN, "mbox_pf%d", dev->pf);
1563 		dev->sync.start_thread = true;
1564 		rc = plt_thread_create_control(&dev->sync.pfvf_msg_thread, name,
1565 				pf_vf_mbox_thread_main, dev);
1566 		if (rc != 0) {
1567 			plt_err("Failed to create thread for VF mbox handling\n");
1568 			goto thread_fail;
1569 		}
1570 	}
1571 
1572 	/* Register VF-FLR irq handlers */
1573 	if (!dev_is_vf(dev)) {
1574 		rc = dev_vf_flr_register_irqs(pci_dev, dev);
1575 		if (rc)
1576 			goto stop_msg_thrd;
1577 	}
1578 	dev->mbox_active = 1;
1579 
1580 	rc = npa_lf_init(dev, pci_dev);
1581 	if (rc)
1582 		goto stop_msg_thrd;
1583 
1584 	/* Setup LMT line base */
1585 	rc = dev_lmt_setup(dev);
1586 	if (rc)
1587 		goto stop_msg_thrd;
1588 
1589 	return rc;
1590 stop_msg_thrd:
1591 	/* Exiting the mbox sync thread */
1592 	if (dev->sync.start_thread) {
1593 		dev->sync.start_thread = false;
1594 		pthread_cond_signal(&dev->sync.pfvf_msg_cond);
1595 		plt_thread_join(dev->sync.pfvf_msg_thread, NULL);
1596 	}
1597 thread_fail:
1598 	pthread_mutex_destroy(&dev->sync.mutex);
1599 	pthread_cond_destroy(&dev->sync.pfvf_msg_cond);
1600 iounmap:
1601 	dev_vf_mbase_put(pci_dev, vf_mbase);
1602 mbox_unregister:
1603 	mbox_unregister_irq(pci_dev, dev);
1604 	if (dev->ops)
1605 		plt_free(dev->ops);
1606 mbox_fini:
1607 	mbox_fini(dev->mbox);
1608 	mbox_fini(&dev->mbox_up);
1609 error:
1610 	return rc;
1611 }
1612 
1613 int
1614 dev_fini(struct dev *dev, struct plt_pci_device *pci_dev)
1615 {
1616 	struct plt_intr_handle *intr_handle = pci_dev->intr_handle;
1617 	struct mbox *mbox;
1618 
1619 	/* Check if this dev hosts npalf and has 1+ refs */
1620 	if (idev_npa_lf_active(dev) > 1)
1621 		return -EAGAIN;
1622 
1623 	/* Exiting the mbox sync thread */
1624 	if (dev->sync.start_thread) {
1625 		dev->sync.start_thread = false;
1626 		pthread_cond_signal(&dev->sync.pfvf_msg_cond);
1627 		plt_thread_join(dev->sync.pfvf_msg_thread, NULL);
1628 		pthread_mutex_destroy(&dev->sync.mutex);
1629 		pthread_cond_destroy(&dev->sync.pfvf_msg_cond);
1630 	}
1631 
1632 	/* Clear references to this pci dev */
1633 	npa_lf_fini();
1634 
1635 	/* Releasing memory allocated for lmt region */
1636 	if (dev->lmt_mz)
1637 		plt_memzone_free(dev->lmt_mz);
1638 
1639 	mbox_unregister_irq(pci_dev, dev);
1640 
1641 	if (!dev_is_vf(dev))
1642 		vf_flr_unregister_irqs(pci_dev, dev);
1643 	/* Release PF - VF */
1644 	mbox = &dev->mbox_vfpf;
1645 	if (mbox->hwbase && mbox->dev)
1646 		dev_vf_mbase_put(pci_dev, mbox->hwbase);
1647 
1648 	if (dev->ops)
1649 		plt_free(dev->ops);
1650 
1651 	mbox_fini(mbox);
1652 	mbox = &dev->mbox_vfpf_up;
1653 	mbox_fini(mbox);
1654 
1655 	/* Release PF - AF */
1656 	mbox = dev->mbox;
1657 	mbox_fini(mbox);
1658 	mbox = &dev->mbox_up;
1659 	mbox_fini(mbox);
1660 	dev->mbox_active = 0;
1661 
1662 	/* Disable MSIX vectors */
1663 	dev_irqs_disable(intr_handle);
1664 	return 0;
1665 }
1666