xref: /dpdk/drivers/common/cnxk/roc_dev.c (revision edc13af9a8bbdca92bd8974165df2ff7049f45b7)
1 /* SPDX-License-Identifier: BSD-3-Clause
2  * Copyright(C) 2021 Marvell.
3  */
4 
5 #include <fcntl.h>
6 #include <inttypes.h>
7 #include <string.h>
8 #include <sys/mman.h>
9 #include <unistd.h>
10 
11 #include "roc_api.h"
12 #include "roc_priv.h"
13 
14 /* PCI Extended capability ID */
15 #define ROC_PCI_EXT_CAP_ID_SRIOV 0x10 /* SRIOV cap */
16 
17 /* Single Root I/O Virtualization */
18 #define ROC_PCI_SRIOV_TOTAL_VF 0x0e /* Total VFs */
19 
20 /* VF Mbox handler thread name */
21 #define MBOX_HANDLER_NAME_MAX_LEN RTE_THREAD_INTERNAL_NAME_SIZE
22 
23 /* VF interrupt message pending bits - mbox or flr */
24 #define ROC_DEV_MBOX_PEND BIT_ULL(0)
25 #define ROC_DEV_FLR_PEND  BIT_ULL(1)
26 static void *
27 mbox_mem_map(off_t off, size_t size)
28 {
29 	void *va = MAP_FAILED;
30 	int mem_fd;
31 
32 	if (size <= 0 || !off) {
33 		plt_err("Invalid mbox area off 0x%lx size %lu", off, size);
34 		goto error;
35 	}
36 
37 	mem_fd = open("/dev/mem", O_RDWR);
38 	if (mem_fd < 0)
39 		goto error;
40 
41 	va = plt_mmap(NULL, size, PLT_PROT_READ | PLT_PROT_WRITE,
42 		      PLT_MAP_SHARED, mem_fd, off);
43 	close(mem_fd);
44 
45 	if (va == MAP_FAILED)
46 		plt_err("Failed to mmap sz=0x%zx, fd=%d, off=%jd", size, mem_fd,
47 			(intmax_t)off);
48 error:
49 	return va;
50 }
51 
52 static void
53 mbox_mem_unmap(void *va, size_t size)
54 {
55 	if (va)
56 		munmap(va, size);
57 }
58 
59 static int
60 pf_af_sync_msg(struct dev *dev, struct mbox_msghdr **rsp)
61 {
62 	uint32_t timeout = 0, sleep = 1;
63 	struct mbox *mbox = dev->mbox;
64 	struct mbox_dev *mdev = &mbox->dev[0];
65 
66 	volatile uint64_t int_status = 0;
67 	struct mbox_msghdr *msghdr;
68 	uint64_t off;
69 	int rc = 0;
70 
71 	/* We need to disable PF interrupts. We are in timer interrupt */
72 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1C);
73 
74 	/* Send message */
75 	mbox_msg_send(mbox, 0);
76 
77 	do {
78 		plt_delay_ms(sleep);
79 		timeout += sleep;
80 		if (timeout >= mbox->rsp_tmo) {
81 			plt_err("Message timeout: %dms", mbox->rsp_tmo);
82 			rc = -EIO;
83 			break;
84 		}
85 		int_status = plt_read64(dev->bar2 + RVU_PF_INT);
86 	} while ((int_status & 0x1) != 0x1);
87 
88 	/* Clear */
89 	plt_write64(int_status, dev->bar2 + RVU_PF_INT);
90 
91 	/* Enable interrupts */
92 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1S);
93 
94 	if (rc == 0) {
95 		/* Get message */
96 		off = mbox->rx_start +
97 		      PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
98 		msghdr = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + off);
99 		if (rsp)
100 			*rsp = msghdr;
101 		rc = msghdr->rc;
102 	}
103 
104 	return rc;
105 }
106 
107 /* PF will send the messages to AF and wait for responses and forward the
108  * responses to VF.
109  */
110 static int
111 af_pf_wait_msg(struct dev *dev, uint16_t vf, int num_msg)
112 {
113 	uint32_t timeout = 0, sleep = 1;
114 	struct mbox *mbox = dev->mbox;
115 	struct mbox_dev *mdev = &mbox->dev[0];
116 	volatile uint64_t int_status;
117 	struct mbox_hdr *req_hdr;
118 	struct mbox_msghdr *msg;
119 	struct mbox_msghdr *rsp;
120 	uint64_t offset;
121 	size_t size;
122 	int i;
123 
124 	/* We need to disable PF interrupts. We are in timer interrupt */
125 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1C);
126 
127 	/* Send message to AF */
128 	mbox_msg_send(mbox, 0);
129 
130 	/* Wait for AF response */
131 	do {
132 		plt_delay_ms(sleep);
133 		timeout++;
134 		if (timeout >= mbox->rsp_tmo) {
135 			plt_err("Routed messages %d timeout: %dms", num_msg,
136 				mbox->rsp_tmo);
137 			break;
138 		}
139 		int_status = plt_read64(dev->bar2 + RVU_PF_INT);
140 	} while ((int_status & 0x1) != 0x1);
141 
142 	/* Clear */
143 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT);
144 
145 	/* Enable interrupts */
146 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1S);
147 
148 	req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start);
149 	if (req_hdr->num_msgs != num_msg)
150 		plt_err("Routed messages: %d received: %d", num_msg,
151 			req_hdr->num_msgs);
152 
153 	/* Get messages from mbox */
154 	offset = mbox->rx_start +
155 		 PLT_ALIGN(sizeof(struct mbox_hdr), MBOX_MSG_ALIGN);
156 	for (i = 0; i < req_hdr->num_msgs; i++) {
157 		msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
158 		size = mbox->rx_start + msg->next_msgoff - offset;
159 
160 		/* Reserve PF/VF mbox message */
161 		size = PLT_ALIGN(size, MBOX_MSG_ALIGN);
162 		rsp = mbox_alloc_msg(&dev->mbox_vfpf, vf, size);
163 		if (!rsp) {
164 			plt_err("Failed to reserve VF%d message", vf);
165 			continue;
166 		}
167 
168 		mbox_rsp_init(msg->id, rsp);
169 
170 		/* Copy message from AF<->PF mbox to PF<->VF mbox */
171 		mbox_memcpy((uint8_t *)rsp + sizeof(struct mbox_msghdr),
172 			    (uint8_t *)msg + sizeof(struct mbox_msghdr),
173 			    size - sizeof(struct mbox_msghdr));
174 
175 		/* Set status and sender pf_func data */
176 		rsp->rc = msg->rc;
177 		rsp->pcifunc = msg->pcifunc;
178 
179 		/* Whenever a PF comes up, AF sends the link status to it but
180 		 * when VF comes up no such event is sent to respective VF.
181 		 * Using MBOX_MSG_NIX_LF_START_RX response from AF for the
182 		 * purpose and send the link status of PF to VF.
183 		 */
184 		if (msg->id == MBOX_MSG_NIX_LF_START_RX) {
185 			/* Send link status to VF */
186 			struct cgx_link_user_info linfo;
187 			struct mbox_msghdr *vf_msg;
188 			size_t sz;
189 
190 			/* Get the link status */
191 			memset(&linfo, 0, sizeof(struct cgx_link_user_info));
192 			if (dev->ops && dev->ops->link_status_get)
193 				dev->ops->link_status_get(dev->roc_nix, &linfo);
194 
195 			sz = PLT_ALIGN(mbox_id2size(MBOX_MSG_CGX_LINK_EVENT),
196 				       MBOX_MSG_ALIGN);
197 			/* Prepare the message to be sent */
198 			vf_msg = mbox_alloc_msg(&dev->mbox_vfpf_up, vf, sz);
199 			if (vf_msg) {
200 				mbox_req_init(MBOX_MSG_CGX_LINK_EVENT, vf_msg);
201 				memcpy((uint8_t *)vf_msg +
202 				       sizeof(struct mbox_msghdr), &linfo,
203 				       sizeof(struct cgx_link_user_info));
204 
205 				vf_msg->rc = msg->rc;
206 				vf_msg->pcifunc = msg->pcifunc;
207 				/* Send to VF */
208 				mbox_msg_send_up(&dev->mbox_vfpf_up, vf);
209 				mbox_wait_for_zero(&dev->mbox_vfpf_up, vf);
210 			}
211 		}
212 
213 		offset = mbox->rx_start + msg->next_msgoff;
214 	}
215 
216 	return req_hdr->num_msgs;
217 }
218 
219 /* PF receives mbox DOWN messages from VF and forwards to AF */
220 static int
221 vf_pf_process_msgs(struct dev *dev, uint16_t vf)
222 {
223 	struct mbox *mbox = &dev->mbox_vfpf;
224 	struct mbox_dev *mdev = &mbox->dev[vf];
225 	struct mbox_hdr *req_hdr;
226 	struct mbox_msghdr *msg;
227 	int offset, routed = 0;
228 	size_t size;
229 	uint16_t i;
230 
231 	req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start);
232 	if (!req_hdr->num_msgs)
233 		return 0;
234 
235 	offset = mbox->rx_start + PLT_ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN);
236 
237 	mbox_get(dev->mbox);
238 	for (i = 0; i < req_hdr->num_msgs; i++) {
239 		msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
240 		size = mbox->rx_start + msg->next_msgoff - offset;
241 
242 		/* RVU_PF_FUNC_S */
243 		msg->pcifunc = dev_pf_func(dev->pf, vf);
244 
245 		if (msg->id == MBOX_MSG_READY) {
246 			struct ready_msg_rsp *rsp;
247 			uint16_t max_bits = sizeof(dev->active_vfs[0]) * 8;
248 
249 			/* Handle READY message in PF */
250 			dev->active_vfs[vf / max_bits] |=
251 				BIT_ULL(vf % max_bits);
252 			rsp = (struct ready_msg_rsp *)mbox_alloc_msg(
253 				mbox, vf, sizeof(*rsp));
254 			if (!rsp) {
255 				plt_err("Failed to alloc VF%d READY message",
256 					vf);
257 				continue;
258 			}
259 
260 			mbox_rsp_init(msg->id, rsp);
261 
262 			/* PF/VF function ID */
263 			rsp->hdr.pcifunc = msg->pcifunc;
264 			rsp->hdr.rc = 0;
265 		} else {
266 			struct mbox_msghdr *af_req;
267 			/* Reserve AF/PF mbox message */
268 			size = PLT_ALIGN(size, MBOX_MSG_ALIGN);
269 			af_req = mbox_alloc_msg(dev->mbox, 0, size);
270 			if (af_req == NULL)
271 				return -ENOSPC;
272 			mbox_req_init(msg->id, af_req);
273 
274 			/* Copy message from VF<->PF mbox to PF<->AF mbox */
275 			mbox_memcpy((uint8_t *)af_req +
276 					    sizeof(struct mbox_msghdr),
277 				    (uint8_t *)msg + sizeof(struct mbox_msghdr),
278 				    size - sizeof(struct mbox_msghdr));
279 			af_req->pcifunc = msg->pcifunc;
280 			routed++;
281 		}
282 		offset = mbox->rx_start + msg->next_msgoff;
283 	}
284 
285 	if (routed > 0) {
286 		plt_base_dbg("pf:%d routed %d messages from vf:%d to AF",
287 			     dev->pf, routed, vf);
288 		/* PF will send the messages to AF and wait for responses */
289 		af_pf_wait_msg(dev, vf, routed);
290 		mbox_reset(dev->mbox, 0);
291 	}
292 	mbox_put(dev->mbox);
293 
294 	/* Send mbox responses to VF */
295 	if (mdev->num_msgs) {
296 		plt_base_dbg("pf:%d reply %d messages to vf:%d", dev->pf,
297 			     mdev->num_msgs, vf);
298 		mbox_msg_send(mbox, vf);
299 	}
300 
301 	return i;
302 }
303 
304 /* VF sends Ack to PF's UP messages */
305 static int
306 vf_pf_process_up_msgs(struct dev *dev, uint16_t vf)
307 {
308 	struct mbox *mbox = &dev->mbox_vfpf_up;
309 	struct mbox_dev *mdev = &mbox->dev[vf];
310 	struct mbox_hdr *req_hdr;
311 	struct mbox_msghdr *msg;
312 	int msgs_acked = 0;
313 	int offset;
314 	uint16_t i;
315 
316 	req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start);
317 	if (req_hdr->num_msgs == 0)
318 		return 0;
319 
320 	offset = mbox->rx_start + PLT_ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN);
321 
322 	for (i = 0; i < req_hdr->num_msgs; i++) {
323 		msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
324 
325 		msgs_acked++;
326 		/* RVU_PF_FUNC_S */
327 		msg->pcifunc = dev_pf_func(dev->pf, vf);
328 
329 		switch (msg->id) {
330 		case MBOX_MSG_CGX_LINK_EVENT:
331 			plt_base_dbg("PF: Msg 0x%x (%s) fn:0x%x (pf:%d,vf:%d)",
332 				     msg->id, mbox_id2name(msg->id),
333 				     msg->pcifunc, dev_get_pf(msg->pcifunc),
334 				     dev_get_vf(msg->pcifunc));
335 			break;
336 		case MBOX_MSG_CGX_PTP_RX_INFO:
337 			plt_base_dbg("PF: Msg 0x%x (%s) fn:0x%x (pf:%d,vf:%d)",
338 				     msg->id, mbox_id2name(msg->id),
339 				     msg->pcifunc, dev_get_pf(msg->pcifunc),
340 				     dev_get_vf(msg->pcifunc));
341 			break;
342 		default:
343 			plt_err("Not handled UP msg 0x%x (%s) func:0x%x",
344 				msg->id, mbox_id2name(msg->id), msg->pcifunc);
345 		}
346 		offset = mbox->rx_start + msg->next_msgoff;
347 	}
348 	mbox_reset(mbox, vf);
349 	mdev->msgs_acked = msgs_acked;
350 	plt_wmb();
351 
352 	return i;
353 }
354 
355 /* PF handling messages from VF */
356 static void
357 roc_vf_pf_mbox_handle_msg(void *param, dev_intr_t *intr)
358 {
359 	uint16_t vf, max_vf, max_bits;
360 	struct dev *dev = param;
361 
362 	max_bits = sizeof(dev->intr.bits[0]) * sizeof(uint64_t);
363 	max_vf = max_bits * MAX_VFPF_DWORD_BITS;
364 
365 	for (vf = 0; vf < max_vf; vf++) {
366 		if (intr->bits[vf / max_bits] & BIT_ULL(vf % max_bits)) {
367 			plt_base_dbg("Process vf:%d request (pf:%d, vf:%d)", vf,
368 				     dev->pf, dev->vf);
369 			/* VF initiated down messages */
370 			vf_pf_process_msgs(dev, vf);
371 			/* VF replies to PF's UP messages */
372 			vf_pf_process_up_msgs(dev, vf);
373 			intr->bits[vf / max_bits] &= ~(BIT_ULL(vf % max_bits));
374 		}
375 	}
376 }
377 
378 /* IRQ to PF from VF - PF context (interrupt thread) */
379 static void
380 roc_vf_pf_mbox_irq(void *param)
381 {
382 	bool signal_thread = false;
383 	struct dev *dev = param;
384 	dev_intr_t intrb;
385 	uint64_t intr;
386 	int vfpf, sz;
387 
388 	sz = sizeof(intrb.bits[0]) * MAX_VFPF_DWORD_BITS;
389 	memset(intrb.bits, 0, sz);
390 	for (vfpf = 0; vfpf < MAX_VFPF_DWORD_BITS; ++vfpf) {
391 		intr = plt_read64(dev->bar2 + RVU_PF_VFPF_MBOX_INTX(vfpf));
392 		if (!intr)
393 			continue;
394 
395 		plt_base_dbg("vfpf: %d intr: 0x%" PRIx64 " (pf:%d, vf:%d)",
396 			     vfpf, intr, dev->pf, dev->vf);
397 
398 		/* Save and clear intr bits */
399 		intrb.bits[vfpf] |= intr;
400 		plt_write64(intr, dev->bar2 + RVU_PF_VFPF_MBOX_INTX(vfpf));
401 		signal_thread = true;
402 	}
403 
404 	if (signal_thread) {
405 		pthread_mutex_lock(&dev->sync.mutex);
406 		/* Interrupt state was saved in local variable first, as dev->intr.bits
407 		 * is a shared resources between VF msg and interrupt thread.
408 		 */
409 		memcpy(dev->intr.bits, intrb.bits, sz);
410 		/* MBOX message received from VF */
411 		dev->sync.msg_avail |= ROC_DEV_MBOX_PEND;
412 		/* Signal vf message handler thread */
413 		pthread_cond_signal(&dev->sync.pfvf_msg_cond);
414 		pthread_mutex_unlock(&dev->sync.mutex);
415 	}
416 }
417 
418 /* Received response from AF (PF context) / PF (VF context) */
419 static void
420 process_msgs(struct dev *dev, struct mbox *mbox)
421 {
422 	struct mbox_dev *mdev = &mbox->dev[0];
423 	struct mbox_hdr *req_hdr;
424 	struct mbox_msghdr *msg;
425 	int msgs_acked = 0;
426 	int offset;
427 	uint16_t i;
428 
429 	req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start);
430 	if (req_hdr->num_msgs == 0)
431 		return;
432 
433 	offset = mbox->rx_start + PLT_ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN);
434 	for (i = 0; i < req_hdr->num_msgs; i++) {
435 		msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
436 
437 		msgs_acked++;
438 		plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id,
439 			     mbox_id2name(msg->id), dev_get_pf(msg->pcifunc),
440 			     dev_get_vf(msg->pcifunc));
441 
442 		switch (msg->id) {
443 			/* Add message id's that are handled here */
444 		case MBOX_MSG_READY:
445 			/* Get our identity */
446 			dev->pf_func = msg->pcifunc;
447 			break;
448 		case MBOX_MSG_CGX_PRIO_FLOW_CTRL_CFG:
449 		case MBOX_MSG_CGX_CFG_PAUSE_FRM:
450 			/* Handling the case where one VF tries to disable PFC
451 			 * while PFC already configured on other VFs. This is
452 			 * not an error but a warning which can be ignored.
453 			 */
454 			if (msg->rc) {
455 				if (msg->rc == LMAC_AF_ERR_PERM_DENIED) {
456 					plt_mbox_dbg(
457 						"Receive Flow control disable not permitted "
458 						"as its used by other PFVFs");
459 					msg->rc = 0;
460 				} else {
461 					plt_err("Message (%s) response has err=%d",
462 						mbox_id2name(msg->id), msg->rc);
463 				}
464 			}
465 			break;
466 		case MBOX_MSG_CGX_PROMISC_DISABLE:
467 		case MBOX_MSG_CGX_PROMISC_ENABLE:
468 			if (msg->rc) {
469 				if (msg->rc == LMAC_AF_ERR_INVALID_PARAM) {
470 					plt_mbox_dbg("Already in same promisc state");
471 					msg->rc = 0;
472 				} else {
473 					plt_err("Message (%s) response has err=%d",
474 						mbox_id2name(msg->id), msg->rc);
475 				}
476 			}
477 			break;
478 
479 		default:
480 			if (msg->rc)
481 				plt_err("Message (%s) response has err=%d (%s)",
482 					mbox_id2name(msg->id), msg->rc, roc_error_msg_get(msg->rc));
483 			break;
484 		}
485 		offset = mbox->rx_start + msg->next_msgoff;
486 	}
487 
488 	mbox_reset(mbox, 0);
489 	/* Update acked if someone is waiting a message - mbox_wait is waiting */
490 	mdev->msgs_acked = msgs_acked;
491 	plt_wmb();
492 }
493 
494 /* Copies the message received from AF and sends it to VF */
495 static void
496 pf_vf_mbox_send_up_msg(struct dev *dev, void *rec_msg)
497 {
498 	uint16_t max_bits = sizeof(dev->active_vfs[0]) * sizeof(uint64_t);
499 	struct mbox *vf_mbox = &dev->mbox_vfpf_up;
500 	struct msg_req *msg = rec_msg;
501 	struct mbox_msghdr *vf_msg;
502 	uint16_t vf;
503 	size_t size;
504 
505 	size = PLT_ALIGN(mbox_id2size(msg->hdr.id), MBOX_MSG_ALIGN);
506 	/* Send UP message to all VF's */
507 	for (vf = 0; vf < vf_mbox->ndevs; vf++) {
508 		/* VF active */
509 		if (!(dev->active_vfs[vf / max_bits] & (BIT_ULL(vf))))
510 			continue;
511 
512 		plt_base_dbg("(%s) size: %zx to VF: %d",
513 			     mbox_id2name(msg->hdr.id), size, vf);
514 
515 		/* Reserve PF/VF mbox message */
516 		vf_msg = mbox_alloc_msg(vf_mbox, vf, size);
517 		if (!vf_msg) {
518 			plt_err("Failed to alloc VF%d UP message", vf);
519 			continue;
520 		}
521 		mbox_req_init(msg->hdr.id, vf_msg);
522 
523 		/*
524 		 * Copy message from AF<->PF UP mbox
525 		 * to PF<->VF UP mbox
526 		 */
527 		mbox_memcpy((uint8_t *)vf_msg + sizeof(struct mbox_msghdr),
528 			    (uint8_t *)msg + sizeof(struct mbox_msghdr),
529 			    size - sizeof(struct mbox_msghdr));
530 
531 		vf_msg->rc = msg->hdr.rc;
532 		/* Set PF to be a sender */
533 		vf_msg->pcifunc = dev->pf_func;
534 
535 		/* Send to VF */
536 		mbox_msg_send(vf_mbox, vf);
537 		mbox_wait_for_zero(&dev->mbox_vfpf_up, vf);
538 	}
539 }
540 
541 static int
542 mbox_up_handler_mcs_intr_notify(struct dev *dev, struct mcs_intr_info *info, struct msg_rsp *rsp)
543 {
544 	struct roc_mcs_event_desc desc = {0};
545 	struct roc_mcs *mcs;
546 
547 	plt_base_dbg("pf:%d/vf:%d msg id 0x%x (%s) from: pf:%d/vf:%d", dev_get_pf(dev->pf_func),
548 		     dev_get_vf(dev->pf_func), info->hdr.id, mbox_id2name(info->hdr.id),
549 		     dev_get_pf(info->hdr.pcifunc), dev_get_vf(info->hdr.pcifunc));
550 
551 	mcs = roc_idev_mcs_get(info->mcs_id);
552 	if (!mcs)
553 		goto exit;
554 
555 	if (info->intr_mask) {
556 		switch (info->intr_mask) {
557 		case MCS_CPM_RX_SECTAG_V_EQ1_INT:
558 			desc.type = ROC_MCS_EVENT_SECTAG_VAL_ERR;
559 			desc.subtype = ROC_MCS_EVENT_RX_SECTAG_V_EQ1;
560 			break;
561 		case MCS_CPM_RX_SECTAG_E_EQ0_C_EQ1_INT:
562 			desc.type = ROC_MCS_EVENT_SECTAG_VAL_ERR;
563 			desc.subtype = ROC_MCS_EVENT_RX_SECTAG_E_EQ0_C_EQ1;
564 			break;
565 		case MCS_CPM_RX_SECTAG_SL_GTE48_INT:
566 			desc.type = ROC_MCS_EVENT_SECTAG_VAL_ERR;
567 			desc.subtype = ROC_MCS_EVENT_RX_SECTAG_SL_GTE48;
568 			break;
569 		case MCS_CPM_RX_SECTAG_ES_EQ1_SC_EQ1_INT:
570 			desc.type = ROC_MCS_EVENT_SECTAG_VAL_ERR;
571 			desc.subtype = ROC_MCS_EVENT_RX_SECTAG_ES_EQ1_SC_EQ1;
572 			break;
573 		case MCS_CPM_RX_SECTAG_SC_EQ1_SCB_EQ1_INT:
574 			desc.type = ROC_MCS_EVENT_SECTAG_VAL_ERR;
575 			desc.subtype = ROC_MCS_EVENT_RX_SECTAG_SC_EQ1_SCB_EQ1;
576 			break;
577 		case MCS_CPM_RX_PACKET_XPN_EQ0_INT:
578 			desc.type = ROC_MCS_EVENT_RX_SA_PN_HARD_EXP;
579 			desc.metadata.sa_idx = info->sa_id;
580 			break;
581 		case MCS_CPM_RX_PN_THRESH_REACHED_INT:
582 			desc.type = ROC_MCS_EVENT_RX_SA_PN_SOFT_EXP;
583 			desc.metadata.sa_idx = info->sa_id;
584 			break;
585 		case MCS_CPM_TX_PACKET_XPN_EQ0_INT:
586 			desc.type = ROC_MCS_EVENT_TX_SA_PN_HARD_EXP;
587 			desc.metadata.sa_idx = info->sa_id;
588 			break;
589 		case MCS_CPM_TX_PN_THRESH_REACHED_INT:
590 			desc.type = ROC_MCS_EVENT_TX_SA_PN_SOFT_EXP;
591 			desc.metadata.sa_idx = info->sa_id;
592 			break;
593 		case MCS_CPM_TX_SA_NOT_VALID_INT:
594 			desc.type = ROC_MCS_EVENT_SA_NOT_VALID;
595 			break;
596 		case MCS_BBE_RX_DFIFO_OVERFLOW_INT:
597 		case MCS_BBE_TX_DFIFO_OVERFLOW_INT:
598 			desc.type = ROC_MCS_EVENT_FIFO_OVERFLOW;
599 			desc.subtype = ROC_MCS_EVENT_DATA_FIFO_OVERFLOW;
600 			desc.metadata.lmac_id = info->lmac_id;
601 			break;
602 		case MCS_BBE_RX_PLFIFO_OVERFLOW_INT:
603 		case MCS_BBE_TX_PLFIFO_OVERFLOW_INT:
604 			desc.type = ROC_MCS_EVENT_FIFO_OVERFLOW;
605 			desc.subtype = ROC_MCS_EVENT_POLICY_FIFO_OVERFLOW;
606 			desc.metadata.lmac_id = info->lmac_id;
607 			break;
608 		case MCS_PAB_RX_CHAN_OVERFLOW_INT:
609 		case MCS_PAB_TX_CHAN_OVERFLOW_INT:
610 			desc.type = ROC_MCS_EVENT_FIFO_OVERFLOW;
611 			desc.subtype = ROC_MCS_EVENT_PKT_ASSM_FIFO_OVERFLOW;
612 			desc.metadata.lmac_id = info->lmac_id;
613 			break;
614 		default:
615 			goto exit;
616 		}
617 
618 		mcs_event_cb_process(mcs, &desc);
619 	}
620 
621 exit:
622 	rsp->hdr.rc = 0;
623 	return 0;
624 }
625 
626 static int
627 mbox_up_handler_cgx_link_event(struct dev *dev, struct cgx_link_info_msg *msg,
628 			       struct msg_rsp *rsp)
629 {
630 	struct cgx_link_user_info *linfo = &msg->link_info;
631 	void *roc_nix = dev->roc_nix;
632 
633 	plt_base_dbg("pf:%d/vf:%d NIC Link %s --> 0x%x (%s) from: pf:%d/vf:%d",
634 		     dev_get_pf(dev->pf_func), dev_get_vf(dev->pf_func),
635 		     linfo->link_up ? "UP" : "DOWN", msg->hdr.id,
636 		     mbox_id2name(msg->hdr.id), dev_get_pf(msg->hdr.pcifunc),
637 		     dev_get_vf(msg->hdr.pcifunc));
638 
639 	/* PF gets link notification from AF */
640 	if (dev_get_pf(msg->hdr.pcifunc) == 0) {
641 		if (dev->ops && dev->ops->link_status_update)
642 			dev->ops->link_status_update(roc_nix, linfo);
643 
644 		/* Forward the same message as received from AF to VF */
645 		pf_vf_mbox_send_up_msg(dev, msg);
646 	} else {
647 		/* VF gets link up notification */
648 		if (dev->ops && dev->ops->link_status_update)
649 			dev->ops->link_status_update(roc_nix, linfo);
650 	}
651 
652 	rsp->hdr.rc = 0;
653 	return 0;
654 }
655 
656 static int
657 mbox_up_handler_cgx_ptp_rx_info(struct dev *dev,
658 				struct cgx_ptp_rx_info_msg *msg,
659 				struct msg_rsp *rsp)
660 {
661 	void *roc_nix = dev->roc_nix;
662 
663 	plt_base_dbg("pf:%d/vf:%d PTP mode %s --> 0x%x (%s) from: pf:%d/vf:%d",
664 		     dev_get_pf(dev->pf_func), dev_get_vf(dev->pf_func),
665 		     msg->ptp_en ? "ENABLED" : "DISABLED", msg->hdr.id,
666 		     mbox_id2name(msg->hdr.id), dev_get_pf(msg->hdr.pcifunc),
667 		     dev_get_vf(msg->hdr.pcifunc));
668 
669 	/* PF gets PTP notification from AF */
670 	if (dev_get_pf(msg->hdr.pcifunc) == 0) {
671 		if (dev->ops && dev->ops->ptp_info_update)
672 			dev->ops->ptp_info_update(roc_nix, msg->ptp_en);
673 
674 		/* Forward the same message as received from AF to VF */
675 		pf_vf_mbox_send_up_msg(dev, msg);
676 	} else {
677 		/* VF gets PTP notification */
678 		if (dev->ops && dev->ops->ptp_info_update)
679 			dev->ops->ptp_info_update(roc_nix, msg->ptp_en);
680 	}
681 
682 	rsp->hdr.rc = 0;
683 	return 0;
684 }
685 
686 static int
687 mbox_process_msgs_up(struct dev *dev, struct mbox_msghdr *req)
688 {
689 	/* Check if valid, if not reply with a invalid msg */
690 	if (req->sig != MBOX_REQ_SIG)
691 		return -EIO;
692 
693 	switch (req->id) {
694 	default:
695 		reply_invalid_msg(&dev->mbox_up, 0, 0, req->id);
696 		break;
697 #define M(_name, _id, _fn_name, _req_type, _rsp_type)                          \
698 	case _id: {                                                            \
699 		struct _rsp_type *rsp;                                         \
700 		int err;                                                       \
701 		rsp = (struct _rsp_type *)mbox_alloc_msg(                      \
702 			&dev->mbox_up, 0, sizeof(struct _rsp_type));           \
703 		if (!rsp)                                                      \
704 			return -ENOMEM;                                        \
705 		rsp->hdr.id = _id;                                             \
706 		rsp->hdr.sig = MBOX_RSP_SIG;                                   \
707 		rsp->hdr.pcifunc = dev->pf_func;                               \
708 		rsp->hdr.rc = 0;                                               \
709 		err = mbox_up_handler_##_fn_name(dev, (struct _req_type *)req, \
710 						 rsp);                         \
711 		return err;                                                    \
712 	}
713 		MBOX_UP_CGX_MESSAGES
714 		MBOX_UP_MCS_MESSAGES
715 #undef M
716 	}
717 
718 	return -ENODEV;
719 }
720 
721 /* Received up messages from AF (PF context) / PF (in context) */
722 static void
723 process_msgs_up(struct dev *dev, struct mbox *mbox)
724 {
725 	struct mbox_dev *mdev = &mbox->dev[0];
726 	struct mbox_hdr *req_hdr;
727 	struct mbox_msghdr *msg;
728 	int i, err, offset;
729 
730 	req_hdr = (struct mbox_hdr *)((uintptr_t)mdev->mbase + mbox->rx_start);
731 	if (req_hdr->num_msgs == 0)
732 		return;
733 
734 	offset = mbox->rx_start + PLT_ALIGN(sizeof(*req_hdr), MBOX_MSG_ALIGN);
735 	for (i = 0; i < req_hdr->num_msgs; i++) {
736 		msg = (struct mbox_msghdr *)((uintptr_t)mdev->mbase + offset);
737 
738 		plt_base_dbg("Message 0x%x (%s) pf:%d/vf:%d", msg->id,
739 			     mbox_id2name(msg->id), dev_get_pf(msg->pcifunc),
740 			     dev_get_vf(msg->pcifunc));
741 		err = mbox_process_msgs_up(dev, msg);
742 		if (err)
743 			plt_err("Error %d handling 0x%x (%s)", err, msg->id,
744 				mbox_id2name(msg->id));
745 		offset = mbox->rx_start + msg->next_msgoff;
746 	}
747 	/* Send mbox responses */
748 	if (mdev->num_msgs) {
749 		plt_base_dbg("Reply num_msgs:%d", mdev->num_msgs);
750 		mbox_msg_send(mbox, 0);
751 	}
752 }
753 
754 /* IRQ to VF from PF - VF context (interrupt thread) */
755 static void
756 roc_pf_vf_mbox_irq(void *param)
757 {
758 	struct dev *dev = param;
759 	uint64_t mbox_data;
760 	uint64_t intr;
761 
762 	intr = plt_read64(dev->bar2 + RVU_VF_INT);
763 	if (intr == 0)
764 		plt_base_dbg("Proceeding to check mbox UP messages if any");
765 
766 	plt_write64(intr, dev->bar2 + RVU_VF_INT);
767 	plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
768 
769 	/* Reading for UP/DOWN message, next message sending will be delayed
770 	 * by 1ms until this region is zeroed mbox_wait_for_zero()
771 	 */
772 	mbox_data = plt_read64(dev->bar2 + RVU_VF_VFPF_MBOX0);
773 	if (mbox_data)
774 		plt_write64(!mbox_data, dev->bar2 + RVU_VF_VFPF_MBOX0);
775 
776 	/* First process all configuration messages */
777 	process_msgs(dev, dev->mbox);
778 
779 	/* Process Uplink messages */
780 	process_msgs_up(dev, &dev->mbox_up);
781 }
782 
783 /* IRQ to PF from AF - PF context (interrupt thread) */
784 static void
785 roc_af_pf_mbox_irq(void *param)
786 {
787 	struct dev *dev = param;
788 	uint64_t mbox_data;
789 	uint64_t intr;
790 
791 	intr = plt_read64(dev->bar2 + RVU_PF_INT);
792 	if (intr == 0)
793 		plt_base_dbg("Proceeding to check mbox UP messages if any");
794 
795 	plt_write64(intr, dev->bar2 + RVU_PF_INT);
796 	plt_base_dbg("Irq 0x%" PRIx64 "(pf:%d,vf:%d)", intr, dev->pf, dev->vf);
797 
798 	/* Reading for UP/DOWN message, next message sending will be delayed
799 	 * by 1ms until this region is zeroed mbox_wait_for_zero()
800 	 */
801 	mbox_data = plt_read64(dev->bar2 + RVU_PF_PFAF_MBOX0);
802 	if (mbox_data)
803 		plt_write64(!mbox_data, dev->bar2 + RVU_PF_PFAF_MBOX0);
804 
805 	/* First process all configuration messages */
806 	process_msgs(dev, dev->mbox);
807 
808 	/* Process Uplink messages */
809 	process_msgs_up(dev, &dev->mbox_up);
810 }
811 
812 static int
813 mbox_register_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
814 {
815 	struct plt_intr_handle *intr_handle = pci_dev->intr_handle;
816 	int i, rc;
817 
818 	/* HW clear irq */
819 	for (i = 0; i < MAX_VFPF_DWORD_BITS; ++i)
820 		plt_write64(~0ull,
821 			    dev->bar2 + RVU_PF_VFPF_MBOX_INT_ENA_W1CX(i));
822 
823 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1C);
824 
825 	/* MBOX interrupt for VF(0...63) <-> PF */
826 	rc = dev_irq_register(intr_handle, roc_vf_pf_mbox_irq, dev,
827 			      RVU_PF_INT_VEC_VFPF_MBOX0);
828 
829 	if (rc) {
830 		plt_err("Fail to register PF(VF0-63) mbox irq");
831 		return rc;
832 	}
833 	/* MBOX interrupt for VF(64...128) <-> PF */
834 	rc = dev_irq_register(intr_handle, roc_vf_pf_mbox_irq, dev,
835 			      RVU_PF_INT_VEC_VFPF_MBOX1);
836 
837 	if (rc) {
838 		plt_err("Fail to register PF(VF64-128) mbox irq");
839 		return rc;
840 	}
841 	/* MBOX interrupt AF <-> PF */
842 	rc = dev_irq_register(intr_handle, roc_af_pf_mbox_irq, dev,
843 			      RVU_PF_INT_VEC_AFPF_MBOX);
844 	if (rc) {
845 		plt_err("Fail to register AF<->PF mbox irq");
846 		return rc;
847 	}
848 
849 	/* HW enable intr */
850 	for (i = 0; i < MAX_VFPF_DWORD_BITS; ++i)
851 		plt_write64(~0ull,
852 			    dev->bar2 + RVU_PF_VFPF_MBOX_INT_ENA_W1SX(i));
853 
854 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT);
855 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1S);
856 
857 	return rc;
858 }
859 
860 static int
861 mbox_register_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
862 {
863 	struct plt_intr_handle *intr_handle = pci_dev->intr_handle;
864 	int rc;
865 
866 	/* Clear irq */
867 	plt_write64(~0ull, dev->bar2 + RVU_VF_INT_ENA_W1C);
868 
869 	/* MBOX interrupt PF <-> VF */
870 	rc = dev_irq_register(intr_handle, roc_pf_vf_mbox_irq, dev,
871 			      RVU_VF_INT_VEC_MBOX);
872 	if (rc) {
873 		plt_err("Fail to register PF<->VF mbox irq");
874 		return rc;
875 	}
876 
877 	/* HW enable intr */
878 	plt_write64(~0ull, dev->bar2 + RVU_VF_INT);
879 	plt_write64(~0ull, dev->bar2 + RVU_VF_INT_ENA_W1S);
880 
881 	return rc;
882 }
883 
884 int
885 dev_mbox_register_irq(struct plt_pci_device *pci_dev, struct dev *dev)
886 {
887 	if (dev_is_vf(dev))
888 		return mbox_register_vf_irq(pci_dev, dev);
889 	else
890 		return mbox_register_pf_irq(pci_dev, dev);
891 }
892 
893 static void
894 mbox_unregister_pf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
895 {
896 	struct plt_intr_handle *intr_handle = pci_dev->intr_handle;
897 	int i;
898 
899 	/* HW clear irq */
900 	for (i = 0; i < MAX_VFPF_DWORD_BITS; ++i)
901 		plt_write64(~0ull,
902 			    dev->bar2 + RVU_PF_VFPF_MBOX_INT_ENA_W1CX(i));
903 
904 	plt_write64(~0ull, dev->bar2 + RVU_PF_INT_ENA_W1C);
905 
906 	/* Unregister the interrupt handler for each vectors */
907 	/* MBOX interrupt for VF(0...63) <-> PF */
908 	dev_irq_unregister(intr_handle, roc_vf_pf_mbox_irq, dev,
909 			   RVU_PF_INT_VEC_VFPF_MBOX0);
910 
911 	/* MBOX interrupt for VF(64...128) <-> PF */
912 	dev_irq_unregister(intr_handle, roc_vf_pf_mbox_irq, dev,
913 			   RVU_PF_INT_VEC_VFPF_MBOX1);
914 
915 	/* MBOX interrupt AF <-> PF */
916 	dev_irq_unregister(intr_handle, roc_af_pf_mbox_irq, dev,
917 			   RVU_PF_INT_VEC_AFPF_MBOX);
918 }
919 
920 static void
921 mbox_unregister_vf_irq(struct plt_pci_device *pci_dev, struct dev *dev)
922 {
923 	struct plt_intr_handle *intr_handle = pci_dev->intr_handle;
924 
925 	/* Clear irq */
926 	plt_write64(~0ull, dev->bar2 + RVU_VF_INT_ENA_W1C);
927 
928 	/* Unregister the interrupt handler */
929 	dev_irq_unregister(intr_handle, roc_pf_vf_mbox_irq, dev,
930 			   RVU_VF_INT_VEC_MBOX);
931 }
932 
933 static void
934 mbox_unregister_irq(struct plt_pci_device *pci_dev, struct dev *dev)
935 {
936 	if (dev_is_vf(dev))
937 		mbox_unregister_vf_irq(pci_dev, dev);
938 	else
939 		mbox_unregister_pf_irq(pci_dev, dev);
940 }
941 
942 static int
943 vf_flr_send_msg(struct dev *dev, uint16_t vf)
944 {
945 	struct mbox *mbox = dev->mbox;
946 	struct msg_req *req;
947 	int rc;
948 
949 	req = mbox_alloc_msg_vf_flr(mbox_get(mbox));
950 	if (req == NULL)
951 		return -ENOSPC;
952 	/* Overwrite pcifunc to indicate VF */
953 	req->hdr.pcifunc = dev_pf_func(dev->pf, vf);
954 
955 	/* Sync message in interrupt context */
956 	rc = pf_af_sync_msg(dev, NULL);
957 	if (rc)
958 		plt_err("Failed to send VF FLR mbox msg, rc=%d", rc);
959 
960 	mbox_put(mbox);
961 
962 	return rc;
963 }
964 
965 static void
966 roc_pf_vf_flr_irq(void *param)
967 {
968 	struct dev *dev = (struct dev *)param;
969 	bool signal_thread = false;
970 	dev_intr_t flr;
971 	uintptr_t bar2;
972 	uint64_t intr;
973 	int i, sz;
974 
975 	bar2 = dev->bar2;
976 
977 	sz = sizeof(flr.bits[0]) * MAX_VFPF_DWORD_BITS;
978 	memset(flr.bits, 0, sz);
979 	for (i = 0; i < MAX_VFPF_DWORD_BITS; ++i) {
980 		intr = plt_read64(bar2 + RVU_PF_VFFLR_INTX(i));
981 		if (!intr)
982 			continue;
983 
984 		/* Clear interrupt */
985 		plt_write64(intr, bar2 + RVU_PF_VFFLR_INTX(i));
986 		/* Disable the interrupt */
987 		plt_write64(intr,
988 			    bar2 + RVU_PF_VFFLR_INT_ENA_W1CX(i));
989 
990 		/* Save FLR interrupts per VF as bits */
991 		flr.bits[i] |= intr;
992 		/* Enable interrupt */
993 		plt_write64(~0ull,
994 			    bar2 + RVU_PF_VFFLR_INT_ENA_W1SX(i));
995 		signal_thread = true;
996 	}
997 
998 	if (signal_thread) {
999 		pthread_mutex_lock(&dev->sync.mutex);
1000 		/* Interrupt state was saved in local variable first, as dev->flr.bits
1001 		 * is a shared resources between VF msg and interrupt thread.
1002 		 */
1003 		memcpy(dev->flr.bits, flr.bits, sz);
1004 		/* FLR message received from VF */
1005 		dev->sync.msg_avail |= ROC_DEV_FLR_PEND;
1006 		/* Signal vf message handler thread */
1007 		pthread_cond_signal(&dev->sync.pfvf_msg_cond);
1008 		pthread_mutex_unlock(&dev->sync.mutex);
1009 	}
1010 }
1011 
1012 static int
1013 vf_flr_unregister_irqs(struct plt_pci_device *pci_dev, struct dev *dev)
1014 {
1015 	struct plt_intr_handle *intr_handle = pci_dev->intr_handle;
1016 	int i;
1017 
1018 	plt_base_dbg("Unregister VF FLR interrupts for %s", pci_dev->name);
1019 
1020 	/* HW clear irq */
1021 	for (i = 0; i < MAX_VFPF_DWORD_BITS; i++)
1022 		plt_write64(~0ull, dev->bar2 + RVU_PF_VFFLR_INT_ENA_W1CX(i));
1023 
1024 	dev_irq_unregister(intr_handle, roc_pf_vf_flr_irq, dev,
1025 			   RVU_PF_INT_VEC_VFFLR0);
1026 
1027 	dev_irq_unregister(intr_handle, roc_pf_vf_flr_irq, dev,
1028 			   RVU_PF_INT_VEC_VFFLR1);
1029 
1030 	return 0;
1031 }
1032 
1033 int
1034 dev_vf_flr_register_irqs(struct plt_pci_device *pci_dev, struct dev *dev)
1035 {
1036 	struct plt_intr_handle *handle = pci_dev->intr_handle;
1037 	int i, rc;
1038 
1039 	plt_base_dbg("Register VF FLR interrupts for %s", pci_dev->name);
1040 
1041 	rc = dev_irq_register(handle, roc_pf_vf_flr_irq, dev,
1042 			      RVU_PF_INT_VEC_VFFLR0);
1043 	if (rc)
1044 		plt_err("Failed to init RVU_PF_INT_VEC_VFFLR0 rc=%d", rc);
1045 
1046 	rc = dev_irq_register(handle, roc_pf_vf_flr_irq, dev,
1047 			      RVU_PF_INT_VEC_VFFLR1);
1048 	if (rc)
1049 		plt_err("Failed to init RVU_PF_INT_VEC_VFFLR1 rc=%d", rc);
1050 
1051 	/* Enable HW interrupt */
1052 	for (i = 0; i < MAX_VFPF_DWORD_BITS; ++i) {
1053 		plt_write64(~0ull, dev->bar2 + RVU_PF_VFFLR_INTX(i));
1054 		plt_write64(~0ull, dev->bar2 + RVU_PF_VFTRPENDX(i));
1055 		plt_write64(~0ull, dev->bar2 + RVU_PF_VFFLR_INT_ENA_W1SX(i));
1056 	}
1057 	return 0;
1058 }
1059 
1060 static void
1061 vf_flr_handle_msg(void *param, dev_intr_t *flr)
1062 {
1063 	uint16_t vf, max_vf, max_bits;
1064 	struct dev *dev = param;
1065 
1066 	max_bits = sizeof(flr->bits[0]) * sizeof(uint64_t);
1067 	max_vf = max_bits * MAX_VFPF_DWORD_BITS;
1068 
1069 	for (vf = 0; vf < max_vf; vf++) {
1070 		if (flr->bits[vf / max_bits] & BIT_ULL(vf % max_bits)) {
1071 			plt_base_dbg("Process FLR vf:%d request (pf:%d, vf:%d)",
1072 				     vf, dev->pf, dev->vf);
1073 			/* Inform AF about VF reset */
1074 			vf_flr_send_msg(dev, vf);
1075 			flr->bits[vf / max_bits] &= ~(BIT_ULL(vf % max_bits));
1076 
1077 			/* Signal FLR finish */
1078 			plt_write64(BIT_ULL(vf % max_bits),
1079 				    dev->bar2 + RVU_PF_VFTRPENDX(vf / max_bits));
1080 		}
1081 	}
1082 }
1083 
1084 static uint32_t
1085 pf_vf_mbox_thread_main(void *arg)
1086 {
1087 	struct dev *dev = arg;
1088 	bool is_flr, is_mbox;
1089 	dev_intr_t flr, intr;
1090 	int sz, rc;
1091 
1092 	sz = sizeof(intr.bits[0]) * MAX_VFPF_DWORD_BITS;
1093 	pthread_mutex_lock(&dev->sync.mutex);
1094 	while (dev->sync.start_thread) {
1095 		do {
1096 			rc = pthread_cond_wait(&dev->sync.pfvf_msg_cond, &dev->sync.mutex);
1097 		} while (rc != 0);
1098 
1099 		if (!dev->sync.msg_avail) {
1100 			continue;
1101 		} else {
1102 			while (dev->sync.msg_avail) {
1103 				/* Check which VF msg received */
1104 				is_mbox = dev->sync.msg_avail & ROC_DEV_MBOX_PEND;
1105 				is_flr = dev->sync.msg_avail & ROC_DEV_FLR_PEND;
1106 				memcpy(intr.bits, dev->intr.bits, sz);
1107 				memcpy(flr.bits, dev->flr.bits, sz);
1108 				memset(dev->flr.bits, 0, sz);
1109 				memset(dev->intr.bits, 0, sz);
1110 				dev->sync.msg_avail = 0;
1111 				/* Unlocking for interrupt thread to grab lock
1112 				 * and update msg_avail field.
1113 				 */
1114 				pthread_mutex_unlock(&dev->sync.mutex);
1115 				/* Calling respective message handlers */
1116 				if (is_mbox)
1117 					roc_vf_pf_mbox_handle_msg(dev, &intr);
1118 				if (is_flr)
1119 					vf_flr_handle_msg(dev, &flr);
1120 				/* Locking as cond wait will unlock before wait */
1121 				pthread_mutex_lock(&dev->sync.mutex);
1122 			}
1123 		}
1124 	}
1125 
1126 	pthread_mutex_unlock(&dev->sync.mutex);
1127 
1128 	return 0;
1129 }
1130 
1131 static void
1132 clear_rvum_interrupts(struct dev *dev)
1133 {
1134 	uint64_t intr;
1135 	int i;
1136 
1137 	if (dev_is_vf(dev)) {
1138 		/* Clear VF mbox interrupt */
1139 		intr = plt_read64(dev->bar2 + RVU_VF_INT);
1140 		if (intr)
1141 			plt_write64(intr, dev->bar2 + RVU_VF_INT);
1142 	} else {
1143 		/* Clear AF PF interrupt line */
1144 		intr = plt_read64(dev->bar2 + RVU_PF_INT);
1145 		if (intr)
1146 			plt_write64(intr, dev->bar2 + RVU_PF_INT);
1147 		for (i = 0; i < MAX_VFPF_DWORD_BITS; ++i) {
1148 			/* Clear MBOX interrupts */
1149 			intr = plt_read64(dev->bar2 + RVU_PF_VFPF_MBOX_INTX(i));
1150 			if (intr)
1151 				plt_write64(intr,
1152 					    dev->bar2 +
1153 						    RVU_PF_VFPF_MBOX_INTX(i));
1154 			/* Clear VF FLR interrupts */
1155 			intr = plt_read64(dev->bar2 + RVU_PF_VFFLR_INTX(i));
1156 			if (intr)
1157 				plt_write64(intr,
1158 					    dev->bar2 + RVU_PF_VFFLR_INTX(i));
1159 		}
1160 	}
1161 }
1162 
1163 int
1164 dev_active_vfs(struct dev *dev)
1165 {
1166 	int i, count = 0;
1167 
1168 	for (i = 0; i < MAX_VFPF_DWORD_BITS; i++)
1169 		count += rte_popcount32(dev->active_vfs[i]);
1170 
1171 	return count;
1172 }
1173 
1174 static void
1175 dev_vf_hwcap_update(struct plt_pci_device *pci_dev, struct dev *dev)
1176 {
1177 	switch (pci_dev->id.device_id) {
1178 	case PCI_DEVID_CNXK_RVU_PF:
1179 		break;
1180 	case PCI_DEVID_CNXK_RVU_SSO_TIM_VF:
1181 	case PCI_DEVID_CNXK_RVU_NPA_VF:
1182 	case PCI_DEVID_CN10K_RVU_CPT_VF:
1183 	case PCI_DEVID_CN9K_RVU_CPT_VF:
1184 	case PCI_DEVID_CNXK_RVU_AF_VF:
1185 	case PCI_DEVID_CNXK_RVU_VF:
1186 	case PCI_DEVID_CNXK_RVU_SDP_VF:
1187 	case PCI_DEVID_CNXK_RVU_NIX_INL_VF:
1188 		dev->hwcap |= DEV_HWCAP_F_VF;
1189 		break;
1190 	}
1191 }
1192 
1193 static uintptr_t
1194 dev_vf_mbase_get(struct plt_pci_device *pci_dev, struct dev *dev)
1195 {
1196 	void *vf_mbase = NULL;
1197 	uintptr_t pa;
1198 
1199 	if (dev_is_vf(dev))
1200 		return 0;
1201 
1202 	/* For CN10K onwards, it is just after PF MBOX */
1203 	if (!roc_model_is_cn9k())
1204 		return dev->bar4 + MBOX_SIZE;
1205 
1206 	pa = plt_read64(dev->bar2 + RVU_PF_VF_BAR4_ADDR);
1207 	if (!pa) {
1208 		plt_err("Invalid VF mbox base pa");
1209 		return pa;
1210 	}
1211 
1212 	vf_mbase = mbox_mem_map(pa, MBOX_SIZE * pci_dev->max_vfs);
1213 	if (vf_mbase == MAP_FAILED) {
1214 		plt_err("Failed to mmap vf mbase at pa 0x%lx, rc=%d", pa,
1215 			errno);
1216 		return 0;
1217 	}
1218 	return (uintptr_t)vf_mbase;
1219 }
1220 
1221 static void
1222 dev_vf_mbase_put(struct plt_pci_device *pci_dev, uintptr_t vf_mbase)
1223 {
1224 	if (!vf_mbase || !pci_dev->max_vfs || !roc_model_is_cn9k())
1225 		return;
1226 
1227 	mbox_mem_unmap((void *)vf_mbase, MBOX_SIZE * pci_dev->max_vfs);
1228 }
1229 
1230 static int
1231 dev_setup_shared_lmt_region(struct mbox *mbox, bool valid_iova, uint64_t iova)
1232 {
1233 	struct lmtst_tbl_setup_req *req;
1234 	int rc;
1235 
1236 	req = mbox_alloc_msg_lmtst_tbl_setup(mbox_get(mbox));
1237 	if (!req) {
1238 		rc = -ENOSPC;
1239 		goto exit;
1240 	}
1241 
1242 	/* This pcifunc is defined with primary pcifunc whose LMT address
1243 	 * will be shared. If call contains valid IOVA, following pcifunc
1244 	 * field is of no use.
1245 	 */
1246 	req->pcifunc = valid_iova ? 0 : idev_lmt_pffunc_get();
1247 	req->use_local_lmt_region = valid_iova;
1248 	req->lmt_iova = iova;
1249 
1250 	rc = mbox_process(mbox);
1251 exit:
1252 	mbox_put(mbox);
1253 	return rc;
1254 }
1255 
1256 /* Total no of lines * size of each lmtline */
1257 #define LMT_REGION_SIZE (ROC_NUM_LMT_LINES * ROC_LMT_LINE_SZ)
1258 static int
1259 dev_lmt_setup(struct dev *dev)
1260 {
1261 	char name[PLT_MEMZONE_NAMESIZE];
1262 	const struct plt_memzone *mz;
1263 	struct idev_cfg *idev;
1264 	int rc;
1265 
1266 	if (roc_model_is_cn9k()) {
1267 		dev->lmt_base = dev->bar2 + (RVU_BLOCK_ADDR_LMT << 20);
1268 		return 0;
1269 	}
1270 
1271 	/* [CN10K, .) */
1272 
1273 	/* Set common lmt region from second pf_func onwards. */
1274 	if (!dev->disable_shared_lmt && idev_lmt_pffunc_get() &&
1275 	    dev->pf_func != idev_lmt_pffunc_get()) {
1276 		rc = dev_setup_shared_lmt_region(dev->mbox, false, 0);
1277 		if (!rc) {
1278 			/* On success, updating lmt base of secondary pf_funcs
1279 			 * with primary pf_func's lmt base.
1280 			 */
1281 			dev->lmt_base = roc_idev_lmt_base_addr_get();
1282 			return rc;
1283 		}
1284 		plt_err("Failed to setup shared lmt region, pf_func %d err %d "
1285 			"Using respective LMT region per pf func",
1286 			dev->pf_func, rc);
1287 	}
1288 
1289 	/* Allocating memory for LMT region */
1290 	sprintf(name, "LMT_MAP%x", dev->pf_func);
1291 
1292 	/* Setting alignment to ensure correct masking for resetting to lmt base
1293 	 * of a core after all lmt lines under that core are used.
1294 	 * Alignment value LMT_REGION_SIZE to handle the case where all lines
1295 	 * are used by 1 core.
1296 	 */
1297 	mz = plt_lmt_region_reserve_aligned(name, LMT_REGION_SIZE,
1298 					    LMT_REGION_SIZE);
1299 	if (!mz) {
1300 		plt_err("Memory alloc failed: %s", strerror(errno));
1301 		goto fail;
1302 	}
1303 
1304 	/* Share the IOVA address with Kernel */
1305 	rc = dev_setup_shared_lmt_region(dev->mbox, true, mz->iova);
1306 	if (rc) {
1307 		errno = rc;
1308 		goto free;
1309 	}
1310 
1311 	dev->lmt_base = mz->iova;
1312 	dev->lmt_mz = mz;
1313 	/* Base LMT address should be chosen from only those pci funcs which
1314 	 * participate in LMT shared mode.
1315 	 */
1316 	if (!dev->disable_shared_lmt) {
1317 		idev = idev_get_cfg();
1318 		if (!idev) {
1319 			errno = EFAULT;
1320 			goto free;
1321 		}
1322 
1323 		if (!__atomic_load_n(&idev->lmt_pf_func, __ATOMIC_ACQUIRE)) {
1324 			idev->lmt_base_addr = dev->lmt_base;
1325 			idev->lmt_pf_func = dev->pf_func;
1326 			idev->num_lmtlines = RVU_LMT_LINE_MAX;
1327 		}
1328 	}
1329 
1330 	return 0;
1331 free:
1332 	plt_memzone_free(mz);
1333 fail:
1334 	return -errno;
1335 }
1336 
1337 static bool
1338 dev_cache_line_size_valid(void)
1339 {
1340 	if (roc_model_is_cn9k()) {
1341 		if (PLT_CACHE_LINE_SIZE != 128) {
1342 			plt_err("Cache line size of %d is wrong for CN9K",
1343 				PLT_CACHE_LINE_SIZE);
1344 			return false;
1345 		}
1346 	} else if (roc_model_is_cn10k()) {
1347 		if (PLT_CACHE_LINE_SIZE == 128) {
1348 			plt_warn("Cache line size of %d might affect performance",
1349 				 PLT_CACHE_LINE_SIZE);
1350 		} else if (PLT_CACHE_LINE_SIZE != 64) {
1351 			plt_err("Cache line size of %d is wrong for CN10K",
1352 				PLT_CACHE_LINE_SIZE);
1353 			return false;
1354 		}
1355 	}
1356 
1357 	return true;
1358 }
1359 
1360 int
1361 dev_init(struct dev *dev, struct plt_pci_device *pci_dev)
1362 {
1363 	char name[MBOX_HANDLER_NAME_MAX_LEN];
1364 	int direction, up_direction, rc;
1365 	uintptr_t bar2, bar4, mbox;
1366 	uintptr_t vf_mbase = 0;
1367 	uint64_t intr_offset;
1368 
1369 	if (!dev_cache_line_size_valid())
1370 		return -EFAULT;
1371 
1372 	bar2 = (uintptr_t)pci_dev->mem_resource[2].addr;
1373 	bar4 = (uintptr_t)pci_dev->mem_resource[4].addr;
1374 	if (bar2 == 0 || bar4 == 0) {
1375 		plt_err("Failed to get PCI bars");
1376 		rc = -ENODEV;
1377 		goto error;
1378 	}
1379 
1380 	/* Trigger fault on bar2 and bar4 regions
1381 	 * to avoid BUG_ON in remap_pfn_range()
1382 	 * in latest kernel.
1383 	 */
1384 	*(volatile uint64_t *)bar2;
1385 	*(volatile uint64_t *)bar4;
1386 
1387 	/* Check ROC model supported */
1388 	if (roc_model->flag == 0) {
1389 		rc = UTIL_ERR_INVALID_MODEL;
1390 		goto error;
1391 	}
1392 
1393 	dev->maxvf = pci_dev->max_vfs;
1394 	dev->bar2 = bar2;
1395 	dev->bar4 = bar4;
1396 	dev_vf_hwcap_update(pci_dev, dev);
1397 
1398 	if (dev_is_vf(dev)) {
1399 		mbox = (roc_model_is_cn9k() ?
1400 			bar4 : (bar2 + RVU_VF_MBOX_REGION));
1401 		direction = MBOX_DIR_VFPF;
1402 		up_direction = MBOX_DIR_VFPF_UP;
1403 		intr_offset = RVU_VF_INT;
1404 	} else {
1405 		mbox = bar4;
1406 		direction = MBOX_DIR_PFAF;
1407 		up_direction = MBOX_DIR_PFAF_UP;
1408 		intr_offset = RVU_PF_INT;
1409 	}
1410 
1411 	/* Clear all RVUM interrupts */
1412 	clear_rvum_interrupts(dev);
1413 
1414 	/* Initialize the local mbox */
1415 	rc = mbox_init(&dev->mbox_local, mbox, bar2, direction, 1, intr_offset);
1416 	if (rc)
1417 		goto error;
1418 	dev->mbox = &dev->mbox_local;
1419 
1420 	rc = mbox_init(&dev->mbox_up, mbox, bar2, up_direction, 1, intr_offset);
1421 	if (rc)
1422 		goto mbox_fini;
1423 
1424 	/* Register mbox interrupts */
1425 	rc = dev_mbox_register_irq(pci_dev, dev);
1426 	if (rc)
1427 		goto mbox_fini;
1428 
1429 	/* Check the readiness of PF/VF */
1430 	rc = send_ready_msg(dev->mbox, &dev->pf_func);
1431 	if (rc)
1432 		goto mbox_unregister;
1433 
1434 	dev->pf = dev_get_pf(dev->pf_func);
1435 	dev->vf = dev_get_vf(dev->pf_func);
1436 	memset(&dev->active_vfs, 0, sizeof(dev->active_vfs));
1437 
1438 	/* Allocate memory for device ops */
1439 	dev->ops = plt_zmalloc(sizeof(struct dev_ops), 0);
1440 	if (dev->ops == NULL) {
1441 		rc = -ENOMEM;
1442 		goto mbox_unregister;
1443 	}
1444 
1445 	/* Found VF devices in a PF device */
1446 	if (pci_dev->max_vfs > 0) {
1447 		/* Remap mbox area for all vf's */
1448 		vf_mbase = dev_vf_mbase_get(pci_dev, dev);
1449 		if (!vf_mbase) {
1450 			rc = -ENODEV;
1451 			goto mbox_unregister;
1452 		}
1453 		/* Init mbox object */
1454 		rc = mbox_init(&dev->mbox_vfpf, vf_mbase, bar2, MBOX_DIR_PFVF,
1455 			       pci_dev->max_vfs, intr_offset);
1456 		if (rc)
1457 			goto iounmap;
1458 
1459 		/* PF -> VF UP messages */
1460 		rc = mbox_init(&dev->mbox_vfpf_up, vf_mbase, bar2,
1461 			       MBOX_DIR_PFVF_UP, pci_dev->max_vfs, intr_offset);
1462 		if (rc)
1463 			goto iounmap;
1464 
1465 		/* Create a thread for handling msgs from VFs */
1466 		pthread_cond_init(&dev->sync.pfvf_msg_cond, NULL);
1467 		pthread_mutex_init(&dev->sync.mutex, NULL);
1468 
1469 		snprintf(name, MBOX_HANDLER_NAME_MAX_LEN, "mbox_pf%d", dev->pf);
1470 		dev->sync.start_thread = true;
1471 		rc = plt_thread_create_control(&dev->sync.pfvf_msg_thread, name,
1472 				pf_vf_mbox_thread_main, dev);
1473 		if (rc != 0) {
1474 			plt_err("Failed to create thread for VF mbox handling\n");
1475 			goto thread_fail;
1476 		}
1477 	}
1478 
1479 	/* Register VF-FLR irq handlers */
1480 	if (!dev_is_vf(dev)) {
1481 		rc = dev_vf_flr_register_irqs(pci_dev, dev);
1482 		if (rc)
1483 			goto stop_msg_thrd;
1484 	}
1485 	dev->mbox_active = 1;
1486 
1487 	rc = npa_lf_init(dev, pci_dev);
1488 	if (rc)
1489 		goto stop_msg_thrd;
1490 
1491 	/* Setup LMT line base */
1492 	rc = dev_lmt_setup(dev);
1493 	if (rc)
1494 		goto stop_msg_thrd;
1495 
1496 	return rc;
1497 stop_msg_thrd:
1498 	/* Exiting the mbox sync thread */
1499 	if (dev->sync.start_thread) {
1500 		dev->sync.start_thread = false;
1501 		pthread_cond_signal(&dev->sync.pfvf_msg_cond);
1502 		plt_thread_join(dev->sync.pfvf_msg_thread, NULL);
1503 	}
1504 thread_fail:
1505 	pthread_mutex_destroy(&dev->sync.mutex);
1506 	pthread_cond_destroy(&dev->sync.pfvf_msg_cond);
1507 iounmap:
1508 	dev_vf_mbase_put(pci_dev, vf_mbase);
1509 mbox_unregister:
1510 	mbox_unregister_irq(pci_dev, dev);
1511 	if (dev->ops)
1512 		plt_free(dev->ops);
1513 mbox_fini:
1514 	mbox_fini(dev->mbox);
1515 	mbox_fini(&dev->mbox_up);
1516 error:
1517 	return rc;
1518 }
1519 
1520 int
1521 dev_fini(struct dev *dev, struct plt_pci_device *pci_dev)
1522 {
1523 	struct plt_intr_handle *intr_handle = pci_dev->intr_handle;
1524 	struct mbox *mbox;
1525 
1526 	/* Check if this dev hosts npalf and has 1+ refs */
1527 	if (idev_npa_lf_active(dev) > 1)
1528 		return -EAGAIN;
1529 
1530 	/* Exiting the mbox sync thread */
1531 	if (dev->sync.start_thread) {
1532 		dev->sync.start_thread = false;
1533 		pthread_cond_signal(&dev->sync.pfvf_msg_cond);
1534 		plt_thread_join(dev->sync.pfvf_msg_thread, NULL);
1535 		pthread_mutex_destroy(&dev->sync.mutex);
1536 		pthread_cond_destroy(&dev->sync.pfvf_msg_cond);
1537 	}
1538 
1539 	/* Clear references to this pci dev */
1540 	npa_lf_fini();
1541 
1542 	/* Releasing memory allocated for lmt region */
1543 	if (dev->lmt_mz)
1544 		plt_memzone_free(dev->lmt_mz);
1545 
1546 	mbox_unregister_irq(pci_dev, dev);
1547 
1548 	if (!dev_is_vf(dev))
1549 		vf_flr_unregister_irqs(pci_dev, dev);
1550 	/* Release PF - VF */
1551 	mbox = &dev->mbox_vfpf;
1552 	if (mbox->hwbase && mbox->dev)
1553 		dev_vf_mbase_put(pci_dev, mbox->hwbase);
1554 
1555 	if (dev->ops)
1556 		plt_free(dev->ops);
1557 
1558 	mbox_fini(mbox);
1559 	mbox = &dev->mbox_vfpf_up;
1560 	mbox_fini(mbox);
1561 
1562 	/* Release PF - AF */
1563 	mbox = dev->mbox;
1564 	mbox_fini(mbox);
1565 	mbox = &dev->mbox_up;
1566 	mbox_fini(mbox);
1567 	dev->mbox_active = 0;
1568 
1569 	/* Disable MSIX vectors */
1570 	dev_irqs_disable(intr_handle);
1571 	return 0;
1572 }
1573