xref: /netbsd-src/sys/arch/x86/pci/msipic.c (revision cef8759bd76c1b621f8eab8faa6f208faabc2e15)
1 /*	$NetBSD: msipic.c,v 1.23 2020/05/04 15:55:56 jdolecek Exp $	*/
2 
3 /*
4  * Copyright (c) 2015 Internet Initiative Japan Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26  * POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(0, "$NetBSD: msipic.c,v 1.23 2020/05/04 15:55:56 jdolecek Exp $");
31 
32 #include "opt_intrdebug.h"
33 
34 #include <sys/types.h>
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/errno.h>
38 #include <sys/kmem.h>
39 #include <sys/mutex.h>
40 #include <sys/bitops.h>
41 
42 #include <dev/pci/pcivar.h>
43 
44 #include <machine/i82489reg.h>
45 #include <machine/i82489var.h>
46 #include <machine/i82093reg.h>
47 #include <machine/i82093var.h>
48 #include <machine/pic.h>
49 #include <machine/lock.h>
50 
51 #include <x86/pci/msipic.h>
52 
53 #ifdef INTRDEBUG
54 #define MSIPICDEBUG
55 #endif
56 
57 #ifdef MSIPICDEBUG
58 #define DPRINTF(msg) printf msg
59 #else
60 #define DPRINTF(msg)
61 #endif
62 
63 #define BUS_SPACE_WRITE_FLUSH(pc, tag) (void)bus_space_read_4(pc, tag, 0)
64 
65 #define MSIPICNAMEBUF 16
66 
67 /*
68  * A Pseudo pic for single MSI/MSI-X device.
69  * The pic and MSI/MSI-X device are distinbuished by "devid". The "devid"
70  * is managed by below "dev_seqs".
71  */
72 struct msipic {
73 	struct msipic_pci_info mp_i;
74 
75 	int mp_devid; /* The device id for the MSI/MSI-X device. */
76 
77 	char mp_pic_name[MSIPICNAMEBUF]; /* The MSI/MSI-X device's name. */
78 
79 	struct pci_attach_args mp_pa;
80 	bus_space_tag_t mp_bstag;
81 	bus_space_handle_t mp_bshandle;
82 	bus_size_t mp_bssize;
83 	struct pic *mp_pic;
84 
85 	LIST_ENTRY(msipic) mp_list;
86 };
87 
88 static kmutex_t msipic_list_lock;
89 
90 static LIST_HEAD(, msipic) msipic_list =
91 	LIST_HEAD_INITIALIZER(msipic_list);
92 
93 /*
94  * This struct managements "devid" to use the same "devid" for the device
95  * re-attached. If the device's bus number and device number and function
96  * number are equal, it is assumed re-attached.
97  */
98 struct dev_last_used_seq {
99 	bool ds_using;
100 	int ds_bus;
101 	int ds_dev;
102 	int ds_fun;
103 };
104 /* The number of MSI/MSI-X devices supported by system. */
105 #define NUM_MSI_DEVS 256
106 /* Record devids to use the same devid when the device is re-attached. */
107 static struct dev_last_used_seq dev_seqs[NUM_MSI_DEVS];
108 
109 static int msipic_allocate_common_msi_devid(const struct pci_attach_args *);
110 static void msipic_release_common_msi_devid(int);
111 
112 static struct pic *msipic_find_msi_pic_locked(int);
113 static struct pic *msipic_construct_common_msi_pic(const struct pci_attach_args *,
114 						   const struct pic *);
115 static void msipic_destruct_common_msi_pic(struct pic *);
116 
117 static void msi_set_msictl_enablebit(struct pic *, int, int);
118 static void msi_hwmask(struct pic *, int);
119 static void msi_hwunmask(struct pic *, int);
120 static void msi_addroute(struct pic *, struct cpu_info *, int, int, int);
121 static void msi_delroute(struct pic *, struct cpu_info *, int, int, int);
122 
123 static void msix_set_vecctl_mask(struct pic *, int, int);
124 static void msix_hwmask(struct pic *, int);
125 static void msix_hwunmask(struct pic *, int);
126 static void msix_addroute(struct pic *, struct cpu_info *, int, int, int);
127 static void msix_delroute(struct pic *, struct cpu_info *, int, int, int);
128 
129 /*
130  * Return new "devid" for the device attached first.
131  * Return the same "devid" for the device re-attached after dettached once.
132  * Return -1 if the number of attached MSI/MSI-X devices is over NUM_MSI_DEVS.
133  */
134 static int
135 msipic_allocate_common_msi_devid(const struct pci_attach_args *pa)
136 {
137 	pci_chipset_tag_t pc;
138 	pcitag_t tag;
139 	int bus, dev, fun, i;
140 
141 	KASSERT(mutex_owned(&msipic_list_lock));
142 
143 	pc = pa->pa_pc;
144 	tag = pa->pa_tag;
145 	pci_decompose_tag(pc, tag, &bus, &dev, &fun);
146 
147 	/* if the device was once attached, use same devid */
148 	for (i = 0; i < NUM_MSI_DEVS; i++) {
149 		/* skip host bridge */
150 		if (dev_seqs[i].ds_bus == 0
151 		    && dev_seqs[i].ds_dev == 0
152 		    && dev_seqs[i].ds_fun == 0)
153 			break;
154 
155 		if (dev_seqs[i].ds_bus == bus
156 		    && dev_seqs[i].ds_dev == dev
157 		    && dev_seqs[i].ds_fun == fun) {
158 			dev_seqs[i].ds_using = true;
159 			return i;
160 		}
161 	}
162 
163 	for (i = 0; i < NUM_MSI_DEVS; i++) {
164 		if (dev_seqs[i].ds_using == 0) {
165 			dev_seqs[i].ds_using = true;
166 			dev_seqs[i].ds_bus = bus;
167 			dev_seqs[i].ds_dev = dev;
168 			dev_seqs[i].ds_fun = fun;
169 			return i;
170 		}
171 	}
172 
173 	DPRINTF(("too many MSI devices.\n"));
174 	return -1;
175 }
176 
177 /*
178  * Set the "devid" unused, but keep reserving the "devid" to reuse when
179  * the device is re-attached.
180  */
181 static void
182 msipic_release_common_msi_devid(int devid)
183 {
184 
185 	KASSERT(mutex_owned(&msipic_list_lock));
186 
187 	if (devid < 0 || NUM_MSI_DEVS <= devid) {
188 		DPRINTF(("%s: invalid devid.\n", __func__));
189 		return;
190 	}
191 
192 	dev_seqs[devid].ds_using = false;
193 	/* Keep ds_* to reuse the same devid for the same device. */
194 }
195 
196 static struct pic *
197 msipic_find_msi_pic_locked(int devid)
198 {
199 	struct msipic *mpp;
200 
201 	KASSERT(mutex_owned(&msipic_list_lock));
202 
203 	LIST_FOREACH(mpp, &msipic_list, mp_list) {
204 		if (mpp->mp_devid == devid)
205 			return mpp->mp_pic;
206 	}
207 	return NULL;
208 }
209 
210 /*
211  * Return the msi_pic whose device is already registered.
212  * If the device is not registered yet, return NULL.
213  */
214 struct pic *
215 msipic_find_msi_pic(int devid)
216 {
217 	struct pic *msipic;
218 
219 	mutex_enter(&msipic_list_lock);
220 	msipic = msipic_find_msi_pic_locked(devid);
221 	mutex_exit(&msipic_list_lock);
222 
223 	return msipic;
224 }
225 
226 /*
227  * A common construct process of MSI and MSI-X.
228  */
229 static struct pic *
230 msipic_construct_common_msi_pic(const struct pci_attach_args *pa,
231     const struct pic *pic_tmpl)
232 {
233 	struct pic *pic;
234 	struct msipic *msipic;
235 	int devid;
236 
237 	pic = kmem_alloc(sizeof(*pic), KM_SLEEP);
238 	msipic = kmem_zalloc(sizeof(*msipic), KM_SLEEP);
239 
240 	mutex_enter(&msipic_list_lock);
241 
242 	devid = msipic_allocate_common_msi_devid(pa);
243 	if (devid == -1) {
244 		mutex_exit(&msipic_list_lock);
245 		kmem_free(pic, sizeof(*pic));
246 		kmem_free(msipic, sizeof(*msipic));
247 		return NULL;
248 	}
249 
250 	memcpy(pic, pic_tmpl, sizeof(*pic));
251 	pic->pic_edge_stubs
252 	    = x2apic_mode ? x2apic_edge_stubs : ioapic_edge_stubs;
253 	pic->pic_msipic = msipic;
254 	msipic->mp_pic = pic;
255 	pci_decompose_tag(pa->pa_pc, pa->pa_tag,
256 	    &msipic->mp_i.mp_bus, &msipic->mp_i.mp_dev, &msipic->mp_i.mp_fun);
257 	memcpy(&msipic->mp_pa, pa, sizeof(msipic->mp_pa));
258 	msipic->mp_devid = devid;
259 	/*
260 	 * pci_msi{,x}_alloc() must be called only once in the device driver.
261 	 */
262 	KASSERT(msipic_find_msi_pic_locked(msipic->mp_devid) == NULL);
263 
264 	LIST_INSERT_HEAD(&msipic_list, msipic, mp_list);
265 
266 	mutex_exit(&msipic_list_lock);
267 
268 	return pic;
269 }
270 
271 static void
272 msipic_destruct_common_msi_pic(struct pic *msi_pic)
273 {
274 	struct msipic *msipic;
275 
276 	if (msi_pic == NULL)
277 		return;
278 
279 	msipic = msi_pic->pic_msipic;
280 	mutex_enter(&msipic_list_lock);
281 	LIST_REMOVE(msipic, mp_list);
282 	msipic_release_common_msi_devid(msipic->mp_devid);
283 	mutex_exit(&msipic_list_lock);
284 
285 	kmem_free(msipic, sizeof(*msipic));
286 	kmem_free(msi_pic, sizeof(*msi_pic));
287 }
288 
289 /*
290  * The pic is MSI/MSI-X pic or not.
291  */
292 bool
293 msipic_is_msi_pic(struct pic *pic)
294 {
295 
296 	return (pic->pic_msipic != NULL);
297 }
298 
299 /*
300  * Return the MSI/MSI-X devid which is unique for each devices.
301  */
302 int
303 msipic_get_devid(struct pic *pic)
304 {
305 
306 	KASSERT(msipic_is_msi_pic(pic));
307 
308 	return pic->pic_msipic->mp_devid;
309 }
310 
311 /*
312  * Return the PCI bus/dev/func info for the device.
313  */
314 const struct msipic_pci_info *
315 msipic_get_pci_info(struct pic *pic)
316 {
317 	KASSERT(msipic_is_msi_pic(pic));
318 
319 	return &pic->pic_msipic->mp_i;
320 }
321 
322 #define MSI_MSICTL_ENABLE 1
323 #define MSI_MSICTL_DISABLE 0
324 static void
325 msi_set_msictl_enablebit(struct pic *pic, int msi_vec, int flag)
326 {
327 	pci_chipset_tag_t pc;
328 	struct pci_attach_args *pa;
329 	pcitag_t tag;
330 	pcireg_t ctl;
331 	int off, err __diagused;
332 
333 	pc = NULL;
334 	pa = &pic->pic_msipic->mp_pa;
335 	tag = pa->pa_tag;
336 	err = pci_get_capability(pc, tag, PCI_CAP_MSI, &off, NULL);
337 	KASSERT(err != 0);
338 
339 	/*
340 	 * MSI can establish only one vector at once.
341 	 * So, use whole device mask bit instead of a vector mask bit.
342 	 */
343 	ctl = pci_conf_read(pc, tag, off + PCI_MSI_CTL);
344 	if (flag == MSI_MSICTL_ENABLE)
345 		ctl |= PCI_MSI_CTL_MSI_ENABLE;
346 	else
347 		ctl &= ~PCI_MSI_CTL_MSI_ENABLE;
348 
349 	pci_conf_write(pc, tag, off, ctl);
350 }
351 
352 static void
353 msi_hwmask(struct pic *pic, int msi_vec)
354 {
355 
356 	msi_set_msictl_enablebit(pic, msi_vec, MSI_MSICTL_DISABLE);
357 }
358 
359 /*
360  * Do not use pic->hwunmask() immediately after pic->delroute().
361  * It is required to use pic->addroute() before pic->hwunmask().
362  */
363 static void
364 msi_hwunmask(struct pic *pic, int msi_vec)
365 {
366 
367 	msi_set_msictl_enablebit(pic, msi_vec, MSI_MSICTL_ENABLE);
368 }
369 
370 static void
371 msi_addroute(struct pic *pic, struct cpu_info *ci,
372 	     int unused, int idt_vec, int type)
373 {
374 	pci_chipset_tag_t pc;
375 	struct pci_attach_args *pa;
376 	pcitag_t tag;
377 #ifndef XENPV
378 	pcireg_t addr, data;
379 #endif
380 	pcireg_t ctl;
381 	int off, err __diagused;
382 
383 	pc = NULL;
384 	pa = &pic->pic_msipic->mp_pa;
385 	tag = pa->pa_tag;
386 	err = pci_get_capability(pc, tag, PCI_CAP_MSI, &off, NULL);
387 	KASSERT(err != 0);
388 
389 	ctl = pci_conf_read(pc, tag, off + PCI_MSI_CTL);
390 #ifndef XENPV
391 	/*
392 	 * See Intel 64 and IA-32 Architectures Software Developer's Manual
393 	 * Volume 3 10.11 Message Signalled Interrupts.
394 	 */
395 	/*
396 	 * "cpuid" for MSI address is local APIC ID. In NetBSD, the ID is
397 	 * the same as ci->ci_cpuid.
398 	 */
399 	addr = LAPIC_MSIADDR_BASE | __SHIFTIN(ci->ci_cpuid,
400 	    LAPIC_MSIADDR_DSTID_MASK);
401 	/* If trigger mode is edge, it don't care level for trigger mode. */
402 	data = __SHIFTIN(idt_vec, LAPIC_VECTOR_MASK)
403 		| LAPIC_TRIGMODE_EDGE | LAPIC_DLMODE_FIXED;
404 
405 	/*
406 	 * The size of the message data register is 16bit if the extended
407 	 * message data is not implemented. If it's 16bit and the per-vector
408 	 * masking is not capable, the location of the upper 16bit is out of
409 	 * the MSI capability structure's range. The PCI spec says the upper
410 	 * 16bit is driven to 0 if the message data register is 16bit. It's the
411 	 * spec, so it's OK just to write it regardless of the value of the
412 	 * upper 16bit.
413 	 */
414 	if (ctl & PCI_MSI_CTL_64BIT_ADDR) {
415 		pci_conf_write(pc, tag, off + PCI_MSI_MADDR64_LO, addr);
416 		pci_conf_write(pc, tag, off + PCI_MSI_MADDR64_HI, 0);
417 		pci_conf_write(pc, tag, off + PCI_MSI_MDATA64, data);
418 	} else {
419 		pci_conf_write(pc, tag, off + PCI_MSI_MADDR, addr);
420 		pci_conf_write(pc, tag, off + PCI_MSI_MDATA, data);
421 	}
422 #endif /* !XENPV */
423 	ctl |= PCI_MSI_CTL_MSI_ENABLE;
424 	pci_conf_write(pc, tag, off + PCI_MSI_CTL, ctl);
425 }
426 
427 /*
428  * Do not use pic->hwunmask() immediately after pic->delroute().
429  * It is required to use pic->addroute() before pic->hwunmask().
430  */
431 static void
432 msi_delroute(struct pic *pic, struct cpu_info *ci,
433 	     int msi_vec, int idt_vec, int type)
434 {
435 
436 	msi_hwmask(pic, msi_vec);
437 }
438 
439 /*
440  * Template for MSI pic.
441  * .pic_msipic is set later in construct_msi_pic().
442  */
443 static const struct pic msi_pic_tmpl = {
444 	.pic_type = PIC_MSI,
445 	.pic_vecbase = 0,
446 	.pic_apicid = 0,
447 	.pic_lock = __SIMPLELOCK_UNLOCKED, /* not used for msi_pic */
448 	.pic_hwmask = msi_hwmask,
449 	.pic_hwunmask = msi_hwunmask,
450 	.pic_addroute = msi_addroute,
451 	.pic_delroute = msi_delroute,
452 	.pic_intr_get_devname = x86_intr_get_devname,
453 	.pic_intr_get_assigned = x86_intr_get_assigned,
454 	.pic_intr_get_count = x86_intr_get_count,
455 };
456 
457 /*
458  * Create pseudo pic for a MSI device.
459  */
460 struct pic *
461 msipic_construct_msi_pic(const struct pci_attach_args *pa)
462 {
463 	struct pic *msi_pic;
464 	char pic_name_buf[MSIPICNAMEBUF];
465 
466 	msi_pic = msipic_construct_common_msi_pic(pa, &msi_pic_tmpl);
467 	if (msi_pic == NULL) {
468 		DPRINTF(("cannot allocate MSI pic.\n"));
469 		return NULL;
470 	}
471 
472 	memset(pic_name_buf, 0, MSIPICNAMEBUF);
473 	snprintf(pic_name_buf, MSIPICNAMEBUF, "msi%d",
474 	    msi_pic->pic_msipic->mp_devid);
475 	strncpy(msi_pic->pic_msipic->mp_pic_name, pic_name_buf,
476 	    MSIPICNAMEBUF - 1);
477 	msi_pic->pic_name = msi_pic->pic_msipic->mp_pic_name;
478 
479 	return msi_pic;
480 }
481 
482 /*
483  * Delete pseudo pic for a MSI device.
484  */
485 void
486 msipic_destruct_msi_pic(struct pic *msi_pic)
487 {
488 
489 	msipic_destruct_common_msi_pic(msi_pic);
490 }
491 
492 #define MSIX_VECCTL_HWMASK 1
493 #define MSIX_VECCTL_HWUNMASK 0
494 static void
495 msix_set_vecctl_mask(struct pic *pic, int msix_vec, int flag)
496 {
497 	bus_space_tag_t bstag;
498 	bus_space_handle_t bshandle;
499 	uint64_t entry_base;
500 	uint32_t vecctl;
501 
502 	if (msix_vec < 0) {
503 		DPRINTF(("%s: invalid MSI-X table index, devid=%d vecid=%d",
504 			__func__, msipic_get_devid(pic), msix_vec));
505 		return;
506 	}
507 
508 	entry_base = PCI_MSIX_TABLE_ENTRY_SIZE * msix_vec;
509 
510 	bstag = pic->pic_msipic->mp_bstag;
511 	bshandle = pic->pic_msipic->mp_bshandle;
512 	vecctl = bus_space_read_4(bstag, bshandle,
513 	    entry_base + PCI_MSIX_TABLE_ENTRY_VECTCTL);
514 	if (flag == MSIX_VECCTL_HWMASK)
515 		vecctl |= PCI_MSIX_VECTCTL_MASK;
516 	else
517 		vecctl &= ~PCI_MSIX_VECTCTL_MASK;
518 
519 	bus_space_write_4(bstag, bshandle,
520 	    entry_base + PCI_MSIX_TABLE_ENTRY_VECTCTL, vecctl);
521 	BUS_SPACE_WRITE_FLUSH(bstag, bshandle);
522 }
523 
524 static void
525 msix_hwmask(struct pic *pic, int msix_vec)
526 {
527 
528 	msix_set_vecctl_mask(pic, msix_vec, MSIX_VECCTL_HWMASK);
529 }
530 
531 /*
532  * Do not use pic->hwunmask() immediately after pic->delroute().
533  * It is required to use pic->addroute() before pic->hwunmask().
534  */
535 static void
536 msix_hwunmask(struct pic *pic, int msix_vec)
537 {
538 
539 	msix_set_vecctl_mask(pic, msix_vec, MSIX_VECCTL_HWUNMASK);
540 }
541 
542 static void
543 msix_addroute(struct pic *pic, struct cpu_info *ci,
544 	     int msix_vec, int idt_vec, int type)
545 {
546 	pci_chipset_tag_t pc;
547 	struct pci_attach_args *pa;
548 	pcitag_t tag;
549 	bus_space_tag_t bstag;
550 	bus_space_handle_t bshandle;
551 	uint64_t entry_base;
552 #ifndef XENPV
553 	pcireg_t addr, data;
554 #endif
555 	pcireg_t ctl;
556 	int off, err __diagused;
557 
558 	if (msix_vec < 0) {
559 		DPRINTF(("%s: invalid MSI-X table index, devid=%d vecid=%d",
560 			__func__, msipic_get_devid(pic), msix_vec));
561 		return;
562 	}
563 
564 	pa = &pic->pic_msipic->mp_pa;
565 	pc = pa->pa_pc;
566 	tag = pa->pa_tag;
567 	err = pci_get_capability(pc, tag, PCI_CAP_MSIX, &off, NULL);
568 	KASSERT(err != 0);
569 
570 	/* Disable MSI-X before writing MSI-X table */
571 	ctl = pci_conf_read(pc, tag, off + PCI_MSIX_CTL);
572 	ctl &= ~PCI_MSIX_CTL_ENABLE;
573 	pci_conf_write(pc, tag, off + PCI_MSIX_CTL, ctl);
574 
575 	bstag = pic->pic_msipic->mp_bstag;
576 	bshandle = pic->pic_msipic->mp_bshandle;
577 	entry_base = PCI_MSIX_TABLE_ENTRY_SIZE * msix_vec;
578 
579 #ifndef XENPV
580 	/*
581 	 * See Intel 64 and IA-32 Architectures Software Developer's Manual
582 	 * Volume 3 10.11 Message Signalled Interrupts.
583 	 */
584 	/*
585 	 * "cpuid" for MSI-X address is local APIC ID. In NetBSD, the ID is
586 	 * the same as ci->ci_cpuid.
587 	 */
588 	addr = LAPIC_MSIADDR_BASE | __SHIFTIN(ci->ci_cpuid,
589 	    LAPIC_MSIADDR_DSTID_MASK);
590 	/* If trigger mode is edge, it don't care level for trigger mode. */
591 	data = __SHIFTIN(idt_vec, LAPIC_VECTOR_MASK)
592 		| LAPIC_TRIGMODE_EDGE | LAPIC_DLMODE_FIXED;
593 
594 	bus_space_write_4(bstag, bshandle,
595 	    entry_base + PCI_MSIX_TABLE_ENTRY_ADDR_LO, addr);
596 	bus_space_write_4(bstag, bshandle,
597 	    entry_base + PCI_MSIX_TABLE_ENTRY_ADDR_HI, 0);
598 	bus_space_write_4(bstag, bshandle,
599 	    entry_base + PCI_MSIX_TABLE_ENTRY_DATA, data);
600 #endif /* !XENPV */
601 	bus_space_write_4(bstag, bshandle,
602 	    entry_base + PCI_MSIX_TABLE_ENTRY_VECTCTL, 0);
603 	BUS_SPACE_WRITE_FLUSH(bstag, bshandle);
604 
605 	ctl = pci_conf_read(pc, tag, off + PCI_MSIX_CTL);
606 	ctl |= PCI_MSIX_CTL_ENABLE;
607 	pci_conf_write(pc, tag, off + PCI_MSIX_CTL, ctl);
608 }
609 
610 /*
611  * Do not use pic->hwunmask() immediately after pic->delroute().
612  * It is required to use pic->addroute() before pic->hwunmask().
613  */
614 static void
615 msix_delroute(struct pic *pic, struct cpu_info *ci,
616 	     int msix_vec, int vec, int type)
617 {
618 
619 	msix_hwmask(pic, msix_vec);
620 }
621 
622 /*
623  * Template for MSI-X pic.
624  * .pic_msipic is set later in construct_msix_pic().
625  */
626 static const struct pic msix_pic_tmpl = {
627 	.pic_type = PIC_MSIX,
628 	.pic_vecbase = 0,
629 	.pic_apicid = 0,
630 	.pic_lock = __SIMPLELOCK_UNLOCKED, /* not used for msix_pic */
631 	.pic_hwmask = msix_hwmask,
632 	.pic_hwunmask = msix_hwunmask,
633 	.pic_addroute = msix_addroute,
634 	.pic_delroute = msix_delroute,
635 	.pic_intr_get_devname = x86_intr_get_devname,
636 	.pic_intr_get_assigned = x86_intr_get_assigned,
637 	.pic_intr_get_count = x86_intr_get_count,
638 };
639 
640 struct pic *
641 msipic_construct_msix_pic(const struct pci_attach_args *pa)
642 {
643 	struct pic *msix_pic;
644 	pci_chipset_tag_t pc;
645 	pcitag_t tag;
646 	pcireg_t tbl;
647 	bus_space_tag_t bstag;
648 	bus_space_handle_t bshandle;
649 	bus_size_t bssize;
650 	size_t table_size;
651 	uint32_t table_offset;
652 	u_int memtype;
653 	bus_addr_t memaddr;
654 	int flags;
655 	int bir, bar, err, off, table_nentry;
656 	char pic_name_buf[MSIPICNAMEBUF];
657 
658 	table_nentry = pci_msix_count(pa->pa_pc, pa->pa_tag);
659 	if (table_nentry == 0) {
660 		DPRINTF(("MSI-X table entry is 0.\n"));
661 		return NULL;
662 	}
663 
664 	pc = pa->pa_pc;
665 	tag = pa->pa_tag;
666 	if (pci_get_capability(pc, tag, PCI_CAP_MSIX, &off, NULL) == 0) {
667 		DPRINTF(("%s: no msix capability", __func__));
668 		return NULL;
669 	}
670 
671 	msix_pic = msipic_construct_common_msi_pic(pa, &msix_pic_tmpl);
672 	if (msix_pic == NULL) {
673 		DPRINTF(("cannot allocate MSI-X pic.\n"));
674 		return NULL;
675 	}
676 
677 	memset(pic_name_buf, 0, MSIPICNAMEBUF);
678 	snprintf(pic_name_buf, MSIPICNAMEBUF, "msix%d",
679 	    msix_pic->pic_msipic->mp_devid);
680 	strncpy(msix_pic->pic_msipic->mp_pic_name, pic_name_buf,
681 	    MSIPICNAMEBUF - 1);
682 	msix_pic->pic_name = msix_pic->pic_msipic->mp_pic_name;
683 
684 	tbl = pci_conf_read(pc, tag, off + PCI_MSIX_TBLOFFSET);
685 	table_offset = tbl & PCI_MSIX_TBLOFFSET_MASK;
686 	bir = tbl & PCI_MSIX_TBLBIR_MASK;
687 	switch (bir) {
688 	case 0:
689 		bar = PCI_BAR0;
690 		break;
691 	case 1:
692 		bar = PCI_BAR1;
693 		break;
694 	case 2:
695 		bar = PCI_BAR2;
696 		break;
697 	case 3:
698 		bar = PCI_BAR3;
699 		break;
700 	case 4:
701 		bar = PCI_BAR4;
702 		break;
703 	case 5:
704 		bar = PCI_BAR5;
705 		break;
706 	default:
707 		aprint_error("detect an illegal device! "
708 		    "The device use reserved BIR values.\n");
709 		msipic_destruct_common_msi_pic(msix_pic);
710 		return NULL;
711 	}
712 	memtype = pci_mapreg_type(pc, tag, bar);
713 	/*
714 	 * PCI_MSIX_TABLE_ENTRY_SIZE consists below
715 	 *     - Vector Control (32bit)
716 	 *     - Message Data (32bit)
717 	 *     - Message Upper Address (32bit)
718 	 *     - Message Lower Address (32bit)
719 	 */
720 	table_size = table_nentry * PCI_MSIX_TABLE_ENTRY_SIZE;
721 #if 0
722 	err = pci_mapreg_submap(pa, bar, memtype, BUS_SPACE_MAP_LINEAR,
723 	    roundup(table_size, PAGE_SIZE), table_offset,
724 	    &bstag, &bshandle, NULL, &bssize);
725 #else
726 	/*
727 	 * Workaround for PCI prefetchable bit. Some chips (e.g. Intel 82599)
728 	 * report SERR and MSI-X doesn't work. This problem might not be the
729 	 * driver's bug but our PCI common part or VMs' bug. Until we find a
730 	 * real reason, we ignore the prefetchable bit.
731 	 */
732 	if (pci_mapreg_info(pa->pa_pc, pa->pa_tag, bar, memtype,
733 		&memaddr, NULL, &flags) != 0) {
734 		DPRINTF(("cannot get a map info.\n"));
735 		msipic_destruct_common_msi_pic(msix_pic);
736 		return NULL;
737 	}
738 	if ((flags & BUS_SPACE_MAP_PREFETCHABLE) != 0) {
739 		DPRINTF(( "clear prefetchable bit\n"));
740 		flags &= ~BUS_SPACE_MAP_PREFETCHABLE;
741 	}
742 	bssize = roundup(table_size, PAGE_SIZE);
743 	err = _x86_memio_map(pa->pa_memt, memaddr + table_offset, bssize, flags,
744 	    &bshandle);
745 	bstag = pa->pa_memt;
746 #endif
747 	if (err) {
748 		DPRINTF(("cannot map msix table.\n"));
749 		msipic_destruct_common_msi_pic(msix_pic);
750 		return NULL;
751 	}
752 	msix_pic->pic_msipic->mp_bstag = bstag;
753 	msix_pic->pic_msipic->mp_bshandle = bshandle;
754 	msix_pic->pic_msipic->mp_bssize = bssize;
755 	msix_pic->pic_msipic->mp_i.mp_table_base = memaddr + table_offset;
756 
757 	return msix_pic;
758 }
759 
760 /*
761  * Delete pseudo pic for a MSI-X device.
762  */
763 void
764 msipic_destruct_msix_pic(struct pic *msix_pic)
765 {
766 	struct msipic *msipic;
767 
768 	KASSERT(msipic_is_msi_pic(msix_pic));
769 	KASSERT(msix_pic->pic_type == PIC_MSIX);
770 
771 	msipic = msix_pic->pic_msipic;
772 	_x86_memio_unmap(msipic->mp_bstag, msipic->mp_bshandle,
773 	    msipic->mp_bssize, NULL);
774 
775 	msipic_destruct_common_msi_pic(msix_pic);
776 }
777 
778 /*
779  * Set the number of MSI vectors for pseudo MSI pic.
780  */
781 int
782 msipic_set_msi_vectors(struct pic *msi_pic, pci_intr_handle_t *pihs,
783     int count)
784 {
785 
786 	KASSERT(msipic_is_msi_pic(msi_pic));
787 
788 	if (msi_pic->pic_type == PIC_MSI) {
789 		pci_chipset_tag_t pc;
790 		struct pci_attach_args *pa;
791 		pcitag_t tag;
792 		int off, err __diagused;
793 		pcireg_t ctl;
794 
795 		pc = NULL;
796 		pa = &msi_pic->pic_msipic->mp_pa;
797 		tag = pa->pa_tag;
798 		err = pci_get_capability(pc, tag, PCI_CAP_MSI, &off, NULL);
799 		KASSERT(err != 0);
800 
801 		ctl = pci_conf_read(pc, tag, off + PCI_MSI_CTL);
802 		ctl &= ~PCI_MSI_CTL_MME_MASK;
803 		ctl |= __SHIFTIN(ilog2(count), PCI_MSI_CTL_MME_MASK);
804 		pci_conf_write(pc, tag, off + PCI_MSI_CTL, ctl);
805 	}
806 
807 	msi_pic->pic_msipic->mp_i.mp_veccnt = count;
808 	return 0;
809 }
810 
811 /*
812  * Initialize the system to use MSI/MSI-X.
813  */
814 void
815 msipic_init(void)
816 {
817 
818 	mutex_init(&msipic_list_lock, MUTEX_DEFAULT, IPL_NONE);
819 }
820