xref: /dflybsd-src/sys/bus/pci/pci.c (revision 0087561d6d4d84b8ac1a312cc720339cbf66781d)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_acpi.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/malloc.h>
36 #include <sys/module.h>
37 #include <sys/linker.h>
38 #include <sys/fcntl.h>
39 #include <sys/conf.h>
40 #include <sys/kernel.h>
41 #include <sys/queue.h>
42 #include <sys/sysctl.h>
43 #include <sys/endian.h>
44 #include <sys/machintr.h>
45 
46 #include <machine/msi_machdep.h>
47 
48 #include <vm/vm.h>
49 #include <vm/pmap.h>
50 #include <vm/vm_extern.h>
51 
52 #include <sys/bus.h>
53 #include <sys/rman.h>
54 #include <sys/device.h>
55 
56 #include <sys/pciio.h>
57 #include <bus/pci/pcireg.h>
58 #include <bus/pci/pcivar.h>
59 #include <bus/pci/pci_private.h>
60 
61 #include <bus/u4b/controller/xhcireg.h>
62 #include <bus/u4b/controller/ehcireg.h>
63 #include <bus/u4b/controller/ohcireg.h>
64 #include <bus/u4b/controller/uhcireg.h>
65 
66 #include "pcib_if.h"
67 #include "pci_if.h"
68 
69 #ifdef __HAVE_ACPI
70 #include <contrib/dev/acpica/acpi.h>
71 #include "acpi_if.h"
72 #else
73 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
74 #endif
75 
76 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
77 
78 static uint32_t		pci_mapbase(unsigned mapreg);
79 static const char	*pci_maptype(unsigned mapreg);
80 static int		pci_mapsize(unsigned testval);
81 static int		pci_maprange(unsigned mapreg);
82 static void		pci_fixancient(pcicfgregs *cfg);
83 
84 static int		pci_porten(device_t pcib, int b, int s, int f);
85 static int		pci_memen(device_t pcib, int b, int s, int f);
86 static void		pci_assign_interrupt(device_t bus, device_t dev,
87 			    int force_route);
88 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
89 			    int b, int s, int f, int reg,
90 			    struct resource_list *rl, int force, int prefetch);
91 static int		pci_probe(device_t dev);
92 static int		pci_attach(device_t dev);
93 static void		pci_child_detached(device_t, device_t);
94 static void		pci_load_vendor_data(void);
95 static int		pci_describe_parse_line(char **ptr, int *vendor,
96 			    int *device, char **desc);
97 static char		*pci_describe_device(device_t dev);
98 static int		pci_modevent(module_t mod, int what, void *arg);
99 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
100 			    pcicfgregs *cfg);
101 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
102 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
103 			    int reg, uint32_t *data);
104 #if 0
105 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
106 			    int reg, uint32_t data);
107 #endif
108 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
109 static void		pci_disable_msi(device_t dev);
110 static void		pci_enable_msi(device_t dev, uint64_t address,
111 			    uint16_t data);
112 static void		pci_setup_msix_vector(device_t dev, u_int index,
113 			    uint64_t address, uint32_t data);
114 static void		pci_mask_msix_vector(device_t dev, u_int index);
115 static void		pci_unmask_msix_vector(device_t dev, u_int index);
116 static void		pci_mask_msix_allvectors(device_t dev);
117 static struct msix_vector *pci_find_msix_vector(device_t dev, int rid);
118 static int		pci_msi_blacklisted(void);
119 static void		pci_resume_msi(device_t dev);
120 static void		pci_resume_msix(device_t dev);
121 static int		pcie_slotimpl(const pcicfgregs *);
122 static void		pci_print_verbose_expr(const pcicfgregs *);
123 
124 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
125 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
127 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
128 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
129 static void		pci_read_cap_subvendor(device_t, int, int,
130 			    pcicfgregs *);
131 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
132 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
133 
134 static device_method_t pci_methods[] = {
135 	/* Device interface */
136 	DEVMETHOD(device_probe,		pci_probe),
137 	DEVMETHOD(device_attach,	pci_attach),
138 	DEVMETHOD(device_detach,	bus_generic_detach),
139 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
140 	DEVMETHOD(device_suspend,	pci_suspend),
141 	DEVMETHOD(device_resume,	pci_resume),
142 
143 	/* Bus interface */
144 	DEVMETHOD(bus_print_child,	pci_print_child),
145 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
146 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
147 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
148 	DEVMETHOD(bus_driver_added,	pci_driver_added),
149 	DEVMETHOD(bus_child_detached,	pci_child_detached),
150 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
151 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
152 
153 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
154 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
155 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
156 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
157 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
158 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
159 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
160 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
161 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
162 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
163 
164 	/* PCI interface */
165 	DEVMETHOD(pci_read_config,	pci_read_config_method),
166 	DEVMETHOD(pci_write_config,	pci_write_config_method),
167 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
168 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
169 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
170 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
171 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
172 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
173 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
174 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
175 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
176 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
177 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
178 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
179 	DEVMETHOD(pci_alloc_msix_vector, pci_alloc_msix_vector_method),
180 	DEVMETHOD(pci_release_msix_vector, pci_release_msix_vector_method),
181 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
182 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
183 
184 	DEVMETHOD_END
185 };
186 
187 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
188 
189 static devclass_t pci_devclass;
190 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
191 MODULE_VERSION(pci, 1);
192 
193 static char	*pci_vendordata;
194 static size_t	pci_vendordata_size;
195 
196 
197 static const struct pci_read_cap {
198 	int		cap;
199 	pci_read_cap_t	read_cap;
200 } pci_read_caps[] = {
201 	{ PCIY_PMG,		pci_read_cap_pmgt },
202 	{ PCIY_HT,		pci_read_cap_ht },
203 	{ PCIY_MSI,		pci_read_cap_msi },
204 	{ PCIY_MSIX,		pci_read_cap_msix },
205 	{ PCIY_VPD,		pci_read_cap_vpd },
206 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
207 	{ PCIY_PCIX,		pci_read_cap_pcix },
208 	{ PCIY_EXPRESS,		pci_read_cap_express },
209 	{ 0, NULL } /* required last entry */
210 };
211 
212 struct pci_quirk {
213 	uint32_t devid;	/* Vendor/device of the card */
214 	int	type;
215 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
216 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
217 #define	PCI_QUIRK_MSI_INTX_BUG	6 /* PCIM_CMD_INTxDIS disables MSI */
218 	int	arg1;
219 	int	arg2;
220 };
221 
222 struct pci_quirk pci_quirks[] = {
223 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
224 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
225 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
226 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
227 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
228 
229 	/*
230 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
231 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
232 	 */
233 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
234 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
235 
236 	/*
237 	 * MSI doesn't work on earlier Intel chipsets including
238 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
239 	 */
240 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
244 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
245 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
246 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
247 
248 	/*
249 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
250 	 * bridge.
251 	 */
252 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
253 
254 	/*
255 	 * Atheros AR8161/AR8162/E2200/E2400/E2500 Ethernet controllers have
256 	 * a bug that MSI interrupt does not assert if PCIM_CMD_INTxDIS bit
257 	 * of the command register is set.
258 	 */
259 	{ 0x10901969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
260 	{ 0x10911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
261 	{ 0xE0911969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
262 	{ 0xE0A11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
263 	{ 0xE0B11969, PCI_QUIRK_MSI_INTX_BUG,	0,	0 },
264 
265 	{ 0 }
266 };
267 
268 /* map register information */
269 #define	PCI_MAPMEM	0x01	/* memory map */
270 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
271 #define	PCI_MAPPORT	0x04	/* port map */
272 
273 #define PCI_MSIX_RID2VEC(rid)	((rid) - 1)	/* rid -> MSI-X vector # */
274 #define PCI_MSIX_VEC2RID(vec)	((vec) + 1)	/* MSI-X vector # -> rid */
275 
276 struct devlist pci_devq;
277 uint32_t pci_generation;
278 uint32_t pci_numdevs = 0;
279 static int pcie_chipset, pcix_chipset;
280 
281 /* sysctl vars */
282 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
283 
284 static int pci_enable_io_modes = 1;
285 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
286 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
287     &pci_enable_io_modes, 1,
288     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
289 enable these bits correctly.  We'd like to do this all the time, but there\n\
290 are some peripherals that this causes problems with.");
291 
292 static int pci_do_power_nodriver = 0;
293 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
294 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
295     &pci_do_power_nodriver, 0,
296   "Place a function into D3 state when no driver attaches to it.  0 means\n\
297 disable.  1 means conservatively place devices into D3 state.  2 means\n\
298 aggressively place devices into D3 state.  3 means put absolutely everything\n\
299 in D3 state.");
300 
301 static int pci_do_power_resume = 1;
302 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
303 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
304     &pci_do_power_resume, 1,
305   "Transition from D3 -> D0 on resume.");
306 
307 static int pci_do_msi = 1;
308 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
309 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
310     "Enable support for MSI interrupts");
311 
312 static int pci_do_msix = 1;
313 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
314 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
315     "Enable support for MSI-X interrupts");
316 
317 static int pci_honor_msi_blacklist = 1;
318 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
319 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
320     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
321 
322 #if defined(__x86_64__)
323 static int pci_usb_takeover = 1;
324 TUNABLE_INT("hw.pci.usb_early_takeover", &pci_usb_takeover);
325 SYSCTL_INT(_hw_pci, OID_AUTO, usb_early_takeover, CTLFLAG_RD,
326     &pci_usb_takeover, 1, "Enable early takeover of USB controllers.\n\
327 Disable this if you depend on BIOS emulation of USB devices, that is\n\
328 you use USB devices (like keyboard or mouse) but do not load USB drivers");
329 #endif
330 
331 static int pci_msi_cpuid;
332 
333 static int
334 pci_has_quirk(uint32_t devid, int quirk)
335 {
336 	const struct pci_quirk *q;
337 
338 	for (q = &pci_quirks[0]; q->devid; q++) {
339 		if (q->devid == devid && q->type == quirk)
340 			return (1);
341 	}
342 	return (0);
343 }
344 
345 /* Find a device_t by bus/slot/function in domain 0 */
346 
347 device_t
348 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
349 {
350 
351 	return (pci_find_dbsf(0, bus, slot, func));
352 }
353 
354 /* Find a device_t by domain/bus/slot/function */
355 
356 device_t
357 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
358 {
359 	struct pci_devinfo *dinfo;
360 
361 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
362 		if ((dinfo->cfg.domain == domain) &&
363 		    (dinfo->cfg.bus == bus) &&
364 		    (dinfo->cfg.slot == slot) &&
365 		    (dinfo->cfg.func == func)) {
366 			return (dinfo->cfg.dev);
367 		}
368 	}
369 
370 	return (NULL);
371 }
372 
373 /* Find a device_t by vendor/device ID */
374 
375 device_t
376 pci_find_device(uint16_t vendor, uint16_t device)
377 {
378 	struct pci_devinfo *dinfo;
379 
380 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
381 		if ((dinfo->cfg.vendor == vendor) &&
382 		    (dinfo->cfg.device == device)) {
383 			return (dinfo->cfg.dev);
384 		}
385 	}
386 
387 	return (NULL);
388 }
389 
390 device_t
391 pci_find_class(uint8_t class, uint8_t subclass)
392 {
393 	struct pci_devinfo *dinfo;
394 
395 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
396 		if (dinfo->cfg.baseclass == class &&
397 		    dinfo->cfg.subclass == subclass) {
398 			return (dinfo->cfg.dev);
399 		}
400 	}
401 
402 	return (NULL);
403 }
404 
405 device_t
406 pci_iterate_class(struct pci_devinfo **dinfop, uint8_t class, uint8_t subclass)
407 {
408 	struct pci_devinfo *dinfo;
409 
410 	if (*dinfop)
411 		dinfo = STAILQ_NEXT(*dinfop, pci_links);
412 	else
413 		dinfo = STAILQ_FIRST(&pci_devq);
414 
415 	while (dinfo) {
416 		if (dinfo->cfg.baseclass == class &&
417 		    dinfo->cfg.subclass == subclass) {
418 			*dinfop = dinfo;
419 			return (dinfo->cfg.dev);
420 		}
421 		dinfo = STAILQ_NEXT(dinfo, pci_links);
422 	}
423 	*dinfop = NULL;
424 	return (NULL);
425 }
426 
427 /* return base address of memory or port map */
428 
429 static uint32_t
430 pci_mapbase(uint32_t mapreg)
431 {
432 
433 	if (PCI_BAR_MEM(mapreg))
434 		return (mapreg & PCIM_BAR_MEM_BASE);
435 	else
436 		return (mapreg & PCIM_BAR_IO_BASE);
437 }
438 
439 /* return map type of memory or port map */
440 
441 static const char *
442 pci_maptype(unsigned mapreg)
443 {
444 
445 	if (PCI_BAR_IO(mapreg))
446 		return ("I/O Port");
447 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
448 		return ("Prefetchable Memory");
449 	return ("Memory");
450 }
451 
452 /* return log2 of map size decoded for memory or port map */
453 
454 static int
455 pci_mapsize(uint32_t testval)
456 {
457 	int ln2size;
458 
459 	testval = pci_mapbase(testval);
460 	ln2size = 0;
461 	if (testval != 0) {
462 		while ((testval & 1) == 0)
463 		{
464 			ln2size++;
465 			testval >>= 1;
466 		}
467 	}
468 	return (ln2size);
469 }
470 
471 /* return log2 of address range supported by map register */
472 
473 static int
474 pci_maprange(unsigned mapreg)
475 {
476 	int ln2range = 0;
477 
478 	if (PCI_BAR_IO(mapreg))
479 		ln2range = 32;
480 	else
481 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
482 		case PCIM_BAR_MEM_32:
483 			ln2range = 32;
484 			break;
485 		case PCIM_BAR_MEM_1MB:
486 			ln2range = 20;
487 			break;
488 		case PCIM_BAR_MEM_64:
489 			ln2range = 64;
490 			break;
491 		}
492 	return (ln2range);
493 }
494 
495 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
496 
497 static void
498 pci_fixancient(pcicfgregs *cfg)
499 {
500 	if (cfg->hdrtype != 0)
501 		return;
502 
503 	/* PCI to PCI bridges use header type 1 */
504 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
505 		cfg->hdrtype = 1;
506 }
507 
508 /* extract header type specific config data */
509 
510 static void
511 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
512 {
513 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
514 	switch (cfg->hdrtype) {
515 	case 0:
516 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
517 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
518 		cfg->nummaps	    = PCI_MAXMAPS_0;
519 		break;
520 	case 1:
521 		cfg->nummaps	    = PCI_MAXMAPS_1;
522 		break;
523 	case 2:
524 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
525 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
526 		cfg->nummaps	    = PCI_MAXMAPS_2;
527 		break;
528 	}
529 #undef REG
530 }
531 
532 /* read configuration header into pcicfgregs structure */
533 struct pci_devinfo *
534 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
535 {
536 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
537 	pcicfgregs *cfg = NULL;
538 	struct pci_devinfo *devlist_entry;
539 	struct devlist *devlist_head;
540 
541 	devlist_head = &pci_devq;
542 
543 	devlist_entry = NULL;
544 
545 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
546 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
547 
548 		cfg = &devlist_entry->cfg;
549 
550 		cfg->domain		= d;
551 		cfg->bus		= b;
552 		cfg->slot		= s;
553 		cfg->func		= f;
554 		cfg->vendor		= REG(PCIR_VENDOR, 2);
555 		cfg->device		= REG(PCIR_DEVICE, 2);
556 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
557 		cfg->statreg		= REG(PCIR_STATUS, 2);
558 		cfg->baseclass		= REG(PCIR_CLASS, 1);
559 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
560 		cfg->progif		= REG(PCIR_PROGIF, 1);
561 		cfg->revid		= REG(PCIR_REVID, 1);
562 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
563 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
564 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
565 		cfg->intpin		= REG(PCIR_INTPIN, 1);
566 		cfg->intline		= REG(PCIR_INTLINE, 1);
567 
568 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
569 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
570 
571 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
572 		cfg->hdrtype		&= ~PCIM_MFDEV;
573 
574 		pci_fixancient(cfg);
575 		pci_hdrtypedata(pcib, b, s, f, cfg);
576 
577 		pci_read_capabilities(pcib, cfg);
578 
579 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
580 
581 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
582 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
583 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
584 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
585 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
586 
587 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
588 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
589 		devlist_entry->conf.pc_vendor = cfg->vendor;
590 		devlist_entry->conf.pc_device = cfg->device;
591 
592 		devlist_entry->conf.pc_class = cfg->baseclass;
593 		devlist_entry->conf.pc_subclass = cfg->subclass;
594 		devlist_entry->conf.pc_progif = cfg->progif;
595 		devlist_entry->conf.pc_revid = cfg->revid;
596 
597 		pci_numdevs++;
598 		pci_generation++;
599 	}
600 	return (devlist_entry);
601 #undef REG
602 }
603 
604 static int
605 pci_fixup_nextptr(int *nextptr0)
606 {
607 	int nextptr = *nextptr0;
608 
609 	/* "Next pointer" is only one byte */
610 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d", nextptr));
611 
612 	if (nextptr & 0x3) {
613 		/*
614 		 * PCI local bus spec 3.0:
615 		 *
616 		 * "... The bottom two bits of all pointers are reserved
617 		 *  and must be implemented as 00b although software must
618 		 *  mask them to allow for future uses of these bits ..."
619 		 */
620 		if (bootverbose) {
621 			kprintf("Illegal PCI extended capability "
622 				"offset, fixup 0x%02x -> 0x%02x\n",
623 				nextptr, nextptr & ~0x3);
624 		}
625 		nextptr &= ~0x3;
626 	}
627 	*nextptr0 = nextptr;
628 
629 	if (nextptr < 0x40) {
630 		if (nextptr != 0) {
631 			kprintf("Illegal PCI extended capability "
632 				"offset 0x%02x", nextptr);
633 		}
634 		return 0;
635 	}
636 	return 1;
637 }
638 
639 static void
640 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
641 {
642 #define REG(n, w)	\
643 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
644 
645 	struct pcicfg_pp *pp = &cfg->pp;
646 
647 	if (pp->pp_cap)
648 		return;
649 
650 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
651 	pp->pp_status = ptr + PCIR_POWER_STATUS;
652 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
653 
654 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
655 		/*
656 		 * XXX
657 		 * We should write to data_select and read back from
658 		 * data_scale to determine whether data register is
659 		 * implemented.
660 		 */
661 #ifdef foo
662 		pp->pp_data = ptr + PCIR_POWER_DATA;
663 #else
664 		pp->pp_data = 0;
665 #endif
666 	}
667 
668 #undef REG
669 }
670 
671 static void
672 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
673 {
674 #if defined(__x86_64__)
675 
676 #define REG(n, w)	\
677 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
678 
679 	struct pcicfg_ht *ht = &cfg->ht;
680 	uint64_t addr;
681 	uint32_t val;
682 
683 	/* Determine HT-specific capability type. */
684 	val = REG(ptr + PCIR_HT_COMMAND, 2);
685 
686 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
687 		cfg->ht.ht_slave = ptr;
688 
689 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
690 		return;
691 
692 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
693 		/* Sanity check the mapping window. */
694 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
695 		addr <<= 32;
696 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
697 		if (addr != MSI_X86_ADDR_BASE) {
698 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
699 				"has non-default MSI window 0x%llx\n",
700 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
701 				(long long)addr);
702 		}
703 	} else {
704 		addr = MSI_X86_ADDR_BASE;
705 	}
706 
707 	ht->ht_msimap = ptr;
708 	ht->ht_msictrl = val;
709 	ht->ht_msiaddr = addr;
710 
711 #undef REG
712 
713 #endif	/* __x86_64__ */
714 }
715 
716 static void
717 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
718 {
719 #define REG(n, w)	\
720 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
721 
722 	struct pcicfg_msi *msi = &cfg->msi;
723 
724 	msi->msi_location = ptr;
725 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
726 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
727 
728 #undef REG
729 }
730 
731 static void
732 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
733 {
734 #define REG(n, w)	\
735 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
736 
737 	struct pcicfg_msix *msix = &cfg->msix;
738 	uint32_t val;
739 
740 	msix->msix_location = ptr;
741 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
742 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
743 
744 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
745 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
746 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
747 
748 	val = REG(ptr + PCIR_MSIX_PBA, 4);
749 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
750 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
751 
752 	TAILQ_INIT(&msix->msix_vectors);
753 
754 #undef REG
755 }
756 
757 static void
758 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
759 {
760 	cfg->vpd.vpd_reg = ptr;
761 }
762 
763 static void
764 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
765 {
766 #define REG(n, w)	\
767 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
768 
769 	/* Should always be true. */
770 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
771 		uint32_t val;
772 
773 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
774 		cfg->subvendor = val & 0xffff;
775 		cfg->subdevice = val >> 16;
776 	}
777 
778 #undef REG
779 }
780 
781 static void
782 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
783 {
784 	/*
785 	 * Assume we have a PCI-X chipset if we have
786 	 * at least one PCI-PCI bridge with a PCI-X
787 	 * capability.  Note that some systems with
788 	 * PCI-express or HT chipsets might match on
789 	 * this check as well.
790 	 */
791 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
792 		pcix_chipset = 1;
793 
794 	cfg->pcix.pcix_ptr = ptr;
795 }
796 
797 static int
798 pcie_slotimpl(const pcicfgregs *cfg)
799 {
800 	const struct pcicfg_expr *expr = &cfg->expr;
801 	uint16_t port_type;
802 
803 	/*
804 	 * - Slot implemented bit is meaningful iff current port is
805 	 *   root port or down stream port.
806 	 * - Testing for root port or down stream port is meanningful
807 	 *   iff PCI configure has type 1 header.
808 	 */
809 
810 	if (cfg->hdrtype != 1)
811 		return 0;
812 
813 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
814 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
815 		return 0;
816 
817 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
818 		return 0;
819 
820 	return 1;
821 }
822 
823 static void
824 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
825 {
826 #define REG(n, w)	\
827 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
828 
829 	struct pcicfg_expr *expr = &cfg->expr;
830 
831 	/*
832 	 * Assume we have a PCI-express chipset if we have
833 	 * at least one PCI-express device.
834 	 */
835 	pcie_chipset = 1;
836 
837 	expr->expr_ptr = ptr;
838 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
839 
840 	/*
841 	 * Read slot capabilities.  Slot capabilities exists iff
842 	 * current port's slot is implemented
843 	 */
844 	if (pcie_slotimpl(cfg))
845 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
846 
847 #undef REG
848 }
849 
850 static void
851 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
852 {
853 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
854 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
855 
856 	uint32_t val;
857 	int nextptr, ptrptr;
858 
859 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
860 		/* No capabilities */
861 		return;
862 	}
863 
864 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
865 	case 0:
866 	case 1:
867 		ptrptr = PCIR_CAP_PTR;
868 		break;
869 	case 2:
870 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
871 		break;
872 	default:
873 		return;				/* no capabilities support */
874 	}
875 	nextptr = REG(ptrptr, 1);	/* sanity check? */
876 
877 	/*
878 	 * Read capability entries.
879 	 */
880 	while (pci_fixup_nextptr(&nextptr)) {
881 		const struct pci_read_cap *rc;
882 		int ptr = nextptr;
883 
884 		/* Find the next entry */
885 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
886 
887 		/* Process this entry */
888 		val = REG(ptr + PCICAP_ID, 1);
889 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
890 			if (rc->cap == val) {
891 				rc->read_cap(pcib, ptr, nextptr, cfg);
892 				break;
893 			}
894 		}
895 	}
896 
897 #if defined(__x86_64__)
898 	/*
899 	 * Enable the MSI mapping window for all HyperTransport
900 	 * slaves.  PCI-PCI bridges have their windows enabled via
901 	 * PCIB_MAP_MSI().
902 	 */
903 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
904 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
905 		device_printf(pcib,
906 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
907 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
908 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
909 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
910 		     2);
911 	}
912 #endif
913 
914 /* REG and WREG use carry through to next functions */
915 }
916 
917 /*
918  * PCI Vital Product Data
919  */
920 
921 #define	PCI_VPD_TIMEOUT		1000000
922 
923 static int
924 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
925 {
926 	int count = PCI_VPD_TIMEOUT;
927 
928 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
929 
930 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
931 
932 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
933 		if (--count < 0)
934 			return (ENXIO);
935 		DELAY(1);	/* limit looping */
936 	}
937 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
938 
939 	return (0);
940 }
941 
942 #if 0
943 static int
944 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
945 {
946 	int count = PCI_VPD_TIMEOUT;
947 
948 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
949 
950 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
951 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
952 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
953 		if (--count < 0)
954 			return (ENXIO);
955 		DELAY(1);	/* limit looping */
956 	}
957 
958 	return (0);
959 }
960 #endif
961 
962 #undef PCI_VPD_TIMEOUT
963 
964 struct vpd_readstate {
965 	device_t	pcib;
966 	pcicfgregs	*cfg;
967 	uint32_t	val;
968 	int		bytesinval;
969 	int		off;
970 	uint8_t		cksum;
971 };
972 
973 static int
974 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
975 {
976 	uint32_t reg;
977 	uint8_t byte;
978 
979 	if (vrs->bytesinval == 0) {
980 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
981 			return (ENXIO);
982 		vrs->val = le32toh(reg);
983 		vrs->off += 4;
984 		byte = vrs->val & 0xff;
985 		vrs->bytesinval = 3;
986 	} else {
987 		vrs->val = vrs->val >> 8;
988 		byte = vrs->val & 0xff;
989 		vrs->bytesinval--;
990 	}
991 
992 	vrs->cksum += byte;
993 	*data = byte;
994 	return (0);
995 }
996 
997 int
998 pcie_slot_implemented(device_t dev)
999 {
1000 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1001 
1002 	return pcie_slotimpl(&dinfo->cfg);
1003 }
1004 
1005 void
1006 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
1007 {
1008 	uint8_t expr_ptr;
1009 	uint16_t val;
1010 
1011 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
1012 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
1013 		panic("%s: invalid max read request size 0x%02x",
1014 		      device_get_nameunit(dev), rqsize);
1015 	}
1016 
1017 	expr_ptr = pci_get_pciecap_ptr(dev);
1018 	if (!expr_ptr)
1019 		panic("%s: not PCIe device", device_get_nameunit(dev));
1020 
1021 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
1022 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
1023 		if (bootverbose)
1024 			device_printf(dev, "adjust device control 0x%04x", val);
1025 
1026 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
1027 		val |= rqsize;
1028 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
1029 
1030 		if (bootverbose)
1031 			kprintf(" -> 0x%04x\n", val);
1032 	}
1033 }
1034 
1035 uint16_t
1036 pcie_get_max_readrq(device_t dev)
1037 {
1038 	uint8_t expr_ptr;
1039 	uint16_t val;
1040 
1041 	expr_ptr = pci_get_pciecap_ptr(dev);
1042 	if (!expr_ptr)
1043 		panic("%s: not PCIe device", device_get_nameunit(dev));
1044 
1045 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
1046 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
1047 }
1048 
1049 static void
1050 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
1051 {
1052 	struct vpd_readstate vrs;
1053 	int state;
1054 	int name;
1055 	int remain;
1056 	int i;
1057 	int alloc, off;		/* alloc/off for RO/W arrays */
1058 	int cksumvalid;
1059 	int dflen;
1060 	uint8_t byte;
1061 	uint8_t byte2;
1062 
1063 	/* init vpd reader */
1064 	vrs.bytesinval = 0;
1065 	vrs.off = 0;
1066 	vrs.pcib = pcib;
1067 	vrs.cfg = cfg;
1068 	vrs.cksum = 0;
1069 
1070 	state = 0;
1071 	name = remain = i = 0;	/* shut up stupid gcc */
1072 	alloc = off = 0;	/* shut up stupid gcc */
1073 	dflen = 0;		/* shut up stupid gcc */
1074 	cksumvalid = -1;
1075 	while (state >= 0) {
1076 		if (vpd_nextbyte(&vrs, &byte)) {
1077 			state = -2;
1078 			break;
1079 		}
1080 #if 0
1081 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1082 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1083 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1084 #endif
1085 		switch (state) {
1086 		case 0:		/* item name */
1087 			if (byte & 0x80) {
1088 				if (vpd_nextbyte(&vrs, &byte2)) {
1089 					state = -2;
1090 					break;
1091 				}
1092 				remain = byte2;
1093 				if (vpd_nextbyte(&vrs, &byte2)) {
1094 					state = -2;
1095 					break;
1096 				}
1097 				remain |= byte2 << 8;
1098 				if (remain > (0x7f*4 - vrs.off)) {
1099 					state = -1;
1100 					kprintf(
1101 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1102 					    cfg->domain, cfg->bus, cfg->slot,
1103 					    cfg->func, remain);
1104 				}
1105 				name = byte & 0x7f;
1106 			} else {
1107 				remain = byte & 0x7;
1108 				name = (byte >> 3) & 0xf;
1109 			}
1110 			switch (name) {
1111 			case 0x2:	/* String */
1112 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1113 				    M_DEVBUF, M_WAITOK);
1114 				i = 0;
1115 				state = 1;
1116 				break;
1117 			case 0xf:	/* End */
1118 				state = -1;
1119 				break;
1120 			case 0x10:	/* VPD-R */
1121 				alloc = 8;
1122 				off = 0;
1123 				cfg->vpd.vpd_ros = kmalloc(alloc *
1124 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1125 				    M_WAITOK | M_ZERO);
1126 				state = 2;
1127 				break;
1128 			case 0x11:	/* VPD-W */
1129 				alloc = 8;
1130 				off = 0;
1131 				cfg->vpd.vpd_w = kmalloc(alloc *
1132 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1133 				    M_WAITOK | M_ZERO);
1134 				state = 5;
1135 				break;
1136 			default:	/* Invalid data, abort */
1137 				state = -1;
1138 				break;
1139 			}
1140 			break;
1141 
1142 		case 1:	/* Identifier String */
1143 			cfg->vpd.vpd_ident[i++] = byte;
1144 			remain--;
1145 			if (remain == 0)  {
1146 				cfg->vpd.vpd_ident[i] = '\0';
1147 				state = 0;
1148 			}
1149 			break;
1150 
1151 		case 2:	/* VPD-R Keyword Header */
1152 			if (off == alloc) {
1153 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1154 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1155 				    M_DEVBUF, M_WAITOK | M_ZERO);
1156 			}
1157 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1158 			if (vpd_nextbyte(&vrs, &byte2)) {
1159 				state = -2;
1160 				break;
1161 			}
1162 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1163 			if (vpd_nextbyte(&vrs, &byte2)) {
1164 				state = -2;
1165 				break;
1166 			}
1167 			dflen = byte2;
1168 			if (dflen == 0 &&
1169 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1170 			    2) == 0) {
1171 				/*
1172 				 * if this happens, we can't trust the rest
1173 				 * of the VPD.
1174 				 */
1175 				kprintf(
1176 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1177 				    cfg->domain, cfg->bus, cfg->slot,
1178 				    cfg->func, dflen);
1179 				cksumvalid = 0;
1180 				state = -1;
1181 				break;
1182 			} else if (dflen == 0) {
1183 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1184 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1185 				    M_DEVBUF, M_WAITOK);
1186 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1187 			} else
1188 				cfg->vpd.vpd_ros[off].value = kmalloc(
1189 				    (dflen + 1) *
1190 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1191 				    M_DEVBUF, M_WAITOK);
1192 			remain -= 3;
1193 			i = 0;
1194 			/* keep in sync w/ state 3's transistions */
1195 			if (dflen == 0 && remain == 0)
1196 				state = 0;
1197 			else if (dflen == 0)
1198 				state = 2;
1199 			else
1200 				state = 3;
1201 			break;
1202 
1203 		case 3:	/* VPD-R Keyword Value */
1204 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1205 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1206 			    "RV", 2) == 0 && cksumvalid == -1) {
1207 				if (vrs.cksum == 0)
1208 					cksumvalid = 1;
1209 				else {
1210 					if (bootverbose)
1211 						kprintf(
1212 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1213 						    cfg->domain, cfg->bus,
1214 						    cfg->slot, cfg->func,
1215 						    vrs.cksum);
1216 					cksumvalid = 0;
1217 					state = -1;
1218 					break;
1219 				}
1220 			}
1221 			dflen--;
1222 			remain--;
1223 			/* keep in sync w/ state 2's transistions */
1224 			if (dflen == 0)
1225 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1226 			if (dflen == 0 && remain == 0) {
1227 				cfg->vpd.vpd_rocnt = off;
1228 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1229 				    off * sizeof(*cfg->vpd.vpd_ros),
1230 				    M_DEVBUF, M_WAITOK | M_ZERO);
1231 				state = 0;
1232 			} else if (dflen == 0)
1233 				state = 2;
1234 			break;
1235 
1236 		case 4:
1237 			remain--;
1238 			if (remain == 0)
1239 				state = 0;
1240 			break;
1241 
1242 		case 5:	/* VPD-W Keyword Header */
1243 			if (off == alloc) {
1244 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1245 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1246 				    M_DEVBUF, M_WAITOK | M_ZERO);
1247 			}
1248 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1249 			if (vpd_nextbyte(&vrs, &byte2)) {
1250 				state = -2;
1251 				break;
1252 			}
1253 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1254 			if (vpd_nextbyte(&vrs, &byte2)) {
1255 				state = -2;
1256 				break;
1257 			}
1258 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1259 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1260 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1261 			    sizeof(*cfg->vpd.vpd_w[off].value),
1262 			    M_DEVBUF, M_WAITOK);
1263 			remain -= 3;
1264 			i = 0;
1265 			/* keep in sync w/ state 6's transistions */
1266 			if (dflen == 0 && remain == 0)
1267 				state = 0;
1268 			else if (dflen == 0)
1269 				state = 5;
1270 			else
1271 				state = 6;
1272 			break;
1273 
1274 		case 6:	/* VPD-W Keyword Value */
1275 			cfg->vpd.vpd_w[off].value[i++] = byte;
1276 			dflen--;
1277 			remain--;
1278 			/* keep in sync w/ state 5's transistions */
1279 			if (dflen == 0)
1280 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1281 			if (dflen == 0 && remain == 0) {
1282 				cfg->vpd.vpd_wcnt = off;
1283 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1284 				    off * sizeof(*cfg->vpd.vpd_w),
1285 				    M_DEVBUF, M_WAITOK | M_ZERO);
1286 				state = 0;
1287 			} else if (dflen == 0)
1288 				state = 5;
1289 			break;
1290 
1291 		default:
1292 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1293 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1294 			    state);
1295 			state = -1;
1296 			break;
1297 		}
1298 	}
1299 
1300 	if (cksumvalid == 0 || state < -1) {
1301 		/* read-only data bad, clean up */
1302 		if (cfg->vpd.vpd_ros != NULL) {
1303 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1304 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1305 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1306 			cfg->vpd.vpd_ros = NULL;
1307 		}
1308 	}
1309 	if (state < -1) {
1310 		/* I/O error, clean up */
1311 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1312 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1313 		if (cfg->vpd.vpd_ident != NULL) {
1314 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1315 			cfg->vpd.vpd_ident = NULL;
1316 		}
1317 		if (cfg->vpd.vpd_w != NULL) {
1318 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1319 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1320 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1321 			cfg->vpd.vpd_w = NULL;
1322 		}
1323 	}
1324 	cfg->vpd.vpd_cached = 1;
1325 #undef REG
1326 #undef WREG
1327 }
1328 
1329 int
1330 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1331 {
1332 	struct pci_devinfo *dinfo = device_get_ivars(child);
1333 	pcicfgregs *cfg = &dinfo->cfg;
1334 
1335 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1336 		pci_read_vpd(device_get_parent(dev), cfg);
1337 
1338 	*identptr = cfg->vpd.vpd_ident;
1339 
1340 	if (*identptr == NULL)
1341 		return (ENXIO);
1342 
1343 	return (0);
1344 }
1345 
1346 int
1347 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1348 	const char **vptr)
1349 {
1350 	struct pci_devinfo *dinfo = device_get_ivars(child);
1351 	pcicfgregs *cfg = &dinfo->cfg;
1352 	int i;
1353 
1354 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1355 		pci_read_vpd(device_get_parent(dev), cfg);
1356 
1357 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1358 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1359 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1360 			*vptr = cfg->vpd.vpd_ros[i].value;
1361 		}
1362 
1363 	if (i != cfg->vpd.vpd_rocnt)
1364 		return (0);
1365 
1366 	*vptr = NULL;
1367 	return (ENXIO);
1368 }
1369 
1370 /*
1371  * Return the offset in configuration space of the requested extended
1372  * capability entry or 0 if the specified capability was not found.
1373  */
1374 int
1375 pci_find_extcap_method(device_t dev, device_t child, int capability,
1376     int *capreg)
1377 {
1378 	struct pci_devinfo *dinfo = device_get_ivars(child);
1379 	pcicfgregs *cfg = &dinfo->cfg;
1380 	u_int32_t status;
1381 	u_int8_t ptr;
1382 
1383 	/*
1384 	 * Check the CAP_LIST bit of the PCI status register first.
1385 	 */
1386 	status = pci_read_config(child, PCIR_STATUS, 2);
1387 	if (!(status & PCIM_STATUS_CAPPRESENT))
1388 		return (ENXIO);
1389 
1390 	/*
1391 	 * Determine the start pointer of the capabilities list.
1392 	 */
1393 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1394 	case 0:
1395 	case 1:
1396 		ptr = PCIR_CAP_PTR;
1397 		break;
1398 	case 2:
1399 		ptr = PCIR_CAP_PTR_2;
1400 		break;
1401 	default:
1402 		/* XXX: panic? */
1403 		return (ENXIO);		/* no extended capabilities support */
1404 	}
1405 	ptr = pci_read_config(child, ptr, 1);
1406 
1407 	/*
1408 	 * Traverse the capabilities list.
1409 	 */
1410 	while (ptr != 0) {
1411 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1412 			if (capreg != NULL)
1413 				*capreg = ptr;
1414 			return (0);
1415 		}
1416 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1417 	}
1418 
1419 	return (ENOENT);
1420 }
1421 
1422 /*
1423  * Support for MSI-X message interrupts.
1424  */
1425 static void
1426 pci_setup_msix_vector(device_t dev, u_int index, uint64_t address,
1427     uint32_t data)
1428 {
1429 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1430 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1431 	uint32_t offset;
1432 
1433 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1434 	offset = msix->msix_table_offset + index * 16;
1435 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1436 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1437 	bus_write_4(msix->msix_table_res, offset + 8, data);
1438 
1439 	/* Enable MSI -> HT mapping. */
1440 	pci_ht_map_msi(dev, address);
1441 }
1442 
1443 static void
1444 pci_mask_msix_vector(device_t dev, u_int index)
1445 {
1446 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1447 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1448 	uint32_t offset, val;
1449 
1450 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1451 	offset = msix->msix_table_offset + index * 16 + 12;
1452 	val = bus_read_4(msix->msix_table_res, offset);
1453 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1454 		val |= PCIM_MSIX_VCTRL_MASK;
1455 		bus_write_4(msix->msix_table_res, offset, val);
1456 	}
1457 }
1458 
1459 static void
1460 pci_unmask_msix_vector(device_t dev, u_int index)
1461 {
1462 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1463 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1464 	uint32_t offset, val;
1465 
1466 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1467 	offset = msix->msix_table_offset + index * 16 + 12;
1468 	val = bus_read_4(msix->msix_table_res, offset);
1469 	if (val & PCIM_MSIX_VCTRL_MASK) {
1470 		val &= ~PCIM_MSIX_VCTRL_MASK;
1471 		bus_write_4(msix->msix_table_res, offset, val);
1472 	}
1473 }
1474 
1475 int
1476 pci_pending_msix_vector(device_t dev, u_int index)
1477 {
1478 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1479 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1480 	uint32_t offset, bit;
1481 
1482 	KASSERT(msix->msix_table_res != NULL && msix->msix_pba_res != NULL,
1483 	    ("MSI-X is not setup yet"));
1484 
1485 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1486 	offset = msix->msix_pba_offset + (index / 32) * 4;
1487 	bit = 1 << index % 32;
1488 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1489 }
1490 
1491 /*
1492  * Restore MSI-X registers and table during resume.  If MSI-X is
1493  * enabled then walk the virtual table to restore the actual MSI-X
1494  * table.
1495  */
1496 static void
1497 pci_resume_msix(device_t dev)
1498 {
1499 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1500 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1501 
1502 	if (msix->msix_table_res != NULL) {
1503 		const struct msix_vector *mv;
1504 
1505 		pci_mask_msix_allvectors(dev);
1506 
1507 		TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1508 			u_int vector;
1509 
1510 			if (mv->mv_address == 0)
1511 				continue;
1512 
1513 			vector = PCI_MSIX_RID2VEC(mv->mv_rid);
1514 			pci_setup_msix_vector(dev, vector,
1515 			    mv->mv_address, mv->mv_data);
1516 			pci_unmask_msix_vector(dev, vector);
1517 		}
1518 	}
1519 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1520 	    msix->msix_ctrl, 2);
1521 }
1522 
1523 /*
1524  * Attempt to allocate one MSI-X message at the specified vector on cpuid.
1525  *
1526  * After this function returns, the MSI-X's rid will be saved in rid0.
1527  */
1528 int
1529 pci_alloc_msix_vector_method(device_t dev, device_t child, u_int vector,
1530     int *rid0, int cpuid)
1531 {
1532 	struct pci_devinfo *dinfo = device_get_ivars(child);
1533 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1534 	struct msix_vector *mv;
1535 	struct resource_list_entry *rle;
1536 	int error, irq, rid;
1537 
1538 	KASSERT(msix->msix_table_res != NULL &&
1539 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1540 	KASSERT(cpuid >= 0 && cpuid < ncpus, ("invalid cpuid %d", cpuid));
1541 	KASSERT(vector < msix->msix_msgnum,
1542 	    ("invalid MSI-X vector %u, total %d", vector, msix->msix_msgnum));
1543 
1544 	if (bootverbose) {
1545 		device_printf(child,
1546 		    "attempting to allocate MSI-X #%u vector (%d supported)\n",
1547 		    vector, msix->msix_msgnum);
1548 	}
1549 
1550 	/* Set rid according to vector number */
1551 	rid = PCI_MSIX_VEC2RID(vector);
1552 
1553 	/* Vector has already been allocated */
1554 	mv = pci_find_msix_vector(child, rid);
1555 	if (mv != NULL)
1556 		return EBUSY;
1557 
1558 	/* Allocate a message. */
1559 	error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq, cpuid);
1560 	if (error)
1561 		return error;
1562 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, rid,
1563 	    irq, irq, 1, cpuid);
1564 
1565 	if (bootverbose) {
1566 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1567 		device_printf(child, "using IRQ %lu for MSI-X on cpu%d\n",
1568 		    rle->start, cpuid);
1569 	}
1570 
1571 	/* Update counts of alloc'd messages. */
1572 	msix->msix_alloc++;
1573 
1574 	mv = kmalloc(sizeof(*mv), M_DEVBUF, M_WAITOK | M_ZERO);
1575 	mv->mv_rid = rid;
1576 	TAILQ_INSERT_TAIL(&msix->msix_vectors, mv, mv_link);
1577 
1578 	*rid0 = rid;
1579 	return 0;
1580 }
1581 
1582 int
1583 pci_release_msix_vector_method(device_t dev, device_t child, int rid)
1584 {
1585 	struct pci_devinfo *dinfo = device_get_ivars(child);
1586 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1587 	struct resource_list_entry *rle;
1588 	struct msix_vector *mv;
1589 	int irq, cpuid;
1590 
1591 	KASSERT(msix->msix_table_res != NULL &&
1592 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1593 	KASSERT(msix->msix_alloc > 0, ("No MSI-X allocated"));
1594 	KASSERT(rid > 0, ("invalid rid %d", rid));
1595 
1596 	mv = pci_find_msix_vector(child, rid);
1597 	KASSERT(mv != NULL, ("MSI-X rid %d is not allocated", rid));
1598 	KASSERT(mv->mv_address == 0, ("MSI-X rid %d not teardown", rid));
1599 
1600 	/* Make sure resource is no longer allocated. */
1601 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
1602 	KASSERT(rle != NULL, ("missing MSI-X resource, rid %d", rid));
1603 	KASSERT(rle->res == NULL,
1604 	    ("MSI-X resource is still allocated, rid %d", rid));
1605 
1606 	irq = rle->start;
1607 	cpuid = rle->cpuid;
1608 
1609 	/* Free the resource list entries. */
1610 	resource_list_delete(&dinfo->resources, SYS_RES_IRQ, rid);
1611 
1612 	/* Release the IRQ. */
1613 	PCIB_RELEASE_MSIX(device_get_parent(dev), child, irq, cpuid);
1614 
1615 	TAILQ_REMOVE(&msix->msix_vectors, mv, mv_link);
1616 	kfree(mv, M_DEVBUF);
1617 
1618 	msix->msix_alloc--;
1619 	return (0);
1620 }
1621 
1622 /*
1623  * Return the max supported MSI-X messages this device supports.
1624  * Basically, assuming the MD code can alloc messages, this function
1625  * should return the maximum value that pci_alloc_msix() can return.
1626  * Thus, it is subject to the tunables, etc.
1627  */
1628 int
1629 pci_msix_count_method(device_t dev, device_t child)
1630 {
1631 	struct pci_devinfo *dinfo = device_get_ivars(child);
1632 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1633 
1634 	if (pci_do_msix && msix->msix_location != 0)
1635 		return (msix->msix_msgnum);
1636 	return (0);
1637 }
1638 
1639 int
1640 pci_setup_msix(device_t dev)
1641 {
1642 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1643 	pcicfgregs *cfg = &dinfo->cfg;
1644 	struct resource_list_entry *rle;
1645 	struct resource *table_res, *pba_res;
1646 
1647 	KASSERT(cfg->msix.msix_table_res == NULL &&
1648 	    cfg->msix.msix_pba_res == NULL, ("MSI-X has been setup yet"));
1649 
1650 	/* If rid 0 is allocated, then fail. */
1651 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1652 	if (rle != NULL && rle->res != NULL)
1653 		return (ENXIO);
1654 
1655 	/* Already have allocated MSIs? */
1656 	if (cfg->msi.msi_alloc != 0)
1657 		return (ENXIO);
1658 
1659 	/* If MSI is blacklisted for this system, fail. */
1660 	if (pci_msi_blacklisted())
1661 		return (ENXIO);
1662 
1663 	/* MSI-X capability present? */
1664 	if (cfg->msix.msix_location == 0 || cfg->msix.msix_msgnum == 0 ||
1665 	    !pci_do_msix)
1666 		return (ENODEV);
1667 
1668 	KASSERT(cfg->msix.msix_alloc == 0 &&
1669 	    TAILQ_EMPTY(&cfg->msix.msix_vectors),
1670 	    ("MSI-X vector has been allocated"));
1671 
1672 	/* Make sure the appropriate BARs are mapped. */
1673 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1674 	    cfg->msix.msix_table_bar);
1675 	if (rle == NULL || rle->res == NULL ||
1676 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1677 		return (ENXIO);
1678 	table_res = rle->res;
1679 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1680 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1681 		    cfg->msix.msix_pba_bar);
1682 		if (rle == NULL || rle->res == NULL ||
1683 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1684 			return (ENXIO);
1685 	}
1686 	pba_res = rle->res;
1687 
1688 	cfg->msix.msix_table_res = table_res;
1689 	cfg->msix.msix_pba_res = pba_res;
1690 
1691 	pci_mask_msix_allvectors(dev);
1692 
1693 	return 0;
1694 }
1695 
1696 void
1697 pci_teardown_msix(device_t dev)
1698 {
1699 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1700 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1701 
1702 	KASSERT(msix->msix_table_res != NULL &&
1703 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1704 	KASSERT(msix->msix_alloc == 0 && TAILQ_EMPTY(&msix->msix_vectors),
1705 	    ("MSI-X vector is still allocated"));
1706 
1707 	pci_mask_msix_allvectors(dev);
1708 
1709 	msix->msix_table_res = NULL;
1710 	msix->msix_pba_res = NULL;
1711 }
1712 
1713 void
1714 pci_enable_msix(device_t dev)
1715 {
1716 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1717 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1718 
1719 	KASSERT(msix->msix_table_res != NULL &&
1720 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1721 
1722 	/* Update control register to enable MSI-X. */
1723 	msix->msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1724 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1725 	    msix->msix_ctrl, 2);
1726 }
1727 
1728 void
1729 pci_disable_msix(device_t dev)
1730 {
1731 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1732 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1733 
1734 	KASSERT(msix->msix_table_res != NULL &&
1735 	    msix->msix_pba_res != NULL, ("MSI-X is not setup yet"));
1736 
1737 	/* Disable MSI -> HT mapping. */
1738 	pci_ht_map_msi(dev, 0);
1739 
1740 	/* Update control register to disable MSI-X. */
1741 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1742 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1743 	    msix->msix_ctrl, 2);
1744 }
1745 
1746 static void
1747 pci_mask_msix_allvectors(device_t dev)
1748 {
1749 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1750 	u_int i;
1751 
1752 	for (i = 0; i < dinfo->cfg.msix.msix_msgnum; ++i)
1753 		pci_mask_msix_vector(dev, i);
1754 }
1755 
1756 static struct msix_vector *
1757 pci_find_msix_vector(device_t dev, int rid)
1758 {
1759 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1760 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1761 	struct msix_vector *mv;
1762 
1763 	TAILQ_FOREACH(mv, &msix->msix_vectors, mv_link) {
1764 		if (mv->mv_rid == rid)
1765 			return mv;
1766 	}
1767 	return NULL;
1768 }
1769 
1770 /*
1771  * HyperTransport MSI mapping control
1772  */
1773 void
1774 pci_ht_map_msi(device_t dev, uint64_t addr)
1775 {
1776 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1777 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1778 
1779 	if (!ht->ht_msimap)
1780 		return;
1781 
1782 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1783 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1784 		/* Enable MSI -> HT mapping. */
1785 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1786 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1787 		    ht->ht_msictrl, 2);
1788 	}
1789 
1790 	if (!addr && (ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
1791 		/* Disable MSI -> HT mapping. */
1792 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1793 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1794 		    ht->ht_msictrl, 2);
1795 	}
1796 }
1797 
1798 /*
1799  * Support for MSI message signalled interrupts.
1800  */
1801 static void
1802 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1803 {
1804 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1805 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1806 
1807 	/* Write data and address values. */
1808 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1809 	    address & 0xffffffff, 4);
1810 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1811 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1812 		    address >> 32, 4);
1813 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1814 		    data, 2);
1815 	} else
1816 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1817 		    2);
1818 
1819 	/* Enable MSI in the control register. */
1820 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1821 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1822 	    2);
1823 
1824 	/* Enable MSI -> HT mapping. */
1825 	pci_ht_map_msi(dev, address);
1826 }
1827 
1828 static void
1829 pci_disable_msi(device_t dev)
1830 {
1831 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1832 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1833 
1834 	/* Disable MSI -> HT mapping. */
1835 	pci_ht_map_msi(dev, 0);
1836 
1837 	/* Disable MSI in the control register. */
1838 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1839 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1840 	    2);
1841 }
1842 
1843 /*
1844  * Restore MSI registers during resume.  If MSI is enabled then
1845  * restore the data and address registers in addition to the control
1846  * register.
1847  */
1848 static void
1849 pci_resume_msi(device_t dev)
1850 {
1851 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1852 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1853 	uint64_t address;
1854 	uint16_t data;
1855 
1856 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1857 		address = msi->msi_addr;
1858 		data = msi->msi_data;
1859 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1860 		    address & 0xffffffff, 4);
1861 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1862 			pci_write_config(dev, msi->msi_location +
1863 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1864 			pci_write_config(dev, msi->msi_location +
1865 			    PCIR_MSI_DATA_64BIT, data, 2);
1866 		} else
1867 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1868 			    data, 2);
1869 	}
1870 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1871 	    2);
1872 }
1873 
1874 /*
1875  * Returns true if the specified device is blacklisted because MSI
1876  * doesn't work.
1877  */
1878 int
1879 pci_msi_device_blacklisted(device_t dev)
1880 {
1881 	struct pci_quirk *q;
1882 
1883 	if (!pci_honor_msi_blacklist)
1884 		return (0);
1885 
1886 	for (q = &pci_quirks[0]; q->devid; q++) {
1887 		if (q->devid == pci_get_devid(dev) &&
1888 		    q->type == PCI_QUIRK_DISABLE_MSI)
1889 			return (1);
1890 	}
1891 	return (0);
1892 }
1893 
1894 /*
1895  * Determine if MSI is blacklisted globally on this sytem.  Currently,
1896  * we just check for blacklisted chipsets as represented by the
1897  * host-PCI bridge at device 0:0:0.  In the future, it may become
1898  * necessary to check other system attributes, such as the kenv values
1899  * that give the motherboard manufacturer and model number.
1900  */
1901 static int
1902 pci_msi_blacklisted(void)
1903 {
1904 	device_t dev;
1905 
1906 	if (!pci_honor_msi_blacklist)
1907 		return (0);
1908 
1909 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
1910 	if (!(pcie_chipset || pcix_chipset))
1911 		return (1);
1912 
1913 	dev = pci_find_bsf(0, 0, 0);
1914 	if (dev != NULL)
1915 		return (pci_msi_device_blacklisted(dev));
1916 	return (0);
1917 }
1918 
1919 /*
1920  * Attempt to allocate count MSI messages on start_cpuid.
1921  *
1922  * If start_cpuid < 0, then the MSI messages' target CPU will be
1923  * selected automaticly.
1924  *
1925  * If the caller explicitly specified the MSI messages' target CPU,
1926  * i.e. start_cpuid >= 0, then we will try to allocate the count MSI
1927  * messages on the specified CPU, if the allocation fails due to MD
1928  * does not have enough vectors (EMSGSIZE), then we will try next
1929  * available CPU, until the allocation fails on all CPUs.
1930  *
1931  * EMSGSIZE will be returned, if all available CPUs does not have
1932  * enough vectors for the requested amount of MSI messages.  Caller
1933  * should either reduce the amount of MSI messages to be requested,
1934  * or simply giving up using MSI.
1935  *
1936  * The available SYS_RES_IRQ resources' rids, which are >= 1, are
1937  * returned in 'rid' array, if the allocation succeeds.
1938  */
1939 int
1940 pci_alloc_msi_method(device_t dev, device_t child, int *rid, int count,
1941     int start_cpuid)
1942 {
1943 	struct pci_devinfo *dinfo = device_get_ivars(child);
1944 	pcicfgregs *cfg = &dinfo->cfg;
1945 	struct resource_list_entry *rle;
1946 	int error, i, irqs[32], cpuid = 0;
1947 	uint16_t ctrl;
1948 
1949 	KASSERT(count != 0 && count <= 32 && powerof2(count),
1950 	    ("invalid MSI count %d", count));
1951 	KASSERT(start_cpuid < ncpus, ("invalid cpuid %d", start_cpuid));
1952 
1953 	/* If rid 0 is allocated, then fail. */
1954 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1955 	if (rle != NULL && rle->res != NULL)
1956 		return (ENXIO);
1957 
1958 	/* Already have allocated messages? */
1959 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_table_res != NULL)
1960 		return (ENXIO);
1961 
1962 	/* If MSI is blacklisted for this system, fail. */
1963 	if (pci_msi_blacklisted())
1964 		return (ENXIO);
1965 
1966 	/* MSI capability present? */
1967 	if (cfg->msi.msi_location == 0 || cfg->msi.msi_msgnum == 0 ||
1968 	    !pci_do_msi)
1969 		return (ENODEV);
1970 
1971 	KASSERT(count <= cfg->msi.msi_msgnum, ("large MSI count %d, max %d",
1972 	    count, cfg->msi.msi_msgnum));
1973 
1974 	if (bootverbose) {
1975 		device_printf(child,
1976 		    "attempting to allocate %d MSI vector%s (%d supported)\n",
1977 		    count, count > 1 ? "s" : "", cfg->msi.msi_msgnum);
1978 	}
1979 
1980 	if (start_cpuid < 0)
1981 		start_cpuid = atomic_fetchadd_int(&pci_msi_cpuid, 1) % ncpus;
1982 
1983 	error = EINVAL;
1984 	for (i = 0; i < ncpus; ++i) {
1985 		cpuid = (start_cpuid + i) % ncpus;
1986 
1987 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, count,
1988 		    cfg->msi.msi_msgnum, irqs, cpuid);
1989 		if (error == 0)
1990 			break;
1991 		else if (error != EMSGSIZE)
1992 			return error;
1993 	}
1994 	if (error)
1995 		return error;
1996 
1997 	/*
1998 	 * We now have N messages mapped onto SYS_RES_IRQ resources in
1999 	 * the irqs[] array, so add new resources starting at rid 1.
2000 	 */
2001 	for (i = 0; i < count; i++) {
2002 		rid[i] = i + 1;
2003 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2004 		    irqs[i], irqs[i], 1, cpuid);
2005 	}
2006 
2007 	if (bootverbose) {
2008 		if (count == 1) {
2009 			device_printf(child, "using IRQ %d on cpu%d for MSI\n",
2010 			    irqs[0], cpuid);
2011 		} else {
2012 			int run;
2013 
2014 			/*
2015 			 * Be fancy and try to print contiguous runs
2016 			 * of IRQ values as ranges.  'run' is true if
2017 			 * we are in a range.
2018 			 */
2019 			device_printf(child, "using IRQs %d", irqs[0]);
2020 			run = 0;
2021 			for (i = 1; i < count; i++) {
2022 
2023 				/* Still in a run? */
2024 				if (irqs[i] == irqs[i - 1] + 1) {
2025 					run = 1;
2026 					continue;
2027 				}
2028 
2029 				/* Finish previous range. */
2030 				if (run) {
2031 					kprintf("-%d", irqs[i - 1]);
2032 					run = 0;
2033 				}
2034 
2035 				/* Start new range. */
2036 				kprintf(",%d", irqs[i]);
2037 			}
2038 
2039 			/* Unfinished range? */
2040 			if (run)
2041 				kprintf("-%d", irqs[count - 1]);
2042 			kprintf(" for MSI on cpu%d\n", cpuid);
2043 		}
2044 	}
2045 
2046 	/* Update control register with count. */
2047 	ctrl = cfg->msi.msi_ctrl;
2048 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2049 	ctrl |= (ffs(count) - 1) << 4;
2050 	cfg->msi.msi_ctrl = ctrl;
2051 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2052 
2053 	/* Update counts of alloc'd messages. */
2054 	cfg->msi.msi_alloc = count;
2055 	cfg->msi.msi_handlers = 0;
2056 	return (0);
2057 }
2058 
2059 /* Release the MSI messages associated with this device. */
2060 int
2061 pci_release_msi_method(device_t dev, device_t child)
2062 {
2063 	struct pci_devinfo *dinfo = device_get_ivars(child);
2064 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2065 	struct resource_list_entry *rle;
2066 	int i, irqs[32], cpuid = -1;
2067 
2068 	/* Do we have any messages to release? */
2069 	if (msi->msi_alloc == 0)
2070 		return (ENODEV);
2071 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2072 
2073 	/* Make sure none of the resources are allocated. */
2074 	if (msi->msi_handlers > 0)
2075 		return (EBUSY);
2076 	for (i = 0; i < msi->msi_alloc; i++) {
2077 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2078 		KASSERT(rle != NULL, ("missing MSI resource"));
2079 		if (rle->res != NULL)
2080 			return (EBUSY);
2081 		if (i == 0) {
2082 			cpuid = rle->cpuid;
2083 			KASSERT(cpuid >= 0 && cpuid < ncpus,
2084 			    ("invalid MSI target cpuid %d", cpuid));
2085 		} else {
2086 			KASSERT(rle->cpuid == cpuid,
2087 			    ("MSI targets different cpus, "
2088 			     "was cpu%d, now cpu%d", cpuid, rle->cpuid));
2089 		}
2090 		irqs[i] = rle->start;
2091 	}
2092 
2093 	/* Update control register with 0 count. */
2094 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2095 	    ("%s: MSI still enabled", __func__));
2096 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2097 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2098 	    msi->msi_ctrl, 2);
2099 
2100 	/* Release the messages. */
2101 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs,
2102 	    cpuid);
2103 	for (i = 0; i < msi->msi_alloc; i++)
2104 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2105 
2106 	/* Update alloc count. */
2107 	msi->msi_alloc = 0;
2108 	msi->msi_addr = 0;
2109 	msi->msi_data = 0;
2110 	return (0);
2111 }
2112 
2113 /*
2114  * Return the max supported MSI messages this device supports.
2115  * Basically, assuming the MD code can alloc messages, this function
2116  * should return the maximum value that pci_alloc_msi() can return.
2117  * Thus, it is subject to the tunables, etc.
2118  */
2119 int
2120 pci_msi_count_method(device_t dev, device_t child)
2121 {
2122 	struct pci_devinfo *dinfo = device_get_ivars(child);
2123 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2124 
2125 	if (pci_do_msi && msi->msi_location != 0)
2126 		return (msi->msi_msgnum);
2127 	return (0);
2128 }
2129 
2130 /* kfree pcicfgregs structure and all depending data structures */
2131 
2132 int
2133 pci_freecfg(struct pci_devinfo *dinfo)
2134 {
2135 	struct devlist *devlist_head;
2136 	int i;
2137 
2138 	devlist_head = &pci_devq;
2139 
2140 	if (dinfo->cfg.vpd.vpd_reg) {
2141 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2142 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2143 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2144 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2145 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2146 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2147 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2148 	}
2149 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2150 	kfree(dinfo, M_DEVBUF);
2151 
2152 	/* increment the generation count */
2153 	pci_generation++;
2154 
2155 	/* we're losing one device */
2156 	pci_numdevs--;
2157 	return (0);
2158 }
2159 
2160 /*
2161  * PCI power manangement
2162  */
2163 int
2164 pci_set_powerstate_method(device_t dev, device_t child, int state)
2165 {
2166 	struct pci_devinfo *dinfo = device_get_ivars(child);
2167 	pcicfgregs *cfg = &dinfo->cfg;
2168 	uint16_t status;
2169 	int oldstate, highest, delay;
2170 
2171 	if (cfg->pp.pp_cap == 0)
2172 		return (EOPNOTSUPP);
2173 
2174 	/*
2175 	 * Optimize a no state change request away.  While it would be OK to
2176 	 * write to the hardware in theory, some devices have shown odd
2177 	 * behavior when going from D3 -> D3.
2178 	 */
2179 	oldstate = pci_get_powerstate(child);
2180 	if (oldstate == state)
2181 		return (0);
2182 
2183 	/*
2184 	 * The PCI power management specification states that after a state
2185 	 * transition between PCI power states, system software must
2186 	 * guarantee a minimal delay before the function accesses the device.
2187 	 * Compute the worst case delay that we need to guarantee before we
2188 	 * access the device.  Many devices will be responsive much more
2189 	 * quickly than this delay, but there are some that don't respond
2190 	 * instantly to state changes.  Transitions to/from D3 state require
2191 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2192 	 * is done below with DELAY rather than a sleeper function because
2193 	 * this function can be called from contexts where we cannot sleep.
2194 	 */
2195 	highest = (oldstate > state) ? oldstate : state;
2196 	if (highest == PCI_POWERSTATE_D3)
2197 	    delay = 10000;
2198 	else if (highest == PCI_POWERSTATE_D2)
2199 	    delay = 200;
2200 	else
2201 	    delay = 0;
2202 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2203 	    & ~PCIM_PSTAT_DMASK;
2204 	switch (state) {
2205 	case PCI_POWERSTATE_D0:
2206 		status |= PCIM_PSTAT_D0;
2207 		break;
2208 	case PCI_POWERSTATE_D1:
2209 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2210 			return (EOPNOTSUPP);
2211 		status |= PCIM_PSTAT_D1;
2212 		break;
2213 	case PCI_POWERSTATE_D2:
2214 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2215 			return (EOPNOTSUPP);
2216 		status |= PCIM_PSTAT_D2;
2217 		break;
2218 	case PCI_POWERSTATE_D3:
2219 		status |= PCIM_PSTAT_D3;
2220 		break;
2221 	default:
2222 		return (EINVAL);
2223 	}
2224 
2225 	if (bootverbose)
2226 		kprintf(
2227 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2228 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2229 		    dinfo->cfg.func, oldstate, state);
2230 
2231 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2232 	if (delay)
2233 		DELAY(delay);
2234 	return (0);
2235 }
2236 
2237 int
2238 pci_get_powerstate_method(device_t dev, device_t child)
2239 {
2240 	struct pci_devinfo *dinfo = device_get_ivars(child);
2241 	pcicfgregs *cfg = &dinfo->cfg;
2242 	uint16_t status;
2243 	int result;
2244 
2245 	if (cfg->pp.pp_cap != 0) {
2246 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2247 		switch (status & PCIM_PSTAT_DMASK) {
2248 		case PCIM_PSTAT_D0:
2249 			result = PCI_POWERSTATE_D0;
2250 			break;
2251 		case PCIM_PSTAT_D1:
2252 			result = PCI_POWERSTATE_D1;
2253 			break;
2254 		case PCIM_PSTAT_D2:
2255 			result = PCI_POWERSTATE_D2;
2256 			break;
2257 		case PCIM_PSTAT_D3:
2258 			result = PCI_POWERSTATE_D3;
2259 			break;
2260 		default:
2261 			result = PCI_POWERSTATE_UNKNOWN;
2262 			break;
2263 		}
2264 	} else {
2265 		/* No support, device is always at D0 */
2266 		result = PCI_POWERSTATE_D0;
2267 	}
2268 	return (result);
2269 }
2270 
2271 /*
2272  * Some convenience functions for PCI device drivers.
2273  */
2274 
2275 static __inline void
2276 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2277 {
2278 	uint16_t	command;
2279 
2280 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2281 	command |= bit;
2282 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2283 }
2284 
2285 static __inline void
2286 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2287 {
2288 	uint16_t	command;
2289 
2290 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2291 	command &= ~bit;
2292 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2293 }
2294 
2295 int
2296 pci_enable_busmaster_method(device_t dev, device_t child)
2297 {
2298 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2299 	return (0);
2300 }
2301 
2302 int
2303 pci_disable_busmaster_method(device_t dev, device_t child)
2304 {
2305 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2306 	return (0);
2307 }
2308 
2309 int
2310 pci_enable_io_method(device_t dev, device_t child, int space)
2311 {
2312 	uint16_t command;
2313 	uint16_t bit;
2314 	char *error;
2315 
2316 	bit = 0;
2317 	error = NULL;
2318 
2319 	switch(space) {
2320 	case SYS_RES_IOPORT:
2321 		bit = PCIM_CMD_PORTEN;
2322 		error = "port";
2323 		break;
2324 	case SYS_RES_MEMORY:
2325 		bit = PCIM_CMD_MEMEN;
2326 		error = "memory";
2327 		break;
2328 	default:
2329 		return (EINVAL);
2330 	}
2331 	pci_set_command_bit(dev, child, bit);
2332 	/* Some devices seem to need a brief stall here, what do to? */
2333 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2334 	if (command & bit)
2335 		return (0);
2336 	device_printf(child, "failed to enable %s mapping!\n", error);
2337 	return (ENXIO);
2338 }
2339 
2340 int
2341 pci_disable_io_method(device_t dev, device_t child, int space)
2342 {
2343 	uint16_t command;
2344 	uint16_t bit;
2345 	char *error;
2346 
2347 	bit = 0;
2348 	error = NULL;
2349 
2350 	switch(space) {
2351 	case SYS_RES_IOPORT:
2352 		bit = PCIM_CMD_PORTEN;
2353 		error = "port";
2354 		break;
2355 	case SYS_RES_MEMORY:
2356 		bit = PCIM_CMD_MEMEN;
2357 		error = "memory";
2358 		break;
2359 	default:
2360 		return (EINVAL);
2361 	}
2362 	pci_clear_command_bit(dev, child, bit);
2363 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2364 	if (command & bit) {
2365 		device_printf(child, "failed to disable %s mapping!\n", error);
2366 		return (ENXIO);
2367 	}
2368 	return (0);
2369 }
2370 
2371 /*
2372  * New style pci driver.  Parent device is either a pci-host-bridge or a
2373  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2374  */
2375 
2376 void
2377 pci_print_verbose(struct pci_devinfo *dinfo)
2378 {
2379 
2380 	if (bootverbose) {
2381 		pcicfgregs *cfg = &dinfo->cfg;
2382 
2383 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2384 		    cfg->vendor, cfg->device, cfg->revid);
2385 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2386 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2387 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2388 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2389 		    cfg->mfdev);
2390 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2391 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2392 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2393 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2394 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2395 		if (cfg->intpin > 0)
2396 			kprintf("\tintpin=%c, irq=%d\n",
2397 			    cfg->intpin +'a' -1, cfg->intline);
2398 		if (cfg->pp.pp_cap) {
2399 			uint16_t status;
2400 
2401 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2402 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2403 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2404 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2405 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2406 			    status & PCIM_PSTAT_DMASK);
2407 		}
2408 		if (cfg->msi.msi_location) {
2409 			int ctrl;
2410 
2411 			ctrl = cfg->msi.msi_ctrl;
2412 			kprintf("\tMSI supports %d message%s%s%s\n",
2413 			    cfg->msi.msi_msgnum,
2414 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2415 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2416 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2417 		}
2418 		if (cfg->msix.msix_location) {
2419 			kprintf("\tMSI-X supports %d message%s ",
2420 			    cfg->msix.msix_msgnum,
2421 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2422 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2423 				kprintf("in map 0x%x\n",
2424 				    cfg->msix.msix_table_bar);
2425 			else
2426 				kprintf("in maps 0x%x and 0x%x\n",
2427 				    cfg->msix.msix_table_bar,
2428 				    cfg->msix.msix_pba_bar);
2429 		}
2430 		pci_print_verbose_expr(cfg);
2431 	}
2432 }
2433 
2434 static void
2435 pci_print_verbose_expr(const pcicfgregs *cfg)
2436 {
2437 	const struct pcicfg_expr *expr = &cfg->expr;
2438 	const char *port_name;
2439 	uint16_t port_type;
2440 
2441 	if (!bootverbose)
2442 		return;
2443 
2444 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2445 		return;
2446 
2447 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2448 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2449 
2450 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2451 
2452 	switch (port_type) {
2453 	case PCIE_END_POINT:
2454 		port_name = "DEVICE";
2455 		break;
2456 	case PCIE_LEG_END_POINT:
2457 		port_name = "LEGDEV";
2458 		break;
2459 	case PCIE_ROOT_PORT:
2460 		port_name = "ROOT";
2461 		break;
2462 	case PCIE_UP_STREAM_PORT:
2463 		port_name = "UPSTREAM";
2464 		break;
2465 	case PCIE_DOWN_STREAM_PORT:
2466 		port_name = "DOWNSTRM";
2467 		break;
2468 	case PCIE_PCIE2PCI_BRIDGE:
2469 		port_name = "PCIE2PCI";
2470 		break;
2471 	case PCIE_PCI2PCIE_BRIDGE:
2472 		port_name = "PCI2PCIE";
2473 		break;
2474 	case PCIE_ROOT_END_POINT:
2475 		port_name = "ROOTDEV";
2476 		break;
2477 	case PCIE_ROOT_EVT_COLL:
2478 		port_name = "ROOTEVTC";
2479 		break;
2480 	default:
2481 		port_name = NULL;
2482 		break;
2483 	}
2484 	if ((port_type == PCIE_ROOT_PORT ||
2485 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2486 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2487 		port_name = NULL;
2488 	if (port_name != NULL)
2489 		kprintf("[%s]", port_name);
2490 
2491 	if (pcie_slotimpl(cfg)) {
2492 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2493 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2494 			kprintf("[HOTPLUG]");
2495 	}
2496 	kprintf("\n");
2497 }
2498 
2499 static int
2500 pci_porten(device_t pcib, int b, int s, int f)
2501 {
2502 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2503 		& PCIM_CMD_PORTEN) != 0;
2504 }
2505 
2506 static int
2507 pci_memen(device_t pcib, int b, int s, int f)
2508 {
2509 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2510 		& PCIM_CMD_MEMEN) != 0;
2511 }
2512 
2513 /*
2514  * Add a resource based on a pci map register. Return 1 if the map
2515  * register is a 32bit map register or 2 if it is a 64bit register.
2516  */
2517 static int
2518 pci_add_map(device_t pcib, device_t bus, device_t dev,
2519     int b, int s, int f, int reg, struct resource_list *rl, int force,
2520     int prefetch)
2521 {
2522 	uint32_t map;
2523 	uint16_t old_cmd;
2524 	pci_addr_t base;
2525 	pci_addr_t start, end, count;
2526 	uint8_t ln2size;
2527 	uint8_t ln2range;
2528 	uint32_t testval;
2529 	uint16_t cmd;
2530 	int type;
2531 	int barlen;
2532 	struct resource *res;
2533 
2534 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2535 
2536         /* Disable access to device memory */
2537 	old_cmd = 0;
2538 	if (PCI_BAR_MEM(map)) {
2539 		old_cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2540 		cmd = old_cmd & ~PCIM_CMD_MEMEN;
2541 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2542 	}
2543 
2544 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2545 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2546 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2547 
2548         /* Restore memory access mode */
2549 	if (PCI_BAR_MEM(map)) {
2550 		PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, old_cmd, 2);
2551 	}
2552 
2553 	if (PCI_BAR_MEM(map)) {
2554 		type = SYS_RES_MEMORY;
2555 		if (map & PCIM_BAR_MEM_PREFETCH)
2556 			prefetch = 1;
2557 	} else
2558 		type = SYS_RES_IOPORT;
2559 	ln2size = pci_mapsize(testval);
2560 	ln2range = pci_maprange(testval);
2561 	base = pci_mapbase(map);
2562 	barlen = ln2range == 64 ? 2 : 1;
2563 
2564 	/*
2565 	 * For I/O registers, if bottom bit is set, and the next bit up
2566 	 * isn't clear, we know we have a BAR that doesn't conform to the
2567 	 * spec, so ignore it.  Also, sanity check the size of the data
2568 	 * areas to the type of memory involved.  Memory must be at least
2569 	 * 16 bytes in size, while I/O ranges must be at least 4.
2570 	 */
2571 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2572 		return (barlen);
2573 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2574 	    (type == SYS_RES_IOPORT && ln2size < 2))
2575 		return (barlen);
2576 
2577 	if (ln2range == 64)
2578 		/* Read the other half of a 64bit map register */
2579 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2580 	if (bootverbose) {
2581 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2582 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2583 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2584 			kprintf(", port disabled\n");
2585 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2586 			kprintf(", memory disabled\n");
2587 		else
2588 			kprintf(", enabled\n");
2589 	}
2590 
2591 	/*
2592 	 * If base is 0, then we have problems.  It is best to ignore
2593 	 * such entries for the moment.  These will be allocated later if
2594 	 * the driver specifically requests them.  However, some
2595 	 * removable busses look better when all resources are allocated,
2596 	 * so allow '0' to be overriden.
2597 	 *
2598 	 * Similarly treat maps whose values is the same as the test value
2599 	 * read back.  These maps have had all f's written to them by the
2600 	 * BIOS in an attempt to disable the resources.
2601 	 */
2602 	if (!force && (base == 0 || map == testval))
2603 		return (barlen);
2604 	if ((u_long)base != base) {
2605 		device_printf(bus,
2606 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2607 		    pci_get_domain(dev), b, s, f, reg);
2608 		return (barlen);
2609 	}
2610 
2611 	/*
2612 	 * This code theoretically does the right thing, but has
2613 	 * undesirable side effects in some cases where peripherals
2614 	 * respond oddly to having these bits enabled.  Let the user
2615 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2616 	 * default).
2617 	 */
2618 	if (pci_enable_io_modes) {
2619 		/* Turn on resources that have been left off by a lazy BIOS */
2620 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2621 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2622 			cmd |= PCIM_CMD_PORTEN;
2623 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2624 		}
2625 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2626 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2627 			cmd |= PCIM_CMD_MEMEN;
2628 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2629 		}
2630 	} else {
2631 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2632 			return (barlen);
2633 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2634 			return (barlen);
2635 	}
2636 
2637 	count = 1 << ln2size;
2638 	if (base == 0 || base == pci_mapbase(testval)) {
2639 		start = 0;	/* Let the parent decide. */
2640 		end = ~0ULL;
2641 	} else {
2642 		start = base;
2643 		end = base + (1 << ln2size) - 1;
2644 	}
2645 	resource_list_add(rl, type, reg, start, end, count, -1);
2646 
2647 	/*
2648 	 * Try to allocate the resource for this BAR from our parent
2649 	 * so that this resource range is already reserved.  The
2650 	 * driver for this device will later inherit this resource in
2651 	 * pci_alloc_resource().
2652 	 */
2653 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2654 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2655 	if (res == NULL) {
2656 		/*
2657 		 * If the allocation fails, delete the resource list
2658 		 * entry to force pci_alloc_resource() to allocate
2659 		 * resources from the parent.
2660 		 */
2661 		resource_list_delete(rl, type, reg);
2662 #ifdef PCI_BAR_CLEAR
2663 		/* Clear the BAR */
2664 		start = 0;
2665 #else	/* !PCI_BAR_CLEAR */
2666 		/*
2667 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2668 		 * PCI function, clearing the BAR causes HPET timer
2669 		 * stop ticking.
2670 		 */
2671 		if (bootverbose) {
2672 			kprintf("pci:%d:%d:%d: resource reservation failed "
2673 				"%#jx - %#jx\n", b, s, f,
2674 				(intmax_t)start, (intmax_t)end);
2675 		}
2676 		return (barlen);
2677 #endif	/* PCI_BAR_CLEAR */
2678 	} else {
2679 		start = rman_get_start(res);
2680 	}
2681 	pci_write_config(dev, reg, start, 4);
2682 	if (ln2range == 64)
2683 		pci_write_config(dev, reg + 4, start >> 32, 4);
2684 	return (barlen);
2685 }
2686 
2687 /*
2688  * For ATA devices we need to decide early what addressing mode to use.
2689  * Legacy demands that the primary and secondary ATA ports sits on the
2690  * same addresses that old ISA hardware did. This dictates that we use
2691  * those addresses and ignore the BAR's if we cannot set PCI native
2692  * addressing mode.
2693  */
2694 static void
2695 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2696     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2697 {
2698 	int rid, type, progif;
2699 #if 0
2700 	/* if this device supports PCI native addressing use it */
2701 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2702 	if ((progif & 0x8a) == 0x8a) {
2703 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2704 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2705 			kprintf("Trying ATA native PCI addressing mode\n");
2706 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2707 		}
2708 	}
2709 #endif
2710 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2711 	type = SYS_RES_IOPORT;
2712 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2713 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2714 		    prefetchmask & (1 << 0));
2715 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2716 		    prefetchmask & (1 << 1));
2717 	} else {
2718 		rid = PCIR_BAR(0);
2719 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2720 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2721 		    0, -1);
2722 		rid = PCIR_BAR(1);
2723 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2724 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2725 		    0, -1);
2726 	}
2727 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2728 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2729 		    prefetchmask & (1 << 2));
2730 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2731 		    prefetchmask & (1 << 3));
2732 	} else {
2733 		rid = PCIR_BAR(2);
2734 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2735 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2736 		    0, -1);
2737 		rid = PCIR_BAR(3);
2738 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2739 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2740 		    0, -1);
2741 	}
2742 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2743 	    prefetchmask & (1 << 4));
2744 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2745 	    prefetchmask & (1 << 5));
2746 }
2747 
2748 static void
2749 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2750 {
2751 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2752 	pcicfgregs *cfg = &dinfo->cfg;
2753 	char tunable_name[64];
2754 	int irq;
2755 
2756 	/* Has to have an intpin to have an interrupt. */
2757 	if (cfg->intpin == 0)
2758 		return;
2759 
2760 	/* Let the user override the IRQ with a tunable. */
2761 	irq = PCI_INVALID_IRQ;
2762 	ksnprintf(tunable_name, sizeof(tunable_name),
2763 	    "hw.pci%d.%d.%d.%d.INT%c.irq",
2764 	    cfg->domain, cfg->bus, cfg->slot, cfg->func, cfg->intpin + 'A' - 1);
2765 	if (TUNABLE_INT_FETCH(tunable_name, &irq)) {
2766 		if (irq >= 255 || irq <= 0) {
2767 			irq = PCI_INVALID_IRQ;
2768 		} else {
2769 			if (machintr_legacy_intr_find(irq,
2770 			    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW) < 0) {
2771 				device_printf(dev,
2772 				    "hw.pci%d.%d.%d.%d.INT%c.irq=%d, invalid\n",
2773 				    cfg->domain, cfg->bus, cfg->slot, cfg->func,
2774 				    cfg->intpin + 'A' - 1, irq);
2775 				irq = PCI_INVALID_IRQ;
2776 			} else {
2777 				BUS_CONFIG_INTR(bus, dev, irq,
2778 				    INTR_TRIGGER_LEVEL, INTR_POLARITY_LOW);
2779 			}
2780 		}
2781 	}
2782 
2783 	/*
2784 	 * If we didn't get an IRQ via the tunable, then we either use the
2785 	 * IRQ value in the intline register or we ask the bus to route an
2786 	 * interrupt for us.  If force_route is true, then we only use the
2787 	 * value in the intline register if the bus was unable to assign an
2788 	 * IRQ.
2789 	 */
2790 	if (!PCI_INTERRUPT_VALID(irq)) {
2791 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2792 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2793 		if (!PCI_INTERRUPT_VALID(irq))
2794 			irq = cfg->intline;
2795 	}
2796 
2797 	/* If after all that we don't have an IRQ, just bail. */
2798 	if (!PCI_INTERRUPT_VALID(irq))
2799 		return;
2800 
2801 	/* Update the config register if it changed. */
2802 	if (irq != cfg->intline) {
2803 		cfg->intline = irq;
2804 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2805 	}
2806 
2807 	/* Add this IRQ as rid 0 interrupt resource. */
2808 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2809 	    machintr_legacy_intr_cpuid(irq));
2810 }
2811 
2812 /* Perform early OHCI takeover from SMM. */
2813 static void
2814 ohci_early_takeover(device_t self)
2815 {
2816 	struct resource *res;
2817 	uint32_t ctl;
2818 	int rid;
2819 	int i;
2820 
2821 	rid = PCIR_BAR(0);
2822 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2823 	if (res == NULL)
2824 		return;
2825 
2826 	ctl = bus_read_4(res, OHCI_CONTROL);
2827 	if (ctl & OHCI_IR) {
2828 		if (bootverbose)
2829 			kprintf("ohci early: "
2830 			    "SMM active, request owner change\n");
2831 		bus_write_4(res, OHCI_COMMAND_STATUS, OHCI_OCR);
2832 		for (i = 0; (i < 100) && (ctl & OHCI_IR); i++) {
2833 			DELAY(1000);
2834 			ctl = bus_read_4(res, OHCI_CONTROL);
2835 		}
2836 		if (ctl & OHCI_IR) {
2837 			if (bootverbose)
2838 				kprintf("ohci early: "
2839 				    "SMM does not respond, resetting\n");
2840 			bus_write_4(res, OHCI_CONTROL, OHCI_HCFS_RESET);
2841 		}
2842 		/* Disable interrupts */
2843 		bus_write_4(res, OHCI_INTERRUPT_DISABLE, OHCI_ALL_INTRS);
2844 	}
2845 
2846 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2847 }
2848 
2849 /* Perform early UHCI takeover from SMM. */
2850 static void
2851 uhci_early_takeover(device_t self)
2852 {
2853 	struct resource *res;
2854 	int rid;
2855 
2856 	/*
2857 	 * Set the PIRQD enable bit and switch off all the others. We don't
2858 	 * want legacy support to interfere with us XXX Does this also mean
2859 	 * that the BIOS won't touch the keyboard anymore if it is connected
2860 	 * to the ports of the root hub?
2861 	 */
2862 	pci_write_config(self, PCI_LEGSUP, PCI_LEGSUP_USBPIRQDEN, 2);
2863 
2864 	/* Disable interrupts */
2865 	rid = PCI_UHCI_BASE_REG;
2866 	res = bus_alloc_resource_any(self, SYS_RES_IOPORT, &rid, RF_ACTIVE);
2867 	if (res != NULL) {
2868 		bus_write_2(res, UHCI_INTR, 0);
2869 		bus_release_resource(self, SYS_RES_IOPORT, rid, res);
2870 	}
2871 }
2872 
2873 /* Perform early EHCI takeover from SMM. */
2874 static void
2875 ehci_early_takeover(device_t self)
2876 {
2877 	struct resource *res;
2878 	uint32_t cparams;
2879 	uint32_t eec;
2880 	uint32_t eecp;
2881 	uint32_t bios_sem;
2882 	uint32_t offs;
2883 	int rid;
2884 	int i;
2885 
2886 	rid = PCIR_BAR(0);
2887 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2888 	if (res == NULL)
2889 		return;
2890 
2891 	cparams = bus_read_4(res, EHCI_HCCPARAMS);
2892 
2893 	/* Synchronise with the BIOS if it owns the controller. */
2894 	for (eecp = EHCI_HCC_EECP(cparams); eecp != 0;
2895 	    eecp = EHCI_EECP_NEXT(eec)) {
2896 		eec = pci_read_config(self, eecp, 4);
2897 		if (EHCI_EECP_ID(eec) != EHCI_EC_LEGSUP) {
2898 			continue;
2899 		}
2900 		bios_sem = pci_read_config(self, eecp +
2901 		    EHCI_LEGSUP_BIOS_SEM, 1);
2902 		if (bios_sem == 0) {
2903 			continue;
2904 		}
2905 		if (bootverbose)
2906 			kprintf("ehci early: "
2907 			    "SMM active, request owner change\n");
2908 
2909 		pci_write_config(self, eecp + EHCI_LEGSUP_OS_SEM, 1, 1);
2910 
2911 		for (i = 0; (i < 100) && (bios_sem != 0); i++) {
2912 			DELAY(1000);
2913 			bios_sem = pci_read_config(self, eecp +
2914 			    EHCI_LEGSUP_BIOS_SEM, 1);
2915 		}
2916 
2917 		if (bios_sem != 0) {
2918 			if (bootverbose)
2919 				kprintf("ehci early: "
2920 				    "SMM does not respond\n");
2921 		}
2922 		/* Disable interrupts */
2923 		offs = EHCI_CAPLENGTH(bus_read_4(res, EHCI_CAPLEN_HCIVERSION));
2924 		bus_write_4(res, offs + EHCI_USBINTR, 0);
2925 	}
2926 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
2927 }
2928 
2929 /* Perform early XHCI takeover from SMM. */
2930 static void
2931 xhci_early_takeover(device_t self)
2932 {
2933 	struct resource *res;
2934 	uint32_t cparams;
2935 	uint32_t eec;
2936 	uint32_t eecp;
2937 	uint32_t bios_sem;
2938 	uint32_t offs;
2939 	int rid;
2940 	int i;
2941 
2942 	rid = PCIR_BAR(0);
2943 	res = bus_alloc_resource_any(self, SYS_RES_MEMORY, &rid, RF_ACTIVE);
2944 	if (res == NULL)
2945 		return;
2946 
2947 	cparams = bus_read_4(res, XHCI_HCSPARAMS0);
2948 
2949 	eec = -1;
2950 
2951 	/* Synchronise with the BIOS if it owns the controller. */
2952 	for (eecp = XHCI_HCS0_XECP(cparams) << 2; eecp != 0 && XHCI_XECP_NEXT(eec);
2953 	    eecp += XHCI_XECP_NEXT(eec) << 2) {
2954 		eec = bus_read_4(res, eecp);
2955 
2956 		if (XHCI_XECP_ID(eec) != XHCI_ID_USB_LEGACY)
2957 			continue;
2958 
2959 		bios_sem = bus_read_1(res, eecp + XHCI_XECP_BIOS_SEM);
2960 
2961 		if (bios_sem == 0) {
2962 			if (bootverbose)
2963 				kprintf("xhci early: xhci is not owned by SMM\n");
2964 
2965 			continue;
2966 		}
2967 
2968 		if (bootverbose)
2969 			kprintf("xhci early: "
2970 			    "SMM active, request owner change\n");
2971 
2972 		bus_write_1(res, eecp + XHCI_XECP_OS_SEM, 1);
2973 
2974 		/* wait a maximum of 5 seconds */
2975 
2976 		for (i = 0; (i < 5000) && (bios_sem != 0); i++) {
2977 			DELAY(1000);
2978 
2979 			bios_sem = bus_read_1(res, eecp +
2980 			    XHCI_XECP_BIOS_SEM);
2981 		}
2982 
2983 		if (bios_sem != 0) {
2984 			if (bootverbose) {
2985 				kprintf("xhci early: "
2986 				    "SMM does not respond\n");
2987 				kprintf("xhci early: "
2988 				    "taking xhci by force\n");
2989 			}
2990 			bus_write_1(res, eecp + XHCI_XECP_BIOS_SEM, 0x00);
2991 		} else {
2992 			if (bootverbose)
2993 				kprintf("xhci early: "
2994 				    "handover successful\n");
2995 		}
2996 
2997 		/* Disable interrupts */
2998 		offs = bus_read_1(res, XHCI_CAPLENGTH);
2999 		bus_write_4(res, offs + XHCI_USBCMD, 0);
3000 		bus_read_4(res, offs + XHCI_USBSTS);
3001 	}
3002 	bus_release_resource(self, SYS_RES_MEMORY, rid, res);
3003 }
3004 
3005 void
3006 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
3007 {
3008 	struct pci_devinfo *dinfo = device_get_ivars(dev);
3009 	pcicfgregs *cfg = &dinfo->cfg;
3010 	struct resource_list *rl = &dinfo->resources;
3011 	struct pci_quirk *q;
3012 	int b, i, f, s;
3013 
3014 	b = cfg->bus;
3015 	s = cfg->slot;
3016 	f = cfg->func;
3017 
3018 	/* ATA devices needs special map treatment */
3019 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
3020 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
3021 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
3022 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
3023 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
3024 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
3025 	else
3026 		for (i = 0; i < cfg->nummaps;)
3027 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
3028 			    rl, force, prefetchmask & (1 << i));
3029 
3030 	/*
3031 	 * Add additional, quirked resources.
3032 	 */
3033 	for (q = &pci_quirks[0]; q->devid; q++) {
3034 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
3035 		    && q->type == PCI_QUIRK_MAP_REG)
3036 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
3037 			  force, 0);
3038 	}
3039 
3040 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
3041 		/*
3042 		 * Try to re-route interrupts. Sometimes the BIOS or
3043 		 * firmware may leave bogus values in these registers.
3044 		 * If the re-route fails, then just stick with what we
3045 		 * have.
3046 		 */
3047 		pci_assign_interrupt(bus, dev, 1);
3048 	}
3049 
3050 	if (pci_usb_takeover && pci_get_class(dev) == PCIC_SERIALBUS &&
3051 	    pci_get_subclass(dev) == PCIS_SERIALBUS_USB) {
3052 		if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_XHCI)
3053 			xhci_early_takeover(dev);
3054 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_EHCI)
3055 			ehci_early_takeover(dev);
3056 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_OHCI)
3057 			ohci_early_takeover(dev);
3058 		else if (pci_get_progif(dev) == PCIP_SERIALBUS_USB_UHCI)
3059 			uhci_early_takeover(dev);
3060 	}
3061 }
3062 
3063 void
3064 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
3065 {
3066 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
3067 	device_t pcib = device_get_parent(dev);
3068 	struct pci_devinfo *dinfo;
3069 	int maxslots;
3070 	int s, f, pcifunchigh;
3071 	uint8_t hdrtype;
3072 
3073 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
3074 	    ("dinfo_size too small"));
3075 	maxslots = PCIB_MAXSLOTS(pcib);
3076 	for (s = 0; s <= maxslots; s++) {
3077 		pcifunchigh = 0;
3078 		f = 0;
3079 		DELAY(1);
3080 		hdrtype = REG(PCIR_HDRTYPE, 1);
3081 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
3082 			continue;
3083 		if (hdrtype & PCIM_MFDEV)
3084 			pcifunchigh = PCI_FUNCMAX;
3085 		for (f = 0; f <= pcifunchigh; f++) {
3086 			dinfo = pci_read_device(pcib, domain, busno, s, f,
3087 			    dinfo_size);
3088 			if (dinfo != NULL) {
3089 				pci_add_child(dev, dinfo);
3090 			}
3091 		}
3092 	}
3093 #undef REG
3094 }
3095 
3096 void
3097 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
3098 {
3099 	device_t pcib;
3100 
3101 	pcib = device_get_parent(bus);
3102 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
3103 	device_set_ivars(dinfo->cfg.dev, dinfo);
3104 	resource_list_init(&dinfo->resources);
3105 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
3106 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
3107 	pci_print_verbose(dinfo);
3108 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
3109 }
3110 
3111 static int
3112 pci_probe(device_t dev)
3113 {
3114 	device_set_desc(dev, "PCI bus");
3115 
3116 	/* Allow other subclasses to override this driver. */
3117 	return (-1000);
3118 }
3119 
3120 static int
3121 pci_attach(device_t dev)
3122 {
3123 	int busno, domain;
3124 
3125 	/*
3126 	 * Since there can be multiple independantly numbered PCI
3127 	 * busses on systems with multiple PCI domains, we can't use
3128 	 * the unit number to decide which bus we are probing. We ask
3129 	 * the parent pcib what our domain and bus numbers are.
3130 	 */
3131 	domain = pcib_get_domain(dev);
3132 	busno = pcib_get_bus(dev);
3133 	if (bootverbose)
3134 		device_printf(dev, "domain=%d, physical bus=%d\n",
3135 		    domain, busno);
3136 
3137 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3138 
3139 	return (bus_generic_attach(dev));
3140 }
3141 
3142 int
3143 pci_suspend(device_t dev)
3144 {
3145 	int dstate, error, i, numdevs;
3146 	device_t acpi_dev, child, *devlist;
3147 	struct pci_devinfo *dinfo;
3148 
3149 	/*
3150 	 * Save the PCI configuration space for each child and set the
3151 	 * device in the appropriate power state for this sleep state.
3152 	 */
3153 	acpi_dev = NULL;
3154 	if (pci_do_power_resume)
3155 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3156 	device_get_children(dev, &devlist, &numdevs);
3157 	for (i = 0; i < numdevs; i++) {
3158 		child = devlist[i];
3159 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3160 		pci_cfg_save(child, dinfo, 0);
3161 	}
3162 
3163 	/* Suspend devices before potentially powering them down. */
3164 	error = bus_generic_suspend(dev);
3165 	if (error) {
3166 		kfree(devlist, M_TEMP);
3167 		return (error);
3168 	}
3169 
3170 	/*
3171 	 * Always set the device to D3.  If ACPI suggests a different
3172 	 * power state, use it instead.  If ACPI is not present, the
3173 	 * firmware is responsible for managing device power.  Skip
3174 	 * children who aren't attached since they are powered down
3175 	 * separately.  Only manage type 0 devices for now.
3176 	 */
3177 	for (i = 0; acpi_dev && i < numdevs; i++) {
3178 		child = devlist[i];
3179 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3180 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
3181 			dstate = PCI_POWERSTATE_D3;
3182 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
3183 			pci_set_powerstate(child, dstate);
3184 		}
3185 	}
3186 	kfree(devlist, M_TEMP);
3187 	return (0);
3188 }
3189 
3190 int
3191 pci_resume(device_t dev)
3192 {
3193 	int i, numdevs;
3194 	device_t acpi_dev, child, *devlist;
3195 	struct pci_devinfo *dinfo;
3196 
3197 	/*
3198 	 * Set each child to D0 and restore its PCI configuration space.
3199 	 */
3200 	acpi_dev = NULL;
3201 	if (pci_do_power_resume)
3202 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3203 	device_get_children(dev, &devlist, &numdevs);
3204 	for (i = 0; i < numdevs; i++) {
3205 		/*
3206 		 * Notify ACPI we're going to D0 but ignore the result.  If
3207 		 * ACPI is not present, the firmware is responsible for
3208 		 * managing device power.  Only manage type 0 devices for now.
3209 		 */
3210 		child = devlist[i];
3211 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3212 		if (acpi_dev && device_is_attached(child) &&
3213 		    dinfo->cfg.hdrtype == 0) {
3214 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
3215 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
3216 		}
3217 
3218 		/* Now the device is powered up, restore its config space. */
3219 		pci_cfg_restore(child, dinfo);
3220 	}
3221 	kfree(devlist, M_TEMP);
3222 	return (bus_generic_resume(dev));
3223 }
3224 
3225 static void
3226 pci_load_vendor_data(void)
3227 {
3228 	caddr_t vendordata, info;
3229 
3230 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3231 		info = preload_search_info(vendordata, MODINFO_ADDR);
3232 		pci_vendordata = *(char **)info;
3233 		info = preload_search_info(vendordata, MODINFO_SIZE);
3234 		pci_vendordata_size = *(size_t *)info;
3235 		/* terminate the database */
3236 		pci_vendordata[pci_vendordata_size] = '\n';
3237 	}
3238 }
3239 
3240 void
3241 pci_driver_added(device_t dev, driver_t *driver)
3242 {
3243 	int numdevs;
3244 	device_t *devlist;
3245 	device_t child;
3246 	struct pci_devinfo *dinfo;
3247 	int i;
3248 
3249 	if (bootverbose)
3250 		device_printf(dev, "driver added\n");
3251 	DEVICE_IDENTIFY(driver, dev);
3252 	device_get_children(dev, &devlist, &numdevs);
3253 	for (i = 0; i < numdevs; i++) {
3254 		child = devlist[i];
3255 		if (device_get_state(child) != DS_NOTPRESENT)
3256 			continue;
3257 		dinfo = device_get_ivars(child);
3258 		pci_print_verbose(dinfo);
3259 		if (bootverbose)
3260 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
3261 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
3262 			    dinfo->cfg.func);
3263 		pci_cfg_restore(child, dinfo);
3264 		if (device_probe_and_attach(child) != 0)
3265 			pci_cfg_save(child, dinfo, 1);
3266 	}
3267 	kfree(devlist, M_TEMP);
3268 }
3269 
3270 static void
3271 pci_child_detached(device_t parent __unused, device_t child)
3272 {
3273 	/* Turn child's power off */
3274 	pci_cfg_save(child, device_get_ivars(child), 1);
3275 }
3276 
3277 int
3278 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3279     driver_intr_t *intr, void *arg, void **cookiep,
3280     lwkt_serialize_t serializer, const char *desc)
3281 {
3282 	int rid, error;
3283 	void *cookie;
3284 
3285 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3286 	    arg, &cookie, serializer, desc);
3287 	if (error)
3288 		return (error);
3289 
3290 	/* If this is not a direct child, just bail out. */
3291 	if (device_get_parent(child) != dev) {
3292 		*cookiep = cookie;
3293 		return(0);
3294 	}
3295 
3296 	rid = rman_get_rid(irq);
3297 	if (rid == 0) {
3298 		/* Make sure that INTx is enabled */
3299 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3300 	} else {
3301 		struct pci_devinfo *dinfo = device_get_ivars(child);
3302 		uint64_t addr;
3303 		uint32_t data;
3304 
3305 		/*
3306 		 * Check to see if the interrupt is MSI or MSI-X.
3307 		 * Ask our parent to map the MSI and give
3308 		 * us the address and data register values.
3309 		 * If we fail for some reason, teardown the
3310 		 * interrupt handler.
3311 		 */
3312 		if (dinfo->cfg.msi.msi_alloc > 0) {
3313 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3314 
3315 			if (msi->msi_addr == 0) {
3316 				KASSERT(msi->msi_handlers == 0,
3317 			    ("MSI has handlers, but vectors not mapped"));
3318 				error = PCIB_MAP_MSI(device_get_parent(dev),
3319 				    child, rman_get_start(irq), &addr, &data,
3320 				    rman_get_cpuid(irq));
3321 				if (error)
3322 					goto bad;
3323 				msi->msi_addr = addr;
3324 				msi->msi_data = data;
3325 				pci_enable_msi(child, addr, data);
3326 			}
3327 			msi->msi_handlers++;
3328 		} else {
3329 			struct msix_vector *mv;
3330 			u_int vector;
3331 
3332 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3333 			    ("No MSI-X or MSI rid %d allocated", rid));
3334 
3335 			mv = pci_find_msix_vector(child, rid);
3336 			KASSERT(mv != NULL,
3337 			    ("MSI-X rid %d is not allocated", rid));
3338 			KASSERT(mv->mv_address == 0,
3339 			    ("MSI-X rid %d has been setup", rid));
3340 
3341 			error = PCIB_MAP_MSI(device_get_parent(dev),
3342 			    child, rman_get_start(irq), &addr, &data,
3343 			    rman_get_cpuid(irq));
3344 			if (error)
3345 				goto bad;
3346 			mv->mv_address = addr;
3347 			mv->mv_data = data;
3348 
3349 			vector = PCI_MSIX_RID2VEC(rid);
3350 			pci_setup_msix_vector(child, vector,
3351 			    mv->mv_address, mv->mv_data);
3352 			pci_unmask_msix_vector(child, vector);
3353 		}
3354 
3355 		/*
3356 		 * Make sure that INTx is disabled if we are using MSI/MSI-X,
3357 		 * unless the device is affected by PCI_QUIRK_MSI_INTX_BUG,
3358 		 * in which case we "enable" INTx so MSI/MSI-X actually works.
3359 		 */
3360 		if (!pci_has_quirk(pci_get_devid(child),
3361 		    PCI_QUIRK_MSI_INTX_BUG))
3362 			pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3363 		else
3364 			pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3365 	bad:
3366 		if (error) {
3367 			(void)bus_generic_teardown_intr(dev, child, irq,
3368 			    cookie);
3369 			return (error);
3370 		}
3371 	}
3372 	*cookiep = cookie;
3373 	return (0);
3374 }
3375 
3376 int
3377 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3378     void *cookie)
3379 {
3380 	int rid, error;
3381 
3382 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3383 		return (EINVAL);
3384 
3385 	/* If this isn't a direct child, just bail out */
3386 	if (device_get_parent(child) != dev)
3387 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3388 
3389 	rid = rman_get_rid(irq);
3390 	if (rid == 0) {
3391 		/* Mask INTx */
3392 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3393 	} else {
3394 		struct pci_devinfo *dinfo = device_get_ivars(child);
3395 
3396 		/*
3397 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3398 		 * decrement the appropriate handlers count and mask the
3399 		 * MSI-X message, or disable MSI messages if the count
3400 		 * drops to 0.
3401 		 */
3402 		if (dinfo->cfg.msi.msi_alloc > 0) {
3403 			struct pcicfg_msi *msi = &dinfo->cfg.msi;
3404 
3405 			KASSERT(rid <= msi->msi_alloc,
3406 			    ("MSI-X index too high"));
3407 			KASSERT(msi->msi_handlers > 0,
3408 			    ("MSI rid %d is not setup", rid));
3409 
3410 			msi->msi_handlers--;
3411 			if (msi->msi_handlers == 0)
3412 				pci_disable_msi(child);
3413 		} else {
3414 			struct msix_vector *mv;
3415 
3416 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3417 			    ("No MSI or MSI-X rid %d allocated", rid));
3418 
3419 			mv = pci_find_msix_vector(child, rid);
3420 			KASSERT(mv != NULL,
3421 			    ("MSI-X rid %d is not allocated", rid));
3422 			KASSERT(mv->mv_address != 0,
3423 			    ("MSI-X rid %d has not been setup", rid));
3424 
3425 			pci_mask_msix_vector(child, PCI_MSIX_RID2VEC(rid));
3426 			mv->mv_address = 0;
3427 			mv->mv_data = 0;
3428 		}
3429 	}
3430 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3431 	if (rid > 0)
3432 		KASSERT(error == 0,
3433 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3434 	return (error);
3435 }
3436 
3437 int
3438 pci_print_child(device_t dev, device_t child)
3439 {
3440 	struct pci_devinfo *dinfo;
3441 	struct resource_list *rl;
3442 	int retval = 0;
3443 
3444 	dinfo = device_get_ivars(child);
3445 	rl = &dinfo->resources;
3446 
3447 	retval += bus_print_child_header(dev, child);
3448 
3449 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3450 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3451 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3452 	if (device_get_flags(dev))
3453 		retval += kprintf(" flags %#x", device_get_flags(dev));
3454 
3455 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3456 	    pci_get_function(child));
3457 
3458 	retval += bus_print_child_footer(dev, child);
3459 
3460 	return (retval);
3461 }
3462 
3463 static struct
3464 {
3465 	int	class;
3466 	int	subclass;
3467 	char	*desc;
3468 } pci_nomatch_tab[] = {
3469 	{PCIC_OLD,		-1,			"old"},
3470 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3471 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3472 	{PCIC_STORAGE,		-1,			"mass storage"},
3473 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3474 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3475 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3476 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3477 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3478 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3479 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3480 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3481 	{PCIC_NETWORK,		-1,			"network"},
3482 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3483 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3484 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3485 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3486 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3487 	{PCIC_DISPLAY,		-1,			"display"},
3488 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3489 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3490 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3491 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3492 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3493 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3494 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3495 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3496 	{PCIC_MEMORY,		-1,			"memory"},
3497 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3498 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3499 	{PCIC_BRIDGE,		-1,			"bridge"},
3500 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3501 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3502 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3503 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3504 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3505 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3506 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3507 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3508 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3509 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3510 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3511 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3512 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3513 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3514 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3515 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3516 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3517 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3518 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3519 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3520 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3521 	{PCIC_INPUTDEV,		-1,			"input device"},
3522 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3523 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3524 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3525 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3526 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3527 	{PCIC_DOCKING,		-1,			"docking station"},
3528 	{PCIC_PROCESSOR,	-1,			"processor"},
3529 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3530 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3531 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3532 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3533 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3534 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3535 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3536 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3537 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3538 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3539 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3540 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3541 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3542 	{PCIC_SATCOM,		-1,			"satellite communication"},
3543 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3544 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3545 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3546 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3547 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3548 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3549 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3550 	{PCIC_DASP,		-1,			"dasp"},
3551 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3552 	{0, 0,		NULL}
3553 };
3554 
3555 void
3556 pci_probe_nomatch(device_t dev, device_t child)
3557 {
3558 	int	i;
3559 	char	*cp, *scp, *device;
3560 
3561 	/*
3562 	 * Look for a listing for this device in a loaded device database.
3563 	 */
3564 	if ((device = pci_describe_device(child)) != NULL) {
3565 		device_printf(dev, "<%s>", device);
3566 		kfree(device, M_DEVBUF);
3567 	} else {
3568 		/*
3569 		 * Scan the class/subclass descriptions for a general
3570 		 * description.
3571 		 */
3572 		cp = "unknown";
3573 		scp = NULL;
3574 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3575 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3576 				if (pci_nomatch_tab[i].subclass == -1) {
3577 					cp = pci_nomatch_tab[i].desc;
3578 				} else if (pci_nomatch_tab[i].subclass ==
3579 				    pci_get_subclass(child)) {
3580 					scp = pci_nomatch_tab[i].desc;
3581 				}
3582 			}
3583 		}
3584 		device_printf(dev, "<%s%s%s>",
3585 		    cp ? cp : "",
3586 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3587 		    scp ? scp : "");
3588 	}
3589 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3590 		pci_get_vendor(child), pci_get_device(child),
3591 		pci_get_slot(child), pci_get_function(child));
3592 	if (pci_get_intpin(child) > 0) {
3593 		int irq;
3594 
3595 		irq = pci_get_irq(child);
3596 		if (PCI_INTERRUPT_VALID(irq))
3597 			kprintf(" irq %d", irq);
3598 	}
3599 	kprintf("\n");
3600 
3601 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3602 }
3603 
3604 /*
3605  * Parse the PCI device database, if loaded, and return a pointer to a
3606  * description of the device.
3607  *
3608  * The database is flat text formatted as follows:
3609  *
3610  * Any line not in a valid format is ignored.
3611  * Lines are terminated with newline '\n' characters.
3612  *
3613  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3614  * the vendor name.
3615  *
3616  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3617  * - devices cannot be listed without a corresponding VENDOR line.
3618  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3619  * another TAB, then the device name.
3620  */
3621 
3622 /*
3623  * Assuming (ptr) points to the beginning of a line in the database,
3624  * return the vendor or device and description of the next entry.
3625  * The value of (vendor) or (device) inappropriate for the entry type
3626  * is set to -1.  Returns nonzero at the end of the database.
3627  *
3628  * Note that this is slightly unrobust in the face of corrupt data;
3629  * we attempt to safeguard against this by spamming the end of the
3630  * database with a newline when we initialise.
3631  */
3632 static int
3633 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3634 {
3635 	char	*cp = *ptr;
3636 	int	left;
3637 
3638 	*device = -1;
3639 	*vendor = -1;
3640 	**desc = '\0';
3641 	for (;;) {
3642 		left = pci_vendordata_size - (cp - pci_vendordata);
3643 		if (left <= 0) {
3644 			*ptr = cp;
3645 			return(1);
3646 		}
3647 
3648 		/* vendor entry? */
3649 		if (*cp != '\t' &&
3650 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3651 			break;
3652 		/* device entry? */
3653 		if (*cp == '\t' &&
3654 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3655 			break;
3656 
3657 		/* skip to next line */
3658 		while (*cp != '\n' && left > 0) {
3659 			cp++;
3660 			left--;
3661 		}
3662 		if (*cp == '\n') {
3663 			cp++;
3664 			left--;
3665 		}
3666 	}
3667 	/* skip to next line */
3668 	while (*cp != '\n' && left > 0) {
3669 		cp++;
3670 		left--;
3671 	}
3672 	if (*cp == '\n' && left > 0)
3673 		cp++;
3674 	*ptr = cp;
3675 	return(0);
3676 }
3677 
3678 static char *
3679 pci_describe_device(device_t dev)
3680 {
3681 	int	vendor, device;
3682 	char	*desc, *vp, *dp, *line;
3683 
3684 	desc = vp = dp = NULL;
3685 
3686 	/*
3687 	 * If we have no vendor data, we can't do anything.
3688 	 */
3689 	if (pci_vendordata == NULL)
3690 		goto out;
3691 
3692 	/*
3693 	 * Scan the vendor data looking for this device
3694 	 */
3695 	line = pci_vendordata;
3696 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3697 		goto out;
3698 	for (;;) {
3699 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3700 			goto out;
3701 		if (vendor == pci_get_vendor(dev))
3702 			break;
3703 	}
3704 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3705 		goto out;
3706 	for (;;) {
3707 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3708 			*dp = 0;
3709 			break;
3710 		}
3711 		if (vendor != -1) {
3712 			*dp = 0;
3713 			break;
3714 		}
3715 		if (device == pci_get_device(dev))
3716 			break;
3717 	}
3718 	if (dp[0] == '\0')
3719 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3720 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3721 	    NULL)
3722 		ksprintf(desc, "%s, %s", vp, dp);
3723  out:
3724 	if (vp != NULL)
3725 		kfree(vp, M_DEVBUF);
3726 	if (dp != NULL)
3727 		kfree(dp, M_DEVBUF);
3728 	return(desc);
3729 }
3730 
3731 int
3732 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3733 {
3734 	struct pci_devinfo *dinfo;
3735 	pcicfgregs *cfg;
3736 
3737 	dinfo = device_get_ivars(child);
3738 	cfg = &dinfo->cfg;
3739 
3740 	switch (which) {
3741 	case PCI_IVAR_ETHADDR:
3742 		/*
3743 		 * The generic accessor doesn't deal with failure, so
3744 		 * we set the return value, then return an error.
3745 		 */
3746 		*((uint8_t **) result) = NULL;
3747 		return (EINVAL);
3748 	case PCI_IVAR_SUBVENDOR:
3749 		*result = cfg->subvendor;
3750 		break;
3751 	case PCI_IVAR_SUBDEVICE:
3752 		*result = cfg->subdevice;
3753 		break;
3754 	case PCI_IVAR_VENDOR:
3755 		*result = cfg->vendor;
3756 		break;
3757 	case PCI_IVAR_DEVICE:
3758 		*result = cfg->device;
3759 		break;
3760 	case PCI_IVAR_DEVID:
3761 		*result = (cfg->device << 16) | cfg->vendor;
3762 		break;
3763 	case PCI_IVAR_CLASS:
3764 		*result = cfg->baseclass;
3765 		break;
3766 	case PCI_IVAR_SUBCLASS:
3767 		*result = cfg->subclass;
3768 		break;
3769 	case PCI_IVAR_PROGIF:
3770 		*result = cfg->progif;
3771 		break;
3772 	case PCI_IVAR_REVID:
3773 		*result = cfg->revid;
3774 		break;
3775 	case PCI_IVAR_INTPIN:
3776 		*result = cfg->intpin;
3777 		break;
3778 	case PCI_IVAR_IRQ:
3779 		*result = cfg->intline;
3780 		break;
3781 	case PCI_IVAR_DOMAIN:
3782 		*result = cfg->domain;
3783 		break;
3784 	case PCI_IVAR_BUS:
3785 		*result = cfg->bus;
3786 		break;
3787 	case PCI_IVAR_SLOT:
3788 		*result = cfg->slot;
3789 		break;
3790 	case PCI_IVAR_FUNCTION:
3791 		*result = cfg->func;
3792 		break;
3793 	case PCI_IVAR_CMDREG:
3794 		*result = cfg->cmdreg;
3795 		break;
3796 	case PCI_IVAR_CACHELNSZ:
3797 		*result = cfg->cachelnsz;
3798 		break;
3799 	case PCI_IVAR_MINGNT:
3800 		*result = cfg->mingnt;
3801 		break;
3802 	case PCI_IVAR_MAXLAT:
3803 		*result = cfg->maxlat;
3804 		break;
3805 	case PCI_IVAR_LATTIMER:
3806 		*result = cfg->lattimer;
3807 		break;
3808 	case PCI_IVAR_PCIXCAP_PTR:
3809 		*result = cfg->pcix.pcix_ptr;
3810 		break;
3811 	case PCI_IVAR_PCIECAP_PTR:
3812 		*result = cfg->expr.expr_ptr;
3813 		break;
3814 	case PCI_IVAR_VPDCAP_PTR:
3815 		*result = cfg->vpd.vpd_reg;
3816 		break;
3817 	default:
3818 		return (ENOENT);
3819 	}
3820 	return (0);
3821 }
3822 
3823 int
3824 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3825 {
3826 	struct pci_devinfo *dinfo;
3827 
3828 	dinfo = device_get_ivars(child);
3829 
3830 	switch (which) {
3831 	case PCI_IVAR_INTPIN:
3832 		dinfo->cfg.intpin = value;
3833 		return (0);
3834 	case PCI_IVAR_ETHADDR:
3835 	case PCI_IVAR_SUBVENDOR:
3836 	case PCI_IVAR_SUBDEVICE:
3837 	case PCI_IVAR_VENDOR:
3838 	case PCI_IVAR_DEVICE:
3839 	case PCI_IVAR_DEVID:
3840 	case PCI_IVAR_CLASS:
3841 	case PCI_IVAR_SUBCLASS:
3842 	case PCI_IVAR_PROGIF:
3843 	case PCI_IVAR_REVID:
3844 	case PCI_IVAR_IRQ:
3845 	case PCI_IVAR_DOMAIN:
3846 	case PCI_IVAR_BUS:
3847 	case PCI_IVAR_SLOT:
3848 	case PCI_IVAR_FUNCTION:
3849 		return (EINVAL);	/* disallow for now */
3850 
3851 	default:
3852 		return (ENOENT);
3853 	}
3854 }
3855 #ifdef notyet
3856 #include "opt_ddb.h"
3857 #ifdef DDB
3858 #include <ddb/ddb.h>
3859 #include <sys/cons.h>
3860 
3861 /*
3862  * List resources based on pci map registers, used for within ddb
3863  */
3864 
3865 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3866 {
3867 	struct pci_devinfo *dinfo;
3868 	struct devlist *devlist_head;
3869 	struct pci_conf *p;
3870 	const char *name;
3871 	int i, error, none_count;
3872 
3873 	none_count = 0;
3874 	/* get the head of the device queue */
3875 	devlist_head = &pci_devq;
3876 
3877 	/*
3878 	 * Go through the list of devices and print out devices
3879 	 */
3880 	for (error = 0, i = 0,
3881 	     dinfo = STAILQ_FIRST(devlist_head);
3882 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3883 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3884 
3885 		/* Populate pd_name and pd_unit */
3886 		name = NULL;
3887 		if (dinfo->cfg.dev)
3888 			name = device_get_name(dinfo->cfg.dev);
3889 
3890 		p = &dinfo->conf;
3891 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3892 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3893 			(name && *name) ? name : "none",
3894 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3895 			none_count++,
3896 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3897 			p->pc_sel.pc_func, (p->pc_class << 16) |
3898 			(p->pc_subclass << 8) | p->pc_progif,
3899 			(p->pc_subdevice << 16) | p->pc_subvendor,
3900 			(p->pc_device << 16) | p->pc_vendor,
3901 			p->pc_revid, p->pc_hdr);
3902 	}
3903 }
3904 #endif /* DDB */
3905 #endif
3906 
3907 static struct resource *
3908 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3909     u_long start, u_long end, u_long count, u_int flags)
3910 {
3911 	struct pci_devinfo *dinfo = device_get_ivars(child);
3912 	struct resource_list *rl = &dinfo->resources;
3913 	struct resource_list_entry *rle;
3914 	struct resource *res;
3915 	pci_addr_t map, testval;
3916 	int mapsize;
3917 
3918 	/*
3919 	 * Weed out the bogons, and figure out how large the BAR/map
3920 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3921 	 * Note: atapci in legacy mode are special and handled elsewhere
3922 	 * in the code.  If you have a atapci device in legacy mode and
3923 	 * it fails here, that other code is broken.
3924 	 */
3925 	res = NULL;
3926 	map = pci_read_config(child, *rid, 4);
3927 	pci_write_config(child, *rid, 0xffffffff, 4);
3928 	testval = pci_read_config(child, *rid, 4);
3929 	if (pci_maprange(testval) == 64)
3930 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3931 	if (pci_mapbase(testval) == 0)
3932 		goto out;
3933 
3934 	/*
3935 	 * Restore the original value of the BAR.  We may have reprogrammed
3936 	 * the BAR of the low-level console device and when booting verbose,
3937 	 * we need the console device addressable.
3938 	 */
3939 	pci_write_config(child, *rid, map, 4);
3940 
3941 	if (PCI_BAR_MEM(testval)) {
3942 		if (type != SYS_RES_MEMORY) {
3943 			if (bootverbose)
3944 				device_printf(dev,
3945 				    "child %s requested type %d for rid %#x,"
3946 				    " but the BAR says it is an memio\n",
3947 				    device_get_nameunit(child), type, *rid);
3948 			goto out;
3949 		}
3950 	} else {
3951 		if (type != SYS_RES_IOPORT) {
3952 			if (bootverbose)
3953 				device_printf(dev,
3954 				    "child %s requested type %d for rid %#x,"
3955 				    " but the BAR says it is an ioport\n",
3956 				    device_get_nameunit(child), type, *rid);
3957 			goto out;
3958 		}
3959 	}
3960 	/*
3961 	 * For real BARs, we need to override the size that
3962 	 * the driver requests, because that's what the BAR
3963 	 * actually uses and we would otherwise have a
3964 	 * situation where we might allocate the excess to
3965 	 * another driver, which won't work.
3966 	 */
3967 	mapsize = pci_mapsize(testval);
3968 	count = 1UL << mapsize;
3969 	if (RF_ALIGNMENT(flags) < mapsize)
3970 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3971 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3972 		flags |= RF_PREFETCHABLE;
3973 
3974 	/*
3975 	 * Allocate enough resource, and then write back the
3976 	 * appropriate bar for that resource.
3977 	 */
3978 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3979 	    start, end, count, flags, -1);
3980 	if (res == NULL) {
3981 		device_printf(child,
3982 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3983 		    count, *rid, type, start, end);
3984 		goto out;
3985 	}
3986 	resource_list_add(rl, type, *rid, start, end, count, -1);
3987 	rle = resource_list_find(rl, type, *rid);
3988 	if (rle == NULL)
3989 		panic("pci_alloc_map: unexpectedly can't find resource.");
3990 	rle->res = res;
3991 	rle->start = rman_get_start(res);
3992 	rle->end = rman_get_end(res);
3993 	rle->count = count;
3994 	if (bootverbose)
3995 		device_printf(child,
3996 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3997 		    count, *rid, type, rman_get_start(res));
3998 	map = rman_get_start(res);
3999 out:;
4000 	pci_write_config(child, *rid, map, 4);
4001 	if (pci_maprange(testval) == 64)
4002 		pci_write_config(child, *rid + 4, map >> 32, 4);
4003 	return (res);
4004 }
4005 
4006 
4007 struct resource *
4008 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
4009     u_long start, u_long end, u_long count, u_int flags, int cpuid)
4010 {
4011 	struct pci_devinfo *dinfo = device_get_ivars(child);
4012 	struct resource_list *rl = &dinfo->resources;
4013 	struct resource_list_entry *rle;
4014 	pcicfgregs *cfg = &dinfo->cfg;
4015 
4016 	/*
4017 	 * Perform lazy resource allocation
4018 	 */
4019 	if (device_get_parent(child) == dev) {
4020 		switch (type) {
4021 		case SYS_RES_IRQ:
4022 			/*
4023 			 * Can't alloc legacy interrupt once MSI messages
4024 			 * have been allocated.
4025 			 */
4026 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
4027 			    cfg->msix.msix_alloc > 0))
4028 				return (NULL);
4029 			/*
4030 			 * If the child device doesn't have an
4031 			 * interrupt routed and is deserving of an
4032 			 * interrupt, try to assign it one.
4033 			 */
4034 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
4035 			    (cfg->intpin != 0))
4036 				pci_assign_interrupt(dev, child, 0);
4037 			break;
4038 		case SYS_RES_IOPORT:
4039 		case SYS_RES_MEMORY:
4040 			if (*rid < PCIR_BAR(cfg->nummaps)) {
4041 				/*
4042 				 * Enable the I/O mode.  We should
4043 				 * also be assigning resources too
4044 				 * when none are present.  The
4045 				 * resource_list_alloc kind of sorta does
4046 				 * this...
4047 				 */
4048 				if (PCI_ENABLE_IO(dev, child, type))
4049 					return (NULL);
4050 			}
4051 			rle = resource_list_find(rl, type, *rid);
4052 			if (rle == NULL)
4053 				return (pci_alloc_map(dev, child, type, rid,
4054 				    start, end, count, flags));
4055 			break;
4056 		}
4057 		/*
4058 		 * If we've already allocated the resource, then
4059 		 * return it now.  But first we may need to activate
4060 		 * it, since we don't allocate the resource as active
4061 		 * above.  Normally this would be done down in the
4062 		 * nexus, but since we short-circuit that path we have
4063 		 * to do its job here.  Not sure if we should kfree the
4064 		 * resource if it fails to activate.
4065 		 */
4066 		rle = resource_list_find(rl, type, *rid);
4067 		if (rle != NULL && rle->res != NULL) {
4068 			if (bootverbose)
4069 				device_printf(child,
4070 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
4071 				    rman_get_size(rle->res), *rid, type,
4072 				    rman_get_start(rle->res));
4073 			if ((flags & RF_ACTIVE) &&
4074 			    bus_generic_activate_resource(dev, child, type,
4075 			    *rid, rle->res) != 0)
4076 				return (NULL);
4077 			return (rle->res);
4078 		}
4079 	}
4080 	return (resource_list_alloc(rl, dev, child, type, rid,
4081 	    start, end, count, flags, cpuid));
4082 }
4083 
4084 void
4085 pci_delete_resource(device_t dev, device_t child, int type, int rid)
4086 {
4087 	struct pci_devinfo *dinfo;
4088 	struct resource_list *rl;
4089 	struct resource_list_entry *rle;
4090 
4091 	if (device_get_parent(child) != dev)
4092 		return;
4093 
4094 	dinfo = device_get_ivars(child);
4095 	rl = &dinfo->resources;
4096 	rle = resource_list_find(rl, type, rid);
4097 	if (rle) {
4098 		if (rle->res) {
4099 			if (rman_get_device(rle->res) != dev ||
4100 			    rman_get_flags(rle->res) & RF_ACTIVE) {
4101 				device_printf(dev, "delete_resource: "
4102 				    "Resource still owned by child, oops. "
4103 				    "(type=%d, rid=%d, addr=%lx)\n",
4104 				    rle->type, rle->rid,
4105 				    rman_get_start(rle->res));
4106 				return;
4107 			}
4108 			bus_release_resource(dev, type, rid, rle->res);
4109 		}
4110 		resource_list_delete(rl, type, rid);
4111 	}
4112 	/*
4113 	 * Why do we turn off the PCI configuration BAR when we delete a
4114 	 * resource? -- imp
4115 	 */
4116 	pci_write_config(child, rid, 0, 4);
4117 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
4118 }
4119 
4120 struct resource_list *
4121 pci_get_resource_list (device_t dev, device_t child)
4122 {
4123 	struct pci_devinfo *dinfo = device_get_ivars(child);
4124 
4125 	if (dinfo == NULL)
4126 		return (NULL);
4127 
4128 	return (&dinfo->resources);
4129 }
4130 
4131 uint32_t
4132 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4133 {
4134 	struct pci_devinfo *dinfo = device_get_ivars(child);
4135 	pcicfgregs *cfg = &dinfo->cfg;
4136 
4137 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4138 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4139 }
4140 
4141 void
4142 pci_write_config_method(device_t dev, device_t child, int reg,
4143     uint32_t val, int width)
4144 {
4145 	struct pci_devinfo *dinfo = device_get_ivars(child);
4146 	pcicfgregs *cfg = &dinfo->cfg;
4147 
4148 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4149 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4150 }
4151 
4152 int
4153 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4154     size_t buflen)
4155 {
4156 
4157 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4158 	    pci_get_function(child));
4159 	return (0);
4160 }
4161 
4162 int
4163 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4164     size_t buflen)
4165 {
4166 	struct pci_devinfo *dinfo;
4167 	pcicfgregs *cfg;
4168 
4169 	dinfo = device_get_ivars(child);
4170 	cfg = &dinfo->cfg;
4171 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4172 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4173 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4174 	    cfg->progif);
4175 	return (0);
4176 }
4177 
4178 int
4179 pci_assign_interrupt_method(device_t dev, device_t child)
4180 {
4181 	struct pci_devinfo *dinfo = device_get_ivars(child);
4182 	pcicfgregs *cfg = &dinfo->cfg;
4183 
4184 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4185 	    cfg->intpin));
4186 }
4187 
4188 static int
4189 pci_modevent(module_t mod, int what, void *arg)
4190 {
4191 	static struct cdev *pci_cdev;
4192 
4193 	switch (what) {
4194 	case MOD_LOAD:
4195 		STAILQ_INIT(&pci_devq);
4196 		pci_generation = 0;
4197 		pci_cdev = make_dev(&pci_ops, 0, UID_ROOT, GID_WHEEL, 0644,
4198 				    "pci");
4199 		pci_load_vendor_data();
4200 		break;
4201 
4202 	case MOD_UNLOAD:
4203 		destroy_dev(pci_cdev);
4204 		break;
4205 	}
4206 
4207 	return (0);
4208 }
4209 
4210 void
4211 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4212 {
4213 	int i;
4214 
4215 	/*
4216 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4217 	 * which we know need special treatment.  Type 2 devices are
4218 	 * cardbus bridges which also require special treatment.
4219 	 * Other types are unknown, and we err on the side of safety
4220 	 * by ignoring them.
4221 	 */
4222 	if (dinfo->cfg.hdrtype != 0)
4223 		return;
4224 
4225 	/*
4226 	 * Restore the device to full power mode.  We must do this
4227 	 * before we restore the registers because moving from D3 to
4228 	 * D0 will cause the chip's BARs and some other registers to
4229 	 * be reset to some unknown power on reset values.  Cut down
4230 	 * the noise on boot by doing nothing if we are already in
4231 	 * state D0.
4232 	 */
4233 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4234 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4235 	}
4236 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4237 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4238 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4239 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4240 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4241 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4242 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4243 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4244 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4245 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4246 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4247 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4248 
4249 	/* Restore MSI and MSI-X configurations if they are present. */
4250 	if (dinfo->cfg.msi.msi_location != 0)
4251 		pci_resume_msi(dev);
4252 	if (dinfo->cfg.msix.msix_location != 0)
4253 		pci_resume_msix(dev);
4254 }
4255 
4256 void
4257 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4258 {
4259 	int i;
4260 	uint32_t cls;
4261 	int ps;
4262 
4263 	/*
4264 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4265 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4266 	 * which also require special treatment.  Other types are unknown, and
4267 	 * we err on the side of safety by ignoring them.  Powering down
4268 	 * bridges should not be undertaken lightly.
4269 	 */
4270 	if (dinfo->cfg.hdrtype != 0)
4271 		return;
4272 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4273 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4274 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4275 
4276 	/*
4277 	 * Some drivers apparently write to these registers w/o updating our
4278 	 * cached copy.  No harm happens if we update the copy, so do so here
4279 	 * so we can restore them.  The COMMAND register is modified by the
4280 	 * bus w/o updating the cache.  This should represent the normally
4281 	 * writable portion of the 'defined' part of type 0 headers.  In
4282 	 * theory we also need to save/restore the PCI capability structures
4283 	 * we know about, but apart from power we don't know any that are
4284 	 * writable.
4285 	 */
4286 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4287 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4288 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4289 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4290 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4291 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4292 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4293 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4294 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4295 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4296 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4297 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4298 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4299 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4300 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4301 
4302 	/*
4303 	 * don't set the state for display devices, base peripherals and
4304 	 * memory devices since bad things happen when they are powered down.
4305 	 * We should (a) have drivers that can easily detach and (b) use
4306 	 * generic drivers for these devices so that some device actually
4307 	 * attaches.  We need to make sure that when we implement (a) we don't
4308 	 * power the device down on a reattach.
4309 	 */
4310 	cls = pci_get_class(dev);
4311 	if (!setstate)
4312 		return;
4313 	switch (pci_do_power_nodriver)
4314 	{
4315 		case 0:		/* NO powerdown at all */
4316 			return;
4317 		case 1:		/* Conservative about what to power down */
4318 			if (cls == PCIC_STORAGE)
4319 				return;
4320 			/*FALLTHROUGH*/
4321 		case 2:		/* Agressive about what to power down */
4322 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4323 			    cls == PCIC_BASEPERIPH)
4324 				return;
4325 			/*FALLTHROUGH*/
4326 		case 3:		/* Power down everything */
4327 			break;
4328 	}
4329 	/*
4330 	 * PCI spec says we can only go into D3 state from D0 state.
4331 	 * Transition from D[12] into D0 before going to D3 state.
4332 	 */
4333 	ps = pci_get_powerstate(dev);
4334 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4335 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4336 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4337 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4338 }
4339 
4340 int
4341 pci_alloc_1intr(device_t dev, int msi_enable, int *rid0, u_int *flags0)
4342 {
4343 	int rid, type;
4344 	u_int flags;
4345 
4346 	rid = 0;
4347 	type = PCI_INTR_TYPE_LEGACY;
4348 	flags = RF_SHAREABLE | RF_ACTIVE;
4349 
4350 	msi_enable = device_getenv_int(dev, "msi.enable", msi_enable);
4351 	if (msi_enable) {
4352 		int cpu;
4353 
4354 		cpu = device_getenv_int(dev, "msi.cpu", -1);
4355 		if (cpu >= ncpus)
4356 			cpu = ncpus - 1;
4357 
4358 		if (pci_alloc_msi(dev, &rid, 1, cpu) == 0) {
4359 			flags &= ~RF_SHAREABLE;
4360 			type = PCI_INTR_TYPE_MSI;
4361 		}
4362 	}
4363 
4364 	*rid0 = rid;
4365 	*flags0 = flags;
4366 
4367 	return type;
4368 }
4369 
4370 /* Wrapper APIs suitable for device driver use. */
4371 void
4372 pci_save_state(device_t dev)
4373 {
4374 	struct pci_devinfo *dinfo;
4375 
4376 	dinfo = device_get_ivars(dev);
4377 	pci_cfg_save(dev, dinfo, 0);
4378 }
4379 
4380 void
4381 pci_restore_state(device_t dev)
4382 {
4383 	struct pci_devinfo *dinfo;
4384 
4385 	dinfo = device_get_ivars(dev);
4386 	pci_cfg_restore(dev, dinfo);
4387 }
4388