xref: /dflybsd-src/sys/bus/pci/pci.c (revision 01c38e5ca6a816155ecb78897b9846f7b017e05f)
1 /*-
2  * Copyright (c) 1997, Stefan Esser <se@kfreebsd.org>
3  * Copyright (c) 2000, Michael Smith <msmith@kfreebsd.org>
4  * Copyright (c) 2000, BSDi
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: src/sys/dev/pci/pci.c,v 1.355.2.9.2.1 2009/04/15 03:14:26 kensmith Exp $
29  */
30 
31 #include "opt_bus.h"
32 #include "opt_acpi.h"
33 #include "opt_compat_oldpci.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/malloc.h>
38 #include <sys/module.h>
39 #include <sys/linker.h>
40 #include <sys/fcntl.h>
41 #include <sys/conf.h>
42 #include <sys/kernel.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/endian.h>
46 #include <sys/machintr.h>
47 
48 #include <machine/msi_machdep.h>
49 
50 #include <vm/vm.h>
51 #include <vm/pmap.h>
52 #include <vm/vm_extern.h>
53 
54 #include <sys/bus.h>
55 #include <sys/rman.h>
56 #include <sys/device.h>
57 
58 #include <sys/pciio.h>
59 #include <bus/pci/pcireg.h>
60 #include <bus/pci/pcivar.h>
61 #include <bus/pci/pci_private.h>
62 
63 #include "pcib_if.h"
64 #include "pci_if.h"
65 
66 #ifdef __HAVE_ACPI
67 #include <contrib/dev/acpica/acpi.h>
68 #include "acpi_if.h"
69 #else
70 #define	ACPI_PWR_FOR_SLEEP(x, y, z)
71 #endif
72 
73 extern struct dev_ops pcic_ops;	/* XXX */
74 
75 typedef void	(*pci_read_cap_t)(device_t, int, int, pcicfgregs *);
76 
77 static uint32_t		pci_mapbase(unsigned mapreg);
78 static const char	*pci_maptype(unsigned mapreg);
79 static int		pci_mapsize(unsigned testval);
80 static int		pci_maprange(unsigned mapreg);
81 static void		pci_fixancient(pcicfgregs *cfg);
82 
83 static int		pci_porten(device_t pcib, int b, int s, int f);
84 static int		pci_memen(device_t pcib, int b, int s, int f);
85 static void		pci_assign_interrupt(device_t bus, device_t dev,
86 			    int force_route);
87 static int		pci_add_map(device_t pcib, device_t bus, device_t dev,
88 			    int b, int s, int f, int reg,
89 			    struct resource_list *rl, int force, int prefetch);
90 static int		pci_probe(device_t dev);
91 static int		pci_attach(device_t dev);
92 static void		pci_child_detached(device_t, device_t);
93 static void		pci_load_vendor_data(void);
94 static int		pci_describe_parse_line(char **ptr, int *vendor,
95 			    int *device, char **desc);
96 static char		*pci_describe_device(device_t dev);
97 static int		pci_modevent(module_t mod, int what, void *arg);
98 static void		pci_hdrtypedata(device_t pcib, int b, int s, int f,
99 			    pcicfgregs *cfg);
100 static void		pci_read_capabilities(device_t pcib, pcicfgregs *cfg);
101 static int		pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg,
102 			    int reg, uint32_t *data);
103 #if 0
104 static int		pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg,
105 			    int reg, uint32_t data);
106 #endif
107 static void		pci_read_vpd(device_t pcib, pcicfgregs *cfg);
108 static void		pci_disable_msi(device_t dev);
109 static void		pci_enable_msi(device_t dev, uint64_t address,
110 			    uint16_t data);
111 static void		pci_enable_msix(device_t dev, u_int index,
112 			    uint64_t address, uint32_t data);
113 static void		pci_mask_msix(device_t dev, u_int index);
114 static void		pci_unmask_msix(device_t dev, u_int index);
115 static int		pci_msi_blacklisted(void);
116 static void		pci_resume_msi(device_t dev);
117 static void		pci_resume_msix(device_t dev);
118 static int		pcie_slotimpl(const pcicfgregs *);
119 static void		pci_print_verbose_expr(const pcicfgregs *);
120 
121 static void		pci_read_cap_pmgt(device_t, int, int, pcicfgregs *);
122 static void		pci_read_cap_ht(device_t, int, int, pcicfgregs *);
123 static void		pci_read_cap_msi(device_t, int, int, pcicfgregs *);
124 static void		pci_read_cap_msix(device_t, int, int, pcicfgregs *);
125 static void		pci_read_cap_vpd(device_t, int, int, pcicfgregs *);
126 static void		pci_read_cap_subvendor(device_t, int, int,
127 			    pcicfgregs *);
128 static void		pci_read_cap_pcix(device_t, int, int, pcicfgregs *);
129 static void		pci_read_cap_express(device_t, int, int, pcicfgregs *);
130 
131 static device_method_t pci_methods[] = {
132 	/* Device interface */
133 	DEVMETHOD(device_probe,		pci_probe),
134 	DEVMETHOD(device_attach,	pci_attach),
135 	DEVMETHOD(device_detach,	bus_generic_detach),
136 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
137 	DEVMETHOD(device_suspend,	pci_suspend),
138 	DEVMETHOD(device_resume,	pci_resume),
139 
140 	/* Bus interface */
141 	DEVMETHOD(bus_print_child,	pci_print_child),
142 	DEVMETHOD(bus_probe_nomatch,	pci_probe_nomatch),
143 	DEVMETHOD(bus_read_ivar,	pci_read_ivar),
144 	DEVMETHOD(bus_write_ivar,	pci_write_ivar),
145 	DEVMETHOD(bus_driver_added,	pci_driver_added),
146 	DEVMETHOD(bus_child_detached,	pci_child_detached),
147 	DEVMETHOD(bus_setup_intr,	pci_setup_intr),
148 	DEVMETHOD(bus_teardown_intr,	pci_teardown_intr),
149 
150 	DEVMETHOD(bus_get_resource_list,pci_get_resource_list),
151 	DEVMETHOD(bus_set_resource,	bus_generic_rl_set_resource),
152 	DEVMETHOD(bus_get_resource,	bus_generic_rl_get_resource),
153 	DEVMETHOD(bus_delete_resource,	pci_delete_resource),
154 	DEVMETHOD(bus_alloc_resource,	pci_alloc_resource),
155 	DEVMETHOD(bus_release_resource,	bus_generic_rl_release_resource),
156 	DEVMETHOD(bus_activate_resource, bus_generic_activate_resource),
157 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
158 	DEVMETHOD(bus_child_pnpinfo_str, pci_child_pnpinfo_str_method),
159 	DEVMETHOD(bus_child_location_str, pci_child_location_str_method),
160 
161 	/* PCI interface */
162 	DEVMETHOD(pci_read_config,	pci_read_config_method),
163 	DEVMETHOD(pci_write_config,	pci_write_config_method),
164 	DEVMETHOD(pci_enable_busmaster,	pci_enable_busmaster_method),
165 	DEVMETHOD(pci_disable_busmaster, pci_disable_busmaster_method),
166 	DEVMETHOD(pci_enable_io,	pci_enable_io_method),
167 	DEVMETHOD(pci_disable_io,	pci_disable_io_method),
168 	DEVMETHOD(pci_get_vpd_ident,	pci_get_vpd_ident_method),
169 	DEVMETHOD(pci_get_vpd_readonly,	pci_get_vpd_readonly_method),
170 	DEVMETHOD(pci_get_powerstate,	pci_get_powerstate_method),
171 	DEVMETHOD(pci_set_powerstate,	pci_set_powerstate_method),
172 	DEVMETHOD(pci_assign_interrupt,	pci_assign_interrupt_method),
173 	DEVMETHOD(pci_find_extcap,	pci_find_extcap_method),
174 	DEVMETHOD(pci_alloc_msi,	pci_alloc_msi_method),
175 	DEVMETHOD(pci_alloc_msix,	pci_alloc_msix_method),
176 	DEVMETHOD(pci_remap_msix,	pci_remap_msix_method),
177 	DEVMETHOD(pci_release_msi,	pci_release_msi_method),
178 	DEVMETHOD(pci_msi_count,	pci_msi_count_method),
179 	DEVMETHOD(pci_msix_count,	pci_msix_count_method),
180 
181 	{ 0, 0 }
182 };
183 
184 DEFINE_CLASS_0(pci, pci_driver, pci_methods, 0);
185 
186 static devclass_t pci_devclass;
187 DRIVER_MODULE(pci, pcib, pci_driver, pci_devclass, pci_modevent, NULL);
188 MODULE_VERSION(pci, 1);
189 
190 static char	*pci_vendordata;
191 static size_t	pci_vendordata_size;
192 
193 
194 static const struct pci_read_cap {
195 	int		cap;
196 	pci_read_cap_t	read_cap;
197 } pci_read_caps[] = {
198 	{ PCIY_PMG,		pci_read_cap_pmgt },
199 	{ PCIY_HT,		pci_read_cap_ht },
200 	{ PCIY_MSI,		pci_read_cap_msi },
201 	{ PCIY_MSIX,		pci_read_cap_msix },
202 	{ PCIY_VPD,		pci_read_cap_vpd },
203 	{ PCIY_SUBVENDOR,	pci_read_cap_subvendor },
204 	{ PCIY_PCIX,		pci_read_cap_pcix },
205 	{ PCIY_EXPRESS,		pci_read_cap_express },
206 	{ 0, NULL } /* required last entry */
207 };
208 
209 struct pci_quirk {
210 	uint32_t devid;	/* Vendor/device of the card */
211 	int	type;
212 #define	PCI_QUIRK_MAP_REG	1 /* PCI map register in weird place */
213 #define	PCI_QUIRK_DISABLE_MSI	2 /* MSI/MSI-X doesn't work */
214 	int	arg1;
215 	int	arg2;
216 };
217 
218 struct pci_quirk pci_quirks[] = {
219 	/* The Intel 82371AB and 82443MX has a map register at offset 0x90. */
220 	{ 0x71138086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
221 	{ 0x719b8086, PCI_QUIRK_MAP_REG,	0x90,	 0 },
222 	/* As does the Serverworks OSB4 (the SMBus mapping register) */
223 	{ 0x02001166, PCI_QUIRK_MAP_REG,	0x90,	 0 },
224 
225 	/*
226 	 * MSI doesn't work with the ServerWorks CNB20-HE Host Bridge
227 	 * or the CMIC-SL (AKA ServerWorks GC_LE).
228 	 */
229 	{ 0x00141166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
230 	{ 0x00171166, PCI_QUIRK_DISABLE_MSI,	0,	0 },
231 
232 	/*
233 	 * MSI doesn't work on earlier Intel chipsets including
234 	 * E7500, E7501, E7505, 845, 865, 875/E7210, and 855.
235 	 */
236 	{ 0x25408086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
237 	{ 0x254c8086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
238 	{ 0x25508086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
239 	{ 0x25608086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
240 	{ 0x25708086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
241 	{ 0x25788086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
242 	{ 0x35808086, PCI_QUIRK_DISABLE_MSI,	0,	0 },
243 
244 	/*
245 	 * MSI doesn't work with devices behind the AMD 8131 HT-PCIX
246 	 * bridge.
247 	 */
248 	{ 0x74501022, PCI_QUIRK_DISABLE_MSI,	0,	0 },
249 
250 	{ 0 }
251 };
252 
253 /* map register information */
254 #define	PCI_MAPMEM	0x01	/* memory map */
255 #define	PCI_MAPMEMP	0x02	/* prefetchable memory map */
256 #define	PCI_MAPPORT	0x04	/* port map */
257 
258 struct devlist pci_devq;
259 uint32_t pci_generation;
260 uint32_t pci_numdevs = 0;
261 static int pcie_chipset, pcix_chipset;
262 
263 /* sysctl vars */
264 SYSCTL_NODE(_hw, OID_AUTO, pci, CTLFLAG_RD, 0, "PCI bus tuning parameters");
265 
266 static int pci_enable_io_modes = 1;
267 TUNABLE_INT("hw.pci.enable_io_modes", &pci_enable_io_modes);
268 SYSCTL_INT(_hw_pci, OID_AUTO, enable_io_modes, CTLFLAG_RW,
269     &pci_enable_io_modes, 1,
270     "Enable I/O and memory bits in the config register.  Some BIOSes do not\n\
271 enable these bits correctly.  We'd like to do this all the time, but there\n\
272 are some peripherals that this causes problems with.");
273 
274 static int pci_do_power_nodriver = 0;
275 TUNABLE_INT("hw.pci.do_power_nodriver", &pci_do_power_nodriver);
276 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_nodriver, CTLFLAG_RW,
277     &pci_do_power_nodriver, 0,
278   "Place a function into D3 state when no driver attaches to it.  0 means\n\
279 disable.  1 means conservatively place devices into D3 state.  2 means\n\
280 aggressively place devices into D3 state.  3 means put absolutely everything\n\
281 in D3 state.");
282 
283 static int pci_do_power_resume = 1;
284 TUNABLE_INT("hw.pci.do_power_resume", &pci_do_power_resume);
285 SYSCTL_INT(_hw_pci, OID_AUTO, do_power_resume, CTLFLAG_RW,
286     &pci_do_power_resume, 1,
287   "Transition from D3 -> D0 on resume.");
288 
289 static int pci_do_msi = 1;
290 TUNABLE_INT("hw.pci.enable_msi", &pci_do_msi);
291 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msi, CTLFLAG_RW, &pci_do_msi, 1,
292     "Enable support for MSI interrupts");
293 
294 static int pci_do_msix = 1;
295 TUNABLE_INT("hw.pci.enable_msix", &pci_do_msix);
296 SYSCTL_INT(_hw_pci, OID_AUTO, enable_msix, CTLFLAG_RW, &pci_do_msix, 1,
297     "Enable support for MSI-X interrupts");
298 
299 static int pci_honor_msi_blacklist = 1;
300 TUNABLE_INT("hw.pci.honor_msi_blacklist", &pci_honor_msi_blacklist);
301 SYSCTL_INT(_hw_pci, OID_AUTO, honor_msi_blacklist, CTLFLAG_RD,
302     &pci_honor_msi_blacklist, 1, "Honor chipset blacklist for MSI");
303 
304 /* Find a device_t by bus/slot/function in domain 0 */
305 
306 device_t
307 pci_find_bsf(uint8_t bus, uint8_t slot, uint8_t func)
308 {
309 
310 	return (pci_find_dbsf(0, bus, slot, func));
311 }
312 
313 /* Find a device_t by domain/bus/slot/function */
314 
315 device_t
316 pci_find_dbsf(uint32_t domain, uint8_t bus, uint8_t slot, uint8_t func)
317 {
318 	struct pci_devinfo *dinfo;
319 
320 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
321 		if ((dinfo->cfg.domain == domain) &&
322 		    (dinfo->cfg.bus == bus) &&
323 		    (dinfo->cfg.slot == slot) &&
324 		    (dinfo->cfg.func == func)) {
325 			return (dinfo->cfg.dev);
326 		}
327 	}
328 
329 	return (NULL);
330 }
331 
332 /* Find a device_t by vendor/device ID */
333 
334 device_t
335 pci_find_device(uint16_t vendor, uint16_t device)
336 {
337 	struct pci_devinfo *dinfo;
338 
339 	STAILQ_FOREACH(dinfo, &pci_devq, pci_links) {
340 		if ((dinfo->cfg.vendor == vendor) &&
341 		    (dinfo->cfg.device == device)) {
342 			return (dinfo->cfg.dev);
343 		}
344 	}
345 
346 	return (NULL);
347 }
348 
349 /* return base address of memory or port map */
350 
351 static uint32_t
352 pci_mapbase(uint32_t mapreg)
353 {
354 
355 	if (PCI_BAR_MEM(mapreg))
356 		return (mapreg & PCIM_BAR_MEM_BASE);
357 	else
358 		return (mapreg & PCIM_BAR_IO_BASE);
359 }
360 
361 /* return map type of memory or port map */
362 
363 static const char *
364 pci_maptype(unsigned mapreg)
365 {
366 
367 	if (PCI_BAR_IO(mapreg))
368 		return ("I/O Port");
369 	if (mapreg & PCIM_BAR_MEM_PREFETCH)
370 		return ("Prefetchable Memory");
371 	return ("Memory");
372 }
373 
374 /* return log2 of map size decoded for memory or port map */
375 
376 static int
377 pci_mapsize(uint32_t testval)
378 {
379 	int ln2size;
380 
381 	testval = pci_mapbase(testval);
382 	ln2size = 0;
383 	if (testval != 0) {
384 		while ((testval & 1) == 0)
385 		{
386 			ln2size++;
387 			testval >>= 1;
388 		}
389 	}
390 	return (ln2size);
391 }
392 
393 /* return log2 of address range supported by map register */
394 
395 static int
396 pci_maprange(unsigned mapreg)
397 {
398 	int ln2range = 0;
399 
400 	if (PCI_BAR_IO(mapreg))
401 		ln2range = 32;
402 	else
403 		switch (mapreg & PCIM_BAR_MEM_TYPE) {
404 		case PCIM_BAR_MEM_32:
405 			ln2range = 32;
406 			break;
407 		case PCIM_BAR_MEM_1MB:
408 			ln2range = 20;
409 			break;
410 		case PCIM_BAR_MEM_64:
411 			ln2range = 64;
412 			break;
413 		}
414 	return (ln2range);
415 }
416 
417 /* adjust some values from PCI 1.0 devices to match 2.0 standards ... */
418 
419 static void
420 pci_fixancient(pcicfgregs *cfg)
421 {
422 	if (cfg->hdrtype != 0)
423 		return;
424 
425 	/* PCI to PCI bridges use header type 1 */
426 	if (cfg->baseclass == PCIC_BRIDGE && cfg->subclass == PCIS_BRIDGE_PCI)
427 		cfg->hdrtype = 1;
428 }
429 
430 /* extract header type specific config data */
431 
432 static void
433 pci_hdrtypedata(device_t pcib, int b, int s, int f, pcicfgregs *cfg)
434 {
435 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
436 	switch (cfg->hdrtype) {
437 	case 0:
438 		cfg->subvendor      = REG(PCIR_SUBVEND_0, 2);
439 		cfg->subdevice      = REG(PCIR_SUBDEV_0, 2);
440 		cfg->nummaps	    = PCI_MAXMAPS_0;
441 		break;
442 	case 1:
443 		cfg->nummaps	    = PCI_MAXMAPS_1;
444 #ifdef COMPAT_OLDPCI
445 		cfg->secondarybus   = REG(PCIR_SECBUS_1, 1);
446 #endif
447 		break;
448 	case 2:
449 		cfg->subvendor      = REG(PCIR_SUBVEND_2, 2);
450 		cfg->subdevice      = REG(PCIR_SUBDEV_2, 2);
451 		cfg->nummaps	    = PCI_MAXMAPS_2;
452 #ifdef COMPAT_OLDPCI
453 		cfg->secondarybus   = REG(PCIR_SECBUS_2, 1);
454 #endif
455 		break;
456 	}
457 #undef REG
458 }
459 
460 /* read configuration header into pcicfgregs structure */
461 struct pci_devinfo *
462 pci_read_device(device_t pcib, int d, int b, int s, int f, size_t size)
463 {
464 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, b, s, f, n, w)
465 	pcicfgregs *cfg = NULL;
466 	struct pci_devinfo *devlist_entry;
467 	struct devlist *devlist_head;
468 
469 	devlist_head = &pci_devq;
470 
471 	devlist_entry = NULL;
472 
473 	if (REG(PCIR_DEVVENDOR, 4) != -1) {
474 		devlist_entry = kmalloc(size, M_DEVBUF, M_WAITOK | M_ZERO);
475 
476 		cfg = &devlist_entry->cfg;
477 
478 		cfg->domain		= d;
479 		cfg->bus		= b;
480 		cfg->slot		= s;
481 		cfg->func		= f;
482 		cfg->vendor		= REG(PCIR_VENDOR, 2);
483 		cfg->device		= REG(PCIR_DEVICE, 2);
484 		cfg->cmdreg		= REG(PCIR_COMMAND, 2);
485 		cfg->statreg		= REG(PCIR_STATUS, 2);
486 		cfg->baseclass		= REG(PCIR_CLASS, 1);
487 		cfg->subclass		= REG(PCIR_SUBCLASS, 1);
488 		cfg->progif		= REG(PCIR_PROGIF, 1);
489 		cfg->revid		= REG(PCIR_REVID, 1);
490 		cfg->hdrtype		= REG(PCIR_HDRTYPE, 1);
491 		cfg->cachelnsz		= REG(PCIR_CACHELNSZ, 1);
492 		cfg->lattimer		= REG(PCIR_LATTIMER, 1);
493 		cfg->intpin		= REG(PCIR_INTPIN, 1);
494 		cfg->intline		= REG(PCIR_INTLINE, 1);
495 
496 		cfg->mingnt		= REG(PCIR_MINGNT, 1);
497 		cfg->maxlat		= REG(PCIR_MAXLAT, 1);
498 
499 		cfg->mfdev		= (cfg->hdrtype & PCIM_MFDEV) != 0;
500 		cfg->hdrtype		&= ~PCIM_MFDEV;
501 
502 		pci_fixancient(cfg);
503 		pci_hdrtypedata(pcib, b, s, f, cfg);
504 
505 		pci_read_capabilities(pcib, cfg);
506 
507 		STAILQ_INSERT_TAIL(devlist_head, devlist_entry, pci_links);
508 
509 		devlist_entry->conf.pc_sel.pc_domain = cfg->domain;
510 		devlist_entry->conf.pc_sel.pc_bus = cfg->bus;
511 		devlist_entry->conf.pc_sel.pc_dev = cfg->slot;
512 		devlist_entry->conf.pc_sel.pc_func = cfg->func;
513 		devlist_entry->conf.pc_hdr = cfg->hdrtype;
514 
515 		devlist_entry->conf.pc_subvendor = cfg->subvendor;
516 		devlist_entry->conf.pc_subdevice = cfg->subdevice;
517 		devlist_entry->conf.pc_vendor = cfg->vendor;
518 		devlist_entry->conf.pc_device = cfg->device;
519 
520 		devlist_entry->conf.pc_class = cfg->baseclass;
521 		devlist_entry->conf.pc_subclass = cfg->subclass;
522 		devlist_entry->conf.pc_progif = cfg->progif;
523 		devlist_entry->conf.pc_revid = cfg->revid;
524 
525 		pci_numdevs++;
526 		pci_generation++;
527 	}
528 	return (devlist_entry);
529 #undef REG
530 }
531 
532 static int
533 pci_fixup_nextptr(int *nextptr0)
534 {
535 	int nextptr = *nextptr0;
536 
537 	/* "Next pointer" is only one byte */
538 	KASSERT(nextptr <= 0xff, ("Illegal next pointer %d\n", nextptr));
539 
540 	if (nextptr & 0x3) {
541 		/*
542 		 * PCI local bus spec 3.0:
543 		 *
544 		 * "... The bottom two bits of all pointers are reserved
545 		 *  and must be implemented as 00b although software must
546 		 *  mask them to allow for future uses of these bits ..."
547 		 */
548 		if (bootverbose) {
549 			kprintf("Illegal PCI extended capability "
550 				"offset, fixup 0x%02x -> 0x%02x\n",
551 				nextptr, nextptr & ~0x3);
552 		}
553 		nextptr &= ~0x3;
554 	}
555 	*nextptr0 = nextptr;
556 
557 	if (nextptr < 0x40) {
558 		if (nextptr != 0) {
559 			kprintf("Illegal PCI extended capability "
560 				"offset 0x%02x", nextptr);
561 		}
562 		return 0;
563 	}
564 	return 1;
565 }
566 
567 static void
568 pci_read_cap_pmgt(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
569 {
570 #define REG(n, w)	\
571 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
572 
573 	struct pcicfg_pp *pp = &cfg->pp;
574 
575 	if (pp->pp_cap)
576 		return;
577 
578 	pp->pp_cap = REG(ptr + PCIR_POWER_CAP, 2);
579 	pp->pp_status = ptr + PCIR_POWER_STATUS;
580 	pp->pp_pmcsr = ptr + PCIR_POWER_PMCSR;
581 
582 	if ((nextptr - ptr) > PCIR_POWER_DATA) {
583 		/*
584 		 * XXX
585 		 * We should write to data_select and read back from
586 		 * data_scale to determine whether data register is
587 		 * implemented.
588 		 */
589 #ifdef foo
590 		pp->pp_data = ptr + PCIR_POWER_DATA;
591 #else
592 		pp->pp_data = 0;
593 #endif
594 	}
595 
596 #undef REG
597 }
598 
599 static void
600 pci_read_cap_ht(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
601 {
602 #if defined(__i386__) || defined(__x86_64__)
603 
604 #define REG(n, w)	\
605 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
606 
607 	struct pcicfg_ht *ht = &cfg->ht;
608 	uint64_t addr;
609 	uint32_t val;
610 
611 	/* Determine HT-specific capability type. */
612 	val = REG(ptr + PCIR_HT_COMMAND, 2);
613 
614 	if ((val & 0xe000) == PCIM_HTCAP_SLAVE)
615 		cfg->ht.ht_slave = ptr;
616 
617 	if ((val & PCIM_HTCMD_CAP_MASK) != PCIM_HTCAP_MSI_MAPPING)
618 		return;
619 
620 	if (!(val & PCIM_HTCMD_MSI_FIXED)) {
621 		/* Sanity check the mapping window. */
622 		addr = REG(ptr + PCIR_HTMSI_ADDRESS_HI, 4);
623 		addr <<= 32;
624 		addr |= REG(ptr + PCIR_HTMSI_ADDRESS_LO, 4);
625 		if (addr != MSI_X86_ADDR_BASE) {
626 			device_printf(pcib, "HT Bridge at pci%d:%d:%d:%d "
627 				"has non-default MSI window 0x%llx\n",
628 				cfg->domain, cfg->bus, cfg->slot, cfg->func,
629 				(long long)addr);
630 		}
631 	} else {
632 		addr = MSI_X86_ADDR_BASE;
633 	}
634 
635 	ht->ht_msimap = ptr;
636 	ht->ht_msictrl = val;
637 	ht->ht_msiaddr = addr;
638 
639 #undef REG
640 
641 #endif	/* __i386__ || __x86_64__ */
642 }
643 
644 static void
645 pci_read_cap_msi(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
646 {
647 #define REG(n, w)	\
648 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
649 
650 	struct pcicfg_msi *msi = &cfg->msi;
651 
652 	msi->msi_location = ptr;
653 	msi->msi_ctrl = REG(ptr + PCIR_MSI_CTRL, 2);
654 	msi->msi_msgnum = 1 << ((msi->msi_ctrl & PCIM_MSICTRL_MMC_MASK) >> 1);
655 
656 #undef REG
657 }
658 
659 static void
660 pci_read_cap_msix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
661 {
662 #define REG(n, w)	\
663 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
664 
665 	struct pcicfg_msix *msix = &cfg->msix;
666 	uint32_t val;
667 
668 	msix->msix_location = ptr;
669 	msix->msix_ctrl = REG(ptr + PCIR_MSIX_CTRL, 2);
670 	msix->msix_msgnum = (msix->msix_ctrl & PCIM_MSIXCTRL_TABLE_SIZE) + 1;
671 
672 	val = REG(ptr + PCIR_MSIX_TABLE, 4);
673 	msix->msix_table_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
674 	msix->msix_table_offset = val & ~PCIM_MSIX_BIR_MASK;
675 
676 	val = REG(ptr + PCIR_MSIX_PBA, 4);
677 	msix->msix_pba_bar = PCIR_BAR(val & PCIM_MSIX_BIR_MASK);
678 	msix->msix_pba_offset = val & ~PCIM_MSIX_BIR_MASK;
679 
680 #undef REG
681 }
682 
683 static void
684 pci_read_cap_vpd(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
685 {
686 	cfg->vpd.vpd_reg = ptr;
687 }
688 
689 static void
690 pci_read_cap_subvendor(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
691 {
692 #define REG(n, w)	\
693 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
694 
695 	/* Should always be true. */
696 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1) {
697 		uint32_t val;
698 
699 		val = REG(ptr + PCIR_SUBVENDCAP_ID, 4);
700 		cfg->subvendor = val & 0xffff;
701 		cfg->subdevice = val >> 16;
702 	}
703 
704 #undef REG
705 }
706 
707 static void
708 pci_read_cap_pcix(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
709 {
710 	/*
711 	 * Assume we have a PCI-X chipset if we have
712 	 * at least one PCI-PCI bridge with a PCI-X
713 	 * capability.  Note that some systems with
714 	 * PCI-express or HT chipsets might match on
715 	 * this check as well.
716 	 */
717 	if ((cfg->hdrtype & PCIM_HDRTYPE) == 1)
718 		pcix_chipset = 1;
719 
720 	cfg->pcix.pcix_ptr = ptr;
721 }
722 
723 static int
724 pcie_slotimpl(const pcicfgregs *cfg)
725 {
726 	const struct pcicfg_expr *expr = &cfg->expr;
727 	uint16_t port_type;
728 
729 	/*
730 	 * Only version 1 can be parsed currently
731 	 */
732 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
733 		return 0;
734 
735 	/*
736 	 * - Slot implemented bit is meaningful iff current port is
737 	 *   root port or down stream port.
738 	 * - Testing for root port or down stream port is meanningful
739 	 *   iff PCI configure has type 1 header.
740 	 */
741 
742 	if (cfg->hdrtype != 1)
743 		return 0;
744 
745 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
746 	if (port_type != PCIE_ROOT_PORT && port_type != PCIE_DOWN_STREAM_PORT)
747 		return 0;
748 
749 	if (!(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
750 		return 0;
751 
752 	return 1;
753 }
754 
755 static void
756 pci_read_cap_express(device_t pcib, int ptr, int nextptr, pcicfgregs *cfg)
757 {
758 #define REG(n, w)	\
759 	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
760 
761 	struct pcicfg_expr *expr = &cfg->expr;
762 
763 	/*
764 	 * Assume we have a PCI-express chipset if we have
765 	 * at least one PCI-express device.
766 	 */
767 	pcie_chipset = 1;
768 
769 	expr->expr_ptr = ptr;
770 	expr->expr_cap = REG(ptr + PCIER_CAPABILITY, 2);
771 
772 	/*
773 	 * Only version 1 can be parsed currently
774 	 */
775 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
776 		return;
777 
778 	/*
779 	 * Read slot capabilities.  Slot capabilities exists iff
780 	 * current port's slot is implemented
781 	 */
782 	if (pcie_slotimpl(cfg))
783 		expr->expr_slotcap = REG(ptr + PCIER_SLOTCAP, 4);
784 
785 #undef REG
786 }
787 
788 static void
789 pci_read_capabilities(device_t pcib, pcicfgregs *cfg)
790 {
791 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, w)
792 #define	WREG(n, v, w)	PCIB_WRITE_CONFIG(pcib, cfg->bus, cfg->slot, cfg->func, n, v, w)
793 
794 	uint32_t val;
795 	int nextptr, ptrptr;
796 
797 	if ((REG(PCIR_STATUS, 2) & PCIM_STATUS_CAPPRESENT) == 0) {
798 		/* No capabilities */
799 		return;
800 	}
801 
802 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
803 	case 0:
804 	case 1:
805 		ptrptr = PCIR_CAP_PTR;
806 		break;
807 	case 2:
808 		ptrptr = PCIR_CAP_PTR_2;	/* cardbus capabilities ptr */
809 		break;
810 	default:
811 		return;				/* no capabilities support */
812 	}
813 	nextptr = REG(ptrptr, 1);	/* sanity check? */
814 
815 	/*
816 	 * Read capability entries.
817 	 */
818 	while (pci_fixup_nextptr(&nextptr)) {
819 		const struct pci_read_cap *rc;
820 		int ptr = nextptr;
821 
822 		/* Find the next entry */
823 		nextptr = REG(ptr + PCICAP_NEXTPTR, 1);
824 
825 		/* Process this entry */
826 		val = REG(ptr + PCICAP_ID, 1);
827 		for (rc = pci_read_caps; rc->read_cap != NULL; ++rc) {
828 			if (rc->cap == val) {
829 				rc->read_cap(pcib, ptr, nextptr, cfg);
830 				break;
831 			}
832 		}
833 	}
834 
835 #if defined(__i386__) || defined(__x86_64__)
836 	/*
837 	 * Enable the MSI mapping window for all HyperTransport
838 	 * slaves.  PCI-PCI bridges have their windows enabled via
839 	 * PCIB_MAP_MSI().
840 	 */
841 	if (cfg->ht.ht_slave != 0 && cfg->ht.ht_msimap != 0 &&
842 	    !(cfg->ht.ht_msictrl & PCIM_HTCMD_MSI_ENABLE)) {
843 		device_printf(pcib,
844 	    "Enabling MSI window for HyperTransport slave at pci%d:%d:%d:%d\n",
845 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
846 		 cfg->ht.ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
847 		 WREG(cfg->ht.ht_msimap + PCIR_HT_COMMAND, cfg->ht.ht_msictrl,
848 		     2);
849 	}
850 #endif
851 
852 /* REG and WREG use carry through to next functions */
853 }
854 
855 /*
856  * PCI Vital Product Data
857  */
858 
859 #define	PCI_VPD_TIMEOUT		1000000
860 
861 static int
862 pci_read_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t *data)
863 {
864 	int count = PCI_VPD_TIMEOUT;
865 
866 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
867 
868 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg, 2);
869 
870 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) != 0x8000) {
871 		if (--count < 0)
872 			return (ENXIO);
873 		DELAY(1);	/* limit looping */
874 	}
875 	*data = (REG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, 4));
876 
877 	return (0);
878 }
879 
880 #if 0
881 static int
882 pci_write_vpd_reg(device_t pcib, pcicfgregs *cfg, int reg, uint32_t data)
883 {
884 	int count = PCI_VPD_TIMEOUT;
885 
886 	KASSERT((reg & 3) == 0, ("VPD register must by 4 byte aligned"));
887 
888 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_DATA, data, 4);
889 	WREG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, reg | 0x8000, 2);
890 	while ((REG(cfg->vpd.vpd_reg + PCIR_VPD_ADDR, 2) & 0x8000) == 0x8000) {
891 		if (--count < 0)
892 			return (ENXIO);
893 		DELAY(1);	/* limit looping */
894 	}
895 
896 	return (0);
897 }
898 #endif
899 
900 #undef PCI_VPD_TIMEOUT
901 
902 struct vpd_readstate {
903 	device_t	pcib;
904 	pcicfgregs	*cfg;
905 	uint32_t	val;
906 	int		bytesinval;
907 	int		off;
908 	uint8_t		cksum;
909 };
910 
911 static int
912 vpd_nextbyte(struct vpd_readstate *vrs, uint8_t *data)
913 {
914 	uint32_t reg;
915 	uint8_t byte;
916 
917 	if (vrs->bytesinval == 0) {
918 		if (pci_read_vpd_reg(vrs->pcib, vrs->cfg, vrs->off, &reg))
919 			return (ENXIO);
920 		vrs->val = le32toh(reg);
921 		vrs->off += 4;
922 		byte = vrs->val & 0xff;
923 		vrs->bytesinval = 3;
924 	} else {
925 		vrs->val = vrs->val >> 8;
926 		byte = vrs->val & 0xff;
927 		vrs->bytesinval--;
928 	}
929 
930 	vrs->cksum += byte;
931 	*data = byte;
932 	return (0);
933 }
934 
935 int
936 pcie_slot_implemented(device_t dev)
937 {
938 	struct pci_devinfo *dinfo = device_get_ivars(dev);
939 
940 	return pcie_slotimpl(&dinfo->cfg);
941 }
942 
943 void
944 pcie_set_max_readrq(device_t dev, uint16_t rqsize)
945 {
946 	uint8_t expr_ptr;
947 	uint16_t val;
948 
949 	rqsize &= PCIEM_DEVCTL_MAX_READRQ_MASK;
950 	if (rqsize > PCIEM_DEVCTL_MAX_READRQ_4096) {
951 		panic("%s: invalid max read request size 0x%02x\n",
952 		      device_get_nameunit(dev), rqsize);
953 	}
954 
955 	expr_ptr = pci_get_pciecap_ptr(dev);
956 	if (!expr_ptr)
957 		panic("%s: not PCIe device\n", device_get_nameunit(dev));
958 
959 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
960 	if ((val & PCIEM_DEVCTL_MAX_READRQ_MASK) != rqsize) {
961 		if (bootverbose)
962 			device_printf(dev, "adjust device control 0x%04x", val);
963 
964 		val &= ~PCIEM_DEVCTL_MAX_READRQ_MASK;
965 		val |= rqsize;
966 		pci_write_config(dev, expr_ptr + PCIER_DEVCTRL, val, 2);
967 
968 		if (bootverbose)
969 			kprintf(" -> 0x%04x\n", val);
970 	}
971 }
972 
973 uint16_t
974 pcie_get_max_readrq(device_t dev)
975 {
976 	uint8_t expr_ptr;
977 	uint16_t val;
978 
979 	expr_ptr = pci_get_pciecap_ptr(dev);
980 	if (!expr_ptr)
981 		panic("%s: not PCIe device\n", device_get_nameunit(dev));
982 
983 	val = pci_read_config(dev, expr_ptr + PCIER_DEVCTRL, 2);
984 	return (val & PCIEM_DEVCTL_MAX_READRQ_MASK);
985 }
986 
987 static void
988 pci_read_vpd(device_t pcib, pcicfgregs *cfg)
989 {
990 	struct vpd_readstate vrs;
991 	int state;
992 	int name;
993 	int remain;
994 	int i;
995 	int alloc, off;		/* alloc/off for RO/W arrays */
996 	int cksumvalid;
997 	int dflen;
998 	uint8_t byte;
999 	uint8_t byte2;
1000 
1001 	/* init vpd reader */
1002 	vrs.bytesinval = 0;
1003 	vrs.off = 0;
1004 	vrs.pcib = pcib;
1005 	vrs.cfg = cfg;
1006 	vrs.cksum = 0;
1007 
1008 	state = 0;
1009 	name = remain = i = 0;	/* shut up stupid gcc */
1010 	alloc = off = 0;	/* shut up stupid gcc */
1011 	dflen = 0;		/* shut up stupid gcc */
1012 	cksumvalid = -1;
1013 	while (state >= 0) {
1014 		if (vpd_nextbyte(&vrs, &byte)) {
1015 			state = -2;
1016 			break;
1017 		}
1018 #if 0
1019 		kprintf("vpd: val: %#x, off: %d, bytesinval: %d, byte: %#hhx, " \
1020 		    "state: %d, remain: %d, name: %#x, i: %d\n", vrs.val,
1021 		    vrs.off, vrs.bytesinval, byte, state, remain, name, i);
1022 #endif
1023 		switch (state) {
1024 		case 0:		/* item name */
1025 			if (byte & 0x80) {
1026 				if (vpd_nextbyte(&vrs, &byte2)) {
1027 					state = -2;
1028 					break;
1029 				}
1030 				remain = byte2;
1031 				if (vpd_nextbyte(&vrs, &byte2)) {
1032 					state = -2;
1033 					break;
1034 				}
1035 				remain |= byte2 << 8;
1036 				if (remain > (0x7f*4 - vrs.off)) {
1037 					state = -1;
1038 					kprintf(
1039 			    "pci%d:%d:%d:%d: invalid VPD data, remain %#x\n",
1040 					    cfg->domain, cfg->bus, cfg->slot,
1041 					    cfg->func, remain);
1042 				}
1043 				name = byte & 0x7f;
1044 			} else {
1045 				remain = byte & 0x7;
1046 				name = (byte >> 3) & 0xf;
1047 			}
1048 			switch (name) {
1049 			case 0x2:	/* String */
1050 				cfg->vpd.vpd_ident = kmalloc(remain + 1,
1051 				    M_DEVBUF, M_WAITOK);
1052 				i = 0;
1053 				state = 1;
1054 				break;
1055 			case 0xf:	/* End */
1056 				state = -1;
1057 				break;
1058 			case 0x10:	/* VPD-R */
1059 				alloc = 8;
1060 				off = 0;
1061 				cfg->vpd.vpd_ros = kmalloc(alloc *
1062 				    sizeof(*cfg->vpd.vpd_ros), M_DEVBUF,
1063 				    M_WAITOK | M_ZERO);
1064 				state = 2;
1065 				break;
1066 			case 0x11:	/* VPD-W */
1067 				alloc = 8;
1068 				off = 0;
1069 				cfg->vpd.vpd_w = kmalloc(alloc *
1070 				    sizeof(*cfg->vpd.vpd_w), M_DEVBUF,
1071 				    M_WAITOK | M_ZERO);
1072 				state = 5;
1073 				break;
1074 			default:	/* Invalid data, abort */
1075 				state = -1;
1076 				break;
1077 			}
1078 			break;
1079 
1080 		case 1:	/* Identifier String */
1081 			cfg->vpd.vpd_ident[i++] = byte;
1082 			remain--;
1083 			if (remain == 0)  {
1084 				cfg->vpd.vpd_ident[i] = '\0';
1085 				state = 0;
1086 			}
1087 			break;
1088 
1089 		case 2:	/* VPD-R Keyword Header */
1090 			if (off == alloc) {
1091 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1092 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_ros),
1093 				    M_DEVBUF, M_WAITOK | M_ZERO);
1094 			}
1095 			cfg->vpd.vpd_ros[off].keyword[0] = byte;
1096 			if (vpd_nextbyte(&vrs, &byte2)) {
1097 				state = -2;
1098 				break;
1099 			}
1100 			cfg->vpd.vpd_ros[off].keyword[1] = byte2;
1101 			if (vpd_nextbyte(&vrs, &byte2)) {
1102 				state = -2;
1103 				break;
1104 			}
1105 			dflen = byte2;
1106 			if (dflen == 0 &&
1107 			    strncmp(cfg->vpd.vpd_ros[off].keyword, "RV",
1108 			    2) == 0) {
1109 				/*
1110 				 * if this happens, we can't trust the rest
1111 				 * of the VPD.
1112 				 */
1113 				kprintf(
1114 				    "pci%d:%d:%d:%d: bad keyword length: %d\n",
1115 				    cfg->domain, cfg->bus, cfg->slot,
1116 				    cfg->func, dflen);
1117 				cksumvalid = 0;
1118 				state = -1;
1119 				break;
1120 			} else if (dflen == 0) {
1121 				cfg->vpd.vpd_ros[off].value = kmalloc(1 *
1122 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1123 				    M_DEVBUF, M_WAITOK);
1124 				cfg->vpd.vpd_ros[off].value[0] = '\x00';
1125 			} else
1126 				cfg->vpd.vpd_ros[off].value = kmalloc(
1127 				    (dflen + 1) *
1128 				    sizeof(*cfg->vpd.vpd_ros[off].value),
1129 				    M_DEVBUF, M_WAITOK);
1130 			remain -= 3;
1131 			i = 0;
1132 			/* keep in sync w/ state 3's transistions */
1133 			if (dflen == 0 && remain == 0)
1134 				state = 0;
1135 			else if (dflen == 0)
1136 				state = 2;
1137 			else
1138 				state = 3;
1139 			break;
1140 
1141 		case 3:	/* VPD-R Keyword Value */
1142 			cfg->vpd.vpd_ros[off].value[i++] = byte;
1143 			if (strncmp(cfg->vpd.vpd_ros[off].keyword,
1144 			    "RV", 2) == 0 && cksumvalid == -1) {
1145 				if (vrs.cksum == 0)
1146 					cksumvalid = 1;
1147 				else {
1148 					if (bootverbose)
1149 						kprintf(
1150 				"pci%d:%d:%d:%d: bad VPD cksum, remain %hhu\n",
1151 						    cfg->domain, cfg->bus,
1152 						    cfg->slot, cfg->func,
1153 						    vrs.cksum);
1154 					cksumvalid = 0;
1155 					state = -1;
1156 					break;
1157 				}
1158 			}
1159 			dflen--;
1160 			remain--;
1161 			/* keep in sync w/ state 2's transistions */
1162 			if (dflen == 0)
1163 				cfg->vpd.vpd_ros[off++].value[i++] = '\0';
1164 			if (dflen == 0 && remain == 0) {
1165 				cfg->vpd.vpd_rocnt = off;
1166 				cfg->vpd.vpd_ros = krealloc(cfg->vpd.vpd_ros,
1167 				    off * sizeof(*cfg->vpd.vpd_ros),
1168 				    M_DEVBUF, M_WAITOK | M_ZERO);
1169 				state = 0;
1170 			} else if (dflen == 0)
1171 				state = 2;
1172 			break;
1173 
1174 		case 4:
1175 			remain--;
1176 			if (remain == 0)
1177 				state = 0;
1178 			break;
1179 
1180 		case 5:	/* VPD-W Keyword Header */
1181 			if (off == alloc) {
1182 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1183 				    (alloc *= 2) * sizeof(*cfg->vpd.vpd_w),
1184 				    M_DEVBUF, M_WAITOK | M_ZERO);
1185 			}
1186 			cfg->vpd.vpd_w[off].keyword[0] = byte;
1187 			if (vpd_nextbyte(&vrs, &byte2)) {
1188 				state = -2;
1189 				break;
1190 			}
1191 			cfg->vpd.vpd_w[off].keyword[1] = byte2;
1192 			if (vpd_nextbyte(&vrs, &byte2)) {
1193 				state = -2;
1194 				break;
1195 			}
1196 			cfg->vpd.vpd_w[off].len = dflen = byte2;
1197 			cfg->vpd.vpd_w[off].start = vrs.off - vrs.bytesinval;
1198 			cfg->vpd.vpd_w[off].value = kmalloc((dflen + 1) *
1199 			    sizeof(*cfg->vpd.vpd_w[off].value),
1200 			    M_DEVBUF, M_WAITOK);
1201 			remain -= 3;
1202 			i = 0;
1203 			/* keep in sync w/ state 6's transistions */
1204 			if (dflen == 0 && remain == 0)
1205 				state = 0;
1206 			else if (dflen == 0)
1207 				state = 5;
1208 			else
1209 				state = 6;
1210 			break;
1211 
1212 		case 6:	/* VPD-W Keyword Value */
1213 			cfg->vpd.vpd_w[off].value[i++] = byte;
1214 			dflen--;
1215 			remain--;
1216 			/* keep in sync w/ state 5's transistions */
1217 			if (dflen == 0)
1218 				cfg->vpd.vpd_w[off++].value[i++] = '\0';
1219 			if (dflen == 0 && remain == 0) {
1220 				cfg->vpd.vpd_wcnt = off;
1221 				cfg->vpd.vpd_w = krealloc(cfg->vpd.vpd_w,
1222 				    off * sizeof(*cfg->vpd.vpd_w),
1223 				    M_DEVBUF, M_WAITOK | M_ZERO);
1224 				state = 0;
1225 			} else if (dflen == 0)
1226 				state = 5;
1227 			break;
1228 
1229 		default:
1230 			kprintf("pci%d:%d:%d:%d: invalid state: %d\n",
1231 			    cfg->domain, cfg->bus, cfg->slot, cfg->func,
1232 			    state);
1233 			state = -1;
1234 			break;
1235 		}
1236 	}
1237 
1238 	if (cksumvalid == 0 || state < -1) {
1239 		/* read-only data bad, clean up */
1240 		if (cfg->vpd.vpd_ros != NULL) {
1241 			for (off = 0; cfg->vpd.vpd_ros[off].value; off++)
1242 				kfree(cfg->vpd.vpd_ros[off].value, M_DEVBUF);
1243 			kfree(cfg->vpd.vpd_ros, M_DEVBUF);
1244 			cfg->vpd.vpd_ros = NULL;
1245 		}
1246 	}
1247 	if (state < -1) {
1248 		/* I/O error, clean up */
1249 		kprintf("pci%d:%d:%d:%d: failed to read VPD data.\n",
1250 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
1251 		if (cfg->vpd.vpd_ident != NULL) {
1252 			kfree(cfg->vpd.vpd_ident, M_DEVBUF);
1253 			cfg->vpd.vpd_ident = NULL;
1254 		}
1255 		if (cfg->vpd.vpd_w != NULL) {
1256 			for (off = 0; cfg->vpd.vpd_w[off].value; off++)
1257 				kfree(cfg->vpd.vpd_w[off].value, M_DEVBUF);
1258 			kfree(cfg->vpd.vpd_w, M_DEVBUF);
1259 			cfg->vpd.vpd_w = NULL;
1260 		}
1261 	}
1262 	cfg->vpd.vpd_cached = 1;
1263 #undef REG
1264 #undef WREG
1265 }
1266 
1267 int
1268 pci_get_vpd_ident_method(device_t dev, device_t child, const char **identptr)
1269 {
1270 	struct pci_devinfo *dinfo = device_get_ivars(child);
1271 	pcicfgregs *cfg = &dinfo->cfg;
1272 
1273 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1274 		pci_read_vpd(device_get_parent(dev), cfg);
1275 
1276 	*identptr = cfg->vpd.vpd_ident;
1277 
1278 	if (*identptr == NULL)
1279 		return (ENXIO);
1280 
1281 	return (0);
1282 }
1283 
1284 int
1285 pci_get_vpd_readonly_method(device_t dev, device_t child, const char *kw,
1286 	const char **vptr)
1287 {
1288 	struct pci_devinfo *dinfo = device_get_ivars(child);
1289 	pcicfgregs *cfg = &dinfo->cfg;
1290 	int i;
1291 
1292 	if (!cfg->vpd.vpd_cached && cfg->vpd.vpd_reg != 0)
1293 		pci_read_vpd(device_get_parent(dev), cfg);
1294 
1295 	for (i = 0; i < cfg->vpd.vpd_rocnt; i++)
1296 		if (memcmp(kw, cfg->vpd.vpd_ros[i].keyword,
1297 		    sizeof(cfg->vpd.vpd_ros[i].keyword)) == 0) {
1298 			*vptr = cfg->vpd.vpd_ros[i].value;
1299 		}
1300 
1301 	if (i != cfg->vpd.vpd_rocnt)
1302 		return (0);
1303 
1304 	*vptr = NULL;
1305 	return (ENXIO);
1306 }
1307 
1308 /*
1309  * Return the offset in configuration space of the requested extended
1310  * capability entry or 0 if the specified capability was not found.
1311  */
1312 int
1313 pci_find_extcap_method(device_t dev, device_t child, int capability,
1314     int *capreg)
1315 {
1316 	struct pci_devinfo *dinfo = device_get_ivars(child);
1317 	pcicfgregs *cfg = &dinfo->cfg;
1318 	u_int32_t status;
1319 	u_int8_t ptr;
1320 
1321 	/*
1322 	 * Check the CAP_LIST bit of the PCI status register first.
1323 	 */
1324 	status = pci_read_config(child, PCIR_STATUS, 2);
1325 	if (!(status & PCIM_STATUS_CAPPRESENT))
1326 		return (ENXIO);
1327 
1328 	/*
1329 	 * Determine the start pointer of the capabilities list.
1330 	 */
1331 	switch (cfg->hdrtype & PCIM_HDRTYPE) {
1332 	case 0:
1333 	case 1:
1334 		ptr = PCIR_CAP_PTR;
1335 		break;
1336 	case 2:
1337 		ptr = PCIR_CAP_PTR_2;
1338 		break;
1339 	default:
1340 		/* XXX: panic? */
1341 		return (ENXIO);		/* no extended capabilities support */
1342 	}
1343 	ptr = pci_read_config(child, ptr, 1);
1344 
1345 	/*
1346 	 * Traverse the capabilities list.
1347 	 */
1348 	while (ptr != 0) {
1349 		if (pci_read_config(child, ptr + PCICAP_ID, 1) == capability) {
1350 			if (capreg != NULL)
1351 				*capreg = ptr;
1352 			return (0);
1353 		}
1354 		ptr = pci_read_config(child, ptr + PCICAP_NEXTPTR, 1);
1355 	}
1356 
1357 	return (ENOENT);
1358 }
1359 
1360 /*
1361  * Support for MSI-X message interrupts.
1362  */
1363 void
1364 pci_enable_msix(device_t dev, u_int index, uint64_t address, uint32_t data)
1365 {
1366 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1367 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1368 	uint32_t offset;
1369 
1370 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1371 	offset = msix->msix_table_offset + index * 16;
1372 	bus_write_4(msix->msix_table_res, offset, address & 0xffffffff);
1373 	bus_write_4(msix->msix_table_res, offset + 4, address >> 32);
1374 	bus_write_4(msix->msix_table_res, offset + 8, data);
1375 
1376 	/* Enable MSI -> HT mapping. */
1377 	pci_ht_map_msi(dev, address);
1378 }
1379 
1380 void
1381 pci_mask_msix(device_t dev, u_int index)
1382 {
1383 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1384 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1385 	uint32_t offset, val;
1386 
1387 	KASSERT(msix->msix_msgnum > index, ("bogus index"));
1388 	offset = msix->msix_table_offset + index * 16 + 12;
1389 	val = bus_read_4(msix->msix_table_res, offset);
1390 	if (!(val & PCIM_MSIX_VCTRL_MASK)) {
1391 		val |= PCIM_MSIX_VCTRL_MASK;
1392 		bus_write_4(msix->msix_table_res, offset, val);
1393 	}
1394 }
1395 
1396 void
1397 pci_unmask_msix(device_t dev, u_int index)
1398 {
1399 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1400 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1401 	uint32_t offset, val;
1402 
1403 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1404 	offset = msix->msix_table_offset + index * 16 + 12;
1405 	val = bus_read_4(msix->msix_table_res, offset);
1406 	if (val & PCIM_MSIX_VCTRL_MASK) {
1407 		val &= ~PCIM_MSIX_VCTRL_MASK;
1408 		bus_write_4(msix->msix_table_res, offset, val);
1409 	}
1410 }
1411 
1412 int
1413 pci_pending_msix(device_t dev, u_int index)
1414 {
1415 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1416 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1417 	uint32_t offset, bit;
1418 
1419 	KASSERT(msix->msix_table_len > index, ("bogus index"));
1420 	offset = msix->msix_pba_offset + (index / 32) * 4;
1421 	bit = 1 << index % 32;
1422 	return (bus_read_4(msix->msix_pba_res, offset) & bit);
1423 }
1424 
1425 /*
1426  * Restore MSI-X registers and table during resume.  If MSI-X is
1427  * enabled then walk the virtual table to restore the actual MSI-X
1428  * table.
1429  */
1430 static void
1431 pci_resume_msix(device_t dev)
1432 {
1433 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1434 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1435 	struct msix_table_entry *mte;
1436 	struct msix_vector *mv;
1437 	int i;
1438 
1439 	if (msix->msix_alloc > 0) {
1440 		/* First, mask all vectors. */
1441 		for (i = 0; i < msix->msix_msgnum; i++)
1442 			pci_mask_msix(dev, i);
1443 
1444 		/* Second, program any messages with at least one handler. */
1445 		for (i = 0; i < msix->msix_table_len; i++) {
1446 			mte = &msix->msix_table[i];
1447 			if (mte->mte_vector == 0 || mte->mte_handlers == 0)
1448 				continue;
1449 			mv = &msix->msix_vectors[mte->mte_vector - 1];
1450 			pci_enable_msix(dev, i, mv->mv_address, mv->mv_data);
1451 			pci_unmask_msix(dev, i);
1452 		}
1453 	}
1454 	pci_write_config(dev, msix->msix_location + PCIR_MSIX_CTRL,
1455 	    msix->msix_ctrl, 2);
1456 }
1457 
1458 /*
1459  * Attempt to allocate *count MSI-X messages.  The actual number allocated is
1460  * returned in *count.  After this function returns, each message will be
1461  * available to the driver as SYS_RES_IRQ resources starting at rid 1.
1462  */
1463 int
1464 pci_alloc_msix_method(device_t dev, device_t child, int *count)
1465 {
1466 	struct pci_devinfo *dinfo = device_get_ivars(child);
1467 	pcicfgregs *cfg = &dinfo->cfg;
1468 	struct resource_list_entry *rle;
1469 	int actual, error, i, irq, max;
1470 
1471 	/* Don't let count == 0 get us into trouble. */
1472 	if (*count == 0)
1473 		return (EINVAL);
1474 
1475 	/* If rid 0 is allocated, then fail. */
1476 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
1477 	if (rle != NULL && rle->res != NULL)
1478 		return (ENXIO);
1479 
1480 	/* Already have allocated messages? */
1481 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
1482 		return (ENXIO);
1483 
1484 	/* If MSI is blacklisted for this system, fail. */
1485 	if (pci_msi_blacklisted())
1486 		return (ENXIO);
1487 
1488 	/* MSI-X capability present? */
1489 	if (cfg->msix.msix_location == 0 || !pci_do_msix)
1490 		return (ENODEV);
1491 
1492 	/* Make sure the appropriate BARs are mapped. */
1493 	rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1494 	    cfg->msix.msix_table_bar);
1495 	if (rle == NULL || rle->res == NULL ||
1496 	    !(rman_get_flags(rle->res) & RF_ACTIVE))
1497 		return (ENXIO);
1498 	cfg->msix.msix_table_res = rle->res;
1499 	if (cfg->msix.msix_pba_bar != cfg->msix.msix_table_bar) {
1500 		rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY,
1501 		    cfg->msix.msix_pba_bar);
1502 		if (rle == NULL || rle->res == NULL ||
1503 		    !(rman_get_flags(rle->res) & RF_ACTIVE))
1504 			return (ENXIO);
1505 	}
1506 	cfg->msix.msix_pba_res = rle->res;
1507 
1508 	if (bootverbose)
1509 		device_printf(child,
1510 		    "attempting to allocate %d MSI-X vectors (%d supported)\n",
1511 		    *count, cfg->msix.msix_msgnum);
1512 	max = min(*count, cfg->msix.msix_msgnum);
1513 	for (i = 0; i < max; i++) {
1514 		/* Allocate a message. */
1515 		error = PCIB_ALLOC_MSIX(device_get_parent(dev), child, &irq);
1516 		if (error)
1517 			break;
1518 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1519 		    irq, 1, -1);
1520 	}
1521 	actual = i;
1522 
1523 	if (actual == 0) {
1524 		if (bootverbose) {
1525 			device_printf(child,
1526 			    "could not allocate any MSI-X vectors\n");
1527 		}
1528 		return  (ENXIO);
1529 	}
1530 
1531 	if (bootverbose) {
1532 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 1);
1533 		if (actual == 1)
1534 			device_printf(child, "using IRQ %lu for MSI-X\n",
1535 			    rle->start);
1536 		else {
1537 			int run;
1538 
1539 			/*
1540 			 * Be fancy and try to print contiguous runs of
1541 			 * IRQ values as ranges.  'irq' is the previous IRQ.
1542 			 * 'run' is true if we are in a range.
1543 			 */
1544 			device_printf(child, "using IRQs %lu", rle->start);
1545 			irq = rle->start;
1546 			run = 0;
1547 			for (i = 1; i < actual; i++) {
1548 				rle = resource_list_find(&dinfo->resources,
1549 				    SYS_RES_IRQ, i + 1);
1550 
1551 				/* Still in a run? */
1552 				if (rle->start == irq + 1) {
1553 					run = 1;
1554 					irq++;
1555 					continue;
1556 				}
1557 
1558 				/* Finish previous range. */
1559 				if (run) {
1560 					kprintf("-%d", irq);
1561 					run = 0;
1562 				}
1563 
1564 				/* Start new range. */
1565 				kprintf(",%lu", rle->start);
1566 				irq = rle->start;
1567 			}
1568 
1569 			/* Unfinished range? */
1570 			if (run)
1571 				kprintf("-%d", irq);
1572 			kprintf(" for MSI-X\n");
1573 		}
1574 	}
1575 
1576 	/* Mask all vectors. */
1577 	for (i = 0; i < cfg->msix.msix_msgnum; i++)
1578 		pci_mask_msix(child, i);
1579 
1580 	/* Allocate and initialize vector data and virtual table. */
1581 	cfg->msix.msix_vectors = kmalloc(sizeof(struct msix_vector) * actual,
1582 	    M_DEVBUF, M_WAITOK | M_ZERO);
1583 	cfg->msix.msix_table = kmalloc(sizeof(struct msix_table_entry) * actual,
1584 	    M_DEVBUF, M_WAITOK | M_ZERO);
1585 	for (i = 0; i < actual; i++) {
1586 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1587 		cfg->msix.msix_vectors[i].mv_irq = rle->start;
1588 		cfg->msix.msix_table[i].mte_vector = i + 1;
1589 	}
1590 
1591 	/* Update control register to enable MSI-X. */
1592 	cfg->msix.msix_ctrl |= PCIM_MSIXCTRL_MSIX_ENABLE;
1593 	pci_write_config(child, cfg->msix.msix_location + PCIR_MSIX_CTRL,
1594 	    cfg->msix.msix_ctrl, 2);
1595 
1596 	/* Update counts of alloc'd messages. */
1597 	cfg->msix.msix_alloc = actual;
1598 	cfg->msix.msix_table_len = actual;
1599 	*count = actual;
1600 	return (0);
1601 }
1602 
1603 /*
1604  * By default, pci_alloc_msix() will assign the allocated IRQ
1605  * resources consecutively to the first N messages in the MSI-X table.
1606  * However, device drivers may want to use different layouts if they
1607  * either receive fewer messages than they asked for, or they wish to
1608  * populate the MSI-X table sparsely.  This method allows the driver
1609  * to specify what layout it wants.  It must be called after a
1610  * successful pci_alloc_msix() but before any of the associated
1611  * SYS_RES_IRQ resources are allocated via bus_alloc_resource().
1612  *
1613  * The 'vectors' array contains 'count' message vectors.  The array
1614  * maps directly to the MSI-X table in that index 0 in the array
1615  * specifies the vector for the first message in the MSI-X table, etc.
1616  * The vector value in each array index can either be 0 to indicate
1617  * that no vector should be assigned to a message slot, or it can be a
1618  * number from 1 to N (where N is the count returned from a
1619  * succcessful call to pci_alloc_msix()) to indicate which message
1620  * vector (IRQ) to be used for the corresponding message.
1621  *
1622  * On successful return, each message with a non-zero vector will have
1623  * an associated SYS_RES_IRQ whose rid is equal to the array index +
1624  * 1.  Additionally, if any of the IRQs allocated via the previous
1625  * call to pci_alloc_msix() are not used in the mapping, those IRQs
1626  * will be kfreed back to the system automatically.
1627  *
1628  * For example, suppose a driver has a MSI-X table with 6 messages and
1629  * asks for 6 messages, but pci_alloc_msix() only returns a count of
1630  * 3.  Call the three vectors allocated by pci_alloc_msix() A, B, and
1631  * C.  After the call to pci_alloc_msix(), the device will be setup to
1632  * have an MSI-X table of ABC--- (where - means no vector assigned).
1633  * If the driver ten passes a vector array of { 1, 0, 1, 2, 0, 2 },
1634  * then the MSI-X table will look like A-AB-B, and the 'C' vector will
1635  * be kfreed back to the system.  This device will also have valid
1636  * SYS_RES_IRQ rids of 1, 3, 4, and 6.
1637  *
1638  * In any case, the SYS_RES_IRQ rid X will always map to the message
1639  * at MSI-X table index X - 1 and will only be valid if a vector is
1640  * assigned to that table entry.
1641  */
1642 int
1643 pci_remap_msix_method(device_t dev, device_t child, int count,
1644     const u_int *vectors)
1645 {
1646 	struct pci_devinfo *dinfo = device_get_ivars(child);
1647 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1648 	struct resource_list_entry *rle;
1649 	int i, irq, j, *used;
1650 
1651 	/*
1652 	 * Have to have at least one message in the table but the
1653 	 * table can't be bigger than the actual MSI-X table in the
1654 	 * device.
1655 	 */
1656 	if (count == 0 || count > msix->msix_msgnum)
1657 		return (EINVAL);
1658 
1659 	/* Sanity check the vectors. */
1660 	for (i = 0; i < count; i++)
1661 		if (vectors[i] > msix->msix_alloc)
1662 			return (EINVAL);
1663 
1664 	/*
1665 	 * Make sure there aren't any holes in the vectors to be used.
1666 	 * It's a big pain to support it, and it doesn't really make
1667 	 * sense anyway.  Also, at least one vector must be used.
1668 	 */
1669 	used = kmalloc(sizeof(int) * msix->msix_alloc, M_DEVBUF, M_WAITOK |
1670 	    M_ZERO);
1671 	for (i = 0; i < count; i++)
1672 		if (vectors[i] != 0)
1673 			used[vectors[i] - 1] = 1;
1674 	for (i = 0; i < msix->msix_alloc - 1; i++)
1675 		if (used[i] == 0 && used[i + 1] == 1) {
1676 			kfree(used, M_DEVBUF);
1677 			return (EINVAL);
1678 		}
1679 	if (used[0] != 1) {
1680 		kfree(used, M_DEVBUF);
1681 		return (EINVAL);
1682 	}
1683 
1684 	/* Make sure none of the resources are allocated. */
1685 	for (i = 0; i < msix->msix_table_len; i++) {
1686 		if (msix->msix_table[i].mte_vector == 0)
1687 			continue;
1688 		if (msix->msix_table[i].mte_handlers > 0)
1689 			return (EBUSY);
1690 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1691 		KASSERT(rle != NULL, ("missing resource"));
1692 		if (rle->res != NULL)
1693 			return (EBUSY);
1694 	}
1695 
1696 	/* Free the existing resource list entries. */
1697 	for (i = 0; i < msix->msix_table_len; i++) {
1698 		if (msix->msix_table[i].mte_vector == 0)
1699 			continue;
1700 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1701 	}
1702 
1703 	/*
1704 	 * Build the new virtual table keeping track of which vectors are
1705 	 * used.
1706 	 */
1707 	kfree(msix->msix_table, M_DEVBUF);
1708 	msix->msix_table = kmalloc(sizeof(struct msix_table_entry) * count,
1709 	    M_DEVBUF, M_WAITOK | M_ZERO);
1710 	for (i = 0; i < count; i++)
1711 		msix->msix_table[i].mte_vector = vectors[i];
1712 	msix->msix_table_len = count;
1713 
1714 	/* Free any unused IRQs and resize the vectors array if necessary. */
1715 	j = msix->msix_alloc - 1;
1716 	if (used[j] == 0) {
1717 		struct msix_vector *vec;
1718 
1719 		while (used[j] == 0) {
1720 			PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1721 			    msix->msix_vectors[j].mv_irq);
1722 			j--;
1723 		}
1724 		vec = kmalloc(sizeof(struct msix_vector) * (j + 1), M_DEVBUF,
1725 		    M_WAITOK);
1726 		bcopy(msix->msix_vectors, vec, sizeof(struct msix_vector) *
1727 		    (j + 1));
1728 		kfree(msix->msix_vectors, M_DEVBUF);
1729 		msix->msix_vectors = vec;
1730 		msix->msix_alloc = j + 1;
1731 	}
1732 	kfree(used, M_DEVBUF);
1733 
1734 	/* Map the IRQs onto the rids. */
1735 	for (i = 0; i < count; i++) {
1736 		if (vectors[i] == 0)
1737 			continue;
1738 		irq = msix->msix_vectors[vectors[i]].mv_irq;
1739 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1, irq,
1740 		    irq, 1, -1);
1741 	}
1742 
1743 	if (bootverbose) {
1744 		device_printf(child, "Remapped MSI-X IRQs as: ");
1745 		for (i = 0; i < count; i++) {
1746 			if (i != 0)
1747 				kprintf(", ");
1748 			if (vectors[i] == 0)
1749 				kprintf("---");
1750 			else
1751 				kprintf("%d",
1752 				    msix->msix_vectors[vectors[i]].mv_irq);
1753 		}
1754 		kprintf("\n");
1755 	}
1756 
1757 	return (0);
1758 }
1759 
1760 static int
1761 pci_release_msix(device_t dev, device_t child)
1762 {
1763 	struct pci_devinfo *dinfo = device_get_ivars(child);
1764 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1765 	struct resource_list_entry *rle;
1766 	int i;
1767 
1768 	/* Do we have any messages to release? */
1769 	if (msix->msix_alloc == 0)
1770 		return (ENODEV);
1771 
1772 	/* Make sure none of the resources are allocated. */
1773 	for (i = 0; i < msix->msix_table_len; i++) {
1774 		if (msix->msix_table[i].mte_vector == 0)
1775 			continue;
1776 		if (msix->msix_table[i].mte_handlers > 0)
1777 			return (EBUSY);
1778 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
1779 		KASSERT(rle != NULL, ("missing resource"));
1780 		if (rle->res != NULL)
1781 			return (EBUSY);
1782 	}
1783 
1784 	/* Update control register to disable MSI-X. */
1785 	msix->msix_ctrl &= ~PCIM_MSIXCTRL_MSIX_ENABLE;
1786 	pci_write_config(child, msix->msix_location + PCIR_MSIX_CTRL,
1787 	    msix->msix_ctrl, 2);
1788 
1789 	/* Free the resource list entries. */
1790 	for (i = 0; i < msix->msix_table_len; i++) {
1791 		if (msix->msix_table[i].mte_vector == 0)
1792 			continue;
1793 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
1794 	}
1795 	kfree(msix->msix_table, M_DEVBUF);
1796 	msix->msix_table_len = 0;
1797 
1798 	/* Release the IRQs. */
1799 	for (i = 0; i < msix->msix_alloc; i++)
1800 		PCIB_RELEASE_MSIX(device_get_parent(dev), child,
1801 		    msix->msix_vectors[i].mv_irq);
1802 	kfree(msix->msix_vectors, M_DEVBUF);
1803 	msix->msix_alloc = 0;
1804 	return (0);
1805 }
1806 
1807 /*
1808  * Return the max supported MSI-X messages this device supports.
1809  * Basically, assuming the MD code can alloc messages, this function
1810  * should return the maximum value that pci_alloc_msix() can return.
1811  * Thus, it is subject to the tunables, etc.
1812  */
1813 int
1814 pci_msix_count_method(device_t dev, device_t child)
1815 {
1816 	struct pci_devinfo *dinfo = device_get_ivars(child);
1817 	struct pcicfg_msix *msix = &dinfo->cfg.msix;
1818 
1819 	if (pci_do_msix && msix->msix_location != 0)
1820 		return (msix->msix_msgnum);
1821 	return (0);
1822 }
1823 
1824 /*
1825  * HyperTransport MSI mapping control
1826  */
1827 void
1828 pci_ht_map_msi(device_t dev, uint64_t addr)
1829 {
1830 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1831 	struct pcicfg_ht *ht = &dinfo->cfg.ht;
1832 
1833 	if (!ht->ht_msimap)
1834 		return;
1835 
1836 	if (addr && !(ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) &&
1837 	    ht->ht_msiaddr >> 20 == addr >> 20) {
1838 		/* Enable MSI -> HT mapping. */
1839 		ht->ht_msictrl |= PCIM_HTCMD_MSI_ENABLE;
1840 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1841 		    ht->ht_msictrl, 2);
1842 	}
1843 
1844 	if (!addr && ht->ht_msictrl & PCIM_HTCMD_MSI_ENABLE) {
1845 		/* Disable MSI -> HT mapping. */
1846 		ht->ht_msictrl &= ~PCIM_HTCMD_MSI_ENABLE;
1847 		pci_write_config(dev, ht->ht_msimap + PCIR_HT_COMMAND,
1848 		    ht->ht_msictrl, 2);
1849 	}
1850 }
1851 
1852 /*
1853  * Support for MSI message signalled interrupts.
1854  */
1855 void
1856 pci_enable_msi(device_t dev, uint64_t address, uint16_t data)
1857 {
1858 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1859 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1860 
1861 	/* Write data and address values. */
1862 	pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1863 	    address & 0xffffffff, 4);
1864 	if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1865 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR_HIGH,
1866 		    address >> 32, 4);
1867 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA_64BIT,
1868 		    data, 2);
1869 	} else
1870 		pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA, data,
1871 		    2);
1872 
1873 	/* Enable MSI in the control register. */
1874 	msi->msi_ctrl |= PCIM_MSICTRL_MSI_ENABLE;
1875 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1876 	    2);
1877 
1878 	/* Enable MSI -> HT mapping. */
1879 	pci_ht_map_msi(dev, address);
1880 }
1881 
1882 void
1883 pci_disable_msi(device_t dev)
1884 {
1885 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1886 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1887 
1888 	/* Disable MSI -> HT mapping. */
1889 	pci_ht_map_msi(dev, 0);
1890 
1891 	/* Disable MSI in the control register. */
1892 	msi->msi_ctrl &= ~PCIM_MSICTRL_MSI_ENABLE;
1893 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1894 	    2);
1895 }
1896 
1897 /*
1898  * Restore MSI registers during resume.  If MSI is enabled then
1899  * restore the data and address registers in addition to the control
1900  * register.
1901  */
1902 static void
1903 pci_resume_msi(device_t dev)
1904 {
1905 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1906 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
1907 	uint64_t address;
1908 	uint16_t data;
1909 
1910 	if (msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE) {
1911 		address = msi->msi_addr;
1912 		data = msi->msi_data;
1913 		pci_write_config(dev, msi->msi_location + PCIR_MSI_ADDR,
1914 		    address & 0xffffffff, 4);
1915 		if (msi->msi_ctrl & PCIM_MSICTRL_64BIT) {
1916 			pci_write_config(dev, msi->msi_location +
1917 			    PCIR_MSI_ADDR_HIGH, address >> 32, 4);
1918 			pci_write_config(dev, msi->msi_location +
1919 			    PCIR_MSI_DATA_64BIT, data, 2);
1920 		} else
1921 			pci_write_config(dev, msi->msi_location + PCIR_MSI_DATA,
1922 			    data, 2);
1923 	}
1924 	pci_write_config(dev, msi->msi_location + PCIR_MSI_CTRL, msi->msi_ctrl,
1925 	    2);
1926 }
1927 
1928 int
1929 pci_remap_msi_irq(device_t dev, u_int irq)
1930 {
1931 	struct pci_devinfo *dinfo = device_get_ivars(dev);
1932 	pcicfgregs *cfg = &dinfo->cfg;
1933 	struct resource_list_entry *rle;
1934 	struct msix_table_entry *mte;
1935 	struct msix_vector *mv;
1936 	device_t bus;
1937 	uint64_t addr;
1938 	uint32_t data;
1939 	int error, i, j;
1940 
1941 	bus = device_get_parent(dev);
1942 
1943 	/*
1944 	 * Handle MSI first.  We try to find this IRQ among our list
1945 	 * of MSI IRQs.  If we find it, we request updated address and
1946 	 * data registers and apply the results.
1947 	 */
1948 	if (cfg->msi.msi_alloc > 0) {
1949 
1950 		/* If we don't have any active handlers, nothing to do. */
1951 		if (cfg->msi.msi_handlers == 0)
1952 			return (0);
1953 		for (i = 0; i < cfg->msi.msi_alloc; i++) {
1954 			rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ,
1955 			    i + 1);
1956 			if (rle->start == irq) {
1957 				error = PCIB_MAP_MSI(device_get_parent(bus),
1958 				    dev, irq, &addr, &data);
1959 				if (error)
1960 					return (error);
1961 				pci_disable_msi(dev);
1962 				dinfo->cfg.msi.msi_addr = addr;
1963 				dinfo->cfg.msi.msi_data = data;
1964 				pci_enable_msi(dev, addr, data);
1965 				return (0);
1966 			}
1967 		}
1968 		return (ENOENT);
1969 	}
1970 
1971 	/*
1972 	 * For MSI-X, we check to see if we have this IRQ.  If we do,
1973 	 * we request the updated mapping info.  If that works, we go
1974 	 * through all the slots that use this IRQ and update them.
1975 	 */
1976 	if (cfg->msix.msix_alloc > 0) {
1977 		for (i = 0; i < cfg->msix.msix_alloc; i++) {
1978 			mv = &cfg->msix.msix_vectors[i];
1979 			if (mv->mv_irq == irq) {
1980 				error = PCIB_MAP_MSI(device_get_parent(bus),
1981 				    dev, irq, &addr, &data);
1982 				if (error)
1983 					return (error);
1984 				mv->mv_address = addr;
1985 				mv->mv_data = data;
1986 				for (j = 0; j < cfg->msix.msix_table_len; j++) {
1987 					mte = &cfg->msix.msix_table[j];
1988 					if (mte->mte_vector != i + 1)
1989 						continue;
1990 					if (mte->mte_handlers == 0)
1991 						continue;
1992 					pci_mask_msix(dev, j);
1993 					pci_enable_msix(dev, j, addr, data);
1994 					pci_unmask_msix(dev, j);
1995 				}
1996 			}
1997 		}
1998 		return (ENOENT);
1999 	}
2000 
2001 	return (ENOENT);
2002 }
2003 
2004 /*
2005  * Returns true if the specified device is blacklisted because MSI
2006  * doesn't work.
2007  */
2008 int
2009 pci_msi_device_blacklisted(device_t dev)
2010 {
2011 	struct pci_quirk *q;
2012 
2013 	if (!pci_honor_msi_blacklist)
2014 		return (0);
2015 
2016 	for (q = &pci_quirks[0]; q->devid; q++) {
2017 		if (q->devid == pci_get_devid(dev) &&
2018 		    q->type == PCI_QUIRK_DISABLE_MSI)
2019 			return (1);
2020 	}
2021 	return (0);
2022 }
2023 
2024 /*
2025  * Determine if MSI is blacklisted globally on this sytem.  Currently,
2026  * we just check for blacklisted chipsets as represented by the
2027  * host-PCI bridge at device 0:0:0.  In the future, it may become
2028  * necessary to check other system attributes, such as the kenv values
2029  * that give the motherboard manufacturer and model number.
2030  */
2031 static int
2032 pci_msi_blacklisted(void)
2033 {
2034 	device_t dev;
2035 
2036 	if (!pci_honor_msi_blacklist)
2037 		return (0);
2038 
2039 	/* Blacklist all non-PCI-express and non-PCI-X chipsets. */
2040 	if (!(pcie_chipset || pcix_chipset))
2041 		return (1);
2042 
2043 	dev = pci_find_bsf(0, 0, 0);
2044 	if (dev != NULL)
2045 		return (pci_msi_device_blacklisted(dev));
2046 	return (0);
2047 }
2048 
2049 /*
2050  * Attempt to allocate *count MSI messages.  The actual number allocated is
2051  * returned in *count.  After this function returns, each message will be
2052  * available to the driver as SYS_RES_IRQ resources starting at a rid 1.
2053  */
2054 int
2055 pci_alloc_msi_method(device_t dev, device_t child, int *count)
2056 {
2057 	struct pci_devinfo *dinfo = device_get_ivars(child);
2058 	pcicfgregs *cfg = &dinfo->cfg;
2059 	struct resource_list_entry *rle;
2060 	int actual, error, i, irqs[32];
2061 	uint16_t ctrl;
2062 
2063 	/* Don't let count == 0 get us into trouble. */
2064 	if (*count == 0)
2065 		return (EINVAL);
2066 
2067 	/* If rid 0 is allocated, then fail. */
2068 	rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, 0);
2069 	if (rle != NULL && rle->res != NULL)
2070 		return (ENXIO);
2071 
2072 	/* Already have allocated messages? */
2073 	if (cfg->msi.msi_alloc != 0 || cfg->msix.msix_alloc != 0)
2074 		return (ENXIO);
2075 
2076 	/* If MSI is blacklisted for this system, fail. */
2077 	if (pci_msi_blacklisted())
2078 		return (ENXIO);
2079 
2080 	/* MSI capability present? */
2081 	if (cfg->msi.msi_location == 0 || !pci_do_msi)
2082 		return (ENODEV);
2083 
2084 	if (bootverbose)
2085 		device_printf(child,
2086 		    "attempting to allocate %d MSI vectors (%d supported)\n",
2087 		    *count, cfg->msi.msi_msgnum);
2088 
2089 	/* Don't ask for more than the device supports. */
2090 	actual = min(*count, cfg->msi.msi_msgnum);
2091 
2092 	/* Don't ask for more than 32 messages. */
2093 	actual = min(actual, 32);
2094 
2095 	/* MSI requires power of 2 number of messages. */
2096 	if (!powerof2(actual))
2097 		return (EINVAL);
2098 
2099 	for (;;) {
2100 		/* Try to allocate N messages. */
2101 		error = PCIB_ALLOC_MSI(device_get_parent(dev), child, actual,
2102 		    cfg->msi.msi_msgnum, irqs);
2103 		if (error == 0)
2104 			break;
2105 		if (actual == 1)
2106 			return (error);
2107 
2108 		/* Try N / 2. */
2109 		actual >>= 1;
2110 	}
2111 
2112 	/*
2113 	 * We now have N actual messages mapped onto SYS_RES_IRQ
2114 	 * resources in the irqs[] array, so add new resources
2115 	 * starting at rid 1.
2116 	 */
2117 	for (i = 0; i < actual; i++)
2118 		resource_list_add(&dinfo->resources, SYS_RES_IRQ, i + 1,
2119 		    irqs[i], irqs[i], 1, -1);
2120 
2121 	if (bootverbose) {
2122 		if (actual == 1)
2123 			device_printf(child, "using IRQ %d for MSI\n", irqs[0]);
2124 		else {
2125 			int run;
2126 
2127 			/*
2128 			 * Be fancy and try to print contiguous runs
2129 			 * of IRQ values as ranges.  'run' is true if
2130 			 * we are in a range.
2131 			 */
2132 			device_printf(child, "using IRQs %d", irqs[0]);
2133 			run = 0;
2134 			for (i = 1; i < actual; i++) {
2135 
2136 				/* Still in a run? */
2137 				if (irqs[i] == irqs[i - 1] + 1) {
2138 					run = 1;
2139 					continue;
2140 				}
2141 
2142 				/* Finish previous range. */
2143 				if (run) {
2144 					kprintf("-%d", irqs[i - 1]);
2145 					run = 0;
2146 				}
2147 
2148 				/* Start new range. */
2149 				kprintf(",%d", irqs[i]);
2150 			}
2151 
2152 			/* Unfinished range? */
2153 			if (run)
2154 				kprintf("-%d", irqs[actual - 1]);
2155 			kprintf(" for MSI\n");
2156 		}
2157 	}
2158 
2159 	/* Update control register with actual count. */
2160 	ctrl = cfg->msi.msi_ctrl;
2161 	ctrl &= ~PCIM_MSICTRL_MME_MASK;
2162 	ctrl |= (ffs(actual) - 1) << 4;
2163 	cfg->msi.msi_ctrl = ctrl;
2164 	pci_write_config(child, cfg->msi.msi_location + PCIR_MSI_CTRL, ctrl, 2);
2165 
2166 	/* Update counts of alloc'd messages. */
2167 	cfg->msi.msi_alloc = actual;
2168 	cfg->msi.msi_handlers = 0;
2169 	*count = actual;
2170 	return (0);
2171 }
2172 
2173 /* Release the MSI messages associated with this device. */
2174 int
2175 pci_release_msi_method(device_t dev, device_t child)
2176 {
2177 	struct pci_devinfo *dinfo = device_get_ivars(child);
2178 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2179 	struct resource_list_entry *rle;
2180 	int error, i, irqs[32];
2181 
2182 	/* Try MSI-X first. */
2183 	error = pci_release_msix(dev, child);
2184 	if (error != ENODEV)
2185 		return (error);
2186 
2187 	/* Do we have any messages to release? */
2188 	if (msi->msi_alloc == 0)
2189 		return (ENODEV);
2190 	KASSERT(msi->msi_alloc <= 32, ("more than 32 alloc'd messages"));
2191 
2192 	/* Make sure none of the resources are allocated. */
2193 	if (msi->msi_handlers > 0)
2194 		return (EBUSY);
2195 	for (i = 0; i < msi->msi_alloc; i++) {
2196 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, i + 1);
2197 		KASSERT(rle != NULL, ("missing MSI resource"));
2198 		if (rle->res != NULL)
2199 			return (EBUSY);
2200 		irqs[i] = rle->start;
2201 	}
2202 
2203 	/* Update control register with 0 count. */
2204 	KASSERT(!(msi->msi_ctrl & PCIM_MSICTRL_MSI_ENABLE),
2205 	    ("%s: MSI still enabled", __func__));
2206 	msi->msi_ctrl &= ~PCIM_MSICTRL_MME_MASK;
2207 	pci_write_config(child, msi->msi_location + PCIR_MSI_CTRL,
2208 	    msi->msi_ctrl, 2);
2209 
2210 	/* Release the messages. */
2211 	PCIB_RELEASE_MSI(device_get_parent(dev), child, msi->msi_alloc, irqs);
2212 	for (i = 0; i < msi->msi_alloc; i++)
2213 		resource_list_delete(&dinfo->resources, SYS_RES_IRQ, i + 1);
2214 
2215 	/* Update alloc count. */
2216 	msi->msi_alloc = 0;
2217 	msi->msi_addr = 0;
2218 	msi->msi_data = 0;
2219 	return (0);
2220 }
2221 
2222 /*
2223  * Return the max supported MSI messages this device supports.
2224  * Basically, assuming the MD code can alloc messages, this function
2225  * should return the maximum value that pci_alloc_msi() can return.
2226  * Thus, it is subject to the tunables, etc.
2227  */
2228 int
2229 pci_msi_count_method(device_t dev, device_t child)
2230 {
2231 	struct pci_devinfo *dinfo = device_get_ivars(child);
2232 	struct pcicfg_msi *msi = &dinfo->cfg.msi;
2233 
2234 	if (pci_do_msi && msi->msi_location != 0)
2235 		return (msi->msi_msgnum);
2236 	return (0);
2237 }
2238 
2239 /* kfree pcicfgregs structure and all depending data structures */
2240 
2241 int
2242 pci_freecfg(struct pci_devinfo *dinfo)
2243 {
2244 	struct devlist *devlist_head;
2245 	int i;
2246 
2247 	devlist_head = &pci_devq;
2248 
2249 	if (dinfo->cfg.vpd.vpd_reg) {
2250 		kfree(dinfo->cfg.vpd.vpd_ident, M_DEVBUF);
2251 		for (i = 0; i < dinfo->cfg.vpd.vpd_rocnt; i++)
2252 			kfree(dinfo->cfg.vpd.vpd_ros[i].value, M_DEVBUF);
2253 		kfree(dinfo->cfg.vpd.vpd_ros, M_DEVBUF);
2254 		for (i = 0; i < dinfo->cfg.vpd.vpd_wcnt; i++)
2255 			kfree(dinfo->cfg.vpd.vpd_w[i].value, M_DEVBUF);
2256 		kfree(dinfo->cfg.vpd.vpd_w, M_DEVBUF);
2257 	}
2258 	STAILQ_REMOVE(devlist_head, dinfo, pci_devinfo, pci_links);
2259 	kfree(dinfo, M_DEVBUF);
2260 
2261 	/* increment the generation count */
2262 	pci_generation++;
2263 
2264 	/* we're losing one device */
2265 	pci_numdevs--;
2266 	return (0);
2267 }
2268 
2269 /*
2270  * PCI power manangement
2271  */
2272 int
2273 pci_set_powerstate_method(device_t dev, device_t child, int state)
2274 {
2275 	struct pci_devinfo *dinfo = device_get_ivars(child);
2276 	pcicfgregs *cfg = &dinfo->cfg;
2277 	uint16_t status;
2278 	int result, oldstate, highest, delay;
2279 
2280 	if (cfg->pp.pp_cap == 0)
2281 		return (EOPNOTSUPP);
2282 
2283 	/*
2284 	 * Optimize a no state change request away.  While it would be OK to
2285 	 * write to the hardware in theory, some devices have shown odd
2286 	 * behavior when going from D3 -> D3.
2287 	 */
2288 	oldstate = pci_get_powerstate(child);
2289 	if (oldstate == state)
2290 		return (0);
2291 
2292 	/*
2293 	 * The PCI power management specification states that after a state
2294 	 * transition between PCI power states, system software must
2295 	 * guarantee a minimal delay before the function accesses the device.
2296 	 * Compute the worst case delay that we need to guarantee before we
2297 	 * access the device.  Many devices will be responsive much more
2298 	 * quickly than this delay, but there are some that don't respond
2299 	 * instantly to state changes.  Transitions to/from D3 state require
2300 	 * 10ms, while D2 requires 200us, and D0/1 require none.  The delay
2301 	 * is done below with DELAY rather than a sleeper function because
2302 	 * this function can be called from contexts where we cannot sleep.
2303 	 */
2304 	highest = (oldstate > state) ? oldstate : state;
2305 	if (highest == PCI_POWERSTATE_D3)
2306 	    delay = 10000;
2307 	else if (highest == PCI_POWERSTATE_D2)
2308 	    delay = 200;
2309 	else
2310 	    delay = 0;
2311 	status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2)
2312 	    & ~PCIM_PSTAT_DMASK;
2313 	result = 0;
2314 	switch (state) {
2315 	case PCI_POWERSTATE_D0:
2316 		status |= PCIM_PSTAT_D0;
2317 		break;
2318 	case PCI_POWERSTATE_D1:
2319 		if ((cfg->pp.pp_cap & PCIM_PCAP_D1SUPP) == 0)
2320 			return (EOPNOTSUPP);
2321 		status |= PCIM_PSTAT_D1;
2322 		break;
2323 	case PCI_POWERSTATE_D2:
2324 		if ((cfg->pp.pp_cap & PCIM_PCAP_D2SUPP) == 0)
2325 			return (EOPNOTSUPP);
2326 		status |= PCIM_PSTAT_D2;
2327 		break;
2328 	case PCI_POWERSTATE_D3:
2329 		status |= PCIM_PSTAT_D3;
2330 		break;
2331 	default:
2332 		return (EINVAL);
2333 	}
2334 
2335 	if (bootverbose)
2336 		kprintf(
2337 		    "pci%d:%d:%d:%d: Transition from D%d to D%d\n",
2338 		    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
2339 		    dinfo->cfg.func, oldstate, state);
2340 
2341 	PCI_WRITE_CONFIG(dev, child, cfg->pp.pp_status, status, 2);
2342 	if (delay)
2343 		DELAY(delay);
2344 	return (0);
2345 }
2346 
2347 int
2348 pci_get_powerstate_method(device_t dev, device_t child)
2349 {
2350 	struct pci_devinfo *dinfo = device_get_ivars(child);
2351 	pcicfgregs *cfg = &dinfo->cfg;
2352 	uint16_t status;
2353 	int result;
2354 
2355 	if (cfg->pp.pp_cap != 0) {
2356 		status = PCI_READ_CONFIG(dev, child, cfg->pp.pp_status, 2);
2357 		switch (status & PCIM_PSTAT_DMASK) {
2358 		case PCIM_PSTAT_D0:
2359 			result = PCI_POWERSTATE_D0;
2360 			break;
2361 		case PCIM_PSTAT_D1:
2362 			result = PCI_POWERSTATE_D1;
2363 			break;
2364 		case PCIM_PSTAT_D2:
2365 			result = PCI_POWERSTATE_D2;
2366 			break;
2367 		case PCIM_PSTAT_D3:
2368 			result = PCI_POWERSTATE_D3;
2369 			break;
2370 		default:
2371 			result = PCI_POWERSTATE_UNKNOWN;
2372 			break;
2373 		}
2374 	} else {
2375 		/* No support, device is always at D0 */
2376 		result = PCI_POWERSTATE_D0;
2377 	}
2378 	return (result);
2379 }
2380 
2381 /*
2382  * Some convenience functions for PCI device drivers.
2383  */
2384 
2385 static __inline void
2386 pci_set_command_bit(device_t dev, device_t child, uint16_t bit)
2387 {
2388 	uint16_t	command;
2389 
2390 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2391 	command |= bit;
2392 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2393 }
2394 
2395 static __inline void
2396 pci_clear_command_bit(device_t dev, device_t child, uint16_t bit)
2397 {
2398 	uint16_t	command;
2399 
2400 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2401 	command &= ~bit;
2402 	PCI_WRITE_CONFIG(dev, child, PCIR_COMMAND, command, 2);
2403 }
2404 
2405 int
2406 pci_enable_busmaster_method(device_t dev, device_t child)
2407 {
2408 	pci_set_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2409 	return (0);
2410 }
2411 
2412 int
2413 pci_disable_busmaster_method(device_t dev, device_t child)
2414 {
2415 	pci_clear_command_bit(dev, child, PCIM_CMD_BUSMASTEREN);
2416 	return (0);
2417 }
2418 
2419 int
2420 pci_enable_io_method(device_t dev, device_t child, int space)
2421 {
2422 	uint16_t command;
2423 	uint16_t bit;
2424 	char *error;
2425 
2426 	bit = 0;
2427 	error = NULL;
2428 
2429 	switch(space) {
2430 	case SYS_RES_IOPORT:
2431 		bit = PCIM_CMD_PORTEN;
2432 		error = "port";
2433 		break;
2434 	case SYS_RES_MEMORY:
2435 		bit = PCIM_CMD_MEMEN;
2436 		error = "memory";
2437 		break;
2438 	default:
2439 		return (EINVAL);
2440 	}
2441 	pci_set_command_bit(dev, child, bit);
2442 	/* Some devices seem to need a brief stall here, what do to? */
2443 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2444 	if (command & bit)
2445 		return (0);
2446 	device_printf(child, "failed to enable %s mapping!\n", error);
2447 	return (ENXIO);
2448 }
2449 
2450 int
2451 pci_disable_io_method(device_t dev, device_t child, int space)
2452 {
2453 	uint16_t command;
2454 	uint16_t bit;
2455 	char *error;
2456 
2457 	bit = 0;
2458 	error = NULL;
2459 
2460 	switch(space) {
2461 	case SYS_RES_IOPORT:
2462 		bit = PCIM_CMD_PORTEN;
2463 		error = "port";
2464 		break;
2465 	case SYS_RES_MEMORY:
2466 		bit = PCIM_CMD_MEMEN;
2467 		error = "memory";
2468 		break;
2469 	default:
2470 		return (EINVAL);
2471 	}
2472 	pci_clear_command_bit(dev, child, bit);
2473 	command = PCI_READ_CONFIG(dev, child, PCIR_COMMAND, 2);
2474 	if (command & bit) {
2475 		device_printf(child, "failed to disable %s mapping!\n", error);
2476 		return (ENXIO);
2477 	}
2478 	return (0);
2479 }
2480 
2481 /*
2482  * New style pci driver.  Parent device is either a pci-host-bridge or a
2483  * pci-pci-bridge.  Both kinds are represented by instances of pcib.
2484  */
2485 
2486 void
2487 pci_print_verbose(struct pci_devinfo *dinfo)
2488 {
2489 
2490 	if (bootverbose) {
2491 		pcicfgregs *cfg = &dinfo->cfg;
2492 
2493 		kprintf("found->\tvendor=0x%04x, dev=0x%04x, revid=0x%02x\n",
2494 		    cfg->vendor, cfg->device, cfg->revid);
2495 		kprintf("\tdomain=%d, bus=%d, slot=%d, func=%d\n",
2496 		    cfg->domain, cfg->bus, cfg->slot, cfg->func);
2497 		kprintf("\tclass=%02x-%02x-%02x, hdrtype=0x%02x, mfdev=%d\n",
2498 		    cfg->baseclass, cfg->subclass, cfg->progif, cfg->hdrtype,
2499 		    cfg->mfdev);
2500 		kprintf("\tcmdreg=0x%04x, statreg=0x%04x, cachelnsz=%d (dwords)\n",
2501 		    cfg->cmdreg, cfg->statreg, cfg->cachelnsz);
2502 		kprintf("\tlattimer=0x%02x (%d ns), mingnt=0x%02x (%d ns), maxlat=0x%02x (%d ns)\n",
2503 		    cfg->lattimer, cfg->lattimer * 30, cfg->mingnt,
2504 		    cfg->mingnt * 250, cfg->maxlat, cfg->maxlat * 250);
2505 		if (cfg->intpin > 0)
2506 			kprintf("\tintpin=%c, irq=%d\n",
2507 			    cfg->intpin +'a' -1, cfg->intline);
2508 		if (cfg->pp.pp_cap) {
2509 			uint16_t status;
2510 
2511 			status = pci_read_config(cfg->dev, cfg->pp.pp_status, 2);
2512 			kprintf("\tpowerspec %d  supports D0%s%s D3  current D%d\n",
2513 			    cfg->pp.pp_cap & PCIM_PCAP_SPEC,
2514 			    cfg->pp.pp_cap & PCIM_PCAP_D1SUPP ? " D1" : "",
2515 			    cfg->pp.pp_cap & PCIM_PCAP_D2SUPP ? " D2" : "",
2516 			    status & PCIM_PSTAT_DMASK);
2517 		}
2518 		if (cfg->msi.msi_location) {
2519 			int ctrl;
2520 
2521 			ctrl = cfg->msi.msi_ctrl;
2522 			kprintf("\tMSI supports %d message%s%s%s\n",
2523 			    cfg->msi.msi_msgnum,
2524 			    (cfg->msi.msi_msgnum == 1) ? "" : "s",
2525 			    (ctrl & PCIM_MSICTRL_64BIT) ? ", 64 bit" : "",
2526 			    (ctrl & PCIM_MSICTRL_VECTOR) ? ", vector masks":"");
2527 		}
2528 		if (cfg->msix.msix_location) {
2529 			kprintf("\tMSI-X supports %d message%s ",
2530 			    cfg->msix.msix_msgnum,
2531 			    (cfg->msix.msix_msgnum == 1) ? "" : "s");
2532 			if (cfg->msix.msix_table_bar == cfg->msix.msix_pba_bar)
2533 				kprintf("in map 0x%x\n",
2534 				    cfg->msix.msix_table_bar);
2535 			else
2536 				kprintf("in maps 0x%x and 0x%x\n",
2537 				    cfg->msix.msix_table_bar,
2538 				    cfg->msix.msix_pba_bar);
2539 		}
2540 		pci_print_verbose_expr(cfg);
2541 	}
2542 }
2543 
2544 static void
2545 pci_print_verbose_expr(const pcicfgregs *cfg)
2546 {
2547 	const struct pcicfg_expr *expr = &cfg->expr;
2548 	const char *port_name;
2549 	uint16_t port_type;
2550 
2551 	if (!bootverbose)
2552 		return;
2553 
2554 	if (expr->expr_ptr == 0) /* No PCI Express capability */
2555 		return;
2556 
2557 	kprintf("\tPCI Express ver.%d cap=0x%04x",
2558 		expr->expr_cap & PCIEM_CAP_VER_MASK, expr->expr_cap);
2559 	if ((expr->expr_cap & PCIEM_CAP_VER_MASK) != PCIEM_CAP_VER_1)
2560 		goto back;
2561 
2562 	port_type = expr->expr_cap & PCIEM_CAP_PORT_TYPE;
2563 
2564 	switch (port_type) {
2565 	case PCIE_END_POINT:
2566 		port_name = "DEVICE";
2567 		break;
2568 	case PCIE_LEG_END_POINT:
2569 		port_name = "LEGDEV";
2570 		break;
2571 	case PCIE_ROOT_PORT:
2572 		port_name = "ROOT";
2573 		break;
2574 	case PCIE_UP_STREAM_PORT:
2575 		port_name = "UPSTREAM";
2576 		break;
2577 	case PCIE_DOWN_STREAM_PORT:
2578 		port_name = "DOWNSTRM";
2579 		break;
2580 	case PCIE_PCIE2PCI_BRIDGE:
2581 		port_name = "PCIE2PCI";
2582 		break;
2583 	case PCIE_PCI2PCIE_BRIDGE:
2584 		port_name = "PCI2PCIE";
2585 		break;
2586 	default:
2587 		port_name = NULL;
2588 		break;
2589 	}
2590 	if ((port_type == PCIE_ROOT_PORT ||
2591 	     port_type == PCIE_DOWN_STREAM_PORT) &&
2592 	    !(expr->expr_cap & PCIEM_CAP_SLOT_IMPL))
2593 		port_name = NULL;
2594 	if (port_name != NULL)
2595 		kprintf("[%s]", port_name);
2596 
2597 	if (pcie_slotimpl(cfg)) {
2598 		kprintf(", slotcap=0x%08x", expr->expr_slotcap);
2599 		if (expr->expr_slotcap & PCIEM_SLTCAP_HP_CAP)
2600 			kprintf("[HOTPLUG]");
2601 	}
2602 back:
2603 	kprintf("\n");
2604 }
2605 
2606 static int
2607 pci_porten(device_t pcib, int b, int s, int f)
2608 {
2609 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2610 		& PCIM_CMD_PORTEN) != 0;
2611 }
2612 
2613 static int
2614 pci_memen(device_t pcib, int b, int s, int f)
2615 {
2616 	return (PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2)
2617 		& PCIM_CMD_MEMEN) != 0;
2618 }
2619 
2620 /*
2621  * Add a resource based on a pci map register. Return 1 if the map
2622  * register is a 32bit map register or 2 if it is a 64bit register.
2623  */
2624 static int
2625 pci_add_map(device_t pcib, device_t bus, device_t dev,
2626     int b, int s, int f, int reg, struct resource_list *rl, int force,
2627     int prefetch)
2628 {
2629 	uint32_t map;
2630 	pci_addr_t base;
2631 	pci_addr_t start, end, count;
2632 	uint8_t ln2size;
2633 	uint8_t ln2range;
2634 	uint32_t testval;
2635 	uint16_t cmd;
2636 	int type;
2637 	int barlen;
2638 	struct resource *res;
2639 
2640 	map = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2641 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, 0xffffffff, 4);
2642 	testval = PCIB_READ_CONFIG(pcib, b, s, f, reg, 4);
2643 	PCIB_WRITE_CONFIG(pcib, b, s, f, reg, map, 4);
2644 
2645 	if (PCI_BAR_MEM(map)) {
2646 		type = SYS_RES_MEMORY;
2647 		if (map & PCIM_BAR_MEM_PREFETCH)
2648 			prefetch = 1;
2649 	} else
2650 		type = SYS_RES_IOPORT;
2651 	ln2size = pci_mapsize(testval);
2652 	ln2range = pci_maprange(testval);
2653 	base = pci_mapbase(map);
2654 	barlen = ln2range == 64 ? 2 : 1;
2655 
2656 	/*
2657 	 * For I/O registers, if bottom bit is set, and the next bit up
2658 	 * isn't clear, we know we have a BAR that doesn't conform to the
2659 	 * spec, so ignore it.  Also, sanity check the size of the data
2660 	 * areas to the type of memory involved.  Memory must be at least
2661 	 * 16 bytes in size, while I/O ranges must be at least 4.
2662 	 */
2663 	if (PCI_BAR_IO(testval) && (testval & PCIM_BAR_IO_RESERVED) != 0)
2664 		return (barlen);
2665 	if ((type == SYS_RES_MEMORY && ln2size < 4) ||
2666 	    (type == SYS_RES_IOPORT && ln2size < 2))
2667 		return (barlen);
2668 
2669 	if (ln2range == 64)
2670 		/* Read the other half of a 64bit map register */
2671 		base |= (uint64_t) PCIB_READ_CONFIG(pcib, b, s, f, reg + 4, 4) << 32;
2672 	if (bootverbose) {
2673 		kprintf("\tmap[%02x]: type %s, range %2d, base %#jx, size %2d",
2674 		    reg, pci_maptype(map), ln2range, (uintmax_t)base, ln2size);
2675 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2676 			kprintf(", port disabled\n");
2677 		else if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2678 			kprintf(", memory disabled\n");
2679 		else
2680 			kprintf(", enabled\n");
2681 	}
2682 
2683 	/*
2684 	 * If base is 0, then we have problems.  It is best to ignore
2685 	 * such entries for the moment.  These will be allocated later if
2686 	 * the driver specifically requests them.  However, some
2687 	 * removable busses look better when all resources are allocated,
2688 	 * so allow '0' to be overriden.
2689 	 *
2690 	 * Similarly treat maps whose values is the same as the test value
2691 	 * read back.  These maps have had all f's written to them by the
2692 	 * BIOS in an attempt to disable the resources.
2693 	 */
2694 	if (!force && (base == 0 || map == testval))
2695 		return (barlen);
2696 	if ((u_long)base != base) {
2697 		device_printf(bus,
2698 		    "pci%d:%d:%d:%d bar %#x too many address bits",
2699 		    pci_get_domain(dev), b, s, f, reg);
2700 		return (barlen);
2701 	}
2702 
2703 	/*
2704 	 * This code theoretically does the right thing, but has
2705 	 * undesirable side effects in some cases where peripherals
2706 	 * respond oddly to having these bits enabled.  Let the user
2707 	 * be able to turn them off (since pci_enable_io_modes is 1 by
2708 	 * default).
2709 	 */
2710 	if (pci_enable_io_modes) {
2711 		/* Turn on resources that have been left off by a lazy BIOS */
2712 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f)) {
2713 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2714 			cmd |= PCIM_CMD_PORTEN;
2715 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2716 		}
2717 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f)) {
2718 			cmd = PCIB_READ_CONFIG(pcib, b, s, f, PCIR_COMMAND, 2);
2719 			cmd |= PCIM_CMD_MEMEN;
2720 			PCIB_WRITE_CONFIG(pcib, b, s, f, PCIR_COMMAND, cmd, 2);
2721 		}
2722 	} else {
2723 		if (type == SYS_RES_IOPORT && !pci_porten(pcib, b, s, f))
2724 			return (barlen);
2725 		if (type == SYS_RES_MEMORY && !pci_memen(pcib, b, s, f))
2726 			return (barlen);
2727 	}
2728 
2729 	count = 1 << ln2size;
2730 	if (base == 0 || base == pci_mapbase(testval)) {
2731 		start = 0;	/* Let the parent decide. */
2732 		end = ~0ULL;
2733 	} else {
2734 		start = base;
2735 		end = base + (1 << ln2size) - 1;
2736 	}
2737 	resource_list_add(rl, type, reg, start, end, count, -1);
2738 
2739 	/*
2740 	 * Try to allocate the resource for this BAR from our parent
2741 	 * so that this resource range is already reserved.  The
2742 	 * driver for this device will later inherit this resource in
2743 	 * pci_alloc_resource().
2744 	 */
2745 	res = resource_list_alloc(rl, bus, dev, type, &reg, start, end, count,
2746 	    prefetch ? RF_PREFETCHABLE : 0, -1);
2747 	if (res == NULL) {
2748 		/*
2749 		 * If the allocation fails, delete the resource list
2750 		 * entry to force pci_alloc_resource() to allocate
2751 		 * resources from the parent.
2752 		 */
2753 		resource_list_delete(rl, type, reg);
2754 #ifdef PCI_BAR_CLEAR
2755 		/* Clear the BAR */
2756 		start = 0;
2757 #else	/* !PCI_BAR_CLEAR */
2758 		/*
2759 		 * Don't clear BAR here.  Some BIOS lists HPET as a
2760 		 * PCI function, clearing the BAR causes HPET timer
2761 		 * stop ticking.
2762 		 */
2763 		if (bootverbose) {
2764 			kprintf("pci:%d:%d:%d: resource reservation failed "
2765 				"%#jx - %#jx\n", b, s, f,
2766 				(intmax_t)start, (intmax_t)end);
2767 		}
2768 		return (barlen);
2769 #endif	/* PCI_BAR_CLEAR */
2770 	} else {
2771 		start = rman_get_start(res);
2772 	}
2773 	pci_write_config(dev, reg, start, 4);
2774 	if (ln2range == 64)
2775 		pci_write_config(dev, reg + 4, start >> 32, 4);
2776 	return (barlen);
2777 }
2778 
2779 /*
2780  * For ATA devices we need to decide early what addressing mode to use.
2781  * Legacy demands that the primary and secondary ATA ports sits on the
2782  * same addresses that old ISA hardware did. This dictates that we use
2783  * those addresses and ignore the BAR's if we cannot set PCI native
2784  * addressing mode.
2785  */
2786 static void
2787 pci_ata_maps(device_t pcib, device_t bus, device_t dev, int b,
2788     int s, int f, struct resource_list *rl, int force, uint32_t prefetchmask)
2789 {
2790 	int rid, type, progif;
2791 #if 0
2792 	/* if this device supports PCI native addressing use it */
2793 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2794 	if ((progif & 0x8a) == 0x8a) {
2795 		if (pci_mapbase(pci_read_config(dev, PCIR_BAR(0), 4)) &&
2796 		    pci_mapbase(pci_read_config(dev, PCIR_BAR(2), 4))) {
2797 			kprintf("Trying ATA native PCI addressing mode\n");
2798 			pci_write_config(dev, PCIR_PROGIF, progif | 0x05, 1);
2799 		}
2800 	}
2801 #endif
2802 	progif = pci_read_config(dev, PCIR_PROGIF, 1);
2803 	type = SYS_RES_IOPORT;
2804 	if (progif & PCIP_STORAGE_IDE_MODEPRIM) {
2805 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(0), rl, force,
2806 		    prefetchmask & (1 << 0));
2807 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(1), rl, force,
2808 		    prefetchmask & (1 << 1));
2809 	} else {
2810 		rid = PCIR_BAR(0);
2811 		resource_list_add(rl, type, rid, 0x1f0, 0x1f7, 8, -1);
2812 		resource_list_alloc(rl, bus, dev, type, &rid, 0x1f0, 0x1f7, 8,
2813 		    0, -1);
2814 		rid = PCIR_BAR(1);
2815 		resource_list_add(rl, type, rid, 0x3f6, 0x3f6, 1, -1);
2816 		resource_list_alloc(rl, bus, dev, type, &rid, 0x3f6, 0x3f6, 1,
2817 		    0, -1);
2818 	}
2819 	if (progif & PCIP_STORAGE_IDE_MODESEC) {
2820 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(2), rl, force,
2821 		    prefetchmask & (1 << 2));
2822 		pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(3), rl, force,
2823 		    prefetchmask & (1 << 3));
2824 	} else {
2825 		rid = PCIR_BAR(2);
2826 		resource_list_add(rl, type, rid, 0x170, 0x177, 8, -1);
2827 		resource_list_alloc(rl, bus, dev, type, &rid, 0x170, 0x177, 8,
2828 		    0, -1);
2829 		rid = PCIR_BAR(3);
2830 		resource_list_add(rl, type, rid, 0x376, 0x376, 1, -1);
2831 		resource_list_alloc(rl, bus, dev, type, &rid, 0x376, 0x376, 1,
2832 		    0, -1);
2833 	}
2834 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(4), rl, force,
2835 	    prefetchmask & (1 << 4));
2836 	pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(5), rl, force,
2837 	    prefetchmask & (1 << 5));
2838 }
2839 
2840 static void
2841 pci_assign_interrupt(device_t bus, device_t dev, int force_route)
2842 {
2843 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2844 	pcicfgregs *cfg = &dinfo->cfg;
2845 	char tunable_name[64];
2846 	int irq;
2847 
2848 	/* Has to have an intpin to have an interrupt. */
2849 	if (cfg->intpin == 0)
2850 		return;
2851 
2852 	/* Let the user override the IRQ with a tunable. */
2853 	irq = PCI_INVALID_IRQ;
2854 	ksnprintf(tunable_name, sizeof(tunable_name),
2855 	    "hw.pci%d.%d.%d.INT%c.irq",
2856 	    cfg->domain, cfg->bus, cfg->slot, cfg->intpin + 'A' - 1);
2857 	if (TUNABLE_INT_FETCH(tunable_name, &irq) && (irq >= 255 || irq <= 0))
2858 		irq = PCI_INVALID_IRQ;
2859 
2860 	/*
2861 	 * If we didn't get an IRQ via the tunable, then we either use the
2862 	 * IRQ value in the intline register or we ask the bus to route an
2863 	 * interrupt for us.  If force_route is true, then we only use the
2864 	 * value in the intline register if the bus was unable to assign an
2865 	 * IRQ.
2866 	 */
2867 	if (!PCI_INTERRUPT_VALID(irq)) {
2868 		if (!PCI_INTERRUPT_VALID(cfg->intline) || force_route)
2869 			irq = PCI_ASSIGN_INTERRUPT(bus, dev);
2870 		if (!PCI_INTERRUPT_VALID(irq))
2871 			irq = cfg->intline;
2872 	}
2873 
2874 	/* If after all that we don't have an IRQ, just bail. */
2875 	if (!PCI_INTERRUPT_VALID(irq))
2876 		return;
2877 
2878 	/* Update the config register if it changed. */
2879 	if (irq != cfg->intline) {
2880 		cfg->intline = irq;
2881 		pci_write_config(dev, PCIR_INTLINE, irq, 1);
2882 	}
2883 
2884 	/* Add this IRQ as rid 0 interrupt resource. */
2885 	resource_list_add(&dinfo->resources, SYS_RES_IRQ, 0, irq, irq, 1,
2886 	    machintr_intr_cpuid(irq));
2887 }
2888 
2889 void
2890 pci_add_resources(device_t pcib, device_t bus, device_t dev, int force, uint32_t prefetchmask)
2891 {
2892 	struct pci_devinfo *dinfo = device_get_ivars(dev);
2893 	pcicfgregs *cfg = &dinfo->cfg;
2894 	struct resource_list *rl = &dinfo->resources;
2895 	struct pci_quirk *q;
2896 	int b, i, f, s;
2897 
2898 	b = cfg->bus;
2899 	s = cfg->slot;
2900 	f = cfg->func;
2901 
2902 	/* ATA devices needs special map treatment */
2903 	if ((pci_get_class(dev) == PCIC_STORAGE) &&
2904 	    (pci_get_subclass(dev) == PCIS_STORAGE_IDE) &&
2905 	    ((pci_get_progif(dev) & PCIP_STORAGE_IDE_MASTERDEV) ||
2906 	     (!pci_read_config(dev, PCIR_BAR(0), 4) &&
2907 	      !pci_read_config(dev, PCIR_BAR(2), 4))) )
2908 		pci_ata_maps(pcib, bus, dev, b, s, f, rl, force, prefetchmask);
2909 	else
2910 		for (i = 0; i < cfg->nummaps;)
2911 			i += pci_add_map(pcib, bus, dev, b, s, f, PCIR_BAR(i),
2912 			    rl, force, prefetchmask & (1 << i));
2913 
2914 	/*
2915 	 * Add additional, quirked resources.
2916 	 */
2917 	for (q = &pci_quirks[0]; q->devid; q++) {
2918 		if (q->devid == ((cfg->device << 16) | cfg->vendor)
2919 		    && q->type == PCI_QUIRK_MAP_REG)
2920 			pci_add_map(pcib, bus, dev, b, s, f, q->arg1, rl,
2921 			  force, 0);
2922 	}
2923 
2924 	if (cfg->intpin > 0 && PCI_INTERRUPT_VALID(cfg->intline)) {
2925 		/*
2926 		 * Try to re-route interrupts. Sometimes the BIOS or
2927 		 * firmware may leave bogus values in these registers.
2928 		 * If the re-route fails, then just stick with what we
2929 		 * have.
2930 		 */
2931 		pci_assign_interrupt(bus, dev, 1);
2932 	}
2933 }
2934 
2935 void
2936 pci_add_children(device_t dev, int domain, int busno, size_t dinfo_size)
2937 {
2938 #define	REG(n, w)	PCIB_READ_CONFIG(pcib, busno, s, f, n, w)
2939 	device_t pcib = device_get_parent(dev);
2940 	struct pci_devinfo *dinfo;
2941 	int maxslots;
2942 	int s, f, pcifunchigh;
2943 	uint8_t hdrtype;
2944 
2945 	KASSERT(dinfo_size >= sizeof(struct pci_devinfo),
2946 	    ("dinfo_size too small"));
2947 	maxslots = PCIB_MAXSLOTS(pcib);
2948 	for (s = 0; s <= maxslots; s++) {
2949 		pcifunchigh = 0;
2950 		f = 0;
2951 		DELAY(1);
2952 		hdrtype = REG(PCIR_HDRTYPE, 1);
2953 		if ((hdrtype & PCIM_HDRTYPE) > PCI_MAXHDRTYPE)
2954 			continue;
2955 		if (hdrtype & PCIM_MFDEV)
2956 			pcifunchigh = PCI_FUNCMAX;
2957 		for (f = 0; f <= pcifunchigh; f++) {
2958 			dinfo = pci_read_device(pcib, domain, busno, s, f,
2959 			    dinfo_size);
2960 			if (dinfo != NULL) {
2961 				pci_add_child(dev, dinfo);
2962 			}
2963 		}
2964 	}
2965 #undef REG
2966 }
2967 
2968 void
2969 pci_add_child(device_t bus, struct pci_devinfo *dinfo)
2970 {
2971 	device_t pcib;
2972 
2973 	pcib = device_get_parent(bus);
2974 	dinfo->cfg.dev = device_add_child(bus, NULL, -1);
2975 	device_set_ivars(dinfo->cfg.dev, dinfo);
2976 	resource_list_init(&dinfo->resources);
2977 	pci_cfg_save(dinfo->cfg.dev, dinfo, 0);
2978 	pci_cfg_restore(dinfo->cfg.dev, dinfo);
2979 	pci_print_verbose(dinfo);
2980 	pci_add_resources(pcib, bus, dinfo->cfg.dev, 0, 0);
2981 }
2982 
2983 static int
2984 pci_probe(device_t dev)
2985 {
2986 	device_set_desc(dev, "PCI bus");
2987 
2988 	/* Allow other subclasses to override this driver. */
2989 	return (-1000);
2990 }
2991 
2992 static int
2993 pci_attach(device_t dev)
2994 {
2995 	int busno, domain;
2996 
2997 	/*
2998 	 * Since there can be multiple independantly numbered PCI
2999 	 * busses on systems with multiple PCI domains, we can't use
3000 	 * the unit number to decide which bus we are probing. We ask
3001 	 * the parent pcib what our domain and bus numbers are.
3002 	 */
3003 	domain = pcib_get_domain(dev);
3004 	busno = pcib_get_bus(dev);
3005 	if (bootverbose)
3006 		device_printf(dev, "domain=%d, physical bus=%d\n",
3007 		    domain, busno);
3008 
3009 	pci_add_children(dev, domain, busno, sizeof(struct pci_devinfo));
3010 
3011 	return (bus_generic_attach(dev));
3012 }
3013 
3014 int
3015 pci_suspend(device_t dev)
3016 {
3017 	int dstate, error, i, numdevs;
3018 	device_t acpi_dev, child, *devlist;
3019 	struct pci_devinfo *dinfo;
3020 
3021 	/*
3022 	 * Save the PCI configuration space for each child and set the
3023 	 * device in the appropriate power state for this sleep state.
3024 	 */
3025 	acpi_dev = NULL;
3026 	if (pci_do_power_resume)
3027 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3028 	device_get_children(dev, &devlist, &numdevs);
3029 	for (i = 0; i < numdevs; i++) {
3030 		child = devlist[i];
3031 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3032 		pci_cfg_save(child, dinfo, 0);
3033 	}
3034 
3035 	/* Suspend devices before potentially powering them down. */
3036 	error = bus_generic_suspend(dev);
3037 	if (error) {
3038 		kfree(devlist, M_TEMP);
3039 		return (error);
3040 	}
3041 
3042 	/*
3043 	 * Always set the device to D3.  If ACPI suggests a different
3044 	 * power state, use it instead.  If ACPI is not present, the
3045 	 * firmware is responsible for managing device power.  Skip
3046 	 * children who aren't attached since they are powered down
3047 	 * separately.  Only manage type 0 devices for now.
3048 	 */
3049 	for (i = 0; acpi_dev && i < numdevs; i++) {
3050 		child = devlist[i];
3051 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3052 		if (device_is_attached(child) && dinfo->cfg.hdrtype == 0) {
3053 			dstate = PCI_POWERSTATE_D3;
3054 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, &dstate);
3055 			pci_set_powerstate(child, dstate);
3056 		}
3057 	}
3058 	kfree(devlist, M_TEMP);
3059 	return (0);
3060 }
3061 
3062 int
3063 pci_resume(device_t dev)
3064 {
3065 	int i, numdevs;
3066 	device_t acpi_dev, child, *devlist;
3067 	struct pci_devinfo *dinfo;
3068 
3069 	/*
3070 	 * Set each child to D0 and restore its PCI configuration space.
3071 	 */
3072 	acpi_dev = NULL;
3073 	if (pci_do_power_resume)
3074 		acpi_dev = devclass_get_device(devclass_find("acpi"), 0);
3075 	device_get_children(dev, &devlist, &numdevs);
3076 	for (i = 0; i < numdevs; i++) {
3077 		/*
3078 		 * Notify ACPI we're going to D0 but ignore the result.  If
3079 		 * ACPI is not present, the firmware is responsible for
3080 		 * managing device power.  Only manage type 0 devices for now.
3081 		 */
3082 		child = devlist[i];
3083 		dinfo = (struct pci_devinfo *) device_get_ivars(child);
3084 		if (acpi_dev && device_is_attached(child) &&
3085 		    dinfo->cfg.hdrtype == 0) {
3086 			ACPI_PWR_FOR_SLEEP(acpi_dev, child, NULL);
3087 			pci_set_powerstate(child, PCI_POWERSTATE_D0);
3088 		}
3089 
3090 		/* Now the device is powered up, restore its config space. */
3091 		pci_cfg_restore(child, dinfo);
3092 	}
3093 	kfree(devlist, M_TEMP);
3094 	return (bus_generic_resume(dev));
3095 }
3096 
3097 static void
3098 pci_load_vendor_data(void)
3099 {
3100 	caddr_t vendordata, info;
3101 
3102 	if ((vendordata = preload_search_by_type("pci_vendor_data")) != NULL) {
3103 		info = preload_search_info(vendordata, MODINFO_ADDR);
3104 		pci_vendordata = *(char **)info;
3105 		info = preload_search_info(vendordata, MODINFO_SIZE);
3106 		pci_vendordata_size = *(size_t *)info;
3107 		/* terminate the database */
3108 		pci_vendordata[pci_vendordata_size] = '\n';
3109 	}
3110 }
3111 
3112 void
3113 pci_driver_added(device_t dev, driver_t *driver)
3114 {
3115 	int numdevs;
3116 	device_t *devlist;
3117 	device_t child;
3118 	struct pci_devinfo *dinfo;
3119 	int i;
3120 
3121 	if (bootverbose)
3122 		device_printf(dev, "driver added\n");
3123 	DEVICE_IDENTIFY(driver, dev);
3124 	device_get_children(dev, &devlist, &numdevs);
3125 	for (i = 0; i < numdevs; i++) {
3126 		child = devlist[i];
3127 		if (device_get_state(child) != DS_NOTPRESENT)
3128 			continue;
3129 		dinfo = device_get_ivars(child);
3130 		pci_print_verbose(dinfo);
3131 		if (bootverbose)
3132 			kprintf("pci%d:%d:%d:%d: reprobing on driver added\n",
3133 			    dinfo->cfg.domain, dinfo->cfg.bus, dinfo->cfg.slot,
3134 			    dinfo->cfg.func);
3135 		pci_cfg_restore(child, dinfo);
3136 		if (device_probe_and_attach(child) != 0)
3137 			pci_cfg_save(child, dinfo, 1);
3138 	}
3139 	kfree(devlist, M_TEMP);
3140 }
3141 
3142 static void
3143 pci_child_detached(device_t parent __unused, device_t child)
3144 {
3145 	/* Turn child's power off */
3146 	pci_cfg_save(child, device_get_ivars(child), 1);
3147 }
3148 
3149 int
3150 pci_setup_intr(device_t dev, device_t child, struct resource *irq, int flags,
3151     driver_intr_t *intr, void *arg, void **cookiep, lwkt_serialize_t serializer)
3152 {
3153 #ifdef MSI
3154 	struct pci_devinfo *dinfo;
3155 	struct msix_table_entry *mte;
3156 	struct msix_vector *mv;
3157 	uint64_t addr;
3158 	uint32_t data;
3159 	int rid;
3160 #endif
3161 	int error;
3162 	void *cookie;
3163 	error = bus_generic_setup_intr(dev, child, irq, flags, intr,
3164 	    arg, &cookie, serializer);
3165 	if (error)
3166 		return (error);
3167 
3168 	/* If this is not a direct child, just bail out. */
3169 	if (device_get_parent(child) != dev) {
3170 		*cookiep = cookie;
3171 		return(0);
3172 	}
3173 
3174 	pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3175 #ifdef MSI
3176 	rid = rman_get_rid(irq);
3177 	if (rid == 0) {
3178 		/* Make sure that INTx is enabled */
3179 		pci_clear_command_bit(dev, child, PCIM_CMD_INTxDIS);
3180 	} else {
3181 		/*
3182 		 * Check to see if the interrupt is MSI or MSI-X.
3183 		 * Ask our parent to map the MSI and give
3184 		 * us the address and data register values.
3185 		 * If we fail for some reason, teardown the
3186 		 * interrupt handler.
3187 		 */
3188 		dinfo = device_get_ivars(child);
3189 		if (dinfo->cfg.msi.msi_alloc > 0) {
3190 			if (dinfo->cfg.msi.msi_addr == 0) {
3191 				KASSERT(dinfo->cfg.msi.msi_handlers == 0,
3192 			    ("MSI has handlers, but vectors not mapped"));
3193 				error = PCIB_MAP_MSI(device_get_parent(dev),
3194 				    child, rman_get_start(irq), &addr, &data);
3195 				if (error)
3196 					goto bad;
3197 				dinfo->cfg.msi.msi_addr = addr;
3198 				dinfo->cfg.msi.msi_data = data;
3199 				pci_enable_msi(child, addr, data);
3200 			}
3201 			dinfo->cfg.msi.msi_handlers++;
3202 		} else {
3203 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3204 			    ("No MSI or MSI-X interrupts allocated"));
3205 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3206 			    ("MSI-X index too high"));
3207 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3208 			KASSERT(mte->mte_vector != 0, ("no message vector"));
3209 			mv = &dinfo->cfg.msix.msix_vectors[mte->mte_vector - 1];
3210 			KASSERT(mv->mv_irq == rman_get_start(irq),
3211 			    ("IRQ mismatch"));
3212 			if (mv->mv_address == 0) {
3213 				KASSERT(mte->mte_handlers == 0,
3214 		    ("MSI-X table entry has handlers, but vector not mapped"));
3215 				error = PCIB_MAP_MSI(device_get_parent(dev),
3216 				    child, rman_get_start(irq), &addr, &data);
3217 				if (error)
3218 					goto bad;
3219 				mv->mv_address = addr;
3220 				mv->mv_data = data;
3221 			}
3222 			if (mte->mte_handlers == 0) {
3223 				pci_enable_msix(child, rid - 1, mv->mv_address,
3224 				    mv->mv_data);
3225 				pci_unmask_msix(child, rid - 1);
3226 			}
3227 			mte->mte_handlers++;
3228 		}
3229 
3230 		/* Make sure that INTx is disabled if we are using MSI/MSIX */
3231 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3232 	bad:
3233 		if (error) {
3234 			(void)bus_generic_teardown_intr(dev, child, irq,
3235 			    cookie);
3236 			return (error);
3237 		}
3238 	}
3239 #endif
3240 	*cookiep = cookie;
3241 	return (0);
3242 }
3243 
3244 int
3245 pci_teardown_intr(device_t dev, device_t child, struct resource *irq,
3246     void *cookie)
3247 {
3248 #ifdef MSI
3249 	struct msix_table_entry *mte;
3250 	struct resource_list_entry *rle;
3251 	struct pci_devinfo *dinfo;
3252 	int rid;
3253 #endif
3254 	int error;
3255 
3256 	if (irq == NULL || !(rman_get_flags(irq) & RF_ACTIVE))
3257 		return (EINVAL);
3258 
3259 	/* If this isn't a direct child, just bail out */
3260 	if (device_get_parent(child) != dev)
3261 		return(bus_generic_teardown_intr(dev, child, irq, cookie));
3262 
3263 	pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3264 #ifdef MSI
3265 	rid = rman_get_rid(irq);
3266 	if (rid == 0) {
3267 		/* Mask INTx */
3268 		pci_set_command_bit(dev, child, PCIM_CMD_INTxDIS);
3269 	} else {
3270 		/*
3271 		 * Check to see if the interrupt is MSI or MSI-X.  If so,
3272 		 * decrement the appropriate handlers count and mask the
3273 		 * MSI-X message, or disable MSI messages if the count
3274 		 * drops to 0.
3275 		 */
3276 		dinfo = device_get_ivars(child);
3277 		rle = resource_list_find(&dinfo->resources, SYS_RES_IRQ, rid);
3278 		if (rle->res != irq)
3279 			return (EINVAL);
3280 		if (dinfo->cfg.msi.msi_alloc > 0) {
3281 			KASSERT(rid <= dinfo->cfg.msi.msi_alloc,
3282 			    ("MSI-X index too high"));
3283 			if (dinfo->cfg.msi.msi_handlers == 0)
3284 				return (EINVAL);
3285 			dinfo->cfg.msi.msi_handlers--;
3286 			if (dinfo->cfg.msi.msi_handlers == 0)
3287 				pci_disable_msi(child);
3288 		} else {
3289 			KASSERT(dinfo->cfg.msix.msix_alloc > 0,
3290 			    ("No MSI or MSI-X interrupts allocated"));
3291 			KASSERT(rid <= dinfo->cfg.msix.msix_table_len,
3292 			    ("MSI-X index too high"));
3293 			mte = &dinfo->cfg.msix.msix_table[rid - 1];
3294 			if (mte->mte_handlers == 0)
3295 				return (EINVAL);
3296 			mte->mte_handlers--;
3297 			if (mte->mte_handlers == 0)
3298 				pci_mask_msix(child, rid - 1);
3299 		}
3300 	}
3301 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3302 	if (rid > 0)
3303 		KASSERT(error == 0,
3304 		    ("%s: generic teardown failed for MSI/MSI-X", __func__));
3305 #endif
3306 	error = bus_generic_teardown_intr(dev, child, irq, cookie);
3307 	return (error);
3308 }
3309 
3310 int
3311 pci_print_child(device_t dev, device_t child)
3312 {
3313 	struct pci_devinfo *dinfo;
3314 	struct resource_list *rl;
3315 	int retval = 0;
3316 
3317 	dinfo = device_get_ivars(child);
3318 	rl = &dinfo->resources;
3319 
3320 	retval += bus_print_child_header(dev, child);
3321 
3322 	retval += resource_list_print_type(rl, "port", SYS_RES_IOPORT, "%#lx");
3323 	retval += resource_list_print_type(rl, "mem", SYS_RES_MEMORY, "%#lx");
3324 	retval += resource_list_print_type(rl, "irq", SYS_RES_IRQ, "%ld");
3325 	if (device_get_flags(dev))
3326 		retval += kprintf(" flags %#x", device_get_flags(dev));
3327 
3328 	retval += kprintf(" at device %d.%d", pci_get_slot(child),
3329 	    pci_get_function(child));
3330 
3331 	retval += bus_print_child_footer(dev, child);
3332 
3333 	return (retval);
3334 }
3335 
3336 static struct
3337 {
3338 	int	class;
3339 	int	subclass;
3340 	char	*desc;
3341 } pci_nomatch_tab[] = {
3342 	{PCIC_OLD,		-1,			"old"},
3343 	{PCIC_OLD,		PCIS_OLD_NONVGA,	"non-VGA display device"},
3344 	{PCIC_OLD,		PCIS_OLD_VGA,		"VGA-compatible display device"},
3345 	{PCIC_STORAGE,		-1,			"mass storage"},
3346 	{PCIC_STORAGE,		PCIS_STORAGE_SCSI,	"SCSI"},
3347 	{PCIC_STORAGE,		PCIS_STORAGE_IDE,	"ATA"},
3348 	{PCIC_STORAGE,		PCIS_STORAGE_FLOPPY,	"floppy disk"},
3349 	{PCIC_STORAGE,		PCIS_STORAGE_IPI,	"IPI"},
3350 	{PCIC_STORAGE,		PCIS_STORAGE_RAID,	"RAID"},
3351 	{PCIC_STORAGE,		PCIS_STORAGE_ATA_ADMA,	"ATA (ADMA)"},
3352 	{PCIC_STORAGE,		PCIS_STORAGE_SATA,	"SATA"},
3353 	{PCIC_STORAGE,		PCIS_STORAGE_SAS,	"SAS"},
3354 	{PCIC_NETWORK,		-1,			"network"},
3355 	{PCIC_NETWORK,		PCIS_NETWORK_ETHERNET,	"ethernet"},
3356 	{PCIC_NETWORK,		PCIS_NETWORK_TOKENRING,	"token ring"},
3357 	{PCIC_NETWORK,		PCIS_NETWORK_FDDI,	"fddi"},
3358 	{PCIC_NETWORK,		PCIS_NETWORK_ATM,	"ATM"},
3359 	{PCIC_NETWORK,		PCIS_NETWORK_ISDN,	"ISDN"},
3360 	{PCIC_DISPLAY,		-1,			"display"},
3361 	{PCIC_DISPLAY,		PCIS_DISPLAY_VGA,	"VGA"},
3362 	{PCIC_DISPLAY,		PCIS_DISPLAY_XGA,	"XGA"},
3363 	{PCIC_DISPLAY,		PCIS_DISPLAY_3D,	"3D"},
3364 	{PCIC_MULTIMEDIA,	-1,			"multimedia"},
3365 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_VIDEO,	"video"},
3366 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_AUDIO,	"audio"},
3367 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_TELE,	"telephony"},
3368 	{PCIC_MULTIMEDIA,	PCIS_MULTIMEDIA_HDA,	"HDA"},
3369 	{PCIC_MEMORY,		-1,			"memory"},
3370 	{PCIC_MEMORY,		PCIS_MEMORY_RAM,	"RAM"},
3371 	{PCIC_MEMORY,		PCIS_MEMORY_FLASH,	"flash"},
3372 	{PCIC_BRIDGE,		-1,			"bridge"},
3373 	{PCIC_BRIDGE,		PCIS_BRIDGE_HOST,	"HOST-PCI"},
3374 	{PCIC_BRIDGE,		PCIS_BRIDGE_ISA,	"PCI-ISA"},
3375 	{PCIC_BRIDGE,		PCIS_BRIDGE_EISA,	"PCI-EISA"},
3376 	{PCIC_BRIDGE,		PCIS_BRIDGE_MCA,	"PCI-MCA"},
3377 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCI,	"PCI-PCI"},
3378 	{PCIC_BRIDGE,		PCIS_BRIDGE_PCMCIA,	"PCI-PCMCIA"},
3379 	{PCIC_BRIDGE,		PCIS_BRIDGE_NUBUS,	"PCI-NuBus"},
3380 	{PCIC_BRIDGE,		PCIS_BRIDGE_CARDBUS,	"PCI-CardBus"},
3381 	{PCIC_BRIDGE,		PCIS_BRIDGE_RACEWAY,	"PCI-RACEway"},
3382 	{PCIC_SIMPLECOMM,	-1,			"simple comms"},
3383 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_UART,	"UART"},	/* could detect 16550 */
3384 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_PAR,	"parallel port"},
3385 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MULSER,	"multiport serial"},
3386 	{PCIC_SIMPLECOMM,	PCIS_SIMPLECOMM_MODEM,	"generic modem"},
3387 	{PCIC_BASEPERIPH,	-1,			"base peripheral"},
3388 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PIC,	"interrupt controller"},
3389 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_DMA,	"DMA controller"},
3390 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_TIMER,	"timer"},
3391 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_RTC,	"realtime clock"},
3392 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_PCIHOT,	"PCI hot-plug controller"},
3393 	{PCIC_BASEPERIPH,	PCIS_BASEPERIPH_SDHC,	"SD host controller"},
3394 	{PCIC_INPUTDEV,		-1,			"input device"},
3395 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_KEYBOARD,	"keyboard"},
3396 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_DIGITIZER,"digitizer"},
3397 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_MOUSE,	"mouse"},
3398 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_SCANNER,	"scanner"},
3399 	{PCIC_INPUTDEV,		PCIS_INPUTDEV_GAMEPORT,	"gameport"},
3400 	{PCIC_DOCKING,		-1,			"docking station"},
3401 	{PCIC_PROCESSOR,	-1,			"processor"},
3402 	{PCIC_SERIALBUS,	-1,			"serial bus"},
3403 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FW,	"FireWire"},
3404 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_ACCESS,	"AccessBus"},
3405 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SSA,	"SSA"},
3406 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_USB,	"USB"},
3407 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_FC,	"Fibre Channel"},
3408 	{PCIC_SERIALBUS,	PCIS_SERIALBUS_SMBUS,	"SMBus"},
3409 	{PCIC_WIRELESS,		-1,			"wireless controller"},
3410 	{PCIC_WIRELESS,		PCIS_WIRELESS_IRDA,	"iRDA"},
3411 	{PCIC_WIRELESS,		PCIS_WIRELESS_IR,	"IR"},
3412 	{PCIC_WIRELESS,		PCIS_WIRELESS_RF,	"RF"},
3413 	{PCIC_INTELLIIO,	-1,			"intelligent I/O controller"},
3414 	{PCIC_INTELLIIO,	PCIS_INTELLIIO_I2O,	"I2O"},
3415 	{PCIC_SATCOM,		-1,			"satellite communication"},
3416 	{PCIC_SATCOM,		PCIS_SATCOM_TV,		"sat TV"},
3417 	{PCIC_SATCOM,		PCIS_SATCOM_AUDIO,	"sat audio"},
3418 	{PCIC_SATCOM,		PCIS_SATCOM_VOICE,	"sat voice"},
3419 	{PCIC_SATCOM,		PCIS_SATCOM_DATA,	"sat data"},
3420 	{PCIC_CRYPTO,		-1,			"encrypt/decrypt"},
3421 	{PCIC_CRYPTO,		PCIS_CRYPTO_NETCOMP,	"network/computer crypto"},
3422 	{PCIC_CRYPTO,		PCIS_CRYPTO_ENTERTAIN,	"entertainment crypto"},
3423 	{PCIC_DASP,		-1,			"dasp"},
3424 	{PCIC_DASP,		PCIS_DASP_DPIO,		"DPIO module"},
3425 	{0, 0,		NULL}
3426 };
3427 
3428 void
3429 pci_probe_nomatch(device_t dev, device_t child)
3430 {
3431 	int	i;
3432 	char	*cp, *scp, *device;
3433 
3434 	/*
3435 	 * Look for a listing for this device in a loaded device database.
3436 	 */
3437 	if ((device = pci_describe_device(child)) != NULL) {
3438 		device_printf(dev, "<%s>", device);
3439 		kfree(device, M_DEVBUF);
3440 	} else {
3441 		/*
3442 		 * Scan the class/subclass descriptions for a general
3443 		 * description.
3444 		 */
3445 		cp = "unknown";
3446 		scp = NULL;
3447 		for (i = 0; pci_nomatch_tab[i].desc != NULL; i++) {
3448 			if (pci_nomatch_tab[i].class == pci_get_class(child)) {
3449 				if (pci_nomatch_tab[i].subclass == -1) {
3450 					cp = pci_nomatch_tab[i].desc;
3451 				} else if (pci_nomatch_tab[i].subclass ==
3452 				    pci_get_subclass(child)) {
3453 					scp = pci_nomatch_tab[i].desc;
3454 				}
3455 			}
3456 		}
3457 		device_printf(dev, "<%s%s%s>",
3458 		    cp ? cp : "",
3459 		    ((cp != NULL) && (scp != NULL)) ? ", " : "",
3460 		    scp ? scp : "");
3461 	}
3462 	kprintf(" (vendor 0x%04x, dev 0x%04x) at device %d.%d",
3463 		pci_get_vendor(child), pci_get_device(child),
3464 		pci_get_slot(child), pci_get_function(child));
3465 	if (pci_get_intpin(child) > 0) {
3466 		int irq;
3467 
3468 		irq = pci_get_irq(child);
3469 		if (PCI_INTERRUPT_VALID(irq))
3470 			kprintf(" irq %d", irq);
3471 	}
3472 	kprintf("\n");
3473 
3474 	pci_cfg_save(child, (struct pci_devinfo *)device_get_ivars(child), 1);
3475 }
3476 
3477 /*
3478  * Parse the PCI device database, if loaded, and return a pointer to a
3479  * description of the device.
3480  *
3481  * The database is flat text formatted as follows:
3482  *
3483  * Any line not in a valid format is ignored.
3484  * Lines are terminated with newline '\n' characters.
3485  *
3486  * A VENDOR line consists of the 4 digit (hex) vendor code, a TAB, then
3487  * the vendor name.
3488  *
3489  * A DEVICE line is entered immediately below the corresponding VENDOR ID.
3490  * - devices cannot be listed without a corresponding VENDOR line.
3491  * A DEVICE line consists of a TAB, the 4 digit (hex) device code,
3492  * another TAB, then the device name.
3493  */
3494 
3495 /*
3496  * Assuming (ptr) points to the beginning of a line in the database,
3497  * return the vendor or device and description of the next entry.
3498  * The value of (vendor) or (device) inappropriate for the entry type
3499  * is set to -1.  Returns nonzero at the end of the database.
3500  *
3501  * Note that this is slightly unrobust in the face of corrupt data;
3502  * we attempt to safeguard against this by spamming the end of the
3503  * database with a newline when we initialise.
3504  */
3505 static int
3506 pci_describe_parse_line(char **ptr, int *vendor, int *device, char **desc)
3507 {
3508 	char	*cp = *ptr;
3509 	int	left;
3510 
3511 	*device = -1;
3512 	*vendor = -1;
3513 	**desc = '\0';
3514 	for (;;) {
3515 		left = pci_vendordata_size - (cp - pci_vendordata);
3516 		if (left <= 0) {
3517 			*ptr = cp;
3518 			return(1);
3519 		}
3520 
3521 		/* vendor entry? */
3522 		if (*cp != '\t' &&
3523 		    ksscanf(cp, "%x\t%80[^\n]", vendor, *desc) == 2)
3524 			break;
3525 		/* device entry? */
3526 		if (*cp == '\t' &&
3527 		    ksscanf(cp, "%x\t%80[^\n]", device, *desc) == 2)
3528 			break;
3529 
3530 		/* skip to next line */
3531 		while (*cp != '\n' && left > 0) {
3532 			cp++;
3533 			left--;
3534 		}
3535 		if (*cp == '\n') {
3536 			cp++;
3537 			left--;
3538 		}
3539 	}
3540 	/* skip to next line */
3541 	while (*cp != '\n' && left > 0) {
3542 		cp++;
3543 		left--;
3544 	}
3545 	if (*cp == '\n' && left > 0)
3546 		cp++;
3547 	*ptr = cp;
3548 	return(0);
3549 }
3550 
3551 static char *
3552 pci_describe_device(device_t dev)
3553 {
3554 	int	vendor, device;
3555 	char	*desc, *vp, *dp, *line;
3556 
3557 	desc = vp = dp = NULL;
3558 
3559 	/*
3560 	 * If we have no vendor data, we can't do anything.
3561 	 */
3562 	if (pci_vendordata == NULL)
3563 		goto out;
3564 
3565 	/*
3566 	 * Scan the vendor data looking for this device
3567 	 */
3568 	line = pci_vendordata;
3569 	if ((vp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3570 		goto out;
3571 	for (;;) {
3572 		if (pci_describe_parse_line(&line, &vendor, &device, &vp))
3573 			goto out;
3574 		if (vendor == pci_get_vendor(dev))
3575 			break;
3576 	}
3577 	if ((dp = kmalloc(80, M_DEVBUF, M_NOWAIT)) == NULL)
3578 		goto out;
3579 	for (;;) {
3580 		if (pci_describe_parse_line(&line, &vendor, &device, &dp)) {
3581 			*dp = 0;
3582 			break;
3583 		}
3584 		if (vendor != -1) {
3585 			*dp = 0;
3586 			break;
3587 		}
3588 		if (device == pci_get_device(dev))
3589 			break;
3590 	}
3591 	if (dp[0] == '\0')
3592 		ksnprintf(dp, 80, "0x%x", pci_get_device(dev));
3593 	if ((desc = kmalloc(strlen(vp) + strlen(dp) + 3, M_DEVBUF, M_NOWAIT)) !=
3594 	    NULL)
3595 		ksprintf(desc, "%s, %s", vp, dp);
3596  out:
3597 	if (vp != NULL)
3598 		kfree(vp, M_DEVBUF);
3599 	if (dp != NULL)
3600 		kfree(dp, M_DEVBUF);
3601 	return(desc);
3602 }
3603 
3604 int
3605 pci_read_ivar(device_t dev, device_t child, int which, uintptr_t *result)
3606 {
3607 	struct pci_devinfo *dinfo;
3608 	pcicfgregs *cfg;
3609 
3610 	dinfo = device_get_ivars(child);
3611 	cfg = &dinfo->cfg;
3612 
3613 	switch (which) {
3614 	case PCI_IVAR_ETHADDR:
3615 		/*
3616 		 * The generic accessor doesn't deal with failure, so
3617 		 * we set the return value, then return an error.
3618 		 */
3619 		*((uint8_t **) result) = NULL;
3620 		return (EINVAL);
3621 	case PCI_IVAR_SUBVENDOR:
3622 		*result = cfg->subvendor;
3623 		break;
3624 	case PCI_IVAR_SUBDEVICE:
3625 		*result = cfg->subdevice;
3626 		break;
3627 	case PCI_IVAR_VENDOR:
3628 		*result = cfg->vendor;
3629 		break;
3630 	case PCI_IVAR_DEVICE:
3631 		*result = cfg->device;
3632 		break;
3633 	case PCI_IVAR_DEVID:
3634 		*result = (cfg->device << 16) | cfg->vendor;
3635 		break;
3636 	case PCI_IVAR_CLASS:
3637 		*result = cfg->baseclass;
3638 		break;
3639 	case PCI_IVAR_SUBCLASS:
3640 		*result = cfg->subclass;
3641 		break;
3642 	case PCI_IVAR_PROGIF:
3643 		*result = cfg->progif;
3644 		break;
3645 	case PCI_IVAR_REVID:
3646 		*result = cfg->revid;
3647 		break;
3648 	case PCI_IVAR_INTPIN:
3649 		*result = cfg->intpin;
3650 		break;
3651 	case PCI_IVAR_IRQ:
3652 		*result = cfg->intline;
3653 		break;
3654 	case PCI_IVAR_DOMAIN:
3655 		*result = cfg->domain;
3656 		break;
3657 	case PCI_IVAR_BUS:
3658 		*result = cfg->bus;
3659 		break;
3660 	case PCI_IVAR_SLOT:
3661 		*result = cfg->slot;
3662 		break;
3663 	case PCI_IVAR_FUNCTION:
3664 		*result = cfg->func;
3665 		break;
3666 	case PCI_IVAR_CMDREG:
3667 		*result = cfg->cmdreg;
3668 		break;
3669 	case PCI_IVAR_CACHELNSZ:
3670 		*result = cfg->cachelnsz;
3671 		break;
3672 	case PCI_IVAR_MINGNT:
3673 		*result = cfg->mingnt;
3674 		break;
3675 	case PCI_IVAR_MAXLAT:
3676 		*result = cfg->maxlat;
3677 		break;
3678 	case PCI_IVAR_LATTIMER:
3679 		*result = cfg->lattimer;
3680 		break;
3681 	case PCI_IVAR_PCIXCAP_PTR:
3682 		*result = cfg->pcix.pcix_ptr;
3683 		break;
3684 	case PCI_IVAR_PCIECAP_PTR:
3685 		*result = cfg->expr.expr_ptr;
3686 		break;
3687 	case PCI_IVAR_VPDCAP_PTR:
3688 		*result = cfg->vpd.vpd_reg;
3689 		break;
3690 	default:
3691 		return (ENOENT);
3692 	}
3693 	return (0);
3694 }
3695 
3696 int
3697 pci_write_ivar(device_t dev, device_t child, int which, uintptr_t value)
3698 {
3699 	struct pci_devinfo *dinfo;
3700 
3701 	dinfo = device_get_ivars(child);
3702 
3703 	switch (which) {
3704 	case PCI_IVAR_INTPIN:
3705 		dinfo->cfg.intpin = value;
3706 		return (0);
3707 	case PCI_IVAR_ETHADDR:
3708 	case PCI_IVAR_SUBVENDOR:
3709 	case PCI_IVAR_SUBDEVICE:
3710 	case PCI_IVAR_VENDOR:
3711 	case PCI_IVAR_DEVICE:
3712 	case PCI_IVAR_DEVID:
3713 	case PCI_IVAR_CLASS:
3714 	case PCI_IVAR_SUBCLASS:
3715 	case PCI_IVAR_PROGIF:
3716 	case PCI_IVAR_REVID:
3717 	case PCI_IVAR_IRQ:
3718 	case PCI_IVAR_DOMAIN:
3719 	case PCI_IVAR_BUS:
3720 	case PCI_IVAR_SLOT:
3721 	case PCI_IVAR_FUNCTION:
3722 		return (EINVAL);	/* disallow for now */
3723 
3724 	default:
3725 		return (ENOENT);
3726 	}
3727 }
3728 #ifdef notyet
3729 #include "opt_ddb.h"
3730 #ifdef DDB
3731 #include <ddb/ddb.h>
3732 #include <sys/cons.h>
3733 
3734 /*
3735  * List resources based on pci map registers, used for within ddb
3736  */
3737 
3738 DB_SHOW_COMMAND(pciregs, db_pci_dump)
3739 {
3740 	struct pci_devinfo *dinfo;
3741 	struct devlist *devlist_head;
3742 	struct pci_conf *p;
3743 	const char *name;
3744 	int i, error, none_count;
3745 
3746 	none_count = 0;
3747 	/* get the head of the device queue */
3748 	devlist_head = &pci_devq;
3749 
3750 	/*
3751 	 * Go through the list of devices and print out devices
3752 	 */
3753 	for (error = 0, i = 0,
3754 	     dinfo = STAILQ_FIRST(devlist_head);
3755 	     (dinfo != NULL) && (error == 0) && (i < pci_numdevs) && !db_pager_quit;
3756 	     dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
3757 
3758 		/* Populate pd_name and pd_unit */
3759 		name = NULL;
3760 		if (dinfo->cfg.dev)
3761 			name = device_get_name(dinfo->cfg.dev);
3762 
3763 		p = &dinfo->conf;
3764 		db_kprintf("%s%d@pci%d:%d:%d:%d:\tclass=0x%06x card=0x%08x "
3765 			"chip=0x%08x rev=0x%02x hdr=0x%02x\n",
3766 			(name && *name) ? name : "none",
3767 			(name && *name) ? (int)device_get_unit(dinfo->cfg.dev) :
3768 			none_count++,
3769 			p->pc_sel.pc_domain, p->pc_sel.pc_bus, p->pc_sel.pc_dev,
3770 			p->pc_sel.pc_func, (p->pc_class << 16) |
3771 			(p->pc_subclass << 8) | p->pc_progif,
3772 			(p->pc_subdevice << 16) | p->pc_subvendor,
3773 			(p->pc_device << 16) | p->pc_vendor,
3774 			p->pc_revid, p->pc_hdr);
3775 	}
3776 }
3777 #endif /* DDB */
3778 #endif
3779 
3780 static struct resource *
3781 pci_alloc_map(device_t dev, device_t child, int type, int *rid,
3782     u_long start, u_long end, u_long count, u_int flags)
3783 {
3784 	struct pci_devinfo *dinfo = device_get_ivars(child);
3785 	struct resource_list *rl = &dinfo->resources;
3786 	struct resource_list_entry *rle;
3787 	struct resource *res;
3788 	pci_addr_t map, testval;
3789 	int mapsize;
3790 
3791 	/*
3792 	 * Weed out the bogons, and figure out how large the BAR/map
3793 	 * is.  Bars that read back 0 here are bogus and unimplemented.
3794 	 * Note: atapci in legacy mode are special and handled elsewhere
3795 	 * in the code.  If you have a atapci device in legacy mode and
3796 	 * it fails here, that other code is broken.
3797 	 */
3798 	res = NULL;
3799 	map = pci_read_config(child, *rid, 4);
3800 	pci_write_config(child, *rid, 0xffffffff, 4);
3801 	testval = pci_read_config(child, *rid, 4);
3802 	if (pci_maprange(testval) == 64)
3803 		map |= (pci_addr_t)pci_read_config(child, *rid + 4, 4) << 32;
3804 	if (pci_mapbase(testval) == 0)
3805 		goto out;
3806 
3807 	/*
3808 	 * Restore the original value of the BAR.  We may have reprogrammed
3809 	 * the BAR of the low-level console device and when booting verbose,
3810 	 * we need the console device addressable.
3811 	 */
3812 	pci_write_config(child, *rid, map, 4);
3813 
3814 	if (PCI_BAR_MEM(testval)) {
3815 		if (type != SYS_RES_MEMORY) {
3816 			if (bootverbose)
3817 				device_printf(dev,
3818 				    "child %s requested type %d for rid %#x,"
3819 				    " but the BAR says it is an memio\n",
3820 				    device_get_nameunit(child), type, *rid);
3821 			goto out;
3822 		}
3823 	} else {
3824 		if (type != SYS_RES_IOPORT) {
3825 			if (bootverbose)
3826 				device_printf(dev,
3827 				    "child %s requested type %d for rid %#x,"
3828 				    " but the BAR says it is an ioport\n",
3829 				    device_get_nameunit(child), type, *rid);
3830 			goto out;
3831 		}
3832 	}
3833 	/*
3834 	 * For real BARs, we need to override the size that
3835 	 * the driver requests, because that's what the BAR
3836 	 * actually uses and we would otherwise have a
3837 	 * situation where we might allocate the excess to
3838 	 * another driver, which won't work.
3839 	 */
3840 	mapsize = pci_mapsize(testval);
3841 	count = 1UL << mapsize;
3842 	if (RF_ALIGNMENT(flags) < mapsize)
3843 		flags = (flags & ~RF_ALIGNMENT_MASK) | RF_ALIGNMENT_LOG2(mapsize);
3844 	if (PCI_BAR_MEM(testval) && (testval & PCIM_BAR_MEM_PREFETCH))
3845 		flags |= RF_PREFETCHABLE;
3846 
3847 	/*
3848 	 * Allocate enough resource, and then write back the
3849 	 * appropriate bar for that resource.
3850 	 */
3851 	res = BUS_ALLOC_RESOURCE(device_get_parent(dev), child, type, rid,
3852 	    start, end, count, flags, -1);
3853 	if (res == NULL) {
3854 		device_printf(child,
3855 		    "%#lx bytes of rid %#x res %d failed (%#lx, %#lx).\n",
3856 		    count, *rid, type, start, end);
3857 		goto out;
3858 	}
3859 	resource_list_add(rl, type, *rid, start, end, count, -1);
3860 	rle = resource_list_find(rl, type, *rid);
3861 	if (rle == NULL)
3862 		panic("pci_alloc_map: unexpectedly can't find resource.");
3863 	rle->res = res;
3864 	rle->start = rman_get_start(res);
3865 	rle->end = rman_get_end(res);
3866 	rle->count = count;
3867 	if (bootverbose)
3868 		device_printf(child,
3869 		    "Lazy allocation of %#lx bytes rid %#x type %d at %#lx\n",
3870 		    count, *rid, type, rman_get_start(res));
3871 	map = rman_get_start(res);
3872 out:;
3873 	pci_write_config(child, *rid, map, 4);
3874 	if (pci_maprange(testval) == 64)
3875 		pci_write_config(child, *rid + 4, map >> 32, 4);
3876 	return (res);
3877 }
3878 
3879 
3880 struct resource *
3881 pci_alloc_resource(device_t dev, device_t child, int type, int *rid,
3882     u_long start, u_long end, u_long count, u_int flags, int cpuid)
3883 {
3884 	struct pci_devinfo *dinfo = device_get_ivars(child);
3885 	struct resource_list *rl = &dinfo->resources;
3886 	struct resource_list_entry *rle;
3887 	pcicfgregs *cfg = &dinfo->cfg;
3888 
3889 	/*
3890 	 * Perform lazy resource allocation
3891 	 */
3892 	if (device_get_parent(child) == dev) {
3893 		switch (type) {
3894 		case SYS_RES_IRQ:
3895 			/*
3896 			 * Can't alloc legacy interrupt once MSI messages
3897 			 * have been allocated.
3898 			 */
3899 #ifdef MSI
3900 			if (*rid == 0 && (cfg->msi.msi_alloc > 0 ||
3901 			    cfg->msix.msix_alloc > 0))
3902 				return (NULL);
3903 #endif
3904 			/*
3905 			 * If the child device doesn't have an
3906 			 * interrupt routed and is deserving of an
3907 			 * interrupt, try to assign it one.
3908 			 */
3909 			if (*rid == 0 && !PCI_INTERRUPT_VALID(cfg->intline) &&
3910 			    (cfg->intpin != 0))
3911 				pci_assign_interrupt(dev, child, 0);
3912 			break;
3913 		case SYS_RES_IOPORT:
3914 		case SYS_RES_MEMORY:
3915 			if (*rid < PCIR_BAR(cfg->nummaps)) {
3916 				/*
3917 				 * Enable the I/O mode.  We should
3918 				 * also be assigning resources too
3919 				 * when none are present.  The
3920 				 * resource_list_alloc kind of sorta does
3921 				 * this...
3922 				 */
3923 				if (PCI_ENABLE_IO(dev, child, type))
3924 					return (NULL);
3925 			}
3926 			rle = resource_list_find(rl, type, *rid);
3927 			if (rle == NULL)
3928 				return (pci_alloc_map(dev, child, type, rid,
3929 				    start, end, count, flags));
3930 			break;
3931 		}
3932 		/*
3933 		 * If we've already allocated the resource, then
3934 		 * return it now.  But first we may need to activate
3935 		 * it, since we don't allocate the resource as active
3936 		 * above.  Normally this would be done down in the
3937 		 * nexus, but since we short-circuit that path we have
3938 		 * to do its job here.  Not sure if we should kfree the
3939 		 * resource if it fails to activate.
3940 		 */
3941 		rle = resource_list_find(rl, type, *rid);
3942 		if (rle != NULL && rle->res != NULL) {
3943 			if (bootverbose)
3944 				device_printf(child,
3945 			    "Reserved %#lx bytes for rid %#x type %d at %#lx\n",
3946 				    rman_get_size(rle->res), *rid, type,
3947 				    rman_get_start(rle->res));
3948 			if ((flags & RF_ACTIVE) &&
3949 			    bus_generic_activate_resource(dev, child, type,
3950 			    *rid, rle->res) != 0)
3951 				return (NULL);
3952 			return (rle->res);
3953 		}
3954 	}
3955 	return (resource_list_alloc(rl, dev, child, type, rid,
3956 	    start, end, count, flags, cpuid));
3957 }
3958 
3959 void
3960 pci_delete_resource(device_t dev, device_t child, int type, int rid)
3961 {
3962 	struct pci_devinfo *dinfo;
3963 	struct resource_list *rl;
3964 	struct resource_list_entry *rle;
3965 
3966 	if (device_get_parent(child) != dev)
3967 		return;
3968 
3969 	dinfo = device_get_ivars(child);
3970 	rl = &dinfo->resources;
3971 	rle = resource_list_find(rl, type, rid);
3972 	if (rle) {
3973 		if (rle->res) {
3974 			if (rman_get_device(rle->res) != dev ||
3975 			    rman_get_flags(rle->res) & RF_ACTIVE) {
3976 				device_printf(dev, "delete_resource: "
3977 				    "Resource still owned by child, oops. "
3978 				    "(type=%d, rid=%d, addr=%lx)\n",
3979 				    rle->type, rle->rid,
3980 				    rman_get_start(rle->res));
3981 				return;
3982 			}
3983 			bus_release_resource(dev, type, rid, rle->res);
3984 		}
3985 		resource_list_delete(rl, type, rid);
3986 	}
3987 	/*
3988 	 * Why do we turn off the PCI configuration BAR when we delete a
3989 	 * resource? -- imp
3990 	 */
3991 	pci_write_config(child, rid, 0, 4);
3992 	BUS_DELETE_RESOURCE(device_get_parent(dev), child, type, rid);
3993 }
3994 
3995 struct resource_list *
3996 pci_get_resource_list (device_t dev, device_t child)
3997 {
3998 	struct pci_devinfo *dinfo = device_get_ivars(child);
3999 
4000 	if (dinfo == NULL)
4001 		return (NULL);
4002 
4003 	return (&dinfo->resources);
4004 }
4005 
4006 uint32_t
4007 pci_read_config_method(device_t dev, device_t child, int reg, int width)
4008 {
4009 	struct pci_devinfo *dinfo = device_get_ivars(child);
4010 	pcicfgregs *cfg = &dinfo->cfg;
4011 
4012 	return (PCIB_READ_CONFIG(device_get_parent(dev),
4013 	    cfg->bus, cfg->slot, cfg->func, reg, width));
4014 }
4015 
4016 void
4017 pci_write_config_method(device_t dev, device_t child, int reg,
4018     uint32_t val, int width)
4019 {
4020 	struct pci_devinfo *dinfo = device_get_ivars(child);
4021 	pcicfgregs *cfg = &dinfo->cfg;
4022 
4023 	PCIB_WRITE_CONFIG(device_get_parent(dev),
4024 	    cfg->bus, cfg->slot, cfg->func, reg, val, width);
4025 }
4026 
4027 int
4028 pci_child_location_str_method(device_t dev, device_t child, char *buf,
4029     size_t buflen)
4030 {
4031 
4032 	ksnprintf(buf, buflen, "slot=%d function=%d", pci_get_slot(child),
4033 	    pci_get_function(child));
4034 	return (0);
4035 }
4036 
4037 int
4038 pci_child_pnpinfo_str_method(device_t dev, device_t child, char *buf,
4039     size_t buflen)
4040 {
4041 	struct pci_devinfo *dinfo;
4042 	pcicfgregs *cfg;
4043 
4044 	dinfo = device_get_ivars(child);
4045 	cfg = &dinfo->cfg;
4046 	ksnprintf(buf, buflen, "vendor=0x%04x device=0x%04x subvendor=0x%04x "
4047 	    "subdevice=0x%04x class=0x%02x%02x%02x", cfg->vendor, cfg->device,
4048 	    cfg->subvendor, cfg->subdevice, cfg->baseclass, cfg->subclass,
4049 	    cfg->progif);
4050 	return (0);
4051 }
4052 
4053 int
4054 pci_assign_interrupt_method(device_t dev, device_t child)
4055 {
4056 	struct pci_devinfo *dinfo = device_get_ivars(child);
4057 	pcicfgregs *cfg = &dinfo->cfg;
4058 
4059 	return (PCIB_ROUTE_INTERRUPT(device_get_parent(dev), child,
4060 	    cfg->intpin));
4061 }
4062 
4063 static int
4064 pci_modevent(module_t mod, int what, void *arg)
4065 {
4066 	static struct cdev *pci_cdev;
4067 
4068 	switch (what) {
4069 	case MOD_LOAD:
4070 		STAILQ_INIT(&pci_devq);
4071 		pci_generation = 0;
4072 		pci_cdev = make_dev(&pcic_ops, 0, UID_ROOT, GID_WHEEL, 0644,
4073 				    "pci");
4074 		pci_load_vendor_data();
4075 		break;
4076 
4077 	case MOD_UNLOAD:
4078 		destroy_dev(pci_cdev);
4079 		break;
4080 	}
4081 
4082 	return (0);
4083 }
4084 
4085 void
4086 pci_cfg_restore(device_t dev, struct pci_devinfo *dinfo)
4087 {
4088 	int i;
4089 
4090 	/*
4091 	 * Only do header type 0 devices.  Type 1 devices are bridges,
4092 	 * which we know need special treatment.  Type 2 devices are
4093 	 * cardbus bridges which also require special treatment.
4094 	 * Other types are unknown, and we err on the side of safety
4095 	 * by ignoring them.
4096 	 */
4097 	if (dinfo->cfg.hdrtype != 0)
4098 		return;
4099 
4100 	/*
4101 	 * Restore the device to full power mode.  We must do this
4102 	 * before we restore the registers because moving from D3 to
4103 	 * D0 will cause the chip's BARs and some other registers to
4104 	 * be reset to some unknown power on reset values.  Cut down
4105 	 * the noise on boot by doing nothing if we are already in
4106 	 * state D0.
4107 	 */
4108 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D0) {
4109 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4110 	}
4111 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4112 		pci_write_config(dev, PCIR_BAR(i), dinfo->cfg.bar[i], 4);
4113 	pci_write_config(dev, PCIR_BIOS, dinfo->cfg.bios, 4);
4114 	pci_write_config(dev, PCIR_COMMAND, dinfo->cfg.cmdreg, 2);
4115 	pci_write_config(dev, PCIR_INTLINE, dinfo->cfg.intline, 1);
4116 	pci_write_config(dev, PCIR_INTPIN, dinfo->cfg.intpin, 1);
4117 	pci_write_config(dev, PCIR_MINGNT, dinfo->cfg.mingnt, 1);
4118 	pci_write_config(dev, PCIR_MAXLAT, dinfo->cfg.maxlat, 1);
4119 	pci_write_config(dev, PCIR_CACHELNSZ, dinfo->cfg.cachelnsz, 1);
4120 	pci_write_config(dev, PCIR_LATTIMER, dinfo->cfg.lattimer, 1);
4121 	pci_write_config(dev, PCIR_PROGIF, dinfo->cfg.progif, 1);
4122 	pci_write_config(dev, PCIR_REVID, dinfo->cfg.revid, 1);
4123 
4124 	/* Restore MSI and MSI-X configurations if they are present. */
4125 	if (dinfo->cfg.msi.msi_location != 0)
4126 		pci_resume_msi(dev);
4127 	if (dinfo->cfg.msix.msix_location != 0)
4128 		pci_resume_msix(dev);
4129 }
4130 
4131 void
4132 pci_cfg_save(device_t dev, struct pci_devinfo *dinfo, int setstate)
4133 {
4134 	int i;
4135 	uint32_t cls;
4136 	int ps;
4137 
4138 	/*
4139 	 * Only do header type 0 devices.  Type 1 devices are bridges, which
4140 	 * we know need special treatment.  Type 2 devices are cardbus bridges
4141 	 * which also require special treatment.  Other types are unknown, and
4142 	 * we err on the side of safety by ignoring them.  Powering down
4143 	 * bridges should not be undertaken lightly.
4144 	 */
4145 	if (dinfo->cfg.hdrtype != 0)
4146 		return;
4147 	for (i = 0; i < dinfo->cfg.nummaps; i++)
4148 		dinfo->cfg.bar[i] = pci_read_config(dev, PCIR_BAR(i), 4);
4149 	dinfo->cfg.bios = pci_read_config(dev, PCIR_BIOS, 4);
4150 
4151 	/*
4152 	 * Some drivers apparently write to these registers w/o updating our
4153 	 * cached copy.  No harm happens if we update the copy, so do so here
4154 	 * so we can restore them.  The COMMAND register is modified by the
4155 	 * bus w/o updating the cache.  This should represent the normally
4156 	 * writable portion of the 'defined' part of type 0 headers.  In
4157 	 * theory we also need to save/restore the PCI capability structures
4158 	 * we know about, but apart from power we don't know any that are
4159 	 * writable.
4160 	 */
4161 	dinfo->cfg.subvendor = pci_read_config(dev, PCIR_SUBVEND_0, 2);
4162 	dinfo->cfg.subdevice = pci_read_config(dev, PCIR_SUBDEV_0, 2);
4163 	dinfo->cfg.vendor = pci_read_config(dev, PCIR_VENDOR, 2);
4164 	dinfo->cfg.device = pci_read_config(dev, PCIR_DEVICE, 2);
4165 	dinfo->cfg.cmdreg = pci_read_config(dev, PCIR_COMMAND, 2);
4166 	dinfo->cfg.intline = pci_read_config(dev, PCIR_INTLINE, 1);
4167 	dinfo->cfg.intpin = pci_read_config(dev, PCIR_INTPIN, 1);
4168 	dinfo->cfg.mingnt = pci_read_config(dev, PCIR_MINGNT, 1);
4169 	dinfo->cfg.maxlat = pci_read_config(dev, PCIR_MAXLAT, 1);
4170 	dinfo->cfg.cachelnsz = pci_read_config(dev, PCIR_CACHELNSZ, 1);
4171 	dinfo->cfg.lattimer = pci_read_config(dev, PCIR_LATTIMER, 1);
4172 	dinfo->cfg.baseclass = pci_read_config(dev, PCIR_CLASS, 1);
4173 	dinfo->cfg.subclass = pci_read_config(dev, PCIR_SUBCLASS, 1);
4174 	dinfo->cfg.progif = pci_read_config(dev, PCIR_PROGIF, 1);
4175 	dinfo->cfg.revid = pci_read_config(dev, PCIR_REVID, 1);
4176 
4177 	/*
4178 	 * don't set the state for display devices, base peripherals and
4179 	 * memory devices since bad things happen when they are powered down.
4180 	 * We should (a) have drivers that can easily detach and (b) use
4181 	 * generic drivers for these devices so that some device actually
4182 	 * attaches.  We need to make sure that when we implement (a) we don't
4183 	 * power the device down on a reattach.
4184 	 */
4185 	cls = pci_get_class(dev);
4186 	if (!setstate)
4187 		return;
4188 	switch (pci_do_power_nodriver)
4189 	{
4190 		case 0:		/* NO powerdown at all */
4191 			return;
4192 		case 1:		/* Conservative about what to power down */
4193 			if (cls == PCIC_STORAGE)
4194 				return;
4195 			/*FALLTHROUGH*/
4196 		case 2:		/* Agressive about what to power down */
4197 			if (cls == PCIC_DISPLAY || cls == PCIC_MEMORY ||
4198 			    cls == PCIC_BASEPERIPH)
4199 				return;
4200 			/*FALLTHROUGH*/
4201 		case 3:		/* Power down everything */
4202 			break;
4203 	}
4204 	/*
4205 	 * PCI spec says we can only go into D3 state from D0 state.
4206 	 * Transition from D[12] into D0 before going to D3 state.
4207 	 */
4208 	ps = pci_get_powerstate(dev);
4209 	if (ps != PCI_POWERSTATE_D0 && ps != PCI_POWERSTATE_D3)
4210 		pci_set_powerstate(dev, PCI_POWERSTATE_D0);
4211 	if (pci_get_powerstate(dev) != PCI_POWERSTATE_D3)
4212 		pci_set_powerstate(dev, PCI_POWERSTATE_D3);
4213 }
4214 
4215 #ifdef COMPAT_OLDPCI
4216 
4217 /*
4218  * Locate the parent of a PCI device by scanning the PCI devlist
4219  * and return the entry for the parent.
4220  * For devices on PCI Bus 0 (the host bus), this is the PCI Host.
4221  * For devices on secondary PCI busses, this is that bus' PCI-PCI Bridge.
4222  */
4223 pcicfgregs *
4224 pci_devlist_get_parent(pcicfgregs *cfg)
4225 {
4226 	struct devlist *devlist_head;
4227 	struct pci_devinfo *dinfo;
4228 	pcicfgregs *bridge_cfg;
4229 	int i;
4230 
4231 	dinfo = STAILQ_FIRST(devlist_head = &pci_devq);
4232 
4233 	/* If the device is on PCI bus 0, look for the host */
4234 	if (cfg->bus == 0) {
4235 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4236 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4237 			bridge_cfg = &dinfo->cfg;
4238 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4239 				&& bridge_cfg->subclass == PCIS_BRIDGE_HOST
4240 		    		&& bridge_cfg->bus == cfg->bus) {
4241 				return bridge_cfg;
4242 			}
4243 		}
4244 	}
4245 
4246 	/* If the device is not on PCI bus 0, look for the PCI-PCI bridge */
4247 	if (cfg->bus > 0) {
4248 		for (i = 0; (dinfo != NULL) && (i < pci_numdevs);
4249 		dinfo = STAILQ_NEXT(dinfo, pci_links), i++) {
4250 			bridge_cfg = &dinfo->cfg;
4251 			if (bridge_cfg->baseclass == PCIC_BRIDGE
4252 				&& bridge_cfg->subclass == PCIS_BRIDGE_PCI
4253 				&& bridge_cfg->secondarybus == cfg->bus) {
4254 				return bridge_cfg;
4255 			}
4256 		}
4257 	}
4258 
4259 	return NULL;
4260 }
4261 
4262 #endif	/* COMPAT_OLDPCI */
4263