xref: /netbsd-src/sys/dev/pci/pciconf.c (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 /*	$NetBSD: pciconf.c,v 1.52 2021/01/03 10:31:37 skrll Exp $	*/
2 
3 /*
4  * Copyright 2001 Wasabi Systems, Inc.
5  * All rights reserved.
6  *
7  * Written by Allen Briggs for Wasabi Systems, Inc.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  * 3. All advertising materials mentioning features or use of this software
18  *    must display the following acknowledgement:
19  *      This product includes software developed for the NetBSD Project by
20  *      Wasabi Systems, Inc.
21  * 4. The name of Wasabi Systems, Inc. may not be used to endorse
22  *    or promote products derived from this software without specific prior
23  *    written permission.
24  *
25  * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
26  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
27  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL WASABI SYSTEMS, INC
29  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35  * POSSIBILITY OF SUCH DAMAGE.
36  */
37 /*
38  * Derived in part from code from PMON/2000 (http://pmon.groupbsd.org/).
39  */
40 
41 /*
42  * To do:
43  *    - Perform all data structure allocation dynamically, don't have
44  *	statically-sized arrays ("oops, you lose because you have too
45  *	many slots filled!")
46  *    - Do this in 2 passes, with an MD hook to control the behavior:
47  *		(1) Configure the bus (possibly including expansion
48  *		    ROMs.
49  *		(2) Another pass to disable expansion ROMs if they're
50  *		    mapped (since you're not supposed to leave them
51  *		    mapped when you're not using them).
52  *	This would facilitate MD code executing the expansion ROMs
53  *	if necessary (possibly with an x86 emulator) to configure
54  *	devices (e.g. VGA cards).
55  *    - Deal with "anything can be hot-plugged" -- i.e., carry configuration
56  *	information around & be able to reconfigure on the fly
57  *    - Deal with segments (See IA64 System Abstraction Layer)
58  *    - Deal with subtractive bridges (& non-spec positive/subtractive decode)
59  *    - Deal with ISA/VGA/VGA palette snooping
60  *    - Deal with device capabilities on bridges
61  *    - Worry about changing a bridge to/from transparency
62  * From thorpej (05/25/01)
63  *    - Try to handle devices that are already configured (perhaps using that
64  *      as a hint to where we put other devices)
65  */
66 
67 #include <sys/cdefs.h>
68 __KERNEL_RCSID(0, "$NetBSD: pciconf.c,v 1.52 2021/01/03 10:31:37 skrll Exp $");
69 
70 #include "opt_pci.h"
71 
72 #include <sys/param.h>
73 #include <sys/queue.h>
74 #include <sys/systm.h>
75 #include <sys/malloc.h>
76 #include <sys/kmem.h>
77 #include <sys/vmem.h>
78 
79 #include <dev/pci/pcivar.h>
80 #include <dev/pci/pciconf.h>
81 #include <dev/pci/pcidevs.h>
82 #include <dev/pci/pccbbreg.h>
83 
84 int pci_conf_debug = 0;
85 
86 #if !defined(MIN)
87 #define	MIN(a,b) (((a)<(b))?(a):(b))
88 #define	MAX(a,b) (((a)>(b))?(a):(b))
89 #endif
90 
91 /* per-bus constants. */
92 #define MAX_CONF_DEV	32			/* Arbitrary */
93 #define MAX_CONF_MEM	(3 * MAX_CONF_DEV)	/* Avg. 3 per device -- Arb. */
94 #define MAX_CONF_IO	(3 * MAX_CONF_DEV)	/* Avg. 1 per device -- Arb. */
95 
96 struct _s_pciconf_bus_t;			/* Forward declaration */
97 
98 struct pciconf_resource {
99 	vmem_t		*arena;
100 	bus_addr_t	min_addr;
101 	bus_addr_t	max_addr;
102 	bus_size_t	total_size;
103 };
104 
105 #define	PCICONF_RESOURCE_NTYPES	3
106 CTASSERT(PCICONF_RESOURCE_IO < PCICONF_RESOURCE_NTYPES);
107 CTASSERT(PCICONF_RESOURCE_MEM < PCICONF_RESOURCE_NTYPES);
108 CTASSERT(PCICONF_RESOURCE_PREFETCHABLE_MEM < PCICONF_RESOURCE_NTYPES);
109 
110 static const char *pciconf_resource_names[] = {
111 	[PCICONF_RESOURCE_IO]			=	"pci-io",
112 	[PCICONF_RESOURCE_MEM]			=	"pci-mem",
113 	[PCICONF_RESOURCE_PREFETCHABLE_MEM]	=	"pci-pmem",
114 };
115 
116 struct pciconf_resources {
117 	struct pciconf_resource resources[PCICONF_RESOURCE_NTYPES];
118 };
119 
120 struct pciconf_resource_rsvd {
121 	int		type;
122 	uint64_t	start;
123 	bus_size_t	size;
124 	void		(*callback)(void *, uint64_t);
125 	void		*callback_arg;
126 	LIST_ENTRY(pciconf_resource_rsvd) next;
127 };
128 
129 static LIST_HEAD(, pciconf_resource_rsvd) pciconf_resource_reservations =
130     LIST_HEAD_INITIALIZER(pciconf_resource_reservations);
131 
132 typedef struct _s_pciconf_dev_t {
133 	int		ipin;
134 	int		iline;
135 	int		min_gnt;
136 	int		max_lat;
137 	int		enable;
138 	pcitag_t	tag;
139 	pci_chipset_tag_t	pc;
140 	struct _s_pciconf_bus_t	*ppb;		/* I am really a bridge */
141 	pcireg_t	ea_cap_ptr;
142 } pciconf_dev_t;
143 
144 typedef struct _s_pciconf_win_t {
145 	pciconf_dev_t	*dev;
146 	int		reg;			/* 0 for busses */
147 	int		align;
148 	int		prefetch;
149 	uint64_t	size;
150 	uint64_t	address;
151 } pciconf_win_t;
152 
153 typedef struct _s_pciconf_bus_t {
154 	int		busno;
155 	int		next_busno;
156 	int		last_busno;
157 	int		max_mingnt;
158 	int		min_maxlat;
159 	int		cacheline_size;
160 	int		prefetch;
161 	int		fast_b2b;
162 	int		freq_66;
163 	int		def_ltim;
164 	int		max_ltim;
165 	int		bandwidth_used;
166 	int		swiz;
167 	int		io_32bit;
168 	int		pmem_64bit;
169 	int		mem_64bit;
170 	int		io_align;
171 	int		mem_align;
172 	int		pmem_align;
173 
174 	int		ndevs;
175 	pciconf_dev_t	device[MAX_CONF_DEV];
176 
177 	/* These should be sorted in order of decreasing size */
178 	int		nmemwin;
179 	pciconf_win_t	pcimemwin[MAX_CONF_MEM];
180 	int		niowin;
181 	pciconf_win_t	pciiowin[MAX_CONF_IO];
182 
183 	bus_size_t	io_total;
184 	bus_size_t	mem_total;
185 	bus_size_t	pmem_total;
186 
187 	struct pciconf_resource io_res;
188 	struct pciconf_resource mem_res;
189 	struct pciconf_resource pmem_res;
190 
191 	pci_chipset_tag_t	pc;
192 	struct _s_pciconf_bus_t *parent_bus;
193 } pciconf_bus_t;
194 
195 static int	probe_bus(pciconf_bus_t *);
196 static void	alloc_busno(pciconf_bus_t *, pciconf_bus_t *);
197 static void	set_busreg(pci_chipset_tag_t, pcitag_t, int, int, int);
198 static int	pci_do_device_query(pciconf_bus_t *, pcitag_t, int, int, int);
199 static int	setup_iowins(pciconf_bus_t *);
200 static int	setup_memwins(pciconf_bus_t *);
201 static int	configure_bridge(pciconf_dev_t *);
202 static int	configure_bus(pciconf_bus_t *);
203 static uint64_t	pci_allocate_range(struct pciconf_resource *, uint64_t, int,
204 		    bool);
205 static pciconf_win_t	*get_io_desc(pciconf_bus_t *, bus_size_t);
206 static pciconf_win_t	*get_mem_desc(pciconf_bus_t *, bus_size_t);
207 static pciconf_bus_t	*query_bus(pciconf_bus_t *, pciconf_dev_t *, int);
208 
209 static void	print_tag(pci_chipset_tag_t, pcitag_t);
210 
211 static vmem_t *
212 create_vmem_arena(const char *name, bus_addr_t start, bus_size_t size,
213     int flags)
214 {
215 	KASSERT(start < VMEM_ADDR_MAX);
216 	KASSERT(size == 0 ||
217 		(VMEM_ADDR_MAX - start) >= (size - 1));
218 
219 	return vmem_create(name, start, size,
220 			   1,		/*quantum*/
221 			   NULL,	/*importfn*/
222 			   NULL,	/*releasefn*/
223 			   NULL,	/*source*/
224 			   0,		/*qcache_max*/
225 			   flags,
226 			   IPL_NONE);
227 }
228 
229 static int
230 init_range_resource(struct pciconf_resource *r, const char *name,
231     bus_addr_t start, bus_addr_t size)
232 {
233 	r->arena = create_vmem_arena(name, start, size, VM_NOSLEEP);
234 	if (r->arena == NULL)
235 		return ENOMEM;
236 
237 	r->min_addr = start;
238 	r->max_addr = start + (size - 1);
239 	r->total_size = size;
240 
241 	return 0;
242 }
243 
244 static void
245 fini_range_resource(struct pciconf_resource *r)
246 {
247 	if (r->arena) {
248 		vmem_xfreeall(r->arena);
249 		vmem_destroy(r->arena);
250 	}
251 	memset(r, 0, sizeof(*r));
252 }
253 
254 static void
255 print_tag(pci_chipset_tag_t pc, pcitag_t tag)
256 {
257 	int	bus, dev, func;
258 
259 	pci_decompose_tag(pc, tag, &bus, &dev, &func);
260 	printf("PCI: bus %d, device %d, function %d: ", bus, dev, func);
261 }
262 
263 #ifdef _LP64
264 #define	__used_only_lp64	__unused
265 #else
266 #define	__used_only_lp64	/* nothing */
267 #endif /* _LP64 */
268 
269 /************************************************************************/
270 /************************************************************************/
271 /***********************   Bus probing routines   ***********************/
272 /************************************************************************/
273 /************************************************************************/
274 static pciconf_win_t *
275 get_io_desc(pciconf_bus_t *pb, bus_size_t size)
276 {
277 	int	i, n;
278 
279 	n = pb->niowin;
280 	for (i = n; i > 0 && size > pb->pciiowin[i-1].size; i--)
281 		pb->pciiowin[i] = pb->pciiowin[i-1]; /* struct copy */
282 	return &pb->pciiowin[i];
283 }
284 
285 static pciconf_win_t *
286 get_mem_desc(pciconf_bus_t *pb, bus_size_t size)
287 {
288 	int	i, n;
289 
290 	n = pb->nmemwin;
291 	for (i = n; i > 0 && size > pb->pcimemwin[i-1].size; i--)
292 		pb->pcimemwin[i] = pb->pcimemwin[i-1]; /* struct copy */
293 	return &pb->pcimemwin[i];
294 }
295 
296 /*
297  * Set up bus common stuff, then loop over devices & functions.
298  * If we find something, call pci_do_device_query()).
299  */
300 static int
301 probe_bus(pciconf_bus_t *pb)
302 {
303 	int device;
304 	uint8_t devs[32];
305 	int i, n;
306 
307 	pb->ndevs = 0;
308 	pb->niowin = 0;
309 	pb->nmemwin = 0;
310 	pb->freq_66 = 1;
311 #ifdef PCICONF_NO_FAST_B2B
312 	pb->fast_b2b = 0;
313 #else
314 	pb->fast_b2b = 1;
315 #endif
316 	pb->prefetch = 1;
317 	pb->max_mingnt = 0;	/* we are looking for the maximum */
318 	pb->min_maxlat = 0x100;	/* we are looking for the minimum */
319 	pb->bandwidth_used = 0;
320 
321 	n = pci_bus_devorder(pb->pc, pb->busno, devs, __arraycount(devs));
322 	for (i = 0; i < n; i++) {
323 		pcitag_t tag;
324 		pcireg_t id, bhlcr;
325 		int function, nfunction;
326 		int confmode;
327 
328 		device = devs[i];
329 
330 		tag = pci_make_tag(pb->pc, pb->busno, device, 0);
331 		if (pci_conf_debug) {
332 			print_tag(pb->pc, tag);
333 		}
334 		id = pci_conf_read(pb->pc, tag, PCI_ID_REG);
335 
336 		if (pci_conf_debug) {
337 			printf("id=%x: Vendor=%x, Product=%x\n",
338 			    id, PCI_VENDOR(id), PCI_PRODUCT(id));
339 		}
340 		/* Invalid vendor ID value? */
341 		if (PCI_VENDOR(id) == PCI_VENDOR_INVALID)
342 			continue;
343 
344 		bhlcr = pci_conf_read(pb->pc, tag, PCI_BHLC_REG);
345 		nfunction = PCI_HDRTYPE_MULTIFN(bhlcr) ? 8 : 1;
346 		for (function = 0; function < nfunction; function++) {
347 			tag = pci_make_tag(pb->pc, pb->busno, device, function);
348 			id = pci_conf_read(pb->pc, tag, PCI_ID_REG);
349 			if (PCI_VENDOR(id) == PCI_VENDOR_INVALID)
350 				continue;
351 			if (pb->ndevs + 1 < MAX_CONF_DEV) {
352 				if (pci_conf_debug) {
353 					print_tag(pb->pc, tag);
354 					printf("Found dev 0x%04x 0x%04x -- "
355 					    "really probing.\n",
356 					PCI_VENDOR(id), PCI_PRODUCT(id));
357 				}
358 #ifdef __HAVE_PCI_CONF_HOOK
359 				confmode = pci_conf_hook(pb->pc, pb->busno,
360 				    device, function, id);
361 				if (confmode == 0)
362 					continue;
363 #else
364 				/*
365 				 * Don't enable expansion ROMS -- some cards
366 				 * share address decoders between the EXPROM
367 				 * and PCI memory space, and enabling the ROM
368 				 * when not needed will cause all sorts of
369 				 * lossage.
370 				 */
371 				confmode = PCI_CONF_DEFAULT;
372 #endif
373 				if (pci_do_device_query(pb, tag, device,
374 				    function, confmode))
375 					return -1;
376 				pb->ndevs++;
377 			}
378 		}
379 	}
380 	return 0;
381 }
382 
383 static void
384 alloc_busno(pciconf_bus_t *parent, pciconf_bus_t *pb)
385 {
386 	pb->busno = parent->next_busno;
387 	pb->next_busno = pb->busno + 1;
388 }
389 
390 static void
391 set_busreg(pci_chipset_tag_t pc, pcitag_t tag, int prim, int sec, int sub)
392 {
393 	pcireg_t	busreg;
394 
395 	busreg  = __SHIFTIN(prim, PCI_BRIDGE_BUS_PRIMARY);
396 	busreg |= __SHIFTIN(sec,  PCI_BRIDGE_BUS_SECONDARY);
397 	busreg |= __SHIFTIN(sub,  PCI_BRIDGE_BUS_SUBORDINATE);
398 	pci_conf_write(pc, tag, PCI_BRIDGE_BUS_REG, busreg);
399 }
400 
401 static pciconf_bus_t *
402 query_bus(pciconf_bus_t *parent, pciconf_dev_t *pd, int dev)
403 {
404 	pciconf_bus_t	*pb;
405 	pcireg_t	io, pmem;
406 	pciconf_win_t	*pi, *pm;
407 
408 	pb = kmem_zalloc(sizeof (pciconf_bus_t), KM_SLEEP);
409 	pb->cacheline_size = parent->cacheline_size;
410 	pb->parent_bus = parent;
411 	alloc_busno(parent, pb);
412 
413 	pb->mem_align = 0x100000;	/* 1M alignment */
414 	pb->pmem_align = 0x100000;	/* 1M alignment */
415 	pb->io_align = 0x1000;		/* 4K alignment */
416 
417 	set_busreg(parent->pc, pd->tag, parent->busno, pb->busno, 0xff);
418 
419 	pb->swiz = parent->swiz + dev;
420 
421 	memset(&pb->io_res, 0, sizeof(pb->io_res));
422 	memset(&pb->mem_res, 0, sizeof(pb->mem_res));
423 	memset(&pb->pmem_res, 0, sizeof(pb->pmem_res));
424 
425 	pb->pc = parent->pc;
426 	pb->io_total = pb->mem_total = pb->pmem_total = 0;
427 
428 	pb->io_32bit = 0;
429 	if (parent->io_32bit) {
430 		io = pci_conf_read(parent->pc, pd->tag, PCI_BRIDGE_STATIO_REG);
431 		if (PCI_BRIDGE_IO_32BITS(io))
432 			pb->io_32bit = 1;
433 	}
434 
435 	pb->pmem_64bit = 0;
436 	if (parent->pmem_64bit) {
437 		pmem = pci_conf_read(parent->pc, pd->tag,
438 		    PCI_BRIDGE_PREFETCHMEM_REG);
439 		if (PCI_BRIDGE_PREFETCHMEM_64BITS(pmem))
440 			pb->pmem_64bit = 1;
441 	}
442 
443 	/* Bridges only forward a 32-bit range of non-prefetcable memory. */
444 	pb->mem_64bit = 0;
445 
446 	if (probe_bus(pb)) {
447 		printf("Failed to probe bus %d\n", pb->busno);
448 		goto err;
449 	}
450 
451 	/* We have found all subordinate busses now, reprogram busreg. */
452 	pb->last_busno = pb->next_busno - 1;
453 	parent->next_busno = pb->next_busno;
454 	set_busreg(parent->pc, pd->tag, parent->busno, pb->busno,
455 		   pb->last_busno);
456 	if (pci_conf_debug)
457 		printf("PCI bus bridge (parent %d) covers busses %d-%d\n",
458 			parent->busno, pb->busno, pb->last_busno);
459 
460 	if (pb->io_total > 0) {
461 		if (parent->niowin >= MAX_CONF_IO) {
462 			printf("pciconf: too many (%d) I/O windows\n",
463 			    parent->niowin);
464 			goto err;
465 		}
466 		pb->io_total |= pb->io_align - 1; /* Round up */
467 		pi = get_io_desc(parent, pb->io_total);
468 		pi->dev = pd;
469 		pi->reg = 0;
470 		pi->size = pb->io_total;
471 		pi->align = pb->io_align;	/* 4K min alignment */
472 		if (parent->io_align < pb->io_align)
473 			parent->io_align = pb->io_align;
474 		pi->prefetch = 0;
475 		parent->niowin++;
476 		parent->io_total += pb->io_total;
477 	}
478 
479 	if (pb->mem_total > 0) {
480 		if (parent->nmemwin >= MAX_CONF_MEM) {
481 			printf("pciconf: too many (%d) MEM windows\n",
482 			     parent->nmemwin);
483 			goto err;
484 		}
485 		pb->mem_total |= pb->mem_align - 1; /* Round up */
486 		pm = get_mem_desc(parent, pb->mem_total);
487 		pm->dev = pd;
488 		pm->reg = 0;
489 		pm->size = pb->mem_total;
490 		pm->align = pb->mem_align;	/* 1M min alignment */
491 		if (parent->mem_align < pb->mem_align)
492 			parent->mem_align = pb->mem_align;
493 		pm->prefetch = 0;
494 		parent->nmemwin++;
495 		parent->mem_total += pb->mem_total;
496 	}
497 
498 	if (pb->pmem_total > 0) {
499 		if (parent->nmemwin >= MAX_CONF_MEM) {
500 			printf("pciconf: too many MEM windows\n");
501 			goto err;
502 		}
503 		pb->pmem_total |= pb->pmem_align - 1; /* Round up */
504 		pm = get_mem_desc(parent, pb->pmem_total);
505 		pm->dev = pd;
506 		pm->reg = 0;
507 		pm->size = pb->pmem_total;
508 		pm->align = pb->pmem_align;	/* 1M alignment */
509 		if (parent->pmem_align < pb->pmem_align)
510 			parent->pmem_align = pb->pmem_align;
511 		pm->prefetch = 1;
512 		parent->nmemwin++;
513 		parent->pmem_total += pb->pmem_total;
514 	}
515 
516 	return pb;
517 err:
518 	kmem_free(pb, sizeof(*pb));
519 	return NULL;
520 }
521 
522 static struct pciconf_resource_rsvd *
523 pci_resource_is_reserved(int type, uint64_t addr, uint64_t size)
524 {
525 	struct pciconf_resource_rsvd *rsvd;
526 
527 	LIST_FOREACH(rsvd, &pciconf_resource_reservations, next) {
528 		if (rsvd->type != type)
529 			continue;
530 		if (rsvd->start <= addr + size && rsvd->start + rsvd->size >= addr)
531 			return rsvd;
532 	}
533 
534 	return NULL;
535 }
536 
537 static struct pciconf_resource_rsvd *
538 pci_bar_is_reserved(pciconf_bus_t *pb, pciconf_dev_t *pd, int br)
539 {
540 	pcireg_t base, base64, mask, mask64;
541 	pcitag_t tag;
542 	uint64_t addr, size;
543 
544 	/*
545 	 * Resource reservation does not apply to bridges
546 	 */
547 	if (pd->ppb)
548 		return NULL;
549 
550 	tag = pd->tag;
551 
552 	/*
553 	 * Look to see if this device is enabled and one of the resources
554 	 * is already in use (eg. firmware configured console device).
555 	 */
556 	base = pci_conf_read(pb->pc, tag, br);
557 	pci_conf_write(pb->pc, tag, br, 0xffffffff);
558 	mask = pci_conf_read(pb->pc, tag, br);
559 	pci_conf_write(pb->pc, tag, br, base);
560 
561 	switch (PCI_MAPREG_TYPE(base)) {
562 	case PCI_MAPREG_TYPE_IO:
563 		addr = PCI_MAPREG_IO_ADDR(base);
564 		size = PCI_MAPREG_IO_SIZE(mask);
565 		return pci_resource_is_reserved(PCI_CONF_MAP_IO, addr, size);
566 
567 	case PCI_MAPREG_TYPE_MEM:
568 		if (PCI_MAPREG_MEM_TYPE(base) == PCI_MAPREG_MEM_TYPE_64BIT) {
569 			base64 = pci_conf_read(pb->pc, tag, br + 4);
570 			pci_conf_write(pb->pc, tag, br + 4, 0xffffffff);
571 			mask64 = pci_conf_read(pb->pc, tag, br + 4);
572 			pci_conf_write(pb->pc, tag, br + 4, base64);
573 			addr = (uint64_t)PCI_MAPREG_MEM64_ADDR(
574 			      (((uint64_t)base64) << 32) | base);
575 			size = (uint64_t)PCI_MAPREG_MEM64_SIZE(
576 			      (((uint64_t)mask64) << 32) | mask);
577 		} else {
578 			addr = PCI_MAPREG_MEM_ADDR(base);
579 			size = PCI_MAPREG_MEM_SIZE(mask);
580 		}
581 		return pci_resource_is_reserved(PCI_CONF_MAP_MEM, addr, size);
582 
583 	default:
584 		return NULL;
585 	}
586 }
587 
588 static int
589 pci_do_device_query(pciconf_bus_t *pb, pcitag_t tag, int dev, int func,
590     int mode)
591 {
592 	pciconf_dev_t	*pd;
593 	pciconf_win_t	*pi, *pm;
594 	pcireg_t	classreg, cmd, icr, bhlc, bar, mask, bar64, mask64,
595 	    busreg;
596 	uint64_t	size;
597 	int		br, width, reg_start, reg_end;
598 
599 	pd = &pb->device[pb->ndevs];
600 	pd->pc = pb->pc;
601 	pd->tag = tag;
602 	pd->ppb = NULL;
603 	pd->enable = mode;
604 	pd->ea_cap_ptr = 0;
605 
606 	classreg = pci_conf_read(pb->pc, tag, PCI_CLASS_REG);
607 
608 	cmd = pci_conf_read(pb->pc, tag, PCI_COMMAND_STATUS_REG);
609 	bhlc = pci_conf_read(pb->pc, tag, PCI_BHLC_REG);
610 
611 	if (pci_get_capability(pb->pc, tag, PCI_CAP_EA, &pd->ea_cap_ptr,
612 	    NULL)) {
613 		/* XXX Skip devices with EA for now. */
614 		print_tag(pb->pc, tag);
615 		printf("skipping devices with Enhanced Allocations\n");
616 		return 0;
617 	}
618 
619 	if (PCI_CLASS(classreg) != PCI_CLASS_BRIDGE
620 	    && PCI_HDRTYPE_TYPE(bhlc) != PCI_HDRTYPE_PPB) {
621 		cmd &= ~(PCI_COMMAND_MASTER_ENABLE |
622 		    PCI_COMMAND_IO_ENABLE | PCI_COMMAND_MEM_ENABLE);
623 		pci_conf_write(pb->pc, tag, PCI_COMMAND_STATUS_REG, cmd);
624 	} else if (pci_conf_debug) {
625 		print_tag(pb->pc, tag);
626 		printf("device is a bridge; not clearing enables\n");
627 	}
628 
629 	if ((cmd & PCI_STATUS_BACKTOBACK_SUPPORT) == 0)
630 		pb->fast_b2b = 0;
631 
632 	if ((cmd & PCI_STATUS_66MHZ_SUPPORT) == 0)
633 		pb->freq_66 = 0;
634 
635 	switch (PCI_HDRTYPE_TYPE(bhlc)) {
636 	case PCI_HDRTYPE_DEVICE:
637 		reg_start = PCI_MAPREG_START;
638 		reg_end = PCI_MAPREG_END;
639 		break;
640 	case PCI_HDRTYPE_PPB:
641 		pd->ppb = query_bus(pb, pd, dev);
642 		if (pd->ppb == NULL)
643 			return -1;
644 		return 0;
645 	case PCI_HDRTYPE_PCB:
646 		reg_start = PCI_MAPREG_START;
647 		reg_end = PCI_MAPREG_PCB_END;
648 
649 		busreg = pci_conf_read(pb->pc, tag, PCI_BUSNUM);
650 		busreg = (busreg & 0xff000000) |
651 		    __SHIFTIN(pb->busno, PCI_BRIDGE_BUS_PRIMARY) |
652 		    __SHIFTIN(pb->next_busno, PCI_BRIDGE_BUS_SECONDARY) |
653 		    __SHIFTIN(pb->next_busno, PCI_BRIDGE_BUS_SUBORDINATE);
654 		pci_conf_write(pb->pc, tag, PCI_BUSNUM, busreg);
655 
656 		pb->next_busno++;
657 		break;
658 	default:
659 		return -1;
660 	}
661 
662 	icr = pci_conf_read(pb->pc, tag, PCI_INTERRUPT_REG);
663 	pd->ipin = PCI_INTERRUPT_PIN(icr);
664 	pd->iline = PCI_INTERRUPT_LINE(icr);
665 	pd->min_gnt = PCI_MIN_GNT(icr);
666 	pd->max_lat = PCI_MAX_LAT(icr);
667 	if (pd->iline || pd->ipin) {
668 		pci_conf_interrupt(pb->pc, pb->busno, dev, pd->ipin, pb->swiz,
669 		    &pd->iline);
670 		icr &= ~(PCI_INTERRUPT_LINE_MASK << PCI_INTERRUPT_LINE_SHIFT);
671 		icr |= (pd->iline << PCI_INTERRUPT_LINE_SHIFT);
672 		pci_conf_write(pb->pc, tag, PCI_INTERRUPT_REG, icr);
673 	}
674 
675 	if (pd->min_gnt != 0 || pd->max_lat != 0) {
676 		if (pd->min_gnt != 0 && pd->min_gnt > pb->max_mingnt)
677 			pb->max_mingnt = pd->min_gnt;
678 
679 		if (pd->max_lat != 0 && pd->max_lat < pb->min_maxlat)
680 			pb->min_maxlat = pd->max_lat;
681 
682 		pb->bandwidth_used += pd->min_gnt * 4000000 /
683 				(pd->min_gnt + pd->max_lat);
684 	}
685 
686 	width = 4;
687 	for (br = reg_start; br < reg_end; br += width) {
688 #if 0
689 /* XXX Should only ignore if IDE not in legacy mode? */
690 		if (PCI_CLASS(classreg) == PCI_CLASS_MASS_STORAGE &&
691 		    PCI_SUBCLASS(classreg) == PCI_SUBCLASS_MASS_STORAGE_IDE) {
692 			break;
693 		}
694 #endif
695 		bar = pci_conf_read(pb->pc, tag, br);
696 		pci_conf_write(pb->pc, tag, br, 0xffffffff);
697 		mask = pci_conf_read(pb->pc, tag, br);
698 		pci_conf_write(pb->pc, tag, br, bar);
699 		width = 4;
700 
701 		if (   (mode & PCI_CONF_MAP_IO)
702 		    && (PCI_MAPREG_TYPE(mask) == PCI_MAPREG_TYPE_IO)) {
703 			/*
704 			 * Upper 16 bits must be one.  Devices may hardwire
705 			 * them to zero, though, per PCI 2.2, 6.2.5.1, p 203.
706 			 */
707 			mask |= 0xffff0000;
708 
709 			size = PCI_MAPREG_IO_SIZE(mask);
710 			if (size == 0) {
711 				if (pci_conf_debug) {
712 					print_tag(pb->pc, tag);
713 					printf("I/O BAR 0x%x is void\n", br);
714 				}
715 				continue;
716 			}
717 
718 			if (pb->niowin >= MAX_CONF_IO) {
719 				printf("pciconf: too many I/O windows\n");
720 				return -1;
721 			}
722 
723 			pi = get_io_desc(pb, size);
724 			pi->dev = pd;
725 			pi->reg = br;
726 			pi->size = (uint64_t)size;
727 			pi->align = 4;
728 			if (pb->io_align < pi->size)
729 				pb->io_align = pi->size;
730 			pi->prefetch = 0;
731 			if (pci_conf_debug) {
732 				print_tag(pb->pc, tag);
733 				printf("Register 0x%x, I/O size %" PRIu64 "\n",
734 				    br, pi->size);
735 			}
736 			pb->niowin++;
737 			pb->io_total += size;
738 		} else if ((mode & PCI_CONF_MAP_MEM)
739 			   && (PCI_MAPREG_TYPE(mask) == PCI_MAPREG_TYPE_MEM)) {
740 			switch (PCI_MAPREG_MEM_TYPE(mask)) {
741 			case PCI_MAPREG_MEM_TYPE_32BIT:
742 			case PCI_MAPREG_MEM_TYPE_32BIT_1M:
743 				size = (uint64_t)PCI_MAPREG_MEM_SIZE(mask);
744 				break;
745 			case PCI_MAPREG_MEM_TYPE_64BIT:
746 				bar64 = pci_conf_read(pb->pc, tag, br + 4);
747 				pci_conf_write(pb->pc, tag, br + 4, 0xffffffff);
748 				mask64 = pci_conf_read(pb->pc, tag, br + 4);
749 				pci_conf_write(pb->pc, tag, br + 4, bar64);
750 				size = (uint64_t)PCI_MAPREG_MEM64_SIZE(
751 				      (((uint64_t)mask64) << 32) | mask);
752 				width = 8;
753 				break;
754 			default:
755 				print_tag(pb->pc, tag);
756 				printf("reserved mapping type 0x%x\n",
757 					PCI_MAPREG_MEM_TYPE(mask));
758 				continue;
759 			}
760 
761 			if (size == 0) {
762 				if (pci_conf_debug) {
763 					print_tag(pb->pc, tag);
764 					printf("MEM%d BAR 0x%x is void\n",
765 					    PCI_MAPREG_MEM_TYPE(mask) ==
766 						PCI_MAPREG_MEM_TYPE_64BIT ?
767 						64 : 32, br);
768 				}
769 				continue;
770 			} else {
771 				if (pci_conf_debug) {
772 					print_tag(pb->pc, tag);
773 					printf("MEM%d BAR 0x%x has size %#lx\n",
774 					    PCI_MAPREG_MEM_TYPE(mask) ==
775 						PCI_MAPREG_MEM_TYPE_64BIT ?
776 						64 : 32,
777 					    br, (unsigned long)size);
778 				}
779 			}
780 
781 			if (pb->nmemwin >= MAX_CONF_MEM) {
782 				printf("pciconf: too many memory windows\n");
783 				return -1;
784 			}
785 
786 			pm = get_mem_desc(pb, size);
787 			pm->dev = pd;
788 			pm->reg = br;
789 			pm->size = size;
790 			pm->align = 4;
791 			pm->prefetch = PCI_MAPREG_MEM_PREFETCHABLE(mask);
792 			if (pci_conf_debug) {
793 				print_tag(pb->pc, tag);
794 				printf("Register 0x%x, memory size %"
795 				    PRIu64 "\n", br, pm->size);
796 			}
797 			pb->nmemwin++;
798 			if (pm->prefetch) {
799 				pb->pmem_total += size;
800 				if (pb->pmem_align < pm->size)
801 					pb->pmem_align = pm->size;
802 			} else {
803 				pb->mem_total += size;
804 				if (pb->mem_align < pm->size)
805 					pb->mem_align = pm->size;
806 			}
807 		}
808 	}
809 
810 	if (mode & PCI_CONF_MAP_ROM) {
811 		bar = pci_conf_read(pb->pc, tag, PCI_MAPREG_ROM);
812 		pci_conf_write(pb->pc, tag, PCI_MAPREG_ROM, 0xfffffffe);
813 		mask = pci_conf_read(pb->pc, tag, PCI_MAPREG_ROM);
814 		pci_conf_write(pb->pc, tag, PCI_MAPREG_ROM, bar);
815 
816 		if (mask != 0 && mask != 0xffffffff) {
817 			if (pb->nmemwin >= MAX_CONF_MEM) {
818 				printf("pciconf: too many memory windows\n");
819 				return -1;
820 			}
821 			size = (uint64_t)PCI_MAPREG_MEM_SIZE(mask);
822 
823 			pm = get_mem_desc(pb, size);
824 			pm->dev = pd;
825 			pm->reg = PCI_MAPREG_ROM;
826 			pm->size = size;
827 			pm->align = 4;
828 			pm->prefetch = 0;
829 			if (pci_conf_debug) {
830 				print_tag(pb->pc, tag);
831 				printf("Expansion ROM memory size %"
832 				    PRIu64 "\n", pm->size);
833 			}
834 			pb->nmemwin++;
835 			if (pm->prefetch) {
836 				pb->pmem_total += size;
837 				if (pb->pmem_align < pm->size)
838 					pb->pmem_align = pm->size;
839 			} else {
840 				pb->mem_total += size;
841 				if (pb->mem_align < pm->size)
842 					pb->mem_align = pm->size;
843 			}
844 		}
845 	} else {
846 		/* Don't enable ROMs if we aren't going to map them. */
847 		mode &= ~PCI_CONF_ENABLE_ROM;
848 		pd->enable &= ~PCI_CONF_ENABLE_ROM;
849 	}
850 
851 	if (!(mode & PCI_CONF_ENABLE_ROM)) {
852 		/* Ensure ROM is disabled */
853 		bar = pci_conf_read(pb->pc, tag, PCI_MAPREG_ROM);
854 		pci_conf_write(pb->pc, tag, PCI_MAPREG_ROM,
855 		    bar & ~PCI_MAPREG_ROM_ENABLE);
856 	}
857 
858 	return 0;
859 }
860 
861 /************************************************************************/
862 /************************************************************************/
863 /********************   Bus configuration routines   ********************/
864 /************************************************************************/
865 /************************************************************************/
866 static uint64_t
867 pci_allocate_range(struct pciconf_resource * const r, const uint64_t amt,
868 		   const int align, const bool ok64 __used_only_lp64)
869 {
870 	vmem_size_t const size = (vmem_size_t) amt;
871 	vmem_addr_t result;
872 	int error;
873 
874 #ifdef _LP64
875 	/*
876 	 * If a 64-bit range IS OK, then we prefer allocating above 4GB.
877 	 *
878 	 * XXX We guard this with _LP64 because vmem uses uintptr_t
879 	 * internally.
880 	 */
881 	if (!ok64) {
882 		error = vmem_xalloc(r->arena, size, align, 0, 0,
883 				    VMEM_ADDR_MIN, 0xffffffffUL,
884 				    VM_BESTFIT | VM_NOSLEEP,
885 				    &result);
886 	} else {
887 		error = vmem_xalloc(r->arena, size, align, 0, 0,
888 				    (1UL << 32), VMEM_ADDR_MAX,
889 				    VM_BESTFIT | VM_NOSLEEP,
890 				    &result);
891 		if (error) {
892 			error = vmem_xalloc(r->arena, size, align, 0, 0,
893 					    VMEM_ADDR_MIN, VMEM_ADDR_MAX,
894 					    VM_BESTFIT | VM_NOSLEEP,
895 					    &result);
896 		}
897 	}
898 #else
899 	error = vmem_xalloc(r->arena, size, align, 0, 0,
900 			    VMEM_ADDR_MIN, 0xffffffffUL,
901 			    VM_BESTFIT | VM_NOSLEEP,
902 			    &result);
903 #endif /* _L64 */
904 
905 	if (error)
906 		return ~0ULL;
907 
908 	return result;
909 }
910 
911 static int
912 setup_iowins(pciconf_bus_t *pb)
913 {
914 	pciconf_win_t	*pi;
915 	pciconf_dev_t	*pd;
916 	struct pciconf_resource_rsvd *rsvd;
917 	int		error;
918 
919 	for (pi = pb->pciiowin; pi < &pb->pciiowin[pb->niowin]; pi++) {
920 		if (pi->size == 0)
921 			continue;
922 
923 		pd = pi->dev;
924 		rsvd = pci_bar_is_reserved(pb, pd, pi->reg);
925 
926 		if (pb->io_res.arena == NULL) {
927 			/* Bus has no IO ranges, disable IO BAR */
928 			pi->address = 0;
929 			pd->enable &= ~PCI_CONF_ENABLE_IO;
930 			goto write_ioaddr;
931 		}
932 
933 		pi->address = pci_allocate_range(&pb->io_res, pi->size,
934 		    pi->align, false);
935 		if (~pi->address == 0) {
936 			print_tag(pd->pc, pd->tag);
937 			printf("Failed to allocate PCI I/O space (%"
938 			    PRIu64 " req)\n", pi->size);
939 			return -1;
940 		}
941 		if (pd->ppb && pi->reg == 0) {
942 			error = init_range_resource(&pd->ppb->io_res,
943 			    "ppb-io", pi->address, pi->size);
944 			if (error) {
945 				print_tag(pd->pc, pd->tag);
946 				printf("Failed to alloc I/O arena for bus %d\n",
947 				    pd->ppb->busno);
948 				return -1;
949 			}
950 			continue;
951 		}
952 		if (!pb->io_32bit && pi->address > 0xFFFF) {
953 			pi->address = 0;
954 			pd->enable &= ~PCI_CONF_ENABLE_IO;
955 		} else {
956 			pd->enable |= PCI_CONF_ENABLE_IO;
957 		}
958 write_ioaddr:
959 		if (pci_conf_debug) {
960 			print_tag(pd->pc, pd->tag);
961 			printf("Putting %" PRIu64 " I/O bytes @ %#" PRIx64
962 			    " (reg %x)\n", pi->size, pi->address, pi->reg);
963 		}
964 		pci_conf_write(pd->pc, pd->tag, pi->reg,
965 		    PCI_MAPREG_IO_ADDR(pi->address) | PCI_MAPREG_TYPE_IO);
966 
967 		if (rsvd != NULL && rsvd->start != pi->address)
968 			rsvd->callback(rsvd->callback_arg, pi->address);
969 	}
970 	return 0;
971 }
972 
973 static int
974 setup_memwins(pciconf_bus_t *pb)
975 {
976 	pciconf_win_t	*pm;
977 	pciconf_dev_t	*pd;
978 	pcireg_t	base;
979 	struct pciconf_resource *r;
980 	struct pciconf_resource_rsvd *rsvd;
981 	bool		ok64;
982 	int		error;
983 
984 	for (pm = pb->pcimemwin; pm < &pb->pcimemwin[pb->nmemwin]; pm++) {
985 		if (pm->size == 0)
986 			continue;
987 
988 		ok64 = false;
989 		pd = pm->dev;
990 		rsvd = pci_bar_is_reserved(pb, pd, pm->reg);
991 
992 		if (pm->prefetch) {
993 			r = &pb->pmem_res;
994 			ok64 = pb->pmem_64bit;
995 		} else {
996 			r = &pb->mem_res;
997 			ok64 = pb->mem_64bit && pd->ppb == NULL;
998 		}
999 
1000 		/*
1001 		 * We need to figure out if the memory BAR is 64-bit
1002 		 * capable or not.  If it's not, then we need to constrain
1003 		 * the address allocation.
1004 		 */
1005 		if (pm->reg == PCI_MAPREG_ROM) {
1006 			ok64 = false;
1007 		} else if (ok64) {
1008 			base = pci_conf_read(pd->pc, pd->tag, pm->reg);
1009 			ok64 = PCI_MAPREG_MEM_TYPE(base) ==
1010 			    PCI_MAPREG_MEM_TYPE_64BIT;
1011 		}
1012 
1013 		pm->address = pci_allocate_range(r, pm->size, pm->align,
1014 						 ok64);
1015 		if (~pm->address == 0) {
1016 			print_tag(pd->pc, pd->tag);
1017 			printf(
1018 			   "Failed to allocate PCI memory space (%" PRIu64
1019 			   " req, prefetch=%d ok64=%d)\n", pm->size,
1020 			   pm->prefetch, (int)ok64);
1021 			return -1;
1022 		}
1023 		if (pd->ppb && pm->reg == 0) {
1024 			const char *name = pm->prefetch ? "ppb-pmem"
1025 							: "ppb-mem";
1026 			r = pm->prefetch ? &pd->ppb->pmem_res
1027 					 : &pd->ppb->mem_res;
1028 			error = init_range_resource(r, name,
1029 			    pm->address, pm->size);
1030 			if (error) {
1031 				print_tag(pd->pc, pd->tag);
1032 				printf("Failed to alloc MEM arena for bus %d\n",
1033 				    pd->ppb->busno);
1034 				return -1;
1035 			}
1036 			continue;
1037 		}
1038 		if (!ok64 && pm->address > 0xFFFFFFFFULL) {
1039 			pm->address = 0;
1040 			pd->enable &= ~PCI_CONF_ENABLE_MEM;
1041 		} else
1042 			pd->enable |= PCI_CONF_ENABLE_MEM;
1043 
1044 		if (pm->reg != PCI_MAPREG_ROM) {
1045 			if (pci_conf_debug) {
1046 				print_tag(pd->pc, pd->tag);
1047 				printf(
1048 				    "Putting %" PRIu64 " MEM bytes @ %#"
1049 				    PRIx64 " (reg %x)\n", pm->size,
1050 				    pm->address, pm->reg);
1051 			}
1052 			base = pci_conf_read(pd->pc, pd->tag, pm->reg);
1053 			base = PCI_MAPREG_MEM_ADDR(pm->address) |
1054 			    PCI_MAPREG_MEM_TYPE(base);
1055 			pci_conf_write(pd->pc, pd->tag, pm->reg, base);
1056 			if (PCI_MAPREG_MEM_TYPE(base) ==
1057 			    PCI_MAPREG_MEM_TYPE_64BIT) {
1058 				base = (pcireg_t)
1059 				    (PCI_MAPREG_MEM64_ADDR(pm->address) >> 32);
1060 				pci_conf_write(pd->pc, pd->tag, pm->reg + 4,
1061 				    base);
1062 			}
1063 		}
1064 
1065 		if (rsvd != NULL && rsvd->start != pm->address) {
1066 			rsvd->callback(rsvd->callback_arg, pm->address);
1067 		}
1068 	}
1069 	for (pm = pb->pcimemwin; pm < &pb->pcimemwin[pb->nmemwin]; pm++) {
1070 		if (pm->reg == PCI_MAPREG_ROM && pm->address != -1) {
1071 			pd = pm->dev;
1072 			if (!(pd->enable & PCI_CONF_MAP_ROM))
1073 				continue;
1074 			if (pci_conf_debug) {
1075 				print_tag(pd->pc, pd->tag);
1076 				printf(
1077 				    "Putting %" PRIu64 " ROM bytes @ %#"
1078 				    PRIx64 " (reg %x)\n", pm->size,
1079 				    pm->address, pm->reg);
1080 			}
1081 			base = (pcireg_t) pm->address;
1082 			if (pd->enable & PCI_CONF_ENABLE_ROM)
1083 				base |= PCI_MAPREG_ROM_ENABLE;
1084 
1085 			pci_conf_write(pd->pc, pd->tag, pm->reg, base);
1086 		}
1087 	}
1088 	return 0;
1089 }
1090 
1091 static bool
1092 constrain_bridge_mem_range(struct pciconf_resource * const r,
1093 			   u_long * const base,
1094 			   u_long * const limit,
1095 			   const bool ok64 __used_only_lp64)
1096 {
1097 
1098 	*base = r->min_addr;
1099 	*limit = r->max_addr;
1100 
1101 #ifdef _LP64
1102 	if (!ok64) {
1103 		if (r->min_addr >= (1UL << 32)) {
1104 			return true;
1105 		}
1106 		if (r->max_addr > 0xffffffffUL) {
1107 			*limit = 0xffffffffUL;
1108 		}
1109 	}
1110 #endif /* _LP64 */
1111 
1112 	return false;
1113 }
1114 
1115 /*
1116  * Configure I/O, memory, and prefetcable memory spaces, then make
1117  * a call to configure_bus().
1118  */
1119 static int
1120 configure_bridge(pciconf_dev_t *pd)
1121 {
1122 	unsigned long	io_base, io_limit, mem_base, mem_limit;
1123 	pciconf_bus_t	*pb;
1124 	pcireg_t	io, iohigh, mem, cmd;
1125 	int		rv;
1126 	bool		isprefetchmem64;
1127 	bool		bad_range;
1128 
1129 	pb = pd->ppb;
1130 	/* Configure I/O base & limit*/
1131 	if (pb->io_res.arena) {
1132 		io_base = pb->io_res.min_addr;
1133 		io_limit = pb->io_res.max_addr;
1134 	} else {
1135 		io_base  = 0x1000;	/* 4K */
1136 		io_limit = 0x0000;
1137 	}
1138 	if (pb->io_32bit) {
1139 		iohigh = __SHIFTIN(io_base >> 16, PCI_BRIDGE_IOHIGH_BASE) |
1140 		    __SHIFTIN(io_limit >> 16, PCI_BRIDGE_IOHIGH_LIMIT);
1141 	} else {
1142 		if (io_limit > 0xFFFF) {
1143 			printf("Bus %d bridge does not support 32-bit I/O.  ",
1144 			    pb->busno);
1145 			printf("Disabling I/O accesses\n");
1146 			io_base  = 0x1000;	/* 4K */
1147 			io_limit = 0x0000;
1148 		}
1149 		iohigh = 0;
1150 	}
1151 	io = pci_conf_read(pb->pc, pd->tag, PCI_BRIDGE_STATIO_REG) &
1152 	    PCI_BRIDGE_STATIO_STATUS;
1153 	io |= __SHIFTIN((io_base >> 8) & PCI_BRIDGE_STATIO_IOADDR,
1154 	    PCI_BRIDGE_STATIO_IOBASE);
1155 	io |= __SHIFTIN((io_limit >> 8) & PCI_BRIDGE_STATIO_IOADDR,
1156 	    PCI_BRIDGE_STATIO_IOLIMIT);
1157 	pci_conf_write(pb->pc, pd->tag, PCI_BRIDGE_STATIO_REG, io);
1158 	pci_conf_write(pb->pc, pd->tag, PCI_BRIDGE_IOHIGH_REG, iohigh);
1159 
1160 	/* Configure mem base & limit */
1161 	bad_range = false;
1162 	if (pb->mem_res.arena) {
1163 		bad_range = constrain_bridge_mem_range(&pb->mem_res,
1164 						       &mem_base,
1165 						       &mem_limit,
1166 						       false);
1167 	} else {
1168 		mem_base  = 0x100000;	/* 1M */
1169 		mem_limit = 0x000000;
1170 	}
1171 	if (bad_range) {
1172 		printf("Bus %d bridge MEM range out of range.  ", pb->busno);
1173 		printf("Disabling MEM accesses\n");
1174 		mem_base  = 0x100000;	/* 1M */
1175 		mem_limit = 0x000000;
1176 	}
1177 	mem = __SHIFTIN((mem_base >> 16) & PCI_BRIDGE_MEMORY_ADDR,
1178 	    PCI_BRIDGE_MEMORY_BASE);
1179 	mem |= __SHIFTIN((mem_limit >> 16) & PCI_BRIDGE_MEMORY_ADDR,
1180 	    PCI_BRIDGE_MEMORY_LIMIT);
1181 	pci_conf_write(pb->pc, pd->tag, PCI_BRIDGE_MEMORY_REG, mem);
1182 
1183 	/* Configure prefetchable mem base & limit */
1184 	mem = pci_conf_read(pb->pc, pd->tag, PCI_BRIDGE_PREFETCHMEM_REG);
1185 	isprefetchmem64 = PCI_BRIDGE_PREFETCHMEM_64BITS(mem);
1186 	bad_range = false;
1187 	if (pb->pmem_res.arena) {
1188 		bad_range = constrain_bridge_mem_range(&pb->pmem_res,
1189 						       &mem_base,
1190 						       &mem_limit,
1191 						       isprefetchmem64);
1192 	} else {
1193 		mem_base  = 0x100000;	/* 1M */
1194 		mem_limit = 0x000000;
1195 	}
1196 	if (bad_range) {
1197 		printf("Bus %d bridge does not support 64-bit PMEM.  ",
1198 		    pb->busno);
1199 		printf("Disabling prefetchable-MEM accesses\n");
1200 		mem_base  = 0x100000;	/* 1M */
1201 		mem_limit = 0x000000;
1202 	}
1203 	mem = __SHIFTIN((mem_base >> 16) & PCI_BRIDGE_PREFETCHMEM_ADDR,
1204 	    PCI_BRIDGE_PREFETCHMEM_BASE);
1205 	mem |= __SHIFTIN((mem_limit >> 16) & PCI_BRIDGE_PREFETCHMEM_ADDR,
1206 	    PCI_BRIDGE_PREFETCHMEM_LIMIT);
1207 	pci_conf_write(pb->pc, pd->tag, PCI_BRIDGE_PREFETCHMEM_REG, mem);
1208 	/*
1209 	 * XXX -- 64-bit systems need a lot more than just this...
1210 	 */
1211 	if (isprefetchmem64) {
1212 		mem_base  = (uint64_t)mem_base  >> 32;
1213 		mem_limit = (uint64_t)mem_limit >> 32;
1214 		pci_conf_write(pb->pc, pd->tag,
1215 		    PCI_BRIDGE_PREFETCHBASEUP32_REG, mem_base & 0xffffffff);
1216 		pci_conf_write(pb->pc, pd->tag,
1217 		    PCI_BRIDGE_PREFETCHLIMITUP32_REG, mem_limit & 0xffffffff);
1218 	}
1219 
1220 	rv = configure_bus(pb);
1221 
1222 	fini_range_resource(&pb->io_res);
1223 	fini_range_resource(&pb->mem_res);
1224 	fini_range_resource(&pb->pmem_res);
1225 
1226 	if (rv == 0) {
1227 		cmd = pci_conf_read(pd->pc, pd->tag, PCI_BRIDGE_CONTROL_REG);
1228 		cmd &= ~PCI_BRIDGE_CONTROL; /* Clear control bit first */
1229 		cmd |= PCI_BRIDGE_CONTROL_PERE | PCI_BRIDGE_CONTROL_SERR;
1230 		if (pb->fast_b2b)
1231 			cmd |= PCI_BRIDGE_CONTROL_SECFASTB2B;
1232 
1233 		pci_conf_write(pd->pc, pd->tag, PCI_BRIDGE_CONTROL_REG, cmd);
1234 		cmd = pci_conf_read(pd->pc, pd->tag, PCI_COMMAND_STATUS_REG);
1235 		cmd |= PCI_COMMAND_IO_ENABLE | PCI_COMMAND_MEM_ENABLE;
1236 		pci_conf_write(pd->pc, pd->tag, PCI_COMMAND_STATUS_REG, cmd);
1237 	}
1238 
1239 	return rv;
1240 }
1241 
1242 /*
1243  * Calculate latency values, allocate I/O and MEM segments, then set them
1244  * up.  If a PCI-PCI bridge is found, configure the bridge separately,
1245  * which will cause a recursive call back here.
1246  */
1247 static int
1248 configure_bus(pciconf_bus_t *pb)
1249 {
1250 	pciconf_dev_t	*pd;
1251 	int		def_ltim, max_ltim, band, bus_mhz;
1252 
1253 	if (pb->ndevs == 0) {
1254 		if (pci_conf_debug)
1255 			printf("PCI bus %d - no devices\n", pb->busno);
1256 		return 1;
1257 	}
1258 	bus_mhz = pb->freq_66 ? 66 : 33;
1259 	max_ltim = pb->max_mingnt * bus_mhz / 4;	/* cvt to cycle count */
1260 	band = 4000000;					/* 0.25us cycles/sec */
1261 	if (band < pb->bandwidth_used) {
1262 		printf("PCI bus %d: Warning: Total bandwidth exceeded!? (%d)\n",
1263 		    pb->busno, pb->bandwidth_used);
1264 		def_ltim = -1;
1265 	} else {
1266 		def_ltim = (band - pb->bandwidth_used) / pb->ndevs;
1267 		if (def_ltim > pb->min_maxlat)
1268 			def_ltim = pb->min_maxlat;
1269 		def_ltim = def_ltim * bus_mhz / 4;
1270 	}
1271 	def_ltim = (def_ltim + 7) & ~7;
1272 	max_ltim = (max_ltim + 7) & ~7;
1273 
1274 	pb->def_ltim = MIN(def_ltim, 255);
1275 	pb->max_ltim = MIN(MAX(max_ltim, def_ltim), 255);
1276 
1277 	/*
1278 	 * Now we have what we need to initialize the devices.
1279 	 * It would probably be better if we could allocate all of these
1280 	 * for all busses at once, but "not right now".  First, get a list
1281 	 * of free memory ranges from the m.d. system.
1282 	 */
1283 	if (setup_iowins(pb) || setup_memwins(pb)) {
1284 		printf("PCI bus configuration failed: "
1285 		"unable to assign all I/O and memory ranges.\n");
1286 		return -1;
1287 	}
1288 
1289 	/*
1290 	 * Configure the latency for the devices, and enable them.
1291 	 */
1292 	for (pd = pb->device; pd < &pb->device[pb->ndevs]; pd++) {
1293 		pcireg_t cmd, classreg, misc;
1294 		int	ltim;
1295 
1296 		if (pci_conf_debug) {
1297 			print_tag(pd->pc, pd->tag);
1298 			printf("Configuring device.\n");
1299 		}
1300 		classreg = pci_conf_read(pd->pc, pd->tag, PCI_CLASS_REG);
1301 		misc = pci_conf_read(pd->pc, pd->tag, PCI_BHLC_REG);
1302 		cmd = pci_conf_read(pd->pc, pd->tag, PCI_COMMAND_STATUS_REG);
1303 		if (pd->enable & PCI_CONF_ENABLE_PARITY)
1304 			cmd |= PCI_COMMAND_PARITY_ENABLE;
1305 		if (pd->enable & PCI_CONF_ENABLE_SERR)
1306 			cmd |= PCI_COMMAND_SERR_ENABLE;
1307 		if (pb->fast_b2b)
1308 			cmd |= PCI_COMMAND_BACKTOBACK_ENABLE;
1309 		if (PCI_CLASS(classreg) != PCI_CLASS_BRIDGE ||
1310 		    PCI_SUBCLASS(classreg) != PCI_SUBCLASS_BRIDGE_PCI) {
1311 			if (pd->enable & PCI_CONF_ENABLE_IO)
1312 				cmd |= PCI_COMMAND_IO_ENABLE;
1313 			if (pd->enable & PCI_CONF_ENABLE_MEM)
1314 				cmd |= PCI_COMMAND_MEM_ENABLE;
1315 			if (pd->enable & PCI_CONF_ENABLE_BM)
1316 				cmd |= PCI_COMMAND_MASTER_ENABLE;
1317 			ltim = pd->min_gnt * bus_mhz / 4;
1318 			ltim = MIN (MAX (pb->def_ltim, ltim), pb->max_ltim);
1319 		} else {
1320 			cmd |= PCI_COMMAND_MASTER_ENABLE;
1321 			ltim = MIN (pb->def_ltim, pb->max_ltim);
1322 		}
1323 		if ((pd->enable &
1324 		    (PCI_CONF_ENABLE_MEM | PCI_CONF_ENABLE_IO)) == 0) {
1325 			print_tag(pd->pc, pd->tag);
1326 			printf("Disabled due to lack of resources.\n");
1327 			cmd &= ~(PCI_COMMAND_MASTER_ENABLE |
1328 			    PCI_COMMAND_IO_ENABLE | PCI_COMMAND_MEM_ENABLE);
1329 		}
1330 		pci_conf_write(pd->pc, pd->tag, PCI_COMMAND_STATUS_REG, cmd);
1331 
1332 		misc &= ~((PCI_LATTIMER_MASK << PCI_LATTIMER_SHIFT) |
1333 		    (PCI_CACHELINE_MASK << PCI_CACHELINE_SHIFT));
1334 		misc |= (ltim & PCI_LATTIMER_MASK) << PCI_LATTIMER_SHIFT;
1335 		misc |= ((pb->cacheline_size >> 2) & PCI_CACHELINE_MASK) <<
1336 		    PCI_CACHELINE_SHIFT;
1337 		pci_conf_write(pd->pc, pd->tag, PCI_BHLC_REG, misc);
1338 
1339 		if (pd->ppb) {
1340 			if (configure_bridge(pd) < 0)
1341 				return -1;
1342 			continue;
1343 		}
1344 	}
1345 
1346 	if (pci_conf_debug)
1347 		printf("PCI bus %d configured\n", pb->busno);
1348 
1349 	return 0;
1350 }
1351 
1352 static bool
1353 mem_region_ok64(struct pciconf_resource * const r __used_only_lp64)
1354 {
1355 	bool rv = false;
1356 
1357 #ifdef _LP64
1358 	/*
1359 	 * XXX We need to guard this with _LP64 because vmem uses
1360 	 * uintptr_t internally.
1361 	 */
1362 	vmem_size_t result;
1363 	if (vmem_xalloc(r->arena, 1/*size*/, 1/*align*/, 0/*phase*/,
1364 			0/*nocross*/, (1UL << 32), VMEM_ADDR_MAX,
1365 			VM_INSTANTFIT | VM_NOSLEEP, &result) == 0) {
1366 		vmem_free(r->arena, result, 1);
1367 		rv = true;
1368 	}
1369 #endif /* _LP64 */
1370 
1371 	return rv;
1372 }
1373 
1374 /*
1375  * pciconf_resource_init:
1376  *
1377  *	Allocate and initilize a pci configuration resources container.
1378  */
1379 struct pciconf_resources *
1380 pciconf_resource_init(void)
1381 {
1382 	struct pciconf_resources *rs;
1383 
1384 	rs = kmem_zalloc(sizeof(*rs), KM_SLEEP);
1385 
1386 	return (rs);
1387 }
1388 
1389 /*
1390  * pciconf_resource_fini:
1391  *
1392  *	Dispose of a pci configuration resources container.
1393  */
1394 void
1395 pciconf_resource_fini(struct pciconf_resources *rs)
1396 {
1397 	int i;
1398 
1399 	for (i = 0; i < PCICONF_RESOURCE_NTYPES; i++) {
1400 		fini_range_resource(&rs->resources[i]);
1401 	}
1402 
1403 	kmem_free(rs, sizeof(*rs));
1404 }
1405 
1406 /*
1407  * pciconf_resource_add:
1408  *
1409  *	Add a pci configuration resource to a container.
1410  */
1411 int
1412 pciconf_resource_add(struct pciconf_resources *rs, int type,
1413     bus_addr_t start, bus_size_t size)
1414 {
1415 	bus_addr_t end = start + (size - 1);
1416 	struct pciconf_resource *r;
1417 	struct pciconf_resource_rsvd *rsvd;
1418 	int error, rsvd_type, align;
1419 	vmem_addr_t result;
1420 	bool first;
1421 
1422 	if (size == 0 || end <= start)
1423 		return EINVAL;
1424 
1425 	if (type < 0 || type >= PCICONF_RESOURCE_NTYPES)
1426 		return EINVAL;
1427 
1428 	r = &rs->resources[type];
1429 
1430 	first = r->arena == NULL;
1431 	if (first) {
1432 		r->arena = create_vmem_arena(pciconf_resource_names[type],
1433 		    0, 0, VM_SLEEP);
1434 		r->min_addr = VMEM_ADDR_MAX;
1435 		r->max_addr = VMEM_ADDR_MIN;
1436 	}
1437 
1438 	error = vmem_add(r->arena, start, size, VM_SLEEP);
1439 	if (error == 0) {
1440 		if (start < r->min_addr)
1441 			r->min_addr = start;
1442 		if (end > r->max_addr)
1443 			r->max_addr = end;
1444 	}
1445 
1446 	r->total_size += size;
1447 
1448 	switch (type) {
1449 	case PCICONF_RESOURCE_IO:
1450 		rsvd_type = PCI_CONF_MAP_IO;
1451 		align = 0x1000;
1452 		break;
1453 	case PCICONF_RESOURCE_MEM:
1454 	case PCICONF_RESOURCE_PREFETCHABLE_MEM:
1455 		rsvd_type = PCI_CONF_MAP_MEM;
1456 		align = 0x100000;
1457 		break;
1458 	default:
1459 		rsvd_type = 0;
1460 		align = 0;
1461 		break;
1462 	}
1463 
1464 	/*
1465 	 * Exclude reserved ranges from available resources
1466 	 */
1467 	LIST_FOREACH(rsvd, &pciconf_resource_reservations, next) {
1468 		if (rsvd->type != rsvd_type)
1469 			continue;
1470 		/*
1471 		 * The reserved range may not be within our resource window.
1472 		 * That's fine, so ignore the error.
1473 		 */
1474 		(void)vmem_xalloc(r->arena, rsvd->size, align, 0, 0,
1475 				  rsvd->start, rsvd->start + rsvd->size,
1476 				  VM_BESTFIT | VM_NOSLEEP,
1477 				  &result);
1478 	}
1479 
1480 	return 0;
1481 }
1482 
1483 /*
1484  * pciconf_resource_reserve:
1485  *
1486  *	Mark a pci configuration resource as in-use. Devices
1487  *	already configured to use these resources are notified
1488  *	during resource assignment if their resources are changed.
1489  */
1490 void
1491 pciconf_resource_reserve(int type, bus_addr_t start, bus_size_t size,
1492     void (*callback)(void *, uint64_t), void *callback_arg)
1493 {
1494 	struct pciconf_resource_rsvd *rsvd;
1495 
1496 	rsvd = kmem_zalloc(sizeof(*rsvd), KM_SLEEP);
1497 	rsvd->type = type;
1498 	rsvd->start = start;
1499 	rsvd->size = size;
1500 	rsvd->callback = callback;
1501 	rsvd->callback_arg = callback_arg;
1502 	LIST_INSERT_HEAD(&pciconf_resource_reservations, rsvd, next);
1503 }
1504 
1505 /*
1506  * Let's configure the PCI bus.
1507  * This consists of basically scanning for all existing devices,
1508  * identifying their needs, and then making another pass over them
1509  * to set:
1510  *	1. I/O addresses
1511  *	2. Memory addresses (Prefetchable and not)
1512  *	3. PCI command register
1513  *	4. The latency part of the PCI BHLC (BIST (Built-In Self Test),
1514  *	    Header type, Latency timer, Cache line size) register
1515  *
1516  * The command register is set to enable fast back-to-back transactions
1517  * if the host bridge says it can handle it.  We also configure
1518  * Master Enable, SERR enable, parity enable, and (if this is not a
1519  * PCI-PCI bridge) the I/O and Memory spaces.  Apparently some devices
1520  * will not report some I/O space.
1521  *
1522  * The latency is computed to be a "fair share" of the bus bandwidth.
1523  * The bus bandwidth variable is initialized to the number of PCI cycles
1524  * in one second.  The number of cycles taken for one transaction by each
1525  * device (MAX_LAT + MIN_GNT) is then subtracted from the bandwidth.
1526  * Care is taken to ensure that the latency timer won't be set such that
1527  * it would exceed the critical time for any device.
1528  *
1529  * This is complicated somewhat due to the presence of bridges.  PCI-PCI
1530  * bridges are probed and configured recursively.
1531  */
1532 int
1533 pci_configure_bus(pci_chipset_tag_t pc, struct pciconf_resources *rs,
1534     int firstbus, int cacheline_size)
1535 {
1536 	pciconf_bus_t	*pb;
1537 	int		rv;
1538 
1539 	pb = kmem_zalloc(sizeof (pciconf_bus_t), KM_SLEEP);
1540 	pb->busno = firstbus;
1541 	pb->next_busno = pb->busno + 1;
1542 	pb->last_busno = 255;
1543 	pb->cacheline_size = cacheline_size;
1544 	pb->parent_bus = NULL;
1545 	pb->swiz = 0;
1546 	pb->io_32bit = 1;
1547 	pb->io_res = rs->resources[PCICONF_RESOURCE_IO];
1548 
1549 	pb->mem_res = rs->resources[PCICONF_RESOURCE_MEM];
1550 	if (pb->mem_res.arena == NULL)
1551 		pb->mem_res = rs->resources[PCICONF_RESOURCE_PREFETCHABLE_MEM];
1552 
1553 	pb->pmem_res = rs->resources[PCICONF_RESOURCE_PREFETCHABLE_MEM];
1554 	if (pb->pmem_res.arena == NULL)
1555 		pb->pmem_res = rs->resources[PCICONF_RESOURCE_MEM];
1556 
1557 	/*
1558 	 * Probe the memory region arenas to see if allocation of
1559 	 * 64-bit addresses is possible.
1560 	 */
1561 	pb->mem_64bit = mem_region_ok64(&pb->mem_res);
1562 	pb->pmem_64bit = mem_region_ok64(&pb->pmem_res);
1563 
1564 	pb->pc = pc;
1565 	pb->io_total = pb->mem_total = pb->pmem_total = 0;
1566 
1567 	rv = probe_bus(pb);
1568 	pb->last_busno = pb->next_busno - 1;
1569 	if (rv == 0)
1570 		rv = configure_bus(pb);
1571 
1572 	/*
1573 	 * All done!
1574 	 */
1575 	kmem_free(pb, sizeof(*pb));
1576 	return rv;
1577 }
1578