10Sstevel@tonic-gate /*
20Sstevel@tonic-gate * CDDL HEADER START
30Sstevel@tonic-gate *
40Sstevel@tonic-gate * The contents of this file are subject to the terms of the
57656SSherry.Moore@Sun.COM * Common Development and Distribution License (the "License").
67656SSherry.Moore@Sun.COM * You may not use this file except in compliance with the License.
70Sstevel@tonic-gate *
80Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
90Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing.
100Sstevel@tonic-gate * See the License for the specific language governing permissions
110Sstevel@tonic-gate * and limitations under the License.
120Sstevel@tonic-gate *
130Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each
140Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
150Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the
160Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying
170Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner]
180Sstevel@tonic-gate *
190Sstevel@tonic-gate * CDDL HEADER END
200Sstevel@tonic-gate */
210Sstevel@tonic-gate /*
22*11066Srafael.vanoni@sun.com * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
230Sstevel@tonic-gate * Use is subject to license terms.
240Sstevel@tonic-gate */
250Sstevel@tonic-gate
260Sstevel@tonic-gate
270Sstevel@tonic-gate /*
280Sstevel@tonic-gate * This is the lock device driver.
290Sstevel@tonic-gate *
300Sstevel@tonic-gate * The lock driver provides a variation of inter-process mutexes with the
310Sstevel@tonic-gate * following twist in semantics:
320Sstevel@tonic-gate * A waiter for a lock after a set timeout can "break" the lock and
330Sstevel@tonic-gate * grab it from the current owner (without informing the owner).
340Sstevel@tonic-gate *
350Sstevel@tonic-gate * These semantics result in temporarily multiple processes thinking they
360Sstevel@tonic-gate * own the lock. This usually does not make sense for cases where locks are
370Sstevel@tonic-gate * used to protect a critical region and it is important to serialize access
380Sstevel@tonic-gate * to data structures. As breaking the lock will also lose the serialization
390Sstevel@tonic-gate * and result in corrupt data structures.
400Sstevel@tonic-gate *
410Sstevel@tonic-gate * The usage for winlock driver is primarily driven by the graphics system
420Sstevel@tonic-gate * when doing DGA (direct graphics access) graphics. The locks are used to
430Sstevel@tonic-gate * protect access to the frame buffer (presumably reflects back to the screen)
440Sstevel@tonic-gate * between competing processes that directly write to the screen as opposed
450Sstevel@tonic-gate * to going through the window server etc.
460Sstevel@tonic-gate * In this case, the result of breaking the lock at worst causes the screen
470Sstevel@tonic-gate * image to be distorted and is easily fixed by doing a "refresh"
480Sstevel@tonic-gate *
490Sstevel@tonic-gate * In well-behaved applications, the lock is held for a very short time and
500Sstevel@tonic-gate * the breaking semantics do not come into play. Not having this feature and
510Sstevel@tonic-gate * using normal inter-process mutexes will result in a misbehaved application
520Sstevel@tonic-gate * from grabbing the screen writing capability from the window manager and
530Sstevel@tonic-gate * effectively make the system look like it is hung (mouse pointer does not
540Sstevel@tonic-gate * move).
550Sstevel@tonic-gate *
560Sstevel@tonic-gate * A secondary aspect of the winlock driver is that it allows for extremely
570Sstevel@tonic-gate * fast lock acquire/release in cases where there is low contention. A memory
580Sstevel@tonic-gate * write is all that is needed (not even a function call). And the window
590Sstevel@tonic-gate * manager is the only DGA writer usually and this optimized for. Occasionally
600Sstevel@tonic-gate * some processes might do DGA graphics and cause kernel faults to handle
610Sstevel@tonic-gate * the contention/locking (and that has got to be slow!).
620Sstevel@tonic-gate *
630Sstevel@tonic-gate * The following IOCTLs are supported:
640Sstevel@tonic-gate *
650Sstevel@tonic-gate * GRABPAGEALLOC:
660Sstevel@tonic-gate * Compatibility with old cgsix device driver lockpage ioctls.
670Sstevel@tonic-gate * Lockpages created this way must be an entire page for compatibility with
680Sstevel@tonic-gate * older software. This ioctl allocates a lock context with its own
690Sstevel@tonic-gate * private lock page. The unique "ident" that identifies this lock is
700Sstevel@tonic-gate * returned.
710Sstevel@tonic-gate *
720Sstevel@tonic-gate * GRABPAGEFREE:
730Sstevel@tonic-gate * Compatibility with cgsix device driver lockpage ioctls. This
740Sstevel@tonic-gate * ioctl releases the lock context allocated by GRABPAGEALLOC.
750Sstevel@tonic-gate *
760Sstevel@tonic-gate * GRABLOCKINFO:
770Sstevel@tonic-gate * Returns a one-word flag. '1' means that multiple clients may
780Sstevel@tonic-gate * access this lock page. Older device drivers returned '0',
790Sstevel@tonic-gate * meaning that only two clients could access a lock page.
800Sstevel@tonic-gate *
810Sstevel@tonic-gate * GRABATTACH:
820Sstevel@tonic-gate * Not supported. This ioctl would have grabbed all lock pages
830Sstevel@tonic-gate * on behalf of the calling program.
840Sstevel@tonic-gate *
850Sstevel@tonic-gate * WINLOCKALLOC:
860Sstevel@tonic-gate * Allocate a lock context. This ioctl accepts a key value. as
870Sstevel@tonic-gate * its argument. If the key is zero, a new lock context is
880Sstevel@tonic-gate * created, and its "ident" is returned. If the key is nonzero,
890Sstevel@tonic-gate * all existing contexts are checked to see if they match they
900Sstevel@tonic-gate * key. If a match is found, its reference count is incremented
910Sstevel@tonic-gate * and its ident is returned, otherwise a new context is created
920Sstevel@tonic-gate * and its ident is returned.
930Sstevel@tonic-gate *
940Sstevel@tonic-gate * WINLOCKFREE:
950Sstevel@tonic-gate * Free a lock context. This ioctl accepts the ident of a lock
960Sstevel@tonic-gate * context and decrements its reference count. Once the reference
970Sstevel@tonic-gate * count reaches zero *and* all mappings are released, the lock
980Sstevel@tonic-gate * context is freed. When all the lock context in the lock page are
990Sstevel@tonic-gate * freed, the lock page is freed as well.
1000Sstevel@tonic-gate *
1010Sstevel@tonic-gate * WINLOCKSETTIMEOUT:
1020Sstevel@tonic-gate * Set lock timeout for a context. This ioctl accepts the ident
1030Sstevel@tonic-gate * of a lock context and a timeout value in milliseconds.
1040Sstevel@tonic-gate * Whenever lock contention occurs, the timer is started and the lock is
1050Sstevel@tonic-gate * broken after the timeout expires. If timeout value is zero, lock does
1060Sstevel@tonic-gate * not timeout. This value will be rounded to the nearest clock
1070Sstevel@tonic-gate * tick, so don't try to use it for real-time control or something.
1080Sstevel@tonic-gate *
1090Sstevel@tonic-gate * WINLOCKGETTIMEOUT:
1100Sstevel@tonic-gate * Get lock timeout from a context.
1110Sstevel@tonic-gate *
1120Sstevel@tonic-gate * WINLOCKDUMP:
1130Sstevel@tonic-gate * Dump state of this device.
1140Sstevel@tonic-gate *
1150Sstevel@tonic-gate *
1160Sstevel@tonic-gate * How /dev/winlock works:
1170Sstevel@tonic-gate *
1180Sstevel@tonic-gate * Every lock context consists of two mappings for the client to the lock
1190Sstevel@tonic-gate * page. These mappings are known as the "lock page" and "unlock page"
1200Sstevel@tonic-gate * to the client. The first mmap to the lock context (identified by the
1210Sstevel@tonic-gate * sy_ident field returns during alloc) allocates mapping to the lock page,
1220Sstevel@tonic-gate * the second mmap allocates a mapping to the unlock page.
1230Sstevel@tonic-gate * The mappings dont have to be ordered in virtual address space, but do
1240Sstevel@tonic-gate * need to be ordered in time. Mapping and unmapping of these lock and unlock
1250Sstevel@tonic-gate * pages should happen in pairs. Doing them one at a time or unmapping one
1260Sstevel@tonic-gate * and leaving one mapped etc cause undefined behaviors.
1270Sstevel@tonic-gate * The mappings are always of length PAGESIZE, and type MAP_SHARED.
1280Sstevel@tonic-gate *
1290Sstevel@tonic-gate * The first ioctl is to ALLOC a lock, either based on a key (if trying to
1300Sstevel@tonic-gate * grab a preexisting lock) or 0 (gets a default new one)
1310Sstevel@tonic-gate * This ioctl returns a value in sy_ident which is needed to do the
1320Sstevel@tonic-gate * later mmaps and FREE/other ioctls.
1330Sstevel@tonic-gate *
1340Sstevel@tonic-gate * The "page number" portion of the sy_ident needs to be passed as the
1350Sstevel@tonic-gate * file offset when doing an mmap for both the lock page and unlock page
1360Sstevel@tonic-gate *
1370Sstevel@tonic-gate * The value returned by mmap ( a user virtual address) needs to be
1380Sstevel@tonic-gate * incremented by the "page offset" portion of sy_ident to obtain the
1390Sstevel@tonic-gate * pointer to the actual lock. (Skipping this step, does not cause any
1400Sstevel@tonic-gate * visible error, but the process will be using the wrong lock!)
1410Sstevel@tonic-gate *
1420Sstevel@tonic-gate * On a fork(), the child process will inherit the mappings for free, but
1430Sstevel@tonic-gate * will not inherit the parent's lock ownership if any. The child should NOT
1440Sstevel@tonic-gate * do an explicit FREE on the lock context unless it did an explicit ALLOC.
1450Sstevel@tonic-gate * Only one process at a time is allowed to have a valid hat
1460Sstevel@tonic-gate * mapping to a lock page. This is enforced by this driver.
1470Sstevel@tonic-gate * A client acquires a lock by writing a '1' to the lock page.
1480Sstevel@tonic-gate * Note, that it is not necessary to read and veryify that the lock is '0'
1490Sstevel@tonic-gate * prior to writing a '1' in it.
1500Sstevel@tonic-gate * If it does not already have a valid mapping to that page, the driver
1510Sstevel@tonic-gate * takes a fault (devmap_access), loads the client mapping
1520Sstevel@tonic-gate * and allows the client to continue. The client releases the lock by
1530Sstevel@tonic-gate * writing a '0' to the unlock page. Again, if it does not have a valid
1540Sstevel@tonic-gate * mapping to the unlock page, the segment driver takes a fault,
1550Sstevel@tonic-gate * loads the mapping, and lets the client continue. From this point
1560Sstevel@tonic-gate * forward, the client can make as many locks and unlocks as it
1570Sstevel@tonic-gate * wants, without any more faults into the kernel.
1580Sstevel@tonic-gate *
1590Sstevel@tonic-gate * If a different process wants to acquire a lock, it takes a page fault
1600Sstevel@tonic-gate * when it writes the '1' to the lock page. If the segment driver sees
1610Sstevel@tonic-gate * that the lock page contained a zero, then it invalidates the owner's
1620Sstevel@tonic-gate * mappings and gives the mappings to this process.
1630Sstevel@tonic-gate *
1640Sstevel@tonic-gate * If there is already a '1' in the lock page when the second client
1650Sstevel@tonic-gate * tries to access the lock page, then a lock exists. The segment
1660Sstevel@tonic-gate * driver sleeps the second client and, if applicable, starts the
1670Sstevel@tonic-gate * timeout on the lock. The owner's mapping to the unlock page
1680Sstevel@tonic-gate * is invalidated so that the driver will be woken again when the owner
1690Sstevel@tonic-gate * releases the lock.
1700Sstevel@tonic-gate *
1710Sstevel@tonic-gate * When the locking client finally writes a '0' to the unlock page, the
1720Sstevel@tonic-gate * segment driver takes another fault. The client is given a valid
1730Sstevel@tonic-gate * mapping, not to the unlock page, but to the "trash page", and allowed
1740Sstevel@tonic-gate * to continue. Meanwhile, the sleeping client is given a valid mapping
1750Sstevel@tonic-gate * to the lock/unlock pages and allowed to continue as well.
1760Sstevel@tonic-gate *
1770Sstevel@tonic-gate * RFE: There is a leak if process exits before freeing allocated locks
1780Sstevel@tonic-gate * But currently not tracking which locks were allocated by which
1790Sstevel@tonic-gate * process and we do not have a clean entry point into the driver
1800Sstevel@tonic-gate * to do garbage collection. If the interface used a file descriptor for each
1810Sstevel@tonic-gate * lock it allocs, then the driver can free up stuff in the _close routine
1820Sstevel@tonic-gate */
1830Sstevel@tonic-gate
1840Sstevel@tonic-gate #include <sys/types.h> /* various type defn's */
1850Sstevel@tonic-gate #include <sys/debug.h>
1860Sstevel@tonic-gate #include <sys/param.h> /* various kernel limits */
1870Sstevel@tonic-gate #include <sys/time.h>
1880Sstevel@tonic-gate #include <sys/errno.h>
1890Sstevel@tonic-gate #include <sys/kmem.h> /* defines kmem_alloc() */
1900Sstevel@tonic-gate #include <sys/conf.h> /* defines cdevsw */
1910Sstevel@tonic-gate #include <sys/file.h> /* various file modes, etc. */
1920Sstevel@tonic-gate #include <sys/uio.h> /* UIO stuff */
1930Sstevel@tonic-gate #include <sys/ioctl.h>
1940Sstevel@tonic-gate #include <sys/cred.h> /* defines cred struct */
1950Sstevel@tonic-gate #include <sys/mman.h> /* defines mmap(2) parameters */
1960Sstevel@tonic-gate #include <sys/stat.h> /* defines S_IFCHR */
1970Sstevel@tonic-gate #include <sys/cmn_err.h> /* use cmn_err */
1980Sstevel@tonic-gate #include <sys/ddi.h> /* ddi stuff */
1990Sstevel@tonic-gate #include <sys/sunddi.h> /* ddi stuff */
2000Sstevel@tonic-gate #include <sys/ddi_impldefs.h> /* ddi stuff */
2010Sstevel@tonic-gate #include <sys/winlockio.h> /* defines ioctls, flags, data structs */
2020Sstevel@tonic-gate
2030Sstevel@tonic-gate static int winlock_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
2040Sstevel@tonic-gate static int winlock_devmap(dev_t, devmap_cookie_t, offset_t, size_t,
2050Sstevel@tonic-gate size_t *, uint_t);
2060Sstevel@tonic-gate static int winlocksegmap(dev_t, off_t, struct as *, caddr_t *, off_t,
2070Sstevel@tonic-gate uint_t, uint_t, uint_t, cred_t *);
2080Sstevel@tonic-gate
2090Sstevel@tonic-gate static struct cb_ops winlock_cb_ops = {
2100Sstevel@tonic-gate nulldev, /* open */
2110Sstevel@tonic-gate nulldev, /* close */
2120Sstevel@tonic-gate nodev, /* strategy */
2130Sstevel@tonic-gate nodev, /* print */
2140Sstevel@tonic-gate nodev, /* dump */
2150Sstevel@tonic-gate nodev, /* read */
2160Sstevel@tonic-gate nodev, /* write */
2170Sstevel@tonic-gate winlock_ioctl, /* ioctl */
2180Sstevel@tonic-gate winlock_devmap, /* devmap */
2190Sstevel@tonic-gate nodev, /* mmap */
2200Sstevel@tonic-gate winlocksegmap, /* segmap */
2210Sstevel@tonic-gate nochpoll, /* poll */
2220Sstevel@tonic-gate ddi_prop_op, /* prop_op */
2230Sstevel@tonic-gate NULL, /* streamtab */
2240Sstevel@tonic-gate D_NEW|D_MP|D_DEVMAP, /* Driver compatibility flag */
2250Sstevel@tonic-gate 0, /* rev */
2260Sstevel@tonic-gate nodev, /* aread */
2270Sstevel@tonic-gate nodev /* awrite */
2280Sstevel@tonic-gate };
2290Sstevel@tonic-gate
2300Sstevel@tonic-gate static int winlock_info(dev_info_t *, ddi_info_cmd_t, void *, void **);
2310Sstevel@tonic-gate static int winlock_attach(dev_info_t *, ddi_attach_cmd_t);
2320Sstevel@tonic-gate static int winlock_detach(dev_info_t *, ddi_detach_cmd_t);
2330Sstevel@tonic-gate
2340Sstevel@tonic-gate static struct dev_ops winlock_ops = {
2350Sstevel@tonic-gate DEVO_REV,
2360Sstevel@tonic-gate 0, /* refcount */
2370Sstevel@tonic-gate winlock_info, /* info */
2380Sstevel@tonic-gate nulldev, /* identify */
2390Sstevel@tonic-gate nulldev, /* probe */
2400Sstevel@tonic-gate winlock_attach, /* attach */
2410Sstevel@tonic-gate winlock_detach, /* detach */
2420Sstevel@tonic-gate nodev, /* reset */
2430Sstevel@tonic-gate &winlock_cb_ops, /* driver ops */
2440Sstevel@tonic-gate NULL, /* bus ops */
2457656SSherry.Moore@Sun.COM NULL, /* power */
2467656SSherry.Moore@Sun.COM ddi_quiesce_not_needed, /* quiesce */
2470Sstevel@tonic-gate };
2480Sstevel@tonic-gate
2490Sstevel@tonic-gate static int winlockmap_map(devmap_cookie_t, dev_t, uint_t, offset_t, size_t,
2500Sstevel@tonic-gate void **);
2510Sstevel@tonic-gate static void winlockmap_unmap(devmap_cookie_t, void *, offset_t, size_t,
2520Sstevel@tonic-gate devmap_cookie_t, void **, devmap_cookie_t, void **);
2530Sstevel@tonic-gate static int winlockmap_dup(devmap_cookie_t, void *,
2540Sstevel@tonic-gate devmap_cookie_t, void **);
2550Sstevel@tonic-gate static int winlockmap_access(devmap_cookie_t, void *, offset_t, size_t,
2560Sstevel@tonic-gate uint_t, uint_t);
2570Sstevel@tonic-gate
2580Sstevel@tonic-gate static
2590Sstevel@tonic-gate struct devmap_callback_ctl winlockmap_ops = {
2600Sstevel@tonic-gate DEVMAP_OPS_REV,
2610Sstevel@tonic-gate winlockmap_map,
2620Sstevel@tonic-gate winlockmap_access,
2630Sstevel@tonic-gate winlockmap_dup,
2640Sstevel@tonic-gate winlockmap_unmap,
2650Sstevel@tonic-gate };
2660Sstevel@tonic-gate
2670Sstevel@tonic-gate #if DEBUG
2680Sstevel@tonic-gate static int lock_debug = 0;
2690Sstevel@tonic-gate #define DEBUGF(level, args) { if (lock_debug >= (level)) cmn_err args; }
2700Sstevel@tonic-gate #else
2710Sstevel@tonic-gate #define DEBUGF(level, args)
2720Sstevel@tonic-gate #endif
2730Sstevel@tonic-gate
2740Sstevel@tonic-gate /* Driver supports two styles of locks */
2750Sstevel@tonic-gate enum winlock_style { NEWSTYLE_LOCK, OLDSTYLE_LOCK };
2760Sstevel@tonic-gate
2770Sstevel@tonic-gate /*
2780Sstevel@tonic-gate * These structures describe a lock context. We permit multiple
2790Sstevel@tonic-gate * clients (not just two) to access a lock page
2800Sstevel@tonic-gate *
2810Sstevel@tonic-gate * The "cookie" identifies the lock context. It is the page number portion
2820Sstevel@tonic-gate * sy_ident returned on lock allocation. Cookie is used in later ioctls.
2830Sstevel@tonic-gate * "cookie" is lockid * PAGESIZE
2840Sstevel@tonic-gate * "lockptr" is the kernel virtual address to the lock itself
2850Sstevel@tonic-gate * The page offset portion of lockptr is the page offset portion of sy_ident
2860Sstevel@tonic-gate */
2870Sstevel@tonic-gate
2880Sstevel@tonic-gate /*
2890Sstevel@tonic-gate * per-process information about locks. This is the private field of
2900Sstevel@tonic-gate * a devmap mapping. Note that usually *two* mappings point to this.
2910Sstevel@tonic-gate */
2920Sstevel@tonic-gate
2930Sstevel@tonic-gate /*
2940Sstevel@tonic-gate * Each process using winlock is associated with a segproc structure
2950Sstevel@tonic-gate * In various driver entry points, we need to search to find the right
2960Sstevel@tonic-gate * segproc structure (If we were using file handles for each lock this
2970Sstevel@tonic-gate * would not have been necessary).
2980Sstevel@tonic-gate * It would have been simple to use the process pid (and ddi_get_pid)
2990Sstevel@tonic-gate * However, during fork devmap_dup is called in the parent process context
3000Sstevel@tonic-gate * and using the pid complicates the code by introducing orphans.
3010Sstevel@tonic-gate * Instead we use the as pointer for the process as a cookie
3020Sstevel@tonic-gate * which requires delving into various non-DDI kosher structs
3030Sstevel@tonic-gate */
3040Sstevel@tonic-gate typedef struct segproc {
3050Sstevel@tonic-gate struct segproc *next; /* next client of this lock */
3060Sstevel@tonic-gate struct seglock *lp; /* associated lock context */
3070Sstevel@tonic-gate devmap_cookie_t lockseg; /* lock mapping, if any */
3080Sstevel@tonic-gate devmap_cookie_t unlockseg; /* unlock mapping, if any */
3090Sstevel@tonic-gate void *tag; /* process as pointer as tag */
3100Sstevel@tonic-gate uint_t flag; /* see "flag bits" in winlockio.h */
3110Sstevel@tonic-gate } SegProc;
3120Sstevel@tonic-gate
3130Sstevel@tonic-gate #define ID(sdp) ((sdp)->tag)
3140Sstevel@tonic-gate #define CURPROC_ID (void *)(curproc->p_as)
3150Sstevel@tonic-gate
3160Sstevel@tonic-gate /* per lock context information */
3170Sstevel@tonic-gate
3180Sstevel@tonic-gate typedef struct seglock {
3190Sstevel@tonic-gate struct seglock *next; /* next lock */
3200Sstevel@tonic-gate uint_t sleepers; /* nthreads sleeping on this lock */
3210Sstevel@tonic-gate uint_t alloccount; /* how many times created? */
3220Sstevel@tonic-gate uint_t cookie; /* mmap() offset (page #) into device */
3230Sstevel@tonic-gate uint_t key; /* key, if any */
3240Sstevel@tonic-gate enum winlock_style style; /* style of lock - OLDSTYLE, NEWSTYLE */
3250Sstevel@tonic-gate clock_t timeout; /* sleep time in ticks */
3260Sstevel@tonic-gate ddi_umem_cookie_t umem_cookie; /* cookie for umem allocated memory */
3270Sstevel@tonic-gate int *lockptr; /* kernel virtual addr of lock */
3280Sstevel@tonic-gate struct segproc *clients; /* list of clients of this lock */
3290Sstevel@tonic-gate struct segproc *owner; /* current owner of lock */
3300Sstevel@tonic-gate kmutex_t mutex; /* mutex for lock */
3310Sstevel@tonic-gate kcondvar_t locksleep; /* for sleeping on lock */
3320Sstevel@tonic-gate } SegLock;
3330Sstevel@tonic-gate
3340Sstevel@tonic-gate #define LOCK(lp) (*((lp)->lockptr))
3350Sstevel@tonic-gate
3360Sstevel@tonic-gate /*
3370Sstevel@tonic-gate * Number of locks that can fit in a page. Driver can support only that many.
3380Sstevel@tonic-gate * For oldsytle locks, it is relatively easy to increase the limit as each
3390Sstevel@tonic-gate * is in a separate page (MAX_LOCKS mostly serves to prevent runaway allocation
3400Sstevel@tonic-gate * For newstyle locks, this is trickier as the code needs to allow for mapping
3410Sstevel@tonic-gate * into the second or third page of the cookie for some locks.
3420Sstevel@tonic-gate */
3430Sstevel@tonic-gate #define MAX_LOCKS (PAGESIZE/sizeof (int))
3440Sstevel@tonic-gate
3450Sstevel@tonic-gate #define LOCKTIME 3 /* Default lock timeout in seconds */
3460Sstevel@tonic-gate
3470Sstevel@tonic-gate
3480Sstevel@tonic-gate /* Protections setting for winlock user mappings */
3490Sstevel@tonic-gate #define WINLOCK_PROT (PROT_READ|PROT_WRITE|PROT_USER)
3500Sstevel@tonic-gate
3510Sstevel@tonic-gate /*
3520Sstevel@tonic-gate * The trash page is where unwanted writes go
3530Sstevel@tonic-gate * when a process is releasing a lock.
3540Sstevel@tonic-gate */
3550Sstevel@tonic-gate static ddi_umem_cookie_t trashpage_cookie = NULL;
3560Sstevel@tonic-gate
3570Sstevel@tonic-gate /* For newstyle allocations a common page of locks is used */
3580Sstevel@tonic-gate static caddr_t lockpage = NULL;
3590Sstevel@tonic-gate static ddi_umem_cookie_t lockpage_cookie = NULL;
3600Sstevel@tonic-gate
3610Sstevel@tonic-gate static dev_info_t *winlock_dip = NULL;
3620Sstevel@tonic-gate static kmutex_t winlock_mutex;
3630Sstevel@tonic-gate
3640Sstevel@tonic-gate /*
3650Sstevel@tonic-gate * winlock_mutex protects
3660Sstevel@tonic-gate * lock_list
3670Sstevel@tonic-gate * lock_free_list
3680Sstevel@tonic-gate * "next" field in SegLock
3690Sstevel@tonic-gate * next_lock
3700Sstevel@tonic-gate * trashpage_cookie
3710Sstevel@tonic-gate * lockpage & lockpage_cookie
3720Sstevel@tonic-gate *
3730Sstevel@tonic-gate * SegLock_mutex protects
3740Sstevel@tonic-gate * rest of fields in SegLock
3750Sstevel@tonic-gate * All fields in list of SegProc (lp->clients)
3760Sstevel@tonic-gate *
3770Sstevel@tonic-gate * Lock ordering is winlock_mutex->SegLock_mutex
3780Sstevel@tonic-gate * During devmap/seg operations SegLock_mutex acquired without winlock_mutex
3790Sstevel@tonic-gate *
3800Sstevel@tonic-gate * During devmap callbacks, the pointer to SegProc is stored as the private
3810Sstevel@tonic-gate * data in the devmap handle. This pointer will not go stale (i.e., the
3820Sstevel@tonic-gate * SegProc getting deleted) as the SegProc is not deleted until both the
3830Sstevel@tonic-gate * lockseg and unlockseg have been unmapped and the pointers stored in
3840Sstevel@tonic-gate * the devmap handles have been NULL'ed.
3850Sstevel@tonic-gate * But before this pointer is used to access any fields (other than the 'lp')
3860Sstevel@tonic-gate * lp->mutex must be held.
3870Sstevel@tonic-gate */
3880Sstevel@tonic-gate
3890Sstevel@tonic-gate /*
3900Sstevel@tonic-gate * The allocation code tries to allocate from lock_free_list
3910Sstevel@tonic-gate * first, otherwise it uses kmem_zalloc. When lock list is idle, all
3920Sstevel@tonic-gate * locks in lock_free_list are kmem_freed
3930Sstevel@tonic-gate */
3940Sstevel@tonic-gate static SegLock *lock_list = NULL; /* in-use locks */
3950Sstevel@tonic-gate static SegLock *lock_free_list = NULL; /* free locks */
3960Sstevel@tonic-gate static int next_lock = 0; /* next lock cookie */
3970Sstevel@tonic-gate
3980Sstevel@tonic-gate /* Routines to find a lock in lock_list based on offset or key */
3990Sstevel@tonic-gate static SegLock *seglock_findlock(uint_t);
4000Sstevel@tonic-gate static SegLock *seglock_findkey(uint_t);
4010Sstevel@tonic-gate
4020Sstevel@tonic-gate /* Routines to find and allocate SegProc structures */
4030Sstevel@tonic-gate static SegProc *seglock_find_specific(SegLock *, void *);
4040Sstevel@tonic-gate static SegProc *seglock_alloc_specific(SegLock *, void *);
4050Sstevel@tonic-gate #define seglock_findclient(lp) seglock_find_specific((lp), CURPROC_ID)
4060Sstevel@tonic-gate #define seglock_allocclient(lp) seglock_alloc_specific((lp), CURPROC_ID)
4070Sstevel@tonic-gate
4080Sstevel@tonic-gate /* Delete client from lock's client list */
4090Sstevel@tonic-gate static void seglock_deleteclient(SegLock *, SegProc *);
4100Sstevel@tonic-gate static void garbage_collect_lock(SegLock *, SegProc *);
4110Sstevel@tonic-gate
4120Sstevel@tonic-gate /* Create a new lock */
4130Sstevel@tonic-gate static SegLock *seglock_createlock(enum winlock_style);
4140Sstevel@tonic-gate /* Destroy lock */
4150Sstevel@tonic-gate static void seglock_destroylock(SegLock *);
4160Sstevel@tonic-gate static void lock_destroyall(void);
4170Sstevel@tonic-gate
4180Sstevel@tonic-gate /* Helper functions in winlockmap_access */
4190Sstevel@tonic-gate static int give_mapping(SegLock *, SegProc *, uint_t);
4200Sstevel@tonic-gate static int lock_giveup(SegLock *, int);
4210Sstevel@tonic-gate static int seglock_lockfault(devmap_cookie_t, SegProc *, SegLock *, uint_t);
4220Sstevel@tonic-gate
4230Sstevel@tonic-gate /* routines called from ioctl */
4240Sstevel@tonic-gate static int seglock_graballoc(intptr_t, enum winlock_style, int);
4250Sstevel@tonic-gate static int seglock_grabinfo(intptr_t, int);
4260Sstevel@tonic-gate static int seglock_grabfree(intptr_t, int);
4270Sstevel@tonic-gate static int seglock_gettimeout(intptr_t, int);
4280Sstevel@tonic-gate static int seglock_settimeout(intptr_t, int);
4290Sstevel@tonic-gate static void seglock_dump_all(void);
4300Sstevel@tonic-gate
4310Sstevel@tonic-gate static int
winlock_attach(dev_info_t * devi,ddi_attach_cmd_t cmd)4320Sstevel@tonic-gate winlock_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
4330Sstevel@tonic-gate {
4340Sstevel@tonic-gate DEBUGF(1, (CE_CONT, "winlock_attach, devi=%p, cmd=%d\n",
4357656SSherry.Moore@Sun.COM (void *)devi, (int)cmd));
4360Sstevel@tonic-gate if (cmd != DDI_ATTACH)
4370Sstevel@tonic-gate return (DDI_FAILURE);
4380Sstevel@tonic-gate if (ddi_create_minor_node(devi, "winlock", S_IFCHR, 0, DDI_PSEUDO, 0)
4390Sstevel@tonic-gate == DDI_FAILURE) {
4400Sstevel@tonic-gate return (DDI_FAILURE);
4410Sstevel@tonic-gate }
4420Sstevel@tonic-gate winlock_dip = devi;
4430Sstevel@tonic-gate ddi_report_dev(devi);
4440Sstevel@tonic-gate return (DDI_SUCCESS);
4450Sstevel@tonic-gate }
4460Sstevel@tonic-gate
4470Sstevel@tonic-gate /*ARGSUSED*/
4480Sstevel@tonic-gate static int
winlock_detach(dev_info_t * devi,ddi_detach_cmd_t cmd)4490Sstevel@tonic-gate winlock_detach(dev_info_t *devi, ddi_detach_cmd_t cmd)
4500Sstevel@tonic-gate {
4510Sstevel@tonic-gate DEBUGF(1, (CE_CONT, "winlock_detach, devi=%p, cmd=%d\n",
4527656SSherry.Moore@Sun.COM (void *)devi, (int)cmd));
4530Sstevel@tonic-gate if (cmd != DDI_DETACH)
4540Sstevel@tonic-gate return (DDI_FAILURE);
4550Sstevel@tonic-gate
4560Sstevel@tonic-gate mutex_enter(&winlock_mutex);
4570Sstevel@tonic-gate if (lock_list != NULL) {
4580Sstevel@tonic-gate mutex_exit(&winlock_mutex);
4590Sstevel@tonic-gate return (DDI_FAILURE);
4600Sstevel@tonic-gate }
4610Sstevel@tonic-gate ASSERT(lock_free_list == NULL);
4620Sstevel@tonic-gate
4630Sstevel@tonic-gate DEBUGF(1, (CE_CONT, "detach freeing trashpage and lockpage\n"));
4640Sstevel@tonic-gate /* destroy any common stuff created */
4650Sstevel@tonic-gate if (trashpage_cookie != NULL) {
4660Sstevel@tonic-gate ddi_umem_free(trashpage_cookie);
4670Sstevel@tonic-gate trashpage_cookie = NULL;
4680Sstevel@tonic-gate }
4690Sstevel@tonic-gate if (lockpage != NULL) {
4700Sstevel@tonic-gate ddi_umem_free(lockpage_cookie);
4710Sstevel@tonic-gate lockpage = NULL;
4720Sstevel@tonic-gate lockpage_cookie = NULL;
4730Sstevel@tonic-gate }
4740Sstevel@tonic-gate winlock_dip = NULL;
4750Sstevel@tonic-gate mutex_exit(&winlock_mutex);
4760Sstevel@tonic-gate return (DDI_SUCCESS);
4770Sstevel@tonic-gate }
4780Sstevel@tonic-gate
4790Sstevel@tonic-gate /*ARGSUSED*/
4800Sstevel@tonic-gate static int
winlock_info(dev_info_t * dip,ddi_info_cmd_t infocmd,void * arg,void ** result)4810Sstevel@tonic-gate winlock_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result)
4820Sstevel@tonic-gate {
4830Sstevel@tonic-gate register int error;
4840Sstevel@tonic-gate
4850Sstevel@tonic-gate /* initialize result */
4860Sstevel@tonic-gate *result = NULL;
4870Sstevel@tonic-gate
4880Sstevel@tonic-gate /* only valid instance (i.e., getminor) is 0 */
4890Sstevel@tonic-gate if (getminor((dev_t)arg) >= 1)
4900Sstevel@tonic-gate return (DDI_FAILURE);
4910Sstevel@tonic-gate
4920Sstevel@tonic-gate switch (infocmd) {
4930Sstevel@tonic-gate case DDI_INFO_DEVT2DEVINFO:
4940Sstevel@tonic-gate if (winlock_dip == NULL)
4950Sstevel@tonic-gate error = DDI_FAILURE;
4960Sstevel@tonic-gate else {
4970Sstevel@tonic-gate *result = (void *)winlock_dip;
4980Sstevel@tonic-gate error = DDI_SUCCESS;
4990Sstevel@tonic-gate }
5000Sstevel@tonic-gate break;
5010Sstevel@tonic-gate case DDI_INFO_DEVT2INSTANCE:
5020Sstevel@tonic-gate *result = (void *)0;
5030Sstevel@tonic-gate error = DDI_SUCCESS;
5040Sstevel@tonic-gate break;
5050Sstevel@tonic-gate default:
5060Sstevel@tonic-gate error = DDI_FAILURE;
5070Sstevel@tonic-gate }
5080Sstevel@tonic-gate return (error);
5090Sstevel@tonic-gate }
5100Sstevel@tonic-gate
5110Sstevel@tonic-gate
5120Sstevel@tonic-gate /*ARGSUSED*/
5130Sstevel@tonic-gate int
winlock_ioctl(dev_t dev,int cmd,intptr_t arg,int mode,cred_t * cred,int * rval)5140Sstevel@tonic-gate winlock_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
5150Sstevel@tonic-gate cred_t *cred, int *rval)
5160Sstevel@tonic-gate {
5170Sstevel@tonic-gate DEBUGF(1, (CE_CONT, "winlockioctl: cmd=%d, arg=0x%p\n",
5187656SSherry.Moore@Sun.COM cmd, (void *)arg));
5190Sstevel@tonic-gate
5200Sstevel@tonic-gate switch (cmd) {
5210Sstevel@tonic-gate /*
5220Sstevel@tonic-gate * ioctls that used to be handled by framebuffers (defined in fbio.h)
5230Sstevel@tonic-gate * RFE: No code really calls the GRAB* ioctls now. Should EOL.
5240Sstevel@tonic-gate */
5250Sstevel@tonic-gate
5260Sstevel@tonic-gate case GRABPAGEALLOC:
5270Sstevel@tonic-gate return (seglock_graballoc(arg, OLDSTYLE_LOCK, mode));
5280Sstevel@tonic-gate case GRABPAGEFREE:
5290Sstevel@tonic-gate return (seglock_grabfree(arg, mode));
5300Sstevel@tonic-gate case GRABLOCKINFO:
5310Sstevel@tonic-gate return (seglock_grabinfo(arg, mode));
5320Sstevel@tonic-gate case GRABATTACH:
5330Sstevel@tonic-gate return (EINVAL); /* GRABATTACH is not supported (never was) */
5340Sstevel@tonic-gate
5350Sstevel@tonic-gate case WINLOCKALLOC:
5360Sstevel@tonic-gate return (seglock_graballoc(arg, NEWSTYLE_LOCK, mode));
5370Sstevel@tonic-gate case WINLOCKFREE:
5380Sstevel@tonic-gate return (seglock_grabfree(arg, mode));
5390Sstevel@tonic-gate case WINLOCKSETTIMEOUT:
5400Sstevel@tonic-gate return (seglock_settimeout(arg, mode));
5410Sstevel@tonic-gate case WINLOCKGETTIMEOUT:
5420Sstevel@tonic-gate return (seglock_gettimeout(arg, mode));
5430Sstevel@tonic-gate case WINLOCKDUMP:
5440Sstevel@tonic-gate seglock_dump_all();
5450Sstevel@tonic-gate return (0);
5460Sstevel@tonic-gate
5470Sstevel@tonic-gate #ifdef DEBUG
5480Sstevel@tonic-gate case (WIOC|255):
5490Sstevel@tonic-gate lock_debug = arg;
5500Sstevel@tonic-gate return (0);
5510Sstevel@tonic-gate #endif
5520Sstevel@tonic-gate
5530Sstevel@tonic-gate default:
5540Sstevel@tonic-gate return (ENOTTY); /* Why is this not EINVAL */
5550Sstevel@tonic-gate }
5560Sstevel@tonic-gate }
5570Sstevel@tonic-gate
5580Sstevel@tonic-gate int
winlocksegmap(dev_t dev,off_t off,struct as * as,caddr_t * addr,off_t len,uint_t prot,uint_t maxprot,uint_t flags,cred_t * cred)5590Sstevel@tonic-gate winlocksegmap(
5600Sstevel@tonic-gate dev_t dev, /* major:minor */
5610Sstevel@tonic-gate off_t off, /* device offset from mmap(2) */
5620Sstevel@tonic-gate struct as *as, /* user's address space. */
5630Sstevel@tonic-gate caddr_t *addr, /* address from mmap(2) */
5640Sstevel@tonic-gate off_t len, /* length from mmap(2) */
5650Sstevel@tonic-gate uint_t prot, /* user wants this access */
5660Sstevel@tonic-gate uint_t maxprot, /* this is the maximum the user can have */
5670Sstevel@tonic-gate uint_t flags, /* flags from mmap(2) */
5680Sstevel@tonic-gate cred_t *cred)
5690Sstevel@tonic-gate {
5700Sstevel@tonic-gate DEBUGF(1, (CE_CONT, "winlock_segmap off=%lx, len=0x%lx\n", off, len));
5710Sstevel@tonic-gate
5720Sstevel@tonic-gate /* Only MAP_SHARED mappings are supported */
5730Sstevel@tonic-gate if ((flags & MAP_TYPE) == MAP_PRIVATE) {
5740Sstevel@tonic-gate return (EINVAL);
5750Sstevel@tonic-gate }
5760Sstevel@tonic-gate
5770Sstevel@tonic-gate /* Use devmap_setup to setup the mapping */
5780Sstevel@tonic-gate return (devmap_setup(dev, (offset_t)off, as, addr, (size_t)len, prot,
5797656SSherry.Moore@Sun.COM maxprot, flags, cred));
5800Sstevel@tonic-gate }
5810Sstevel@tonic-gate
5820Sstevel@tonic-gate /*ARGSUSED*/
5830Sstevel@tonic-gate int
winlock_devmap(dev_t dev,devmap_cookie_t dhp,offset_t off,size_t len,size_t * maplen,uint_t model)5840Sstevel@tonic-gate winlock_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
5850Sstevel@tonic-gate size_t *maplen, uint_t model)
5860Sstevel@tonic-gate {
5870Sstevel@tonic-gate SegLock *lp;
5880Sstevel@tonic-gate int err;
5890Sstevel@tonic-gate
5900Sstevel@tonic-gate DEBUGF(1, (CE_CONT, "winlock devmap: off=%llx, len=%lx, dhp=%p\n",
5917656SSherry.Moore@Sun.COM off, len, (void *)dhp));
5920Sstevel@tonic-gate
5930Sstevel@tonic-gate *maplen = 0;
5940Sstevel@tonic-gate
5950Sstevel@tonic-gate /* Check if the lock exists, i.e., has been created by alloc */
5960Sstevel@tonic-gate /* off is the sy_ident returned in the alloc ioctl */
5970Sstevel@tonic-gate if ((lp = seglock_findlock((uint_t)off)) == NULL) {
5980Sstevel@tonic-gate return (ENXIO);
5990Sstevel@tonic-gate }
6000Sstevel@tonic-gate
6010Sstevel@tonic-gate /*
6020Sstevel@tonic-gate * The offset bits in mmap(2) offset has to be same as in lockptr
6030Sstevel@tonic-gate * OR the offset should be 0 (i.e. masked off)
6040Sstevel@tonic-gate */
6050Sstevel@tonic-gate if (((off & PAGEOFFSET) != 0) &&
6060Sstevel@tonic-gate ((off ^ (uintptr_t)(lp->lockptr)) & (offset_t)PAGEOFFSET) != 0) {
6070Sstevel@tonic-gate DEBUGF(2, (CE_CONT,
6087656SSherry.Moore@Sun.COM "mmap offset %llx mismatch with lockptr %p\n",
6097656SSherry.Moore@Sun.COM off, (void *)lp->lockptr));
6100Sstevel@tonic-gate mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
6110Sstevel@tonic-gate return (EINVAL);
6120Sstevel@tonic-gate }
6130Sstevel@tonic-gate
6140Sstevel@tonic-gate /* Only supports PAGESIZE length mappings */
6150Sstevel@tonic-gate if (len != PAGESIZE) {
6160Sstevel@tonic-gate mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
6170Sstevel@tonic-gate return (EINVAL);
6180Sstevel@tonic-gate }
6190Sstevel@tonic-gate
6200Sstevel@tonic-gate /*
6210Sstevel@tonic-gate * Set up devmap to point at page associated with lock
6220Sstevel@tonic-gate * RFE: At this point we dont know if this is a lockpage or unlockpage
6230Sstevel@tonic-gate * a lockpage would not need DEVMAP_ALLOW_REMAP setting
6240Sstevel@tonic-gate * We could have kept track of the mapping order here,
6250Sstevel@tonic-gate * but devmap framework does not support storing any state in this
6260Sstevel@tonic-gate * devmap callback as it does not callback for error cleanup if some
6270Sstevel@tonic-gate * other error happens in the framework.
6280Sstevel@tonic-gate * RFE: We should modify the winlock mmap interface so that the
6290Sstevel@tonic-gate * user process marks in the offset passed in whether this is for a
6300Sstevel@tonic-gate * lock or unlock mapping instead of guessing based on order of maps
6310Sstevel@tonic-gate * This would cleanup other things (such as in fork)
6320Sstevel@tonic-gate */
6330Sstevel@tonic-gate if ((err = devmap_umem_setup(dhp, winlock_dip, &winlockmap_ops,
6340Sstevel@tonic-gate lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT,
6350Sstevel@tonic-gate DEVMAP_ALLOW_REMAP, 0)) < 0) {
6360Sstevel@tonic-gate mutex_exit(&lp->mutex); /* held by seglock_findlock */
6370Sstevel@tonic-gate return (err);
6380Sstevel@tonic-gate }
6390Sstevel@tonic-gate /*
6400Sstevel@tonic-gate * No mappings are loaded to those segments yet. The correctness
6410Sstevel@tonic-gate * of the winlock semantics depends on the devmap framework/seg_dev NOT
6420Sstevel@tonic-gate * loading the translations without calling _access callback.
6430Sstevel@tonic-gate */
6440Sstevel@tonic-gate
6450Sstevel@tonic-gate mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
6460Sstevel@tonic-gate *maplen = PAGESIZE;
6470Sstevel@tonic-gate return (0);
6480Sstevel@tonic-gate }
6490Sstevel@tonic-gate
6500Sstevel@tonic-gate /*
6510Sstevel@tonic-gate * This routine is called by the devmap framework after the devmap entry point
6520Sstevel@tonic-gate * above and the mapping is setup in seg_dev.
6530Sstevel@tonic-gate * We store the pointer to the per-process context in the devmap private data.
6540Sstevel@tonic-gate */
6550Sstevel@tonic-gate /*ARGSUSED*/
6560Sstevel@tonic-gate static int
winlockmap_map(devmap_cookie_t dhp,dev_t dev,uint_t flags,offset_t off,size_t len,void ** pvtp)6570Sstevel@tonic-gate winlockmap_map(devmap_cookie_t dhp, dev_t dev, uint_t flags, offset_t off,
6580Sstevel@tonic-gate size_t len, void **pvtp)
6590Sstevel@tonic-gate {
6600Sstevel@tonic-gate SegLock *lp = seglock_findlock((uint_t)off); /* returns w/ mutex held */
6610Sstevel@tonic-gate SegProc *sdp;
6620Sstevel@tonic-gate
6630Sstevel@tonic-gate ASSERT(len == PAGESIZE);
6640Sstevel@tonic-gate
6650Sstevel@tonic-gate /* Find the per-process context for this lock, alloc one if not found */
6660Sstevel@tonic-gate sdp = seglock_allocclient(lp);
6670Sstevel@tonic-gate
6680Sstevel@tonic-gate /*
6690Sstevel@tonic-gate * RFE: Determining which is a lock vs unlock seg is based on order
6700Sstevel@tonic-gate * of mmaps, we should change that to be derivable from off
6710Sstevel@tonic-gate */
6720Sstevel@tonic-gate if (sdp->lockseg == NULL) {
6730Sstevel@tonic-gate sdp->lockseg = dhp;
6740Sstevel@tonic-gate } else if (sdp->unlockseg == NULL) {
6750Sstevel@tonic-gate sdp->unlockseg = dhp;
6760Sstevel@tonic-gate } else {
6770Sstevel@tonic-gate /* attempting to map lock more than twice */
6780Sstevel@tonic-gate mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
6790Sstevel@tonic-gate return (ENOMEM);
6800Sstevel@tonic-gate }
6810Sstevel@tonic-gate
6820Sstevel@tonic-gate *pvtp = sdp;
6830Sstevel@tonic-gate mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
6840Sstevel@tonic-gate return (DDI_SUCCESS);
6850Sstevel@tonic-gate }
6860Sstevel@tonic-gate
6870Sstevel@tonic-gate /*
6880Sstevel@tonic-gate * duplicate a segment, as in fork()
6890Sstevel@tonic-gate * On fork, the child inherits the mappings to the lock
6900Sstevel@tonic-gate * lp->alloccount is NOT incremented, so child should not do a free().
6910Sstevel@tonic-gate * Semantics same as if done an alloc(), map(), map().
6920Sstevel@tonic-gate * This way it would work fine if doing an exec() variant later
6930Sstevel@tonic-gate * Child does not inherit any UFLAGS set in parent
6940Sstevel@tonic-gate * The lock and unlock pages are started off unmapped, i.e., child does not
6950Sstevel@tonic-gate * own the lock.
6960Sstevel@tonic-gate * The code assumes that the child process has a valid pid at this point
6970Sstevel@tonic-gate * RFE: This semantics depends on fork not duplicating the hat mappings
6980Sstevel@tonic-gate * (which is the current implementation). To enforce it would need to
6990Sstevel@tonic-gate * call devmap_unload from here - not clear if that is allowed.
7000Sstevel@tonic-gate */
7010Sstevel@tonic-gate
7020Sstevel@tonic-gate static int
winlockmap_dup(devmap_cookie_t dhp,void * oldpvt,devmap_cookie_t new_dhp,void ** newpvt)7030Sstevel@tonic-gate winlockmap_dup(devmap_cookie_t dhp, void *oldpvt, devmap_cookie_t new_dhp,
7040Sstevel@tonic-gate void **newpvt)
7050Sstevel@tonic-gate {
7060Sstevel@tonic-gate SegProc *sdp = (SegProc *)oldpvt;
7070Sstevel@tonic-gate SegProc *ndp;
7080Sstevel@tonic-gate SegLock *lp = sdp->lp;
7090Sstevel@tonic-gate
7100Sstevel@tonic-gate mutex_enter(&lp->mutex);
7110Sstevel@tonic-gate ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
7120Sstevel@tonic-gate
7130Sstevel@tonic-gate /*
7140Sstevel@tonic-gate * Note: At this point, the child process does have a pid, but
7150Sstevel@tonic-gate * the arguments passed to as_dup and hence to devmap_dup dont pass it
7160Sstevel@tonic-gate * down. So we cannot use normal seglock_findclient - which finds the
7170Sstevel@tonic-gate * parent sdp itself!
7180Sstevel@tonic-gate * Instead we allocate the child's SegProc by using the child as pointer
7190Sstevel@tonic-gate * RFE: we are using the as stucture which means peeking into the
7200Sstevel@tonic-gate * devmap_cookie. This is not DDI-compliant. Need a compliant way of
7210Sstevel@tonic-gate * getting at either the as or, better, a way to get the child's new pid
7220Sstevel@tonic-gate */
7230Sstevel@tonic-gate ndp = seglock_alloc_specific(lp,
7247656SSherry.Moore@Sun.COM (void *)((devmap_handle_t *)new_dhp)->dh_seg->s_as);
7250Sstevel@tonic-gate ASSERT(ndp != sdp);
7260Sstevel@tonic-gate
7270Sstevel@tonic-gate if (sdp->lockseg == dhp) {
7280Sstevel@tonic-gate ASSERT(ndp->lockseg == NULL);
7290Sstevel@tonic-gate ndp->lockseg = new_dhp;
7300Sstevel@tonic-gate } else {
7310Sstevel@tonic-gate ASSERT(sdp->unlockseg == dhp);
7320Sstevel@tonic-gate ASSERT(ndp->unlockseg == NULL);
7330Sstevel@tonic-gate ndp->unlockseg = new_dhp;
7340Sstevel@tonic-gate if (sdp->flag & TRASHPAGE) {
7350Sstevel@tonic-gate ndp->flag |= TRASHPAGE;
7360Sstevel@tonic-gate }
7370Sstevel@tonic-gate }
7380Sstevel@tonic-gate mutex_exit(&lp->mutex);
7390Sstevel@tonic-gate *newpvt = (void *)ndp;
7400Sstevel@tonic-gate return (0);
7410Sstevel@tonic-gate }
7420Sstevel@tonic-gate
7430Sstevel@tonic-gate
7440Sstevel@tonic-gate /*ARGSUSED*/
7450Sstevel@tonic-gate static void
winlockmap_unmap(devmap_cookie_t dhp,void * pvtp,offset_t off,size_t len,devmap_cookie_t new_dhp1,void ** newpvtp1,devmap_cookie_t new_dhp2,void ** newpvtp2)7460Sstevel@tonic-gate winlockmap_unmap(devmap_cookie_t dhp, void *pvtp, offset_t off, size_t len,
7470Sstevel@tonic-gate devmap_cookie_t new_dhp1, void **newpvtp1,
7480Sstevel@tonic-gate devmap_cookie_t new_dhp2, void **newpvtp2)
7490Sstevel@tonic-gate {
7500Sstevel@tonic-gate SegProc *sdp = (SegProc *)pvtp;
7510Sstevel@tonic-gate SegLock *lp = sdp->lp;
7520Sstevel@tonic-gate
7530Sstevel@tonic-gate /*
7540Sstevel@tonic-gate * We always create PAGESIZE length mappings, so there should never
7550Sstevel@tonic-gate * be a partial unmapping case
7560Sstevel@tonic-gate */
7570Sstevel@tonic-gate ASSERT((new_dhp1 == NULL) && (new_dhp2 == NULL));
7580Sstevel@tonic-gate
7590Sstevel@tonic-gate mutex_enter(&lp->mutex);
7600Sstevel@tonic-gate ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
7610Sstevel@tonic-gate /* make sure this process doesn't own the lock */
7620Sstevel@tonic-gate if (sdp == lp->owner) {
7630Sstevel@tonic-gate /*
7640Sstevel@tonic-gate * Not handling errors - i.e., errors in unloading mapping
7650Sstevel@tonic-gate * As part of unmapping hat/seg structure get torn down anyway
7660Sstevel@tonic-gate */
7670Sstevel@tonic-gate (void) lock_giveup(lp, 0);
7680Sstevel@tonic-gate }
7690Sstevel@tonic-gate
7700Sstevel@tonic-gate ASSERT(sdp != lp->owner);
7710Sstevel@tonic-gate if (sdp->lockseg == dhp) {
7720Sstevel@tonic-gate sdp->lockseg = NULL;
7730Sstevel@tonic-gate } else {
7740Sstevel@tonic-gate ASSERT(sdp->unlockseg == dhp);
7750Sstevel@tonic-gate sdp->unlockseg = NULL;
7760Sstevel@tonic-gate sdp->flag &= ~TRASHPAGE; /* clear flag if set */
7770Sstevel@tonic-gate }
7780Sstevel@tonic-gate
7790Sstevel@tonic-gate garbage_collect_lock(lp, sdp);
7800Sstevel@tonic-gate }
7810Sstevel@tonic-gate
7820Sstevel@tonic-gate /*ARGSUSED*/
7830Sstevel@tonic-gate static int
winlockmap_access(devmap_cookie_t dhp,void * pvt,offset_t off,size_t len,uint_t type,uint_t rw)7840Sstevel@tonic-gate winlockmap_access(devmap_cookie_t dhp, void *pvt, offset_t off, size_t len,
7850Sstevel@tonic-gate uint_t type, uint_t rw)
7860Sstevel@tonic-gate {
7870Sstevel@tonic-gate SegProc *sdp = (SegProc *)pvt;
7880Sstevel@tonic-gate SegLock *lp = sdp->lp;
7890Sstevel@tonic-gate int err;
7900Sstevel@tonic-gate
7910Sstevel@tonic-gate /* Driver handles only DEVMAP_ACCESS type of faults */
7920Sstevel@tonic-gate if (type != DEVMAP_ACCESS)
7930Sstevel@tonic-gate return (-1);
7940Sstevel@tonic-gate
7950Sstevel@tonic-gate mutex_enter(&lp->mutex);
7960Sstevel@tonic-gate ASSERT((dhp == sdp->lockseg) || (dhp == sdp->unlockseg));
7970Sstevel@tonic-gate
7980Sstevel@tonic-gate /* should be using a SegProc that corresponds to current process */
7990Sstevel@tonic-gate ASSERT(ID(sdp) == CURPROC_ID);
8000Sstevel@tonic-gate
8010Sstevel@tonic-gate /*
8020Sstevel@tonic-gate * If process is faulting but does not have both segments mapped
8030Sstevel@tonic-gate * return error (should cause a segv).
8040Sstevel@tonic-gate * RFE: could give it a permanent trashpage
8050Sstevel@tonic-gate */
8060Sstevel@tonic-gate if ((sdp->lockseg == NULL) || (sdp->unlockseg == NULL)) {
8070Sstevel@tonic-gate err = -1;
8080Sstevel@tonic-gate } else {
8090Sstevel@tonic-gate err = seglock_lockfault(dhp, sdp, lp, rw);
8100Sstevel@tonic-gate }
8110Sstevel@tonic-gate mutex_exit(&lp->mutex);
8120Sstevel@tonic-gate return (err);
8130Sstevel@tonic-gate }
8140Sstevel@tonic-gate
8150Sstevel@tonic-gate /* INTERNAL ROUTINES START HERE */
8160Sstevel@tonic-gate
8170Sstevel@tonic-gate
8180Sstevel@tonic-gate
8190Sstevel@tonic-gate /*
8200Sstevel@tonic-gate * search the lock_list list for the specified cookie
8210Sstevel@tonic-gate * The cookie is the sy_ident field returns by ALLOC ioctl.
8220Sstevel@tonic-gate * This has two parts:
8230Sstevel@tonic-gate * the pageoffset bits contain offset into the lock page.
8240Sstevel@tonic-gate * the pagenumber bits contain the lock id.
8250Sstevel@tonic-gate * The user code is supposed to pass in only the pagenumber portion
8260Sstevel@tonic-gate * (i.e. mask off the pageoffset bits). However the code below
8270Sstevel@tonic-gate * does the mask in case the users are not diligent
8280Sstevel@tonic-gate * if found, returns with mutex for SegLock structure held
8290Sstevel@tonic-gate */
8300Sstevel@tonic-gate static SegLock *
seglock_findlock(uint_t cookie)8310Sstevel@tonic-gate seglock_findlock(uint_t cookie)
8320Sstevel@tonic-gate {
8330Sstevel@tonic-gate SegLock *lp;
8340Sstevel@tonic-gate
8350Sstevel@tonic-gate cookie &= (uint_t)PAGEMASK; /* remove pageoffset bits to get cookie */
8360Sstevel@tonic-gate mutex_enter(&winlock_mutex);
8370Sstevel@tonic-gate for (lp = lock_list; lp != NULL; lp = lp->next) {
8380Sstevel@tonic-gate mutex_enter(&lp->mutex);
8390Sstevel@tonic-gate if (cookie == lp->cookie) {
8400Sstevel@tonic-gate break; /* return with lp->mutex held */
8410Sstevel@tonic-gate }
8420Sstevel@tonic-gate mutex_exit(&lp->mutex);
8430Sstevel@tonic-gate }
8440Sstevel@tonic-gate mutex_exit(&winlock_mutex);
8450Sstevel@tonic-gate return (lp);
8460Sstevel@tonic-gate }
8470Sstevel@tonic-gate
8480Sstevel@tonic-gate /*
8490Sstevel@tonic-gate * search the lock_list list for the specified non-zero key
8500Sstevel@tonic-gate * if found, returns with lock for SegLock structure held
8510Sstevel@tonic-gate */
8520Sstevel@tonic-gate static SegLock *
seglock_findkey(uint_t key)8530Sstevel@tonic-gate seglock_findkey(uint_t key)
8540Sstevel@tonic-gate {
8550Sstevel@tonic-gate SegLock *lp;
8560Sstevel@tonic-gate
8570Sstevel@tonic-gate ASSERT(MUTEX_HELD(&winlock_mutex));
8580Sstevel@tonic-gate /* The driver allows multiple locks with key 0, dont search */
8590Sstevel@tonic-gate if (key == 0)
8600Sstevel@tonic-gate return (NULL);
8610Sstevel@tonic-gate for (lp = lock_list; lp != NULL; lp = lp->next) {
8620Sstevel@tonic-gate mutex_enter(&lp->mutex);
8630Sstevel@tonic-gate if (key == lp->key)
8640Sstevel@tonic-gate break;
8650Sstevel@tonic-gate mutex_exit(&lp->mutex);
8660Sstevel@tonic-gate }
8670Sstevel@tonic-gate return (lp);
8680Sstevel@tonic-gate }
8690Sstevel@tonic-gate
8700Sstevel@tonic-gate /*
8710Sstevel@tonic-gate * Create a new lock context.
8720Sstevel@tonic-gate * Returns with SegLock mutex held
8730Sstevel@tonic-gate */
8740Sstevel@tonic-gate
8750Sstevel@tonic-gate static SegLock *
seglock_createlock(enum winlock_style style)8760Sstevel@tonic-gate seglock_createlock(enum winlock_style style)
8770Sstevel@tonic-gate {
8780Sstevel@tonic-gate SegLock *lp;
8790Sstevel@tonic-gate
8800Sstevel@tonic-gate DEBUGF(3, (CE_CONT, "seglock_createlock: free_list=%p, next_lock %d\n",
8817656SSherry.Moore@Sun.COM (void *)lock_free_list, next_lock));
8820Sstevel@tonic-gate
8830Sstevel@tonic-gate ASSERT(MUTEX_HELD(&winlock_mutex));
8840Sstevel@tonic-gate if (lock_free_list != NULL) {
8850Sstevel@tonic-gate lp = lock_free_list;
8860Sstevel@tonic-gate lock_free_list = lp->next;
8870Sstevel@tonic-gate } else if (next_lock >= MAX_LOCKS) {
8880Sstevel@tonic-gate return (NULL);
8890Sstevel@tonic-gate } else {
8900Sstevel@tonic-gate lp = kmem_zalloc(sizeof (SegLock), KM_SLEEP);
8910Sstevel@tonic-gate lp->cookie = (next_lock + 1) * (uint_t)PAGESIZE;
8920Sstevel@tonic-gate mutex_init(&lp->mutex, NULL, MUTEX_DEFAULT, NULL);
8930Sstevel@tonic-gate cv_init(&lp->locksleep, NULL, CV_DEFAULT, NULL);
8940Sstevel@tonic-gate ++next_lock;
8950Sstevel@tonic-gate }
8960Sstevel@tonic-gate
8970Sstevel@tonic-gate mutex_enter(&lp->mutex);
8980Sstevel@tonic-gate ASSERT((lp->cookie/PAGESIZE) <= next_lock);
8990Sstevel@tonic-gate
9000Sstevel@tonic-gate if (style == OLDSTYLE_LOCK) {
9010Sstevel@tonic-gate lp->lockptr = (int *)ddi_umem_alloc(PAGESIZE,
9027656SSherry.Moore@Sun.COM DDI_UMEM_SLEEP, &(lp->umem_cookie));
9030Sstevel@tonic-gate } else {
9040Sstevel@tonic-gate lp->lockptr = ((int *)lockpage) + ((lp->cookie/PAGESIZE) - 1);
9050Sstevel@tonic-gate lp->umem_cookie = lockpage_cookie;
9060Sstevel@tonic-gate }
9070Sstevel@tonic-gate
9080Sstevel@tonic-gate ASSERT(lp->lockptr != NULL);
9090Sstevel@tonic-gate lp->style = style;
9100Sstevel@tonic-gate lp->sleepers = 0;
9110Sstevel@tonic-gate lp->alloccount = 1;
9120Sstevel@tonic-gate lp->timeout = LOCKTIME*hz;
9130Sstevel@tonic-gate lp->clients = NULL;
9140Sstevel@tonic-gate lp->owner = NULL;
9150Sstevel@tonic-gate LOCK(lp) = 0;
9160Sstevel@tonic-gate lp->next = lock_list;
9170Sstevel@tonic-gate lock_list = lp;
9180Sstevel@tonic-gate return (lp);
9190Sstevel@tonic-gate }
9200Sstevel@tonic-gate
9210Sstevel@tonic-gate /*
9220Sstevel@tonic-gate * Routine to destory a lock structure.
9230Sstevel@tonic-gate * This routine is called while holding the lp->mutex but not the
9240Sstevel@tonic-gate * winlock_mutex.
9250Sstevel@tonic-gate */
9260Sstevel@tonic-gate
9270Sstevel@tonic-gate static void
seglock_destroylock(SegLock * lp)9280Sstevel@tonic-gate seglock_destroylock(SegLock *lp)
9290Sstevel@tonic-gate {
9300Sstevel@tonic-gate ASSERT(MUTEX_HELD(&lp->mutex));
9310Sstevel@tonic-gate ASSERT(!MUTEX_HELD(&winlock_mutex));
9320Sstevel@tonic-gate
9330Sstevel@tonic-gate DEBUGF(3, (CE_CONT, "destroying lock cookie %d key %d\n",
9347656SSherry.Moore@Sun.COM lp->cookie, lp->key));
9350Sstevel@tonic-gate
9360Sstevel@tonic-gate ASSERT(lp->alloccount == 0);
9370Sstevel@tonic-gate ASSERT(lp->clients == NULL);
9380Sstevel@tonic-gate ASSERT(lp->owner == NULL);
9390Sstevel@tonic-gate ASSERT(lp->sleepers == 0);
9400Sstevel@tonic-gate
9410Sstevel@tonic-gate /* clean up/release fields in lp */
9420Sstevel@tonic-gate if (lp->style == OLDSTYLE_LOCK) {
9430Sstevel@tonic-gate ddi_umem_free(lp->umem_cookie);
9440Sstevel@tonic-gate }
9450Sstevel@tonic-gate lp->umem_cookie = NULL;
9460Sstevel@tonic-gate lp->lockptr = NULL;
9470Sstevel@tonic-gate lp->key = 0;
9480Sstevel@tonic-gate
9490Sstevel@tonic-gate /*
9500Sstevel@tonic-gate * Reduce cookie by 1, makes it non page-aligned and invalid
9510Sstevel@tonic-gate * This prevents any valid lookup from finding this lock
9520Sstevel@tonic-gate * so when we drop the lock and regrab it it will still
9530Sstevel@tonic-gate * be there and nobody else would have attached to it
9540Sstevel@tonic-gate */
9550Sstevel@tonic-gate lp->cookie--;
9560Sstevel@tonic-gate
9570Sstevel@tonic-gate /* Drop and reacquire mutexes in right order */
9580Sstevel@tonic-gate mutex_exit(&lp->mutex);
9590Sstevel@tonic-gate mutex_enter(&winlock_mutex);
9600Sstevel@tonic-gate mutex_enter(&lp->mutex);
9610Sstevel@tonic-gate
9620Sstevel@tonic-gate /* reincrement the cookie to get the original valid cookie */
9630Sstevel@tonic-gate lp->cookie++;
9640Sstevel@tonic-gate ASSERT((lp->cookie & PAGEOFFSET) == 0);
9650Sstevel@tonic-gate ASSERT(lp->alloccount == 0);
9660Sstevel@tonic-gate ASSERT(lp->clients == NULL);
9670Sstevel@tonic-gate ASSERT(lp->owner == NULL);
9680Sstevel@tonic-gate ASSERT(lp->sleepers == 0);
9690Sstevel@tonic-gate
9700Sstevel@tonic-gate /* Remove lp from lock_list */
9710Sstevel@tonic-gate if (lock_list == lp) {
9720Sstevel@tonic-gate lock_list = lp->next;
9730Sstevel@tonic-gate } else {
9740Sstevel@tonic-gate SegLock *tmp = lock_list;
9750Sstevel@tonic-gate while (tmp->next != lp) {
9760Sstevel@tonic-gate tmp = tmp->next;
9770Sstevel@tonic-gate ASSERT(tmp != NULL);
9780Sstevel@tonic-gate }
9790Sstevel@tonic-gate tmp->next = lp->next;
9800Sstevel@tonic-gate }
9810Sstevel@tonic-gate
9820Sstevel@tonic-gate /* Add to lock_free_list */
9830Sstevel@tonic-gate lp->next = lock_free_list;
9840Sstevel@tonic-gate lock_free_list = lp;
9850Sstevel@tonic-gate mutex_exit(&lp->mutex);
9860Sstevel@tonic-gate
9870Sstevel@tonic-gate /* Check if all locks deleted and cleanup */
9880Sstevel@tonic-gate if (lock_list == NULL) {
9890Sstevel@tonic-gate lock_destroyall();
9900Sstevel@tonic-gate }
9910Sstevel@tonic-gate
9920Sstevel@tonic-gate mutex_exit(&winlock_mutex);
9930Sstevel@tonic-gate }
9940Sstevel@tonic-gate
9950Sstevel@tonic-gate /* Routine to find a SegProc corresponding to the tag */
9960Sstevel@tonic-gate
9970Sstevel@tonic-gate static SegProc *
seglock_find_specific(SegLock * lp,void * tag)9980Sstevel@tonic-gate seglock_find_specific(SegLock *lp, void *tag)
9990Sstevel@tonic-gate {
10000Sstevel@tonic-gate SegProc *sdp;
10010Sstevel@tonic-gate
10020Sstevel@tonic-gate ASSERT(MUTEX_HELD(&lp->mutex));
10030Sstevel@tonic-gate ASSERT(tag != NULL);
10040Sstevel@tonic-gate for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
10050Sstevel@tonic-gate if (ID(sdp) == tag)
10060Sstevel@tonic-gate break;
10070Sstevel@tonic-gate }
10080Sstevel@tonic-gate return (sdp);
10090Sstevel@tonic-gate }
10100Sstevel@tonic-gate
10110Sstevel@tonic-gate /* Routine to find (and if needed allocate) a SegProc corresponding to tag */
10120Sstevel@tonic-gate
10130Sstevel@tonic-gate static SegProc *
seglock_alloc_specific(SegLock * lp,void * tag)10140Sstevel@tonic-gate seglock_alloc_specific(SegLock *lp, void *tag)
10150Sstevel@tonic-gate {
10160Sstevel@tonic-gate SegProc *sdp;
10170Sstevel@tonic-gate
10180Sstevel@tonic-gate ASSERT(MUTEX_HELD(&lp->mutex));
10190Sstevel@tonic-gate ASSERT(tag != NULL);
10200Sstevel@tonic-gate
10210Sstevel@tonic-gate /* Search and return if existing one found */
10220Sstevel@tonic-gate sdp = seglock_find_specific(lp, tag);
10230Sstevel@tonic-gate if (sdp != NULL)
10240Sstevel@tonic-gate return (sdp);
10250Sstevel@tonic-gate
10260Sstevel@tonic-gate DEBUGF(3, (CE_CONT, "Allocating segproc structure for tag %p lock %d\n",
10277656SSherry.Moore@Sun.COM tag, lp->cookie));
10280Sstevel@tonic-gate
10290Sstevel@tonic-gate /* Allocate a new SegProc */
10300Sstevel@tonic-gate sdp = kmem_zalloc(sizeof (SegProc), KM_SLEEP);
10310Sstevel@tonic-gate sdp->next = lp->clients;
10320Sstevel@tonic-gate lp->clients = sdp;
10330Sstevel@tonic-gate sdp->lp = lp;
10340Sstevel@tonic-gate ID(sdp) = tag;
10350Sstevel@tonic-gate return (sdp);
10360Sstevel@tonic-gate }
10370Sstevel@tonic-gate
10380Sstevel@tonic-gate /*
10390Sstevel@tonic-gate * search a context's client list for the given client and delete
10400Sstevel@tonic-gate */
10410Sstevel@tonic-gate
10420Sstevel@tonic-gate static void
seglock_deleteclient(SegLock * lp,SegProc * sdp)10430Sstevel@tonic-gate seglock_deleteclient(SegLock *lp, SegProc *sdp)
10440Sstevel@tonic-gate {
10450Sstevel@tonic-gate ASSERT(MUTEX_HELD(&lp->mutex));
10460Sstevel@tonic-gate ASSERT(lp->owner != sdp); /* Not current owner of lock */
10470Sstevel@tonic-gate ASSERT(sdp->lockseg == NULL); /* Mappings torn down */
10480Sstevel@tonic-gate ASSERT(sdp->unlockseg == NULL);
10490Sstevel@tonic-gate
10500Sstevel@tonic-gate DEBUGF(3, (CE_CONT, "Deleting segproc structure for pid %d lock %d\n",
10517656SSherry.Moore@Sun.COM ddi_get_pid(), lp->cookie));
10520Sstevel@tonic-gate if (lp->clients == sdp) {
10530Sstevel@tonic-gate lp->clients = sdp->next;
10540Sstevel@tonic-gate } else {
10550Sstevel@tonic-gate SegProc *tmp = lp->clients;
10560Sstevel@tonic-gate while (tmp->next != sdp) {
10570Sstevel@tonic-gate tmp = tmp->next;
10580Sstevel@tonic-gate ASSERT(tmp != NULL);
10590Sstevel@tonic-gate }
10600Sstevel@tonic-gate tmp->next = sdp->next;
10610Sstevel@tonic-gate }
10620Sstevel@tonic-gate kmem_free(sdp, sizeof (SegProc));
10630Sstevel@tonic-gate }
10640Sstevel@tonic-gate
10650Sstevel@tonic-gate /*
10660Sstevel@tonic-gate * Routine to verify if a SegProc and SegLock
10670Sstevel@tonic-gate * structures are empty/idle.
10680Sstevel@tonic-gate * Destroys the structures if they are ready
10690Sstevel@tonic-gate * Can be called with sdp == NULL if want to verify only the lock state
10700Sstevel@tonic-gate * caller should hold the lp->mutex
10710Sstevel@tonic-gate * and this routine drops the mutex
10720Sstevel@tonic-gate */
10730Sstevel@tonic-gate static void
garbage_collect_lock(SegLock * lp,SegProc * sdp)10740Sstevel@tonic-gate garbage_collect_lock(SegLock *lp, SegProc *sdp)
10750Sstevel@tonic-gate {
10760Sstevel@tonic-gate ASSERT(MUTEX_HELD(&lp->mutex));
10770Sstevel@tonic-gate /* see if both segments unmapped from client structure */
10780Sstevel@tonic-gate if ((sdp != NULL) && (sdp->lockseg == NULL) && (sdp->unlockseg == NULL))
10790Sstevel@tonic-gate seglock_deleteclient(lp, sdp);
10800Sstevel@tonic-gate
10810Sstevel@tonic-gate /* see if this is last client in the entire lock context */
10820Sstevel@tonic-gate if ((lp->clients == NULL) && (lp->alloccount == 0)) {
10830Sstevel@tonic-gate seglock_destroylock(lp);
10840Sstevel@tonic-gate } else {
10850Sstevel@tonic-gate mutex_exit(&lp->mutex);
10860Sstevel@tonic-gate }
10870Sstevel@tonic-gate }
10880Sstevel@tonic-gate
10890Sstevel@tonic-gate
10900Sstevel@tonic-gate /* IOCTLS START HERE */
10910Sstevel@tonic-gate
10920Sstevel@tonic-gate static int
seglock_grabinfo(intptr_t arg,int mode)10930Sstevel@tonic-gate seglock_grabinfo(intptr_t arg, int mode)
10940Sstevel@tonic-gate {
10950Sstevel@tonic-gate int i = 1;
10960Sstevel@tonic-gate
10970Sstevel@tonic-gate /* multiple clients per lock supported - see comments up top */
10980Sstevel@tonic-gate if (ddi_copyout((caddr_t)&i, (caddr_t)arg, sizeof (int), mode) != 0)
10990Sstevel@tonic-gate return (EFAULT);
11000Sstevel@tonic-gate return (0);
11010Sstevel@tonic-gate }
11020Sstevel@tonic-gate
11030Sstevel@tonic-gate static int
seglock_graballoc(intptr_t arg,enum winlock_style style,int mode)11040Sstevel@tonic-gate seglock_graballoc(intptr_t arg, enum winlock_style style, int mode) /* IOCTL */
11050Sstevel@tonic-gate {
11060Sstevel@tonic-gate struct seglock *lp;
11070Sstevel@tonic-gate uint_t key;
11080Sstevel@tonic-gate struct winlockalloc wla;
11090Sstevel@tonic-gate int err;
11100Sstevel@tonic-gate
11110Sstevel@tonic-gate if (style == OLDSTYLE_LOCK) {
11120Sstevel@tonic-gate key = 0;
11130Sstevel@tonic-gate } else {
11140Sstevel@tonic-gate if (ddi_copyin((caddr_t)arg, (caddr_t)&wla, sizeof (wla),
11150Sstevel@tonic-gate mode)) {
11160Sstevel@tonic-gate return (EFAULT);
11170Sstevel@tonic-gate }
11180Sstevel@tonic-gate key = wla.sy_key;
11190Sstevel@tonic-gate }
11200Sstevel@tonic-gate
11210Sstevel@tonic-gate DEBUGF(3, (CE_CONT,
11227656SSherry.Moore@Sun.COM "seglock_graballoc: key=%u, style=%d\n", key, style));
11230Sstevel@tonic-gate
11240Sstevel@tonic-gate mutex_enter(&winlock_mutex);
11250Sstevel@tonic-gate /* Allocate lockpage on first new style alloc */
11260Sstevel@tonic-gate if ((lockpage == NULL) && (style == NEWSTYLE_LOCK)) {
11270Sstevel@tonic-gate lockpage = ddi_umem_alloc(PAGESIZE, DDI_UMEM_SLEEP,
11287656SSherry.Moore@Sun.COM &lockpage_cookie);
11290Sstevel@tonic-gate }
11300Sstevel@tonic-gate
11310Sstevel@tonic-gate /* Allocate trashpage on first alloc (any style) */
11320Sstevel@tonic-gate if (trashpage_cookie == NULL) {
11330Sstevel@tonic-gate (void) ddi_umem_alloc(PAGESIZE, DDI_UMEM_TRASH | DDI_UMEM_SLEEP,
11347656SSherry.Moore@Sun.COM &trashpage_cookie);
11350Sstevel@tonic-gate }
11360Sstevel@tonic-gate
11370Sstevel@tonic-gate if ((lp = seglock_findkey(key)) != NULL) {
11380Sstevel@tonic-gate DEBUGF(2, (CE_CONT, "alloc: found lock key %d cookie %d\n",
11397656SSherry.Moore@Sun.COM key, lp->cookie));
11400Sstevel@tonic-gate ++lp->alloccount;
11410Sstevel@tonic-gate } else if ((lp = seglock_createlock(style)) != NULL) {
11420Sstevel@tonic-gate DEBUGF(2, (CE_CONT, "alloc: created lock key %d cookie %d\n",
11437656SSherry.Moore@Sun.COM key, lp->cookie));
11440Sstevel@tonic-gate lp->key = key;
11450Sstevel@tonic-gate } else {
11460Sstevel@tonic-gate DEBUGF(2, (CE_CONT, "alloc: cannot create lock key %d\n", key));
11470Sstevel@tonic-gate mutex_exit(&winlock_mutex);
11480Sstevel@tonic-gate return (ENOMEM);
11490Sstevel@tonic-gate }
11500Sstevel@tonic-gate ASSERT((lp != NULL) && MUTEX_HELD(&lp->mutex));
11510Sstevel@tonic-gate
11520Sstevel@tonic-gate mutex_exit(&winlock_mutex);
11530Sstevel@tonic-gate
11540Sstevel@tonic-gate if (style == OLDSTYLE_LOCK) {
11550Sstevel@tonic-gate err = ddi_copyout((caddr_t)&lp->cookie, (caddr_t)arg,
11567656SSherry.Moore@Sun.COM sizeof (lp->cookie), mode);
11570Sstevel@tonic-gate } else {
11580Sstevel@tonic-gate wla.sy_ident = lp->cookie +
11590Sstevel@tonic-gate (uint_t)((uintptr_t)(lp->lockptr) & PAGEOFFSET);
11600Sstevel@tonic-gate err = ddi_copyout((caddr_t)&wla, (caddr_t)arg,
11610Sstevel@tonic-gate sizeof (wla), mode);
11620Sstevel@tonic-gate }
11630Sstevel@tonic-gate
11640Sstevel@tonic-gate if (err) {
11650Sstevel@tonic-gate /* On error, should undo allocation */
11660Sstevel@tonic-gate lp->alloccount--;
11670Sstevel@tonic-gate
11680Sstevel@tonic-gate /* Verify and delete if lock is unused now */
11690Sstevel@tonic-gate garbage_collect_lock(lp, NULL);
11700Sstevel@tonic-gate return (EFAULT);
11710Sstevel@tonic-gate }
11720Sstevel@tonic-gate
11730Sstevel@tonic-gate mutex_exit(&lp->mutex);
11740Sstevel@tonic-gate return (0);
11750Sstevel@tonic-gate }
11760Sstevel@tonic-gate
11770Sstevel@tonic-gate static int
seglock_grabfree(intptr_t arg,int mode)11780Sstevel@tonic-gate seglock_grabfree(intptr_t arg, int mode) /* IOCTL */
11790Sstevel@tonic-gate {
11800Sstevel@tonic-gate struct seglock *lp;
11810Sstevel@tonic-gate uint_t offset;
11820Sstevel@tonic-gate
11830Sstevel@tonic-gate if (ddi_copyin((caddr_t)arg, &offset, sizeof (offset), mode)
11840Sstevel@tonic-gate != 0) {
11850Sstevel@tonic-gate return (EFAULT);
11860Sstevel@tonic-gate }
11870Sstevel@tonic-gate DEBUGF(2, (CE_CONT, "seglock_grabfree: offset=%u", offset));
11880Sstevel@tonic-gate
11890Sstevel@tonic-gate if ((lp = seglock_findlock(offset)) == NULL) {
11900Sstevel@tonic-gate DEBUGF(2, (CE_CONT, "did not find lock\n"));
11910Sstevel@tonic-gate return (EINVAL);
11920Sstevel@tonic-gate }
11930Sstevel@tonic-gate DEBUGF(3, (CE_CONT, " lock key %d, cookie %d, alloccount %d\n",
11947656SSherry.Moore@Sun.COM lp->key, lp->cookie, lp->alloccount));
11950Sstevel@tonic-gate
11960Sstevel@tonic-gate if (lp->alloccount > 0)
11970Sstevel@tonic-gate lp->alloccount--;
11980Sstevel@tonic-gate
11990Sstevel@tonic-gate /* Verify and delete if lock is unused now */
12000Sstevel@tonic-gate garbage_collect_lock(lp, NULL);
12010Sstevel@tonic-gate return (0);
12020Sstevel@tonic-gate }
12030Sstevel@tonic-gate
12040Sstevel@tonic-gate
12050Sstevel@tonic-gate /*
12060Sstevel@tonic-gate * Sets timeout in lock and UFLAGS in client
12070Sstevel@tonic-gate * the UFLAGS are stored in the client structure and persistent only
12080Sstevel@tonic-gate * till the unmap of the lock pages. If the process sets UFLAGS
12090Sstevel@tonic-gate * does a map of the lock/unlock pages and unmaps them, the client
12100Sstevel@tonic-gate * structure will get deleted and the UFLAGS will be lost. The process
12110Sstevel@tonic-gate * will need to resetup the flags.
12120Sstevel@tonic-gate */
12130Sstevel@tonic-gate static int
seglock_settimeout(intptr_t arg,int mode)12140Sstevel@tonic-gate seglock_settimeout(intptr_t arg, int mode) /* IOCTL */
12150Sstevel@tonic-gate {
12160Sstevel@tonic-gate SegLock *lp;
12170Sstevel@tonic-gate SegProc *sdp;
12180Sstevel@tonic-gate struct winlocktimeout wlt;
12190Sstevel@tonic-gate
12200Sstevel@tonic-gate if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0) {
12210Sstevel@tonic-gate return (EFAULT);
12220Sstevel@tonic-gate }
12230Sstevel@tonic-gate
12240Sstevel@tonic-gate if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
12250Sstevel@tonic-gate return (EINVAL);
12260Sstevel@tonic-gate
12270Sstevel@tonic-gate lp->timeout = MSEC_TO_TICK_ROUNDUP(wlt.sy_timeout);
12280Sstevel@tonic-gate /* if timeout modified, wake up any sleepers */
12290Sstevel@tonic-gate if (lp->sleepers > 0) {
12300Sstevel@tonic-gate cv_broadcast(&lp->locksleep);
12310Sstevel@tonic-gate }
12320Sstevel@tonic-gate
12330Sstevel@tonic-gate /*
12340Sstevel@tonic-gate * If the process is trying to set UFLAGS,
12350Sstevel@tonic-gate * Find the client segproc and allocate one if needed
12360Sstevel@tonic-gate * Set the flags preserving the kernel flags
12370Sstevel@tonic-gate * If the process is clearing UFLAGS
12380Sstevel@tonic-gate * Find the client segproc but dont allocate one if does not exist
12390Sstevel@tonic-gate */
12400Sstevel@tonic-gate if (wlt.sy_flags & UFLAGS) {
12410Sstevel@tonic-gate sdp = seglock_allocclient(lp);
12420Sstevel@tonic-gate sdp->flag = sdp->flag & KFLAGS | wlt.sy_flags & UFLAGS;
12430Sstevel@tonic-gate } else if ((sdp = seglock_findclient(lp)) != NULL) {
12440Sstevel@tonic-gate sdp->flag = sdp->flag & KFLAGS;
12450Sstevel@tonic-gate /* If clearing UFLAGS leaves the segment or lock idle, delete */
12460Sstevel@tonic-gate garbage_collect_lock(lp, sdp);
12470Sstevel@tonic-gate return (0);
12480Sstevel@tonic-gate }
12490Sstevel@tonic-gate mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
12500Sstevel@tonic-gate return (0);
12510Sstevel@tonic-gate }
12520Sstevel@tonic-gate
12530Sstevel@tonic-gate static int
seglock_gettimeout(intptr_t arg,int mode)12540Sstevel@tonic-gate seglock_gettimeout(intptr_t arg, int mode)
12550Sstevel@tonic-gate {
12560Sstevel@tonic-gate SegLock *lp;
12570Sstevel@tonic-gate SegProc *sdp;
12580Sstevel@tonic-gate struct winlocktimeout wlt;
12590Sstevel@tonic-gate
12600Sstevel@tonic-gate if (ddi_copyin((caddr_t)arg, &wlt, sizeof (wlt), mode) != 0)
12610Sstevel@tonic-gate return (EFAULT);
12620Sstevel@tonic-gate
12630Sstevel@tonic-gate if ((lp = seglock_findlock(wlt.sy_ident)) == NULL)
12640Sstevel@tonic-gate return (EINVAL);
12650Sstevel@tonic-gate
12660Sstevel@tonic-gate wlt.sy_timeout = TICK_TO_MSEC(lp->timeout);
12670Sstevel@tonic-gate /*
12680Sstevel@tonic-gate * If this process has an active allocated lock return those flags
12690Sstevel@tonic-gate * Dont allocate a client structure on gettimeout
12700Sstevel@tonic-gate * If not, return 0.
12710Sstevel@tonic-gate */
12720Sstevel@tonic-gate if ((sdp = seglock_findclient(lp)) != NULL) {
12730Sstevel@tonic-gate wlt.sy_flags = sdp->flag & UFLAGS;
12740Sstevel@tonic-gate } else {
12750Sstevel@tonic-gate wlt.sy_flags = 0;
12760Sstevel@tonic-gate }
12770Sstevel@tonic-gate mutex_exit(&lp->mutex); /* mutex held by seglock_findlock */
12780Sstevel@tonic-gate
12790Sstevel@tonic-gate if (ddi_copyout(&wlt, (caddr_t)arg, sizeof (wlt), mode) != 0)
12800Sstevel@tonic-gate return (EFAULT);
12810Sstevel@tonic-gate
12820Sstevel@tonic-gate return (0);
12830Sstevel@tonic-gate }
12840Sstevel@tonic-gate
12850Sstevel@tonic-gate /*
12860Sstevel@tonic-gate * Handle lock segment faults here...
12870Sstevel@tonic-gate *
12880Sstevel@tonic-gate * This is where the magic happens.
12890Sstevel@tonic-gate */
12900Sstevel@tonic-gate
12910Sstevel@tonic-gate /* ARGSUSED */
12920Sstevel@tonic-gate static int
seglock_lockfault(devmap_cookie_t dhp,SegProc * sdp,SegLock * lp,uint_t rw)12930Sstevel@tonic-gate seglock_lockfault(devmap_cookie_t dhp, SegProc *sdp, SegLock *lp, uint_t rw)
12940Sstevel@tonic-gate {
12950Sstevel@tonic-gate SegProc *owner = lp->owner;
12960Sstevel@tonic-gate int err;
12970Sstevel@tonic-gate
12980Sstevel@tonic-gate ASSERT(MUTEX_HELD(&lp->mutex));
12990Sstevel@tonic-gate DEBUGF(3, (CE_CONT,
13007656SSherry.Moore@Sun.COM "seglock_lockfault: hdl=%p, sdp=%p, lp=%p owner=%p\n",
13017656SSherry.Moore@Sun.COM (void *)dhp, (void *)sdp, (void *)lp, (void *)owner));
13020Sstevel@tonic-gate
13030Sstevel@tonic-gate /* lockfault is always called with sdp in current process context */
13040Sstevel@tonic-gate ASSERT(ID(sdp) == CURPROC_ID);
13050Sstevel@tonic-gate
13060Sstevel@tonic-gate /* If Lock has no current owner, give the mapping to new owner */
13070Sstevel@tonic-gate if (owner == NULL) {
13080Sstevel@tonic-gate DEBUGF(4, (CE_CONT, " lock has no current owner\n"));
13090Sstevel@tonic-gate return (give_mapping(lp, sdp, rw));
13100Sstevel@tonic-gate }
13110Sstevel@tonic-gate
13120Sstevel@tonic-gate if (owner == sdp) {
13130Sstevel@tonic-gate /*
13140Sstevel@tonic-gate * Current owner is faulting on owned lock segment OR
13150Sstevel@tonic-gate * Current owner is faulting on unlock page and has no waiters
13160Sstevel@tonic-gate * Then can give the mapping to current owner
13170Sstevel@tonic-gate */
13187656SSherry.Moore@Sun.COM if ((sdp->lockseg == dhp) || (lp->sleepers == 0)) {
13190Sstevel@tonic-gate DEBUGF(4, (CE_CONT, "lock owner faulting\n"));
13200Sstevel@tonic-gate return (give_mapping(lp, sdp, rw));
13217656SSherry.Moore@Sun.COM } else {
13220Sstevel@tonic-gate /*
13230Sstevel@tonic-gate * Owner must be writing to unlock page and there are waiters.
13240Sstevel@tonic-gate * other cases have been checked earlier.
13250Sstevel@tonic-gate * Release the lock, owner, and owners mappings
13260Sstevel@tonic-gate * As the owner is trying to write to the unlock page, leave
13270Sstevel@tonic-gate * it with a trashpage mapping and wake up the sleepers
13280Sstevel@tonic-gate */
13290Sstevel@tonic-gate ASSERT((dhp == sdp->unlockseg) && (lp->sleepers != 0));
13300Sstevel@tonic-gate DEBUGF(4, (CE_CONT, " owner fault on unlock seg w/ sleeper\n"));
13310Sstevel@tonic-gate return (lock_giveup(lp, 1));
13327656SSherry.Moore@Sun.COM }
13330Sstevel@tonic-gate }
13340Sstevel@tonic-gate
13350Sstevel@tonic-gate ASSERT(owner != sdp);
13360Sstevel@tonic-gate
13370Sstevel@tonic-gate /*
13380Sstevel@tonic-gate * If old owner faulting on trash unlock mapping,
13390Sstevel@tonic-gate * load hat mappings to trash page
13400Sstevel@tonic-gate * RFE: non-owners should NOT be faulting on unlock mapping as they
13410Sstevel@tonic-gate * as first supposed to fault on the lock seg. We could give them
13420Sstevel@tonic-gate * a trash page or return error.
13430Sstevel@tonic-gate */
13440Sstevel@tonic-gate if ((sdp->unlockseg == dhp) && (sdp->flag & TRASHPAGE)) {
13450Sstevel@tonic-gate DEBUGF(4, (CE_CONT, " old owner reloads trash mapping\n"));
13460Sstevel@tonic-gate return (devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
13477656SSherry.Moore@Sun.COM DEVMAP_ACCESS, rw));
13480Sstevel@tonic-gate }
13490Sstevel@tonic-gate
13500Sstevel@tonic-gate /*
13510Sstevel@tonic-gate * Non-owner faulting. Need to check current LOCK state.
13520Sstevel@tonic-gate *
13530Sstevel@tonic-gate * Before reading lock value in LOCK(lp), we must make sure that
13540Sstevel@tonic-gate * the owner cannot change its value before we change mappings
13550Sstevel@tonic-gate * or else we could end up either with a hung process
13560Sstevel@tonic-gate * or more than one process thinking they have the lock.
13570Sstevel@tonic-gate * We do that by unloading the owner's mappings
13580Sstevel@tonic-gate */
13590Sstevel@tonic-gate DEBUGF(4, (CE_CONT, " owner loses mappings to check lock state\n"));
13600Sstevel@tonic-gate err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
13610Sstevel@tonic-gate err |= devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
13620Sstevel@tonic-gate if (err != 0)
13630Sstevel@tonic-gate return (err); /* unable to remove owner mapping */
13640Sstevel@tonic-gate
13650Sstevel@tonic-gate /*
13660Sstevel@tonic-gate * If lock is not held, then current owner mappings were
13670Sstevel@tonic-gate * unloaded above and we can give the lock to the new owner
13680Sstevel@tonic-gate */
13690Sstevel@tonic-gate if (LOCK(lp) == 0) {
13700Sstevel@tonic-gate DEBUGF(4, (CE_CONT,
13717656SSherry.Moore@Sun.COM "Free lock (%p): Giving mapping to new owner %d\n",
13727656SSherry.Moore@Sun.COM (void *)lp, ddi_get_pid()));
13730Sstevel@tonic-gate return (give_mapping(lp, sdp, rw));
13740Sstevel@tonic-gate }
13750Sstevel@tonic-gate
13760Sstevel@tonic-gate DEBUGF(4, (CE_CONT, " lock held, sleeping\n"));
13770Sstevel@tonic-gate
13780Sstevel@tonic-gate /*
13790Sstevel@tonic-gate * A non-owning process tried to write (presumably to the lockpage,
13800Sstevel@tonic-gate * but it doesn't matter) but the lock is held; we need to sleep for
13810Sstevel@tonic-gate * the lock while there is an owner.
13820Sstevel@tonic-gate */
13830Sstevel@tonic-gate
13840Sstevel@tonic-gate lp->sleepers++;
13850Sstevel@tonic-gate while ((owner = lp->owner) != NULL) {
13860Sstevel@tonic-gate int rval;
13870Sstevel@tonic-gate
13880Sstevel@tonic-gate if ((lp->timeout == 0) || (owner->flag & SY_NOTIMEOUT)) {
13890Sstevel@tonic-gate /*
13900Sstevel@tonic-gate * No timeout has been specified for this lock;
13910Sstevel@tonic-gate * we'll simply sleep on the condition variable.
13920Sstevel@tonic-gate */
13930Sstevel@tonic-gate rval = cv_wait_sig(&lp->locksleep, &lp->mutex);
13940Sstevel@tonic-gate } else {
13950Sstevel@tonic-gate /*
13960Sstevel@tonic-gate * A timeout _has_ been specified for this lock. We need
13970Sstevel@tonic-gate * to wake up and possibly steal this lock if the owner
13980Sstevel@tonic-gate * does not let it go. Note that all sleepers on a lock
13990Sstevel@tonic-gate * with a timeout wait; the sleeper with the earliest
14000Sstevel@tonic-gate * timeout will wakeup, and potentially steal the lock
14010Sstevel@tonic-gate * Stealing the lock will cause a broadcast on the
14020Sstevel@tonic-gate * locksleep cv and thus kick the other timed waiters
14030Sstevel@tonic-gate * and cause everyone to restart in a new timedwait
14040Sstevel@tonic-gate */
1405*11066Srafael.vanoni@sun.com rval = cv_reltimedwait_sig(&lp->locksleep,
1406*11066Srafael.vanoni@sun.com &lp->mutex, lp->timeout, TR_CLOCK_TICK);
14070Sstevel@tonic-gate }
14080Sstevel@tonic-gate
14090Sstevel@tonic-gate /*
14100Sstevel@tonic-gate * Timeout and still old owner - steal lock
14110Sstevel@tonic-gate * Force-Release lock and give old owner a trashpage mapping
14120Sstevel@tonic-gate */
14130Sstevel@tonic-gate if ((rval == -1) && (lp->owner == owner)) {
14140Sstevel@tonic-gate /*
14150Sstevel@tonic-gate * if any errors in lock_giveup, go back and sleep/retry
14160Sstevel@tonic-gate * If successful, will break out of loop
14170Sstevel@tonic-gate */
14180Sstevel@tonic-gate cmn_err(CE_NOTE, "Process %d timed out on lock %d\n",
14197656SSherry.Moore@Sun.COM ddi_get_pid(), lp->cookie);
14200Sstevel@tonic-gate (void) lock_giveup(lp, 1);
14210Sstevel@tonic-gate } else if (rval == 0) { /* signal pending */
14220Sstevel@tonic-gate cmn_err(CE_NOTE,
14230Sstevel@tonic-gate "Process %d signalled while waiting on lock %d\n",
14240Sstevel@tonic-gate ddi_get_pid(), lp->cookie);
14250Sstevel@tonic-gate lp->sleepers--;
14260Sstevel@tonic-gate return (FC_MAKE_ERR(EINTR));
14270Sstevel@tonic-gate }
14280Sstevel@tonic-gate }
14290Sstevel@tonic-gate
14300Sstevel@tonic-gate lp->sleepers--;
14310Sstevel@tonic-gate /*
14320Sstevel@tonic-gate * Give mapping to this process and save a fault later
14330Sstevel@tonic-gate */
14340Sstevel@tonic-gate return (give_mapping(lp, sdp, rw));
14350Sstevel@tonic-gate }
14360Sstevel@tonic-gate
14370Sstevel@tonic-gate /*
14380Sstevel@tonic-gate * Utility: give a valid mapping to lock and unlock pages to current process.
14390Sstevel@tonic-gate * Caller responsible for unloading old owner's mappings
14400Sstevel@tonic-gate */
14410Sstevel@tonic-gate
14420Sstevel@tonic-gate static int
give_mapping(SegLock * lp,SegProc * sdp,uint_t rw)14430Sstevel@tonic-gate give_mapping(SegLock *lp, SegProc *sdp, uint_t rw)
14440Sstevel@tonic-gate {
14450Sstevel@tonic-gate int err = 0;
14460Sstevel@tonic-gate
14470Sstevel@tonic-gate ASSERT(MUTEX_HELD(&lp->mutex));
14480Sstevel@tonic-gate ASSERT(!((lp->owner == NULL) && (LOCK(lp) != 0)));
14490Sstevel@tonic-gate /* give_mapping is always called with sdp in current process context */
14500Sstevel@tonic-gate ASSERT(ID(sdp) == CURPROC_ID);
14510Sstevel@tonic-gate
14520Sstevel@tonic-gate /* remap any old trash mappings */
14530Sstevel@tonic-gate if (sdp->flag & TRASHPAGE) {
14540Sstevel@tonic-gate /* current owner should not have a trash mapping */
14550Sstevel@tonic-gate ASSERT(sdp != lp->owner);
14560Sstevel@tonic-gate
14570Sstevel@tonic-gate DEBUGF(4, (CE_CONT,
14580Sstevel@tonic-gate "new owner %d remapping old trash mapping\n",
14590Sstevel@tonic-gate ddi_get_pid()));
14600Sstevel@tonic-gate if ((err = devmap_umem_remap(sdp->unlockseg, winlock_dip,
14610Sstevel@tonic-gate lp->umem_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
14620Sstevel@tonic-gate /*
14630Sstevel@tonic-gate * unable to remap old trash page,
14640Sstevel@tonic-gate * abort before changing owner
14650Sstevel@tonic-gate */
14660Sstevel@tonic-gate DEBUGF(4, (CE_CONT,
14670Sstevel@tonic-gate "aborting: error in umem_remap %d\n", err));
14680Sstevel@tonic-gate return (err);
14690Sstevel@tonic-gate }
14700Sstevel@tonic-gate sdp->flag &= ~TRASHPAGE;
14710Sstevel@tonic-gate }
14720Sstevel@tonic-gate
14730Sstevel@tonic-gate /* we have a new owner now */
14740Sstevel@tonic-gate lp->owner = sdp;
14750Sstevel@tonic-gate
14760Sstevel@tonic-gate if ((err = devmap_load(sdp->lockseg, lp->cookie, PAGESIZE,
14770Sstevel@tonic-gate DEVMAP_ACCESS, rw)) != 0) {
14780Sstevel@tonic-gate return (err);
14790Sstevel@tonic-gate }
14800Sstevel@tonic-gate DEBUGF(4, (CE_CONT, "new owner %d gets lock mapping", ddi_get_pid()));
14810Sstevel@tonic-gate
14820Sstevel@tonic-gate if (lp->sleepers) {
14830Sstevel@tonic-gate /* Force unload unlock mapping if there are waiters */
14840Sstevel@tonic-gate DEBUGF(4, (CE_CONT,
14850Sstevel@tonic-gate " lock has %d sleepers => remove unlock mapping\n",
14860Sstevel@tonic-gate lp->sleepers));
14870Sstevel@tonic-gate err = devmap_unload(sdp->unlockseg, lp->cookie, PAGESIZE);
14880Sstevel@tonic-gate } else {
14890Sstevel@tonic-gate /*
14900Sstevel@tonic-gate * while here, give new owner a valid mapping to unlock
14910Sstevel@tonic-gate * page so we don't get called again.
14920Sstevel@tonic-gate */
14930Sstevel@tonic-gate DEBUGF(4, (CE_CONT, " and unlock mapping\n"));
14940Sstevel@tonic-gate err = devmap_load(sdp->unlockseg, lp->cookie, PAGESIZE,
14957656SSherry.Moore@Sun.COM DEVMAP_ACCESS, PROT_WRITE);
14960Sstevel@tonic-gate }
14970Sstevel@tonic-gate return (err);
14980Sstevel@tonic-gate }
14990Sstevel@tonic-gate
15000Sstevel@tonic-gate /*
15010Sstevel@tonic-gate * Unload owner's mappings, release the lock and wakeup any sleepers
15020Sstevel@tonic-gate * If trash, then the old owner is given a trash mapping
15030Sstevel@tonic-gate * => old owner held lock too long and caused a timeout
15040Sstevel@tonic-gate */
15050Sstevel@tonic-gate static int
lock_giveup(SegLock * lp,int trash)15060Sstevel@tonic-gate lock_giveup(SegLock *lp, int trash)
15070Sstevel@tonic-gate {
15080Sstevel@tonic-gate SegProc *owner = lp->owner;
15090Sstevel@tonic-gate
15100Sstevel@tonic-gate DEBUGF(4, (CE_CONT, "winlock_giveup: lp=%p, owner=%p, trash %d\n",
15110Sstevel@tonic-gate (void *)lp, (void *)ID(lp->owner), trash));
15120Sstevel@tonic-gate
15130Sstevel@tonic-gate ASSERT(MUTEX_HELD(&lp->mutex));
15140Sstevel@tonic-gate ASSERT(owner != NULL);
15150Sstevel@tonic-gate
15160Sstevel@tonic-gate /*
15170Sstevel@tonic-gate * owner loses lockpage/unlockpage mappings and gains a
15180Sstevel@tonic-gate * trashpage mapping, if needed.
15190Sstevel@tonic-gate */
15200Sstevel@tonic-gate if (!trash) {
15210Sstevel@tonic-gate /*
15220Sstevel@tonic-gate * We do not handle errors in devmap_unload in the !trash case,
15230Sstevel@tonic-gate * as the process is attempting to unmap/exit or otherwise
15240Sstevel@tonic-gate * release the lock. Errors in unloading the mapping are not
15250Sstevel@tonic-gate * going to affect that (unmap does not take error return).
15260Sstevel@tonic-gate */
15270Sstevel@tonic-gate (void) devmap_unload(owner->lockseg, lp->cookie, PAGESIZE);
15280Sstevel@tonic-gate (void) devmap_unload(owner->unlockseg, lp->cookie, PAGESIZE);
15290Sstevel@tonic-gate } else {
15300Sstevel@tonic-gate int err;
15310Sstevel@tonic-gate
15320Sstevel@tonic-gate if (err = devmap_unload(owner->lockseg, lp->cookie, PAGESIZE)) {
15330Sstevel@tonic-gate /* error unloading lockseg mapping. abort giveup */
15340Sstevel@tonic-gate return (err);
15350Sstevel@tonic-gate }
15360Sstevel@tonic-gate
15370Sstevel@tonic-gate /*
15380Sstevel@tonic-gate * old owner gets mapping to trash page so it can continue
15390Sstevel@tonic-gate * devmap_umem_remap does a hat_unload (and does it holding
15400Sstevel@tonic-gate * the right locks), so no need to devmap_unload on unlockseg
15410Sstevel@tonic-gate */
15420Sstevel@tonic-gate if ((err = devmap_umem_remap(owner->unlockseg, winlock_dip,
15430Sstevel@tonic-gate trashpage_cookie, 0, PAGESIZE, WINLOCK_PROT, 0, 0)) != 0) {
15440Sstevel@tonic-gate /* error remapping to trash page, abort giveup */
15450Sstevel@tonic-gate return (err);
15460Sstevel@tonic-gate }
15470Sstevel@tonic-gate owner->flag |= TRASHPAGE;
15480Sstevel@tonic-gate /*
15490Sstevel@tonic-gate * Preload mapping to trash page by calling devmap_load
15500Sstevel@tonic-gate * However, devmap_load can only be called on the faulting
15510Sstevel@tonic-gate * process context and not on the owner's process context
15520Sstevel@tonic-gate * we preload only if we happen to be in owner process context
15530Sstevel@tonic-gate * Other processes will fault on the unlock mapping
15540Sstevel@tonic-gate * and be given a trash mapping at that time.
15550Sstevel@tonic-gate */
15560Sstevel@tonic-gate if (ID(owner) == CURPROC_ID) {
15577656SSherry.Moore@Sun.COM (void) devmap_load(owner->unlockseg, lp->cookie,
15587656SSherry.Moore@Sun.COM PAGESIZE, DEVMAP_ACCESS, PROT_WRITE);
15590Sstevel@tonic-gate }
15600Sstevel@tonic-gate }
15610Sstevel@tonic-gate
15620Sstevel@tonic-gate lp->owner = NULL;
15630Sstevel@tonic-gate
15640Sstevel@tonic-gate /* Clear the lock value in underlying page so new owner can grab it */
15650Sstevel@tonic-gate LOCK(lp) = 0;
15660Sstevel@tonic-gate
15670Sstevel@tonic-gate if (lp->sleepers) {
15680Sstevel@tonic-gate DEBUGF(4, (CE_CONT, " waking up, lp=%p\n", (void *)lp));
15690Sstevel@tonic-gate cv_broadcast(&lp->locksleep);
15700Sstevel@tonic-gate }
15710Sstevel@tonic-gate return (0);
15720Sstevel@tonic-gate }
15730Sstevel@tonic-gate
15740Sstevel@tonic-gate /*
15750Sstevel@tonic-gate * destroy all allocated memory.
15760Sstevel@tonic-gate */
15770Sstevel@tonic-gate
15780Sstevel@tonic-gate static void
lock_destroyall(void)15790Sstevel@tonic-gate lock_destroyall(void)
15800Sstevel@tonic-gate {
15810Sstevel@tonic-gate SegLock *lp, *lpnext;
15820Sstevel@tonic-gate
15830Sstevel@tonic-gate ASSERT(MUTEX_HELD(&winlock_mutex));
15840Sstevel@tonic-gate ASSERT(lock_list == NULL);
15850Sstevel@tonic-gate
15860Sstevel@tonic-gate DEBUGF(1, (CE_CONT, "Lock list empty. Releasing free list\n"));
15870Sstevel@tonic-gate for (lp = lock_free_list; lp != NULL; lp = lpnext) {
15880Sstevel@tonic-gate mutex_enter(&lp->mutex);
15890Sstevel@tonic-gate lpnext = lp->next;
15900Sstevel@tonic-gate ASSERT(lp->clients == NULL);
15910Sstevel@tonic-gate ASSERT(lp->owner == NULL);
15920Sstevel@tonic-gate ASSERT(lp->alloccount == 0);
15930Sstevel@tonic-gate mutex_destroy(&lp->mutex);
15940Sstevel@tonic-gate cv_destroy(&lp->locksleep);
15950Sstevel@tonic-gate kmem_free(lp, sizeof (SegLock));
15960Sstevel@tonic-gate }
15970Sstevel@tonic-gate lock_free_list = NULL;
15980Sstevel@tonic-gate next_lock = 0;
15990Sstevel@tonic-gate }
16000Sstevel@tonic-gate
16010Sstevel@tonic-gate
16020Sstevel@tonic-gate /* RFE: create mdb walkers instead of dump routines? */
16030Sstevel@tonic-gate static void
seglock_dump_all(void)16040Sstevel@tonic-gate seglock_dump_all(void)
16050Sstevel@tonic-gate {
16060Sstevel@tonic-gate SegLock *lp;
16070Sstevel@tonic-gate
16080Sstevel@tonic-gate mutex_enter(&winlock_mutex);
16090Sstevel@tonic-gate cmn_err(CE_CONT, "ID\tKEY\tNALLOC\tATTCH\tOWNED\tLOCK\tWAITER\n");
16100Sstevel@tonic-gate
16110Sstevel@tonic-gate cmn_err(CE_CONT, "Lock List:\n");
16120Sstevel@tonic-gate for (lp = lock_list; lp != NULL; lp = lp->next) {
16130Sstevel@tonic-gate mutex_enter(&lp->mutex);
16140Sstevel@tonic-gate cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
16150Sstevel@tonic-gate lp->cookie, lp->key, lp->alloccount,
16160Sstevel@tonic-gate lp->clients ? 'Y' : 'N',
16170Sstevel@tonic-gate lp->owner ? 'Y' : 'N',
16180Sstevel@tonic-gate lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
16190Sstevel@tonic-gate lp->sleepers);
16200Sstevel@tonic-gate mutex_exit(&lp->mutex);
16210Sstevel@tonic-gate }
16220Sstevel@tonic-gate cmn_err(CE_CONT, "Free Lock List:\n");
16230Sstevel@tonic-gate for (lp = lock_free_list; lp != NULL; lp = lp->next) {
16240Sstevel@tonic-gate mutex_enter(&lp->mutex);
16250Sstevel@tonic-gate cmn_err(CE_CONT, "%d\t%d\t%u\t%c\t%c\t%c\t%d\n",
16260Sstevel@tonic-gate lp->cookie, lp->key, lp->alloccount,
16270Sstevel@tonic-gate lp->clients ? 'Y' : 'N',
16280Sstevel@tonic-gate lp->owner ? 'Y' : 'N',
16290Sstevel@tonic-gate lp->lockptr != 0 && LOCK(lp) ? 'Y' : 'N',
16300Sstevel@tonic-gate lp->sleepers);
16310Sstevel@tonic-gate mutex_exit(&lp->mutex);
16320Sstevel@tonic-gate }
16330Sstevel@tonic-gate
16340Sstevel@tonic-gate #ifdef DEBUG
16350Sstevel@tonic-gate if (lock_debug < 3) {
16360Sstevel@tonic-gate mutex_exit(&winlock_mutex);
16370Sstevel@tonic-gate return;
16380Sstevel@tonic-gate }
16390Sstevel@tonic-gate
16400Sstevel@tonic-gate for (lp = lock_list; lp != NULL; lp = lp->next) {
16410Sstevel@tonic-gate SegProc *sdp;
16420Sstevel@tonic-gate
16430Sstevel@tonic-gate mutex_enter(&lp->mutex);
16440Sstevel@tonic-gate cmn_err(CE_CONT,
16450Sstevel@tonic-gate "lock %p, key=%d, cookie=%d, nalloc=%u, lock=%d, wait=%d\n",
16460Sstevel@tonic-gate (void *)lp, lp->key, lp->cookie, lp->alloccount,
16470Sstevel@tonic-gate lp->lockptr != 0 ? LOCK(lp) : -1, lp->sleepers);
16480Sstevel@tonic-gate
16490Sstevel@tonic-gate cmn_err(CE_CONT,
16500Sstevel@tonic-gate "style=%d, lockptr=%p, timeout=%ld, clients=%p, owner=%p\n",
16510Sstevel@tonic-gate lp->style, (void *)lp->lockptr, lp->timeout,
16520Sstevel@tonic-gate (void *)lp->clients, (void *)lp->owner);
16530Sstevel@tonic-gate
16540Sstevel@tonic-gate
16550Sstevel@tonic-gate for (sdp = lp->clients; sdp != NULL; sdp = sdp->next) {
16560Sstevel@tonic-gate cmn_err(CE_CONT, " client %p%s, lp=%p, flag=%x, "
16570Sstevel@tonic-gate "process tag=%p, lockseg=%p, unlockseg=%p\n",
16580Sstevel@tonic-gate (void *)sdp, sdp == lp->owner ? " (owner)" : "",
16590Sstevel@tonic-gate (void *)sdp->lp, sdp->flag, (void *)ID(sdp),
16600Sstevel@tonic-gate (void *)sdp->lockseg, (void *)sdp->unlockseg);
16610Sstevel@tonic-gate }
16620Sstevel@tonic-gate mutex_exit(&lp->mutex);
16630Sstevel@tonic-gate }
16640Sstevel@tonic-gate #endif
16650Sstevel@tonic-gate mutex_exit(&winlock_mutex);
16660Sstevel@tonic-gate }
16670Sstevel@tonic-gate
16680Sstevel@tonic-gate #include <sys/modctl.h>
16690Sstevel@tonic-gate
16700Sstevel@tonic-gate static struct modldrv modldrv = {
16710Sstevel@tonic-gate &mod_driverops, /* Type of module. This one is a driver */
16727656SSherry.Moore@Sun.COM "Winlock Driver", /* Name of the module */
16730Sstevel@tonic-gate &winlock_ops, /* driver ops */
16740Sstevel@tonic-gate };
16750Sstevel@tonic-gate
16760Sstevel@tonic-gate static struct modlinkage modlinkage = {
16770Sstevel@tonic-gate MODREV_1,
16780Sstevel@tonic-gate (void *)&modldrv,
16790Sstevel@tonic-gate 0,
16800Sstevel@tonic-gate 0,
16810Sstevel@tonic-gate 0
16820Sstevel@tonic-gate };
16830Sstevel@tonic-gate
16840Sstevel@tonic-gate int
_init(void)16850Sstevel@tonic-gate _init(void)
16860Sstevel@tonic-gate {
16870Sstevel@tonic-gate int e;
16880Sstevel@tonic-gate
16890Sstevel@tonic-gate mutex_init(&winlock_mutex, NULL, MUTEX_DEFAULT, NULL);
16900Sstevel@tonic-gate e = mod_install(&modlinkage);
16910Sstevel@tonic-gate if (e) {
16920Sstevel@tonic-gate mutex_destroy(&winlock_mutex);
16930Sstevel@tonic-gate }
16940Sstevel@tonic-gate return (e);
16950Sstevel@tonic-gate }
16960Sstevel@tonic-gate
16970Sstevel@tonic-gate
16980Sstevel@tonic-gate int
_info(struct modinfo * modinfop)16990Sstevel@tonic-gate _info(struct modinfo *modinfop)
17000Sstevel@tonic-gate {
17010Sstevel@tonic-gate return (mod_info(&modlinkage, modinfop));
17020Sstevel@tonic-gate }
17030Sstevel@tonic-gate
17040Sstevel@tonic-gate int
_fini(void)17050Sstevel@tonic-gate _fini(void)
17060Sstevel@tonic-gate {
17070Sstevel@tonic-gate int e;
17080Sstevel@tonic-gate
17090Sstevel@tonic-gate e = mod_remove(&modlinkage);
17100Sstevel@tonic-gate if (e == 0) {
17110Sstevel@tonic-gate mutex_destroy(&winlock_mutex);
17120Sstevel@tonic-gate }
17130Sstevel@tonic-gate return (e);
17140Sstevel@tonic-gate }
1715