xref: /netbsd-src/sys/dev/dm/doc/locking.txt (revision adffd44b18fc9111b9196d383b86cc70ba367efb)
1
2				Device-mapper Locking architecture
3
4Overview
5
6There are 2 users in device-mapper driver
7      a) Users who uses disk drives
8      b) Users who uses ioctl management interface
9
10Management is done by dm_dev_*_ioctl and dm_table_*_ioctl routines. There are
11two major structures used in these routines/device-mapper.
12
13Table entry:
14
15typedef struct dm_table_entry {
16        struct dm_dev *dm_dev;          /* backlink */
17        uint64_t start;
18        uint64_t length;
19
20        struct dm_target *target;      /* Link to table target. */
21        void *target_config;           /* Target specific data. */
22        SLIST_ENTRY(dm_table_entry) next;
23} dm_table_entry_t;
24
25This structure stores every target part of dm device. Every device can have
26more than one target mapping entries stored in a list. This structure describe
27mapping between logical/physical blocks in dm device.
28
29start  length target block device offset
300 	   102400 linear /dev/wd1a     384
31102400 204800 linear /dev/wd2a     384
32204800 409600 linear /dev/wd3a     384
33
34Every device has at least two tables ACTIVE and INACTIVE. Only ACTIVE table is
35used during IO. Every IO operation on dm device have to walk through dm_table_entries list.
36
37Device entry:
38
39typedef struct dm_dev {
40        char name[DM_NAME_LEN];
41        char uuid[DM_UUID_LEN];
42
43        int minor;
44        uint32_t flags; /* store communication protocol flags */
45
46        kmutex_t dev_mtx; /* mutex for general device lock */
47        kcondvar_t dev_cv; /* cv for ioctl synchronisation */
48
49        uint32_t event_nr;
50        uint32_t ref_cnt;
51
52        dm_table_head_t table_head;
53
54        struct dm_dev_head upcalls;
55
56        struct disklabel *dk_label;    /* Disklabel for this table. */
57
58        TAILQ_ENTRY(dm_dev) next_upcall; /* LIST of mirrored, snapshoted devices. */
59
60        TAILQ_ENTRY(dm_dev) next_devlist; /* Major device list. */
61} dm_dev_t;
62
63Every device created in dm device-mapper is represented with this structure.
64All devices are stored in a list. Every ioctl routine have to work with this
65structure.
66
67	Locking in dm driver
68
69Locking must be done in two ways. Synchronisation between ioctl routines and
70between IO operations and ioctl. Table entries are read during IO and during some ioctl routines. There are only few routines which manipulates table lists.
71
72Read access to table list:
73
74dmsize
75dmstrategy
76dm_dev_status_ioctl
77dm_table_info_ioctl
78dm_table_deps_ioctl
79dm_disk_ioctl 		-> DIOCCACHESYNC ioctl
80
81Write access to table list:
82dm_dev_remove_ioctl        -> remove device from list, this routine have to
83							  remove all tables.
84dm_dev_resume_ioctl		   -> Switch tables on suspended device, switch INACTIVE
85							  and ACTIVE tables.
86dm_table_clear_ioctl  	   -> Remove INACTIVE table from table list.
87
88
89Synchronisation between readers and writers in table list
90
91I moved everything needed for table synchronisation to struct dm_table_head.
92
93typedef struct dm_table_head {
94        /* Current active table is selected with this. */
95        int cur_active_table;
96        struct dm_table tables[2];
97
98        kmutex_t   table_mtx;
99        kcondvar_t table_cv; /*IO waiting cv */
100
101        uint32_t io_cnt;
102} dm_table_head_t;
103
104dm_table_head_t is used as entry for every dm_table synchronisation routine.
105
106Because every table user have to get list to table list head I have implemented
107these routines to manage access to table lists.
108
109/*
110 * Destroy all table data. This function can run when there are no
111 * readers on table lists.
112 */
113int dm_table_destroy(dm_table_head_t *, uint8_t);
114
115/*
116 * Return length of active table in device.
117 */
118uint64_t dm_table_size(dm_table_head_t *);
119
120/*
121 * Return current active table to caller, increment io_cnt reference counter.
122 */
123struct dm_table *dm_table_get_entry(dm_table_head_t *, uint8_t);
124
125/*
126 * Return > 0 if table is at least one table entry (returns number of entries)
127 * and return 0 if there is not. Target count returned from this function
128 * doesn't need to be true when userspace user receives it (after return
129 * there can be dm_dev_resume_ioctl), therefore this is only informative.
130 */
131int dm_table_get_target_count(dm_table_head_t *, uint8_t);
132
133/*
134 * Decrement io reference counter and wake up all callers, with table_head cv.
135 */
136void dm_table_release(dm_table_head_t *, uint8_t s);
137
138/*
139 * Switch table from inactive to active mode. Have to wait until io_cnt is 0.
140 */
141void dm_table_switch_tables(dm_table_head_t *);
142
143/*
144 * Initialize table_head structures, I'm trying to keep this structure as
145 * opaque as possible.
146 */
147void dm_table_head_init(dm_table_head_t *);
148
149/*
150 * Destroy all variables in table_head
151 */
152void dm_table_head_destroy(dm_table_head_t *);
153
154Internal table synchronisation protocol
155
156Readers:
157dm_table_size
158dm_table_get_target_count
159dm_table_get_target_count
160
161Readers with hold reference counter:
162dm_table_get_entry
163dm_table_release
164
165Writer:
166dm_table_destroy
167dm_table_switch_tables
168
169For managing synchronisation to table lists I use these routines. Every reader
170uses dm_table_busy routine to hold reference counter during work and dm_table_unbusy for reference counter release. Every writer have to wait while
171is reference counter 0 and only then it can work with device. It will sleep on
172head->table_cv while there are other readers. dm_table_get_entry is specific in that it will return table with hold reference counter. After dm_table_get_entry
173every caller must call dm_table_release when it doesn't want to work with it.
174
175/*
176 * Function to increment table user reference counter. Return id
177 * of table_id table.
178 * DM_TABLE_ACTIVE will return active table id.
179 * DM_TABLE_INACTIVE will return inactive table id.
180 */
181static int
182dm_table_busy(dm_table_head_t *head, uint8_t table_id)
183{
184        uint8_t id;
185
186        id = 0;
187
188        mutex_enter(&head->table_mtx);
189
190        if (table_id == DM_TABLE_ACTIVE)
191                id = head->cur_active_table;
192        else
193                id = 1 - head->cur_active_table;
194
195        head->io_cnt++;
196
197        mutex_exit(&head->table_mtx);
198        return id;
199}
200
201/*
202 * Function release table lock and eventually wakeup all waiters.
203 */
204static void
205dm_table_unbusy(dm_table_head_t *head)
206{
207        KASSERT(head->io_cnt != 0);
208
209        mutex_enter(&head->table_mtx);
210
211        if (--head->io_cnt == 0)
212                cv_broadcast(&head->table_cv);
213
214        mutex_exit(&head->table_mtx);
215}
216
217Device-mapper between ioctl device synchronisation
218
219
220Every ioctl user have to find dm_device with name, uuid, minor number.
221For this dm_dev_lookup is used. This routine returns device with hold reference
222counter.
223
224void
225dm_dev_busy(dm_dev_t *dmv)
226{
227        mutex_enter(&dmv->dev_mtx);
228        dmv->ref_cnt++;
229        mutex_exit(&dmv->dev_mtx);
230}
231
232void
233dm_dev_unbusy(dm_dev_t *dmv)
234{
235        KASSERT(dmv->ref_cnt != 0);
236
237        mutex_enter(&dmv->dev_mtx);
238        if (--dmv->ref_cnt == 0)
239                cv_broadcast(&dmv->dev_cv);
240        mutex_exit(&dmv->dev_mtx);
241}
242
243Before returning from ioctl routine must release reference counter with
244dm_dev_unbusy.
245
246dm_dev_remove_ioctl routine have to remove dm_dev from global device list,
247and wait until all ioctl users from dm_dev are gone.
248
249