xref: /netbsd-src/external/gpl2/lvm2/dist/lib/locking/cluster_locking.c (revision 274254cdae52594c1aa480a736aef78313d15c9c)
1 /*	$NetBSD: cluster_locking.c,v 1.1.1.2 2009/02/18 11:17:08 haad Exp $	*/
2 
3 /*
4  * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
5  * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6  *
7  * This file is part of LVM2.
8  *
9  * This copyrighted material is made available to anyone wishing to use,
10  * modify, copy, or redistribute it subject to the terms and conditions
11  * of the GNU Lesser General Public License v.2.1.
12  *
13  * You should have received a copy of the GNU Lesser General Public License
14  * along with this program; if not, write to the Free Software Foundation,
15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 /*
19  * Locking functions for LVM.
20  * The main purpose of this part of the library is to serialise LVM
21  * management operations across a cluster.
22  */
23 
24 #include "lib.h"
25 #include "clvm.h"
26 #include "lvm-string.h"
27 #include "locking.h"
28 #include "locking_types.h"
29 
30 #include <assert.h>
31 #include <stddef.h>
32 #include <sys/socket.h>
33 #include <sys/un.h>
34 #include <unistd.h>
35 
36 #ifndef CLUSTER_LOCKING_INTERNAL
37 int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags);
38 void locking_end(void);
39 int locking_init(int type, struct config_tree *cf, uint32_t *flags);
40 #endif
41 
42 typedef struct lvm_response {
43 	char node[255];
44 	char *response;
45 	int status;
46 	int len;
47 } lvm_response_t;
48 
49 /*
50  * This gets stuck at the start of memory we allocate so we
51  * can sanity-check it at deallocation time
52  */
53 #define LVM_SIGNATURE 0x434C564D
54 
55 /*
56  * NOTE: the LVMD uses the socket FD as the client ID, this means
57  * that any client that calls fork() will inherit the context of
58  * it's parent.
59  */
60 static int _clvmd_sock = -1;
61 
62 /* FIXME Install SIGPIPE handler? */
63 
64 /* Open connection to the Cluster Manager daemon */
65 static int _open_local_sock(void)
66 {
67 	int local_socket;
68 	struct sockaddr_un sockaddr;
69 
70 	/* Open local socket */
71 	if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
72 		log_error("Local socket creation failed: %s", strerror(errno));
73 		return -1;
74 	}
75 
76 	memset(&sockaddr, 0, sizeof(sockaddr));
77 	memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
78 
79 	sockaddr.sun_family = AF_UNIX;
80 
81 	if (connect(local_socket,(struct sockaddr *) &sockaddr,
82 		    sizeof(sockaddr))) {
83 		int saved_errno = errno;
84 
85 		log_error("connect() failed on local socket: %s",
86 			  strerror(errno));
87 		if (close(local_socket))
88 			stack;
89 
90 		errno = saved_errno;
91 		return -1;
92 	}
93 
94 	return local_socket;
95 }
96 
97 /* Send a request and return the status */
98 static int _send_request(char *inbuf, int inlen, char **retbuf)
99 {
100 	char outbuf[PIPE_BUF] __attribute((aligned(8)));
101 	struct clvm_header *outheader = (struct clvm_header *) outbuf;
102 	int len;
103 	int off;
104 	int buflen;
105 	int err;
106 
107 	/* Send it to CLVMD */
108  rewrite:
109 	if ( (err = write(_clvmd_sock, inbuf, inlen)) != inlen) {
110 		if (err == -1 && errno == EINTR)
111 			goto rewrite;
112 		log_error("Error writing data to clvmd: %s", strerror(errno));
113 		return 0;
114 	}
115 
116 	/* Get the response */
117  reread:
118 	if ((len = read(_clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) {
119 		if (errno == EINTR)
120 			goto reread;
121 		log_error("Error reading data from clvmd: %s", strerror(errno));
122 		return 0;
123 	}
124 
125 	if (len == 0) {
126 		log_error("EOF reading CLVMD");
127 		errno = ENOTCONN;
128 		return 0;
129 	}
130 
131 	/* Allocate buffer */
132 	buflen = len + outheader->arglen;
133 	*retbuf = dm_malloc(buflen);
134 	if (!*retbuf) {
135 		errno = ENOMEM;
136 		return 0;
137 	}
138 
139 	/* Copy the header */
140 	memcpy(*retbuf, outbuf, len);
141 	outheader = (struct clvm_header *) *retbuf;
142 
143 	/* Read the returned values */
144 	off = 1;		/* we've already read the first byte */
145 	while (off <= outheader->arglen && len > 0) {
146 		len = read(_clvmd_sock, outheader->args + off,
147 			   buflen - off - offsetof(struct clvm_header, args));
148 		if (len > 0)
149 			off += len;
150 	}
151 
152 	/* Was it an error ? */
153 	if (outheader->status != 0) {
154 		errno = outheader->status;
155 
156 		/* Only return an error here if there are no node-specific
157 		   errors present in the message that might have more detail */
158 		if (!(outheader->flags & CLVMD_FLAG_NODEERRS)) {
159 			log_error("cluster request failed: %s", strerror(errno));
160 			return 0;
161 		}
162 
163 	}
164 
165 	return 1;
166 }
167 
168 /* Build the structure header and parse-out wildcard node names */
169 /* FIXME: Cleanup implicit casts of clvmd_cmd (int, char, uint8_t, etc). */
170 static void _build_header(struct clvm_header *head, int clvmd_cmd, const char *node,
171 			  int len)
172 {
173 	head->cmd = clvmd_cmd;
174 	head->status = 0;
175 	head->flags = 0;
176 	head->clientid = 0;
177 	head->arglen = len;
178 
179 	if (node) {
180 		/*
181 		 * Allow a couple of special node names:
182 		 * "*" for all nodes,
183 		 * "." for the local node only
184 		 */
185 		if (strcmp(node, "*") == 0) {
186 			head->node[0] = '\0';
187 		} else if (strcmp(node, ".") == 0) {
188 			head->node[0] = '\0';
189 			head->flags = CLVMD_FLAG_LOCAL;
190 		} else
191 			strcpy(head->node, node);
192 	} else
193 		head->node[0] = '\0';
194 }
195 
196 /*
197  * Send a message to a(or all) node(s) in the cluster and wait for replies
198  */
199 static int _cluster_request(char clvmd_cmd, const char *node, void *data, int len,
200 			   lvm_response_t ** response, int *num)
201 {
202 	char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1] __attribute((aligned(8)));
203 	char *inptr;
204 	char *retbuf = NULL;
205 	int status;
206 	int i;
207 	int num_responses = 0;
208 	struct clvm_header *head = (struct clvm_header *) outbuf;
209 	lvm_response_t *rarray;
210 
211 	*num = 0;
212 
213 	if (_clvmd_sock == -1)
214 		_clvmd_sock = _open_local_sock();
215 
216 	if (_clvmd_sock == -1)
217 		return 0;
218 
219 	_build_header(head, clvmd_cmd, node, len);
220 	memcpy(head->node + strlen(head->node) + 1, data, len);
221 
222 	status = _send_request(outbuf, sizeof(struct clvm_header) +
223 			      strlen(head->node) + len, &retbuf);
224 	if (!status)
225 		goto out;
226 
227 	/* Count the number of responses we got */
228 	head = (struct clvm_header *) retbuf;
229 	inptr = head->args;
230 	while (inptr[0]) {
231 		num_responses++;
232 		inptr += strlen(inptr) + 1;
233 		inptr += sizeof(int);
234 		inptr += strlen(inptr) + 1;
235 	}
236 
237 	/*
238 	 * Allocate response array.
239 	 * With an extra pair of INTs on the front to sanity
240 	 * check the pointer when we are given it back to free
241 	 */
242 	*response = dm_malloc(sizeof(lvm_response_t) * num_responses);
243 	if (!*response) {
244 		errno = ENOMEM;
245 		status = 0;
246 		goto out;
247 	}
248 
249 	rarray = *response;
250 
251 	/* Unpack the response into an lvm_response_t array */
252 	inptr = head->args;
253 	i = 0;
254 	while (inptr[0]) {
255 		strcpy(rarray[i].node, inptr);
256 		inptr += strlen(inptr) + 1;
257 
258 		memcpy(&rarray[i].status, inptr, sizeof(int));
259 		inptr += sizeof(int);
260 
261 		rarray[i].response = dm_malloc(strlen(inptr) + 1);
262 		if (rarray[i].response == NULL) {
263 			/* Free up everything else and return error */
264 			int j;
265 			for (j = 0; j < i; j++)
266 				dm_free(rarray[i].response);
267 			free(*response);
268 			errno = ENOMEM;
269 			status = -1;
270 			goto out;
271 		}
272 
273 		strcpy(rarray[i].response, inptr);
274 		rarray[i].len = strlen(inptr);
275 		inptr += strlen(inptr) + 1;
276 		i++;
277 	}
278 	*num = num_responses;
279 	*response = rarray;
280 
281       out:
282 	if (retbuf)
283 		dm_free(retbuf);
284 
285 	return status;
286 }
287 
288 /* Free reply array */
289 static int _cluster_free_request(lvm_response_t * response, int num)
290 {
291 	int i;
292 
293 	for (i = 0; i < num; i++) {
294 		dm_free(response[i].response);
295 	}
296 
297 	dm_free(response);
298 
299 	return 1;
300 }
301 
302 static int _lock_for_cluster(unsigned char clvmd_cmd, uint32_t flags, const char *name)
303 {
304 	int status;
305 	int i;
306 	char *args;
307 	const char *node = "";
308 	int len;
309 	int saved_errno = errno;
310 	lvm_response_t *response = NULL;
311 	int num_responses;
312 
313 	assert(name);
314 
315 	len = strlen(name) + 3;
316 	args = alloca(len);
317 	strcpy(args + 2, name);
318 
319 	args[0] = flags & 0x7F; /* Maskoff lock flags */
320 	args[1] = flags & 0xC0; /* Bitmap flags */
321 
322 	if (mirror_in_sync())
323 		args[1] |= LCK_MIRROR_NOSYNC_MODE;
324 
325 	if (dmeventd_monitor_mode())
326 		args[1] |= LCK_DMEVENTD_MONITOR_MODE;
327 
328 	/*
329 	 * VG locks are just that: locks, and have no side effects
330 	 * so we only need to do them on the local node because all
331 	 * locks are cluster-wide.
332 	 * Also, if the lock is exclusive it makes no sense to try to
333 	 * acquire it on all nodes, so just do that on the local node too.
334 	 * One exception, is that P_ locks /do/ get distributed across
335 	 * the cluster because they might have side-effects.
336 	 */
337 	if (strncmp(name, "P_", 2) &&
338 	    (clvmd_cmd == CLVMD_CMD_LOCK_VG ||
339 	     (flags & LCK_TYPE_MASK) == LCK_EXCL ||
340 	     (flags & LCK_LOCAL) ||
341 	     !(flags & LCK_CLUSTER_VG)))
342 		node = ".";
343 
344 	status = _cluster_request(clvmd_cmd, node, args, len,
345 				  &response, &num_responses);
346 
347 	/* If any nodes were down then display them and return an error */
348 	for (i = 0; i < num_responses; i++) {
349 		if (response[i].status == EHOSTDOWN) {
350 			log_error("clvmd not running on node %s",
351 				  response[i].node);
352 			status = 0;
353 			errno = response[i].status;
354 		} else if (response[i].status) {
355 			log_error("Error locking on node %s: %s",
356 				  response[i].node,
357 				  response[i].response[0] ?
358 				  	response[i].response :
359 				  	strerror(response[i].status));
360 			status = 0;
361 			errno = response[i].status;
362 		}
363 	}
364 
365 	saved_errno = errno;
366 	_cluster_free_request(response, num_responses);
367 	errno = saved_errno;
368 
369 	return status;
370 }
371 
372 /* API entry point for LVM */
373 #ifdef CLUSTER_LOCKING_INTERNAL
374 static int _lock_resource(struct cmd_context *cmd, const char *resource,
375 			  uint32_t flags)
376 #else
377 int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags)
378 #endif
379 {
380 	char lockname[PATH_MAX];
381 	int clvmd_cmd = 0;
382 	const char *lock_scope;
383 	const char *lock_type = "";
384 
385 	assert(strlen(resource) < sizeof(lockname));
386 	assert(resource);
387 
388 	switch (flags & LCK_SCOPE_MASK) {
389 	case LCK_VG:
390 		/* If the VG name is empty then lock the unused PVs */
391 		if (*resource == '#' || (flags & LCK_CACHE))
392 			dm_snprintf(lockname, sizeof(lockname), "P_%s",
393 				    resource);
394 		else
395 			dm_snprintf(lockname, sizeof(lockname), "V_%s",
396 				    resource);
397 
398 		lock_scope = "VG";
399 		clvmd_cmd = CLVMD_CMD_LOCK_VG;
400 		flags &= LCK_TYPE_MASK;
401 		break;
402 
403 	case LCK_LV:
404 		clvmd_cmd = CLVMD_CMD_LOCK_LV;
405 		strcpy(lockname, resource);
406 		lock_scope = "LV";
407 		flags &= 0xffdf;	/* Mask off HOLD flag */
408 		break;
409 
410 	default:
411 		log_error("Unrecognised lock scope: %d",
412 			  flags & LCK_SCOPE_MASK);
413 		return 0;
414 	}
415 
416 	switch(flags & LCK_TYPE_MASK) {
417 	case LCK_UNLOCK:
418 		lock_type = "UN";
419 		break;
420 	case LCK_NULL:
421 		lock_type = "NL";
422 		break;
423 	case LCK_READ:
424 		lock_type = "CR";
425 		break;
426 	case LCK_PREAD:
427 		lock_type = "PR";
428 		break;
429 	case LCK_WRITE:
430 		lock_type = "PW";
431 		break;
432 	case LCK_EXCL:
433 		lock_type = "EX";
434 		break;
435 	default:
436 		log_error("Unrecognised lock type: %u",
437 			  flags & LCK_TYPE_MASK);
438 		return 0;
439 	}
440 
441 	/* If we are unlocking a clustered VG, then trigger remote metadata backups */
442 	if (clvmd_cmd == CLVMD_CMD_LOCK_VG &&
443 	    ((flags & LCK_TYPE_MASK) == LCK_UNLOCK) &&
444 	    (flags & LCK_CLUSTER_VG)) {
445 		log_very_verbose("Requesing backup of VG metadata for %s", resource);
446 		_lock_for_cluster(CLVMD_CMD_VG_BACKUP, LCK_CLUSTER_VG, resource);
447 	}
448 
449 	log_very_verbose("Locking %s %s %s %s%s%s%s (0x%x)", lock_scope, lockname,
450 			 lock_type,
451 			 flags & LCK_NONBLOCK ? "" : "B",
452 			 flags & LCK_HOLD ? "H" : "",
453 			 flags & LCK_LOCAL ? "L" : "",
454 			 flags & LCK_CLUSTER_VG ? "C" : "",
455 			 flags);
456 
457 	/* Send a message to the cluster manager */
458 	return _lock_for_cluster(clvmd_cmd, flags, lockname);
459 }
460 
461 #ifdef CLUSTER_LOCKING_INTERNAL
462 static void _locking_end(void)
463 #else
464 void locking_end(void)
465 #endif
466 {
467 	if (_clvmd_sock != -1 && close(_clvmd_sock))
468 		stack;
469 
470 	_clvmd_sock = -1;
471 }
472 
473 #ifdef CLUSTER_LOCKING_INTERNAL
474 static void _reset_locking(void)
475 #else
476 void reset_locking(void)
477 #endif
478 {
479 	if (close(_clvmd_sock))
480 		stack;
481 
482 	_clvmd_sock = _open_local_sock();
483 	if (_clvmd_sock == -1)
484 		stack;
485 }
486 
487 #ifdef CLUSTER_LOCKING_INTERNAL
488 int init_cluster_locking(struct locking_type *locking, struct cmd_context *cmd)
489 {
490 	locking->lock_resource = _lock_resource;
491 	locking->fin_locking = _locking_end;
492 	locking->reset_locking = _reset_locking;
493 	locking->flags = LCK_PRE_MEMLOCK | LCK_CLUSTERED;
494 
495 	_clvmd_sock = _open_local_sock();
496 	if (_clvmd_sock == -1)
497 		return 0;
498 
499 	return 1;
500 }
501 #else
502 int locking_init(int type, struct config_tree *cf, uint32_t *flags)
503 {
504 	_clvmd_sock = _open_local_sock();
505 	if (_clvmd_sock == -1)
506 		return 0;
507 
508 	/* Ask LVM to lock memory before calling us */
509 	*flags |= LCK_PRE_MEMLOCK;
510 	*flags |= LCK_CLUSTERED;
511 
512 	return 1;
513 }
514 #endif
515