xref: /netbsd-src/external/gpl2/lvm2/dist/lib/locking/cluster_locking.c (revision 404fbe5fb94ca1e054339640cabb2801ce52dd30)
1 /*	$NetBSD: cluster_locking.c,v 1.1.1.1 2008/12/22 00:18:04 haad Exp $	*/
2 
3 /*
4  * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved.
5  * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved.
6  *
7  * This file is part of LVM2.
8  *
9  * This copyrighted material is made available to anyone wishing to use,
10  * modify, copy, or redistribute it subject to the terms and conditions
11  * of the GNU Lesser General Public License v.2.1.
12  *
13  * You should have received a copy of the GNU Lesser General Public License
14  * along with this program; if not, write to the Free Software Foundation,
15  * Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
16  */
17 
18 /*
19  * Locking functions for LVM.
20  * The main purpose of this part of the library is to serialise LVM
21  * management operations across a cluster.
22  */
23 
24 #include "lib.h"
25 #include "clvm.h"
26 #include "lvm-string.h"
27 #include "locking.h"
28 #include "locking_types.h"
29 
30 #include <assert.h>
31 #include <stddef.h>
32 #include <sys/socket.h>
33 #include <sys/un.h>
34 #include <unistd.h>
35 
36 #ifndef CLUSTER_LOCKING_INTERNAL
37 int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags);
38 void locking_end(void);
39 int locking_init(int type, struct config_tree *cf, uint32_t *flags);
40 #endif
41 
42 typedef struct lvm_response {
43 	char node[255];
44 	char *response;
45 	int status;
46 	int len;
47 } lvm_response_t;
48 
49 /*
50  * This gets stuck at the start of memory we allocate so we
51  * can sanity-check it at deallocation time
52  */
53 #define LVM_SIGNATURE 0x434C564D
54 
55 /*
56  * NOTE: the LVMD uses the socket FD as the client ID, this means
57  * that any client that calls fork() will inherit the context of
58  * it's parent.
59  */
60 static int _clvmd_sock = -1;
61 
62 /* FIXME Install SIGPIPE handler? */
63 
64 /* Open connection to the Cluster Manager daemon */
65 static int _open_local_sock(void)
66 {
67 	int local_socket;
68 	struct sockaddr_un sockaddr;
69 
70 	/* Open local socket */
71 	if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) {
72 		log_error("Local socket creation failed: %s", strerror(errno));
73 		return -1;
74 	}
75 
76 	memset(&sockaddr, 0, sizeof(sockaddr));
77 	memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME));
78 
79 	sockaddr.sun_family = AF_UNIX;
80 
81 	if (connect(local_socket,(struct sockaddr *) &sockaddr,
82 		    sizeof(sockaddr))) {
83 		int saved_errno = errno;
84 
85 		log_error("connect() failed on local socket: %s",
86 			  strerror(errno));
87 		if (close(local_socket))
88 			stack;
89 
90 		errno = saved_errno;
91 		return -1;
92 	}
93 
94 	return local_socket;
95 }
96 
97 /* Send a request and return the status */
98 static int _send_request(char *inbuf, int inlen, char **retbuf)
99 {
100 	char outbuf[PIPE_BUF] __attribute((aligned(8)));
101 	struct clvm_header *outheader = (struct clvm_header *) outbuf;
102 	int len;
103 	int off;
104 	int buflen;
105 	int err;
106 
107 	/* Send it to CLVMD */
108  rewrite:
109 	if ( (err = write(_clvmd_sock, inbuf, inlen)) != inlen) {
110 		if (err == -1 && errno == EINTR)
111 			goto rewrite;
112 		log_error("Error writing data to clvmd: %s", strerror(errno));
113 		return 0;
114 	}
115 
116 	/* Get the response */
117  reread:
118 	if ((len = read(_clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) {
119 		if (errno == EINTR)
120 			goto reread;
121 		log_error("Error reading data from clvmd: %s", strerror(errno));
122 		return 0;
123 	}
124 
125 	if (len == 0) {
126 		log_error("EOF reading CLVMD");
127 		errno = ENOTCONN;
128 		return 0;
129 	}
130 
131 	/* Allocate buffer */
132 	buflen = len + outheader->arglen;
133 	*retbuf = dm_malloc(buflen);
134 	if (!*retbuf) {
135 		errno = ENOMEM;
136 		return 0;
137 	}
138 
139 	/* Copy the header */
140 	memcpy(*retbuf, outbuf, len);
141 	outheader = (struct clvm_header *) *retbuf;
142 
143 	/* Read the returned values */
144 	off = 1;		/* we've already read the first byte */
145 	while (off <= outheader->arglen && len > 0) {
146 		len = read(_clvmd_sock, outheader->args + off,
147 			   buflen - off - offsetof(struct clvm_header, args));
148 		if (len > 0)
149 			off += len;
150 	}
151 
152 	/* Was it an error ? */
153 	if (outheader->status != 0) {
154 		errno = outheader->status;
155 
156 		/* Only return an error here if there are no node-specific
157 		   errors present in the message that might have more detail */
158 		if (!(outheader->flags & CLVMD_FLAG_NODEERRS)) {
159 			log_error("cluster request failed: %s", strerror(errno));
160 			return 0;
161 		}
162 
163 	}
164 
165 	return 1;
166 }
167 
168 /* Build the structure header and parse-out wildcard node names */
169 static void _build_header(struct clvm_header *head, int cmd, const char *node,
170 			  int len)
171 {
172 	head->cmd = cmd;
173 	head->status = 0;
174 	head->flags = 0;
175 	head->clientid = 0;
176 	head->arglen = len;
177 
178 	if (node) {
179 		/*
180 		 * Allow a couple of special node names:
181 		 * "*" for all nodes,
182 		 * "." for the local node only
183 		 */
184 		if (strcmp(node, "*") == 0) {
185 			head->node[0] = '\0';
186 		} else if (strcmp(node, ".") == 0) {
187 			head->node[0] = '\0';
188 			head->flags = CLVMD_FLAG_LOCAL;
189 		} else
190 			strcpy(head->node, node);
191 	} else
192 		head->node[0] = '\0';
193 }
194 
195 /*
196  * Send a message to a(or all) node(s) in the cluster and wait for replies
197  */
198 static int _cluster_request(char cmd, const char *node, void *data, int len,
199 			   lvm_response_t ** response, int *num)
200 {
201 	char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1] __attribute((aligned(8)));
202 	char *inptr;
203 	char *retbuf = NULL;
204 	int status;
205 	int i;
206 	int num_responses = 0;
207 	struct clvm_header *head = (struct clvm_header *) outbuf;
208 	lvm_response_t *rarray;
209 
210 	*num = 0;
211 
212 	if (_clvmd_sock == -1)
213 		_clvmd_sock = _open_local_sock();
214 
215 	if (_clvmd_sock == -1)
216 		return 0;
217 
218 	_build_header(head, cmd, node, len);
219 	memcpy(head->node + strlen(head->node) + 1, data, len);
220 
221 	status = _send_request(outbuf, sizeof(struct clvm_header) +
222 			      strlen(head->node) + len, &retbuf);
223 	if (!status)
224 		goto out;
225 
226 	/* Count the number of responses we got */
227 	head = (struct clvm_header *) retbuf;
228 	inptr = head->args;
229 	while (inptr[0]) {
230 		num_responses++;
231 		inptr += strlen(inptr) + 1;
232 		inptr += sizeof(int);
233 		inptr += strlen(inptr) + 1;
234 	}
235 
236 	/*
237 	 * Allocate response array.
238 	 * With an extra pair of INTs on the front to sanity
239 	 * check the pointer when we are given it back to free
240 	 */
241 	*response = dm_malloc(sizeof(lvm_response_t) * num_responses);
242 	if (!*response) {
243 		errno = ENOMEM;
244 		status = 0;
245 		goto out;
246 	}
247 
248 	rarray = *response;
249 
250 	/* Unpack the response into an lvm_response_t array */
251 	inptr = head->args;
252 	i = 0;
253 	while (inptr[0]) {
254 		strcpy(rarray[i].node, inptr);
255 		inptr += strlen(inptr) + 1;
256 
257 		memcpy(&rarray[i].status, inptr, sizeof(int));
258 		inptr += sizeof(int);
259 
260 		rarray[i].response = dm_malloc(strlen(inptr) + 1);
261 		if (rarray[i].response == NULL) {
262 			/* Free up everything else and return error */
263 			int j;
264 			for (j = 0; j < i; j++)
265 				dm_free(rarray[i].response);
266 			free(*response);
267 			errno = ENOMEM;
268 			status = -1;
269 			goto out;
270 		}
271 
272 		strcpy(rarray[i].response, inptr);
273 		rarray[i].len = strlen(inptr);
274 		inptr += strlen(inptr) + 1;
275 		i++;
276 	}
277 	*num = num_responses;
278 	*response = rarray;
279 
280       out:
281 	if (retbuf)
282 		dm_free(retbuf);
283 
284 	return status;
285 }
286 
287 /* Free reply array */
288 static int _cluster_free_request(lvm_response_t * response, int num)
289 {
290 	int i;
291 
292 	for (i = 0; i < num; i++) {
293 		dm_free(response[i].response);
294 	}
295 
296 	dm_free(response);
297 
298 	return 1;
299 }
300 
301 static int _lock_for_cluster(unsigned char cmd, uint32_t flags, const char *name)
302 {
303 	int status;
304 	int i;
305 	char *args;
306 	const char *node = "";
307 	int len;
308 	int saved_errno = errno;
309 	lvm_response_t *response = NULL;
310 	int num_responses;
311 
312 	assert(name);
313 
314 	len = strlen(name) + 3;
315 	args = alloca(len);
316 	strcpy(args + 2, name);
317 
318 	args[0] = flags & 0x7F; /* Maskoff lock flags */
319 	args[1] = flags & 0xC0; /* Bitmap flags */
320 
321 	if (mirror_in_sync())
322 		args[1] |= LCK_MIRROR_NOSYNC_MODE;
323 
324 	if (dmeventd_monitor_mode())
325 		args[1] |= LCK_DMEVENTD_MONITOR_MODE;
326 
327 	/*
328 	 * VG locks are just that: locks, and have no side effects
329 	 * so we only need to do them on the local node because all
330 	 * locks are cluster-wide.
331 	 * Also, if the lock is exclusive it makes no sense to try to
332 	 * acquire it on all nodes, so just do that on the local node too.
333 	 * One exception, is that P_ locks /do/ get distributed across
334 	 * the cluster because they might have side-effects.
335 	 */
336 	if (strncmp(name, "P_", 2) &&
337 	    (cmd == CLVMD_CMD_LOCK_VG ||
338 	     (flags & LCK_TYPE_MASK) == LCK_EXCL ||
339 	     (flags & LCK_LOCAL) ||
340 	     !(flags & LCK_CLUSTER_VG)))
341 		node = ".";
342 
343 	status = _cluster_request(cmd, node, args, len,
344 				  &response, &num_responses);
345 
346 	/* If any nodes were down then display them and return an error */
347 	for (i = 0; i < num_responses; i++) {
348 		if (response[i].status == EHOSTDOWN) {
349 			log_error("clvmd not running on node %s",
350 				  response[i].node);
351 			status = 0;
352 			errno = response[i].status;
353 		} else if (response[i].status) {
354 			log_error("Error locking on node %s: %s",
355 				  response[i].node,
356 				  response[i].response[0] ?
357 				  	response[i].response :
358 				  	strerror(response[i].status));
359 			status = 0;
360 			errno = response[i].status;
361 		}
362 	}
363 
364 	saved_errno = errno;
365 	_cluster_free_request(response, num_responses);
366 	errno = saved_errno;
367 
368 	return status;
369 }
370 
371 /* API entry point for LVM */
372 #ifdef CLUSTER_LOCKING_INTERNAL
373 static int _lock_resource(struct cmd_context *cmd, const char *resource,
374 			  uint32_t flags)
375 #else
376 int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags)
377 #endif
378 {
379 	char lockname[PATH_MAX];
380 	int cluster_cmd = 0;
381 	const char *lock_scope;
382 	const char *lock_type = "";
383 
384 	assert(strlen(resource) < sizeof(lockname));
385 	assert(resource);
386 
387 	switch (flags & LCK_SCOPE_MASK) {
388 	case LCK_VG:
389 		/* If the VG name is empty then lock the unused PVs */
390 		if (*resource == '#' || (flags & LCK_CACHE))
391 			dm_snprintf(lockname, sizeof(lockname), "P_%s",
392 				    resource);
393 		else
394 			dm_snprintf(lockname, sizeof(lockname), "V_%s",
395 				    resource);
396 
397 		lock_scope = "VG";
398 		cluster_cmd = CLVMD_CMD_LOCK_VG;
399 		flags &= LCK_TYPE_MASK;
400 		break;
401 
402 	case LCK_LV:
403 		cluster_cmd = CLVMD_CMD_LOCK_LV;
404 		strcpy(lockname, resource);
405 		lock_scope = "LV";
406 		flags &= 0xffdf;	/* Mask off HOLD flag */
407 		break;
408 
409 	default:
410 		log_error("Unrecognised lock scope: %d",
411 			  flags & LCK_SCOPE_MASK);
412 		return 0;
413 	}
414 
415 	switch(flags & LCK_TYPE_MASK) {
416 	case LCK_UNLOCK:
417 		lock_type = "UN";
418 		break;
419 	case LCK_NULL:
420 		lock_type = "NL";
421 		break;
422 	case LCK_READ:
423 		lock_type = "CR";
424 		break;
425 	case LCK_PREAD:
426 		lock_type = "PR";
427 		break;
428 	case LCK_WRITE:
429 		lock_type = "PW";
430 		break;
431 	case LCK_EXCL:
432 		lock_type = "EX";
433 		break;
434 	default:
435 		log_error("Unrecognised lock type: %u",
436 			  flags & LCK_TYPE_MASK);
437 		return 0;
438 	}
439 
440 	/* If we are unlocking a clustered VG, then trigger remote metadata backups */
441 	if (cluster_cmd == CLVMD_CMD_LOCK_VG &&
442 	    ((flags & LCK_TYPE_MASK) == LCK_UNLOCK) &&
443 	    (flags & LCK_CLUSTER_VG)) {
444 		log_very_verbose("Requesing backup of VG metadata for %s", resource);
445 		_lock_for_cluster(CLVMD_CMD_VG_BACKUP, LCK_CLUSTER_VG, resource);
446 	}
447 
448 	log_very_verbose("Locking %s %s %s %s%s%s%s (0x%x)", lock_scope, lockname,
449 			 lock_type,
450 			 flags & LCK_NONBLOCK ? "" : "B",
451 			 flags & LCK_HOLD ? "H" : "",
452 			 flags & LCK_LOCAL ? "L" : "",
453 			 flags & LCK_CLUSTER_VG ? "C" : "",
454 			 flags);
455 
456 	/* Send a message to the cluster manager */
457 	return _lock_for_cluster(cluster_cmd, flags, lockname);
458 }
459 
460 #ifdef CLUSTER_LOCKING_INTERNAL
461 static void _locking_end(void)
462 #else
463 void locking_end(void)
464 #endif
465 {
466 	if (_clvmd_sock != -1 && close(_clvmd_sock))
467 		stack;
468 
469 	_clvmd_sock = -1;
470 }
471 
472 #ifdef CLUSTER_LOCKING_INTERNAL
473 static void _reset_locking(void)
474 #else
475 void reset_locking(void)
476 #endif
477 {
478 	if (close(_clvmd_sock))
479 		stack;
480 
481 	_clvmd_sock = _open_local_sock();
482 	if (_clvmd_sock == -1)
483 		stack;
484 }
485 
486 #ifdef CLUSTER_LOCKING_INTERNAL
487 int init_cluster_locking(struct locking_type *locking, struct cmd_context *cmd)
488 {
489 	locking->lock_resource = _lock_resource;
490 	locking->fin_locking = _locking_end;
491 	locking->reset_locking = _reset_locking;
492 	locking->flags = LCK_PRE_MEMLOCK | LCK_CLUSTERED;
493 
494 	_clvmd_sock = _open_local_sock();
495 	if (_clvmd_sock == -1)
496 		return 0;
497 
498 	return 1;
499 }
500 #else
501 int locking_init(int type, struct config_tree *cf, uint32_t *flags)
502 {
503 	_clvmd_sock = _open_local_sock();
504 	if (_clvmd_sock == -1)
505 		return 0;
506 
507 	/* Ask LVM to lock memory before calling us */
508 	*flags |= LCK_PRE_MEMLOCK;
509 	*flags |= LCK_CLUSTERED;
510 
511 	return 1;
512 }
513 #endif
514