1 /* $NetBSD: cluster_locking.c,v 1.1.1.1 2008/12/22 00:18:04 haad Exp $ */ 2 3 /* 4 * Copyright (C) 2002-2004 Sistina Software, Inc. All rights reserved. 5 * Copyright (C) 2004-2007 Red Hat, Inc. All rights reserved. 6 * 7 * This file is part of LVM2. 8 * 9 * This copyrighted material is made available to anyone wishing to use, 10 * modify, copy, or redistribute it subject to the terms and conditions 11 * of the GNU Lesser General Public License v.2.1. 12 * 13 * You should have received a copy of the GNU Lesser General Public License 14 * along with this program; if not, write to the Free Software Foundation, 15 * Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 16 */ 17 18 /* 19 * Locking functions for LVM. 20 * The main purpose of this part of the library is to serialise LVM 21 * management operations across a cluster. 22 */ 23 24 #include "lib.h" 25 #include "clvm.h" 26 #include "lvm-string.h" 27 #include "locking.h" 28 #include "locking_types.h" 29 30 #include <assert.h> 31 #include <stddef.h> 32 #include <sys/socket.h> 33 #include <sys/un.h> 34 #include <unistd.h> 35 36 #ifndef CLUSTER_LOCKING_INTERNAL 37 int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags); 38 void locking_end(void); 39 int locking_init(int type, struct config_tree *cf, uint32_t *flags); 40 #endif 41 42 typedef struct lvm_response { 43 char node[255]; 44 char *response; 45 int status; 46 int len; 47 } lvm_response_t; 48 49 /* 50 * This gets stuck at the start of memory we allocate so we 51 * can sanity-check it at deallocation time 52 */ 53 #define LVM_SIGNATURE 0x434C564D 54 55 /* 56 * NOTE: the LVMD uses the socket FD as the client ID, this means 57 * that any client that calls fork() will inherit the context of 58 * it's parent. 59 */ 60 static int _clvmd_sock = -1; 61 62 /* FIXME Install SIGPIPE handler? */ 63 64 /* Open connection to the Cluster Manager daemon */ 65 static int _open_local_sock(void) 66 { 67 int local_socket; 68 struct sockaddr_un sockaddr; 69 70 /* Open local socket */ 71 if ((local_socket = socket(PF_UNIX, SOCK_STREAM, 0)) < 0) { 72 log_error("Local socket creation failed: %s", strerror(errno)); 73 return -1; 74 } 75 76 memset(&sockaddr, 0, sizeof(sockaddr)); 77 memcpy(sockaddr.sun_path, CLVMD_SOCKNAME, sizeof(CLVMD_SOCKNAME)); 78 79 sockaddr.sun_family = AF_UNIX; 80 81 if (connect(local_socket,(struct sockaddr *) &sockaddr, 82 sizeof(sockaddr))) { 83 int saved_errno = errno; 84 85 log_error("connect() failed on local socket: %s", 86 strerror(errno)); 87 if (close(local_socket)) 88 stack; 89 90 errno = saved_errno; 91 return -1; 92 } 93 94 return local_socket; 95 } 96 97 /* Send a request and return the status */ 98 static int _send_request(char *inbuf, int inlen, char **retbuf) 99 { 100 char outbuf[PIPE_BUF] __attribute((aligned(8))); 101 struct clvm_header *outheader = (struct clvm_header *) outbuf; 102 int len; 103 int off; 104 int buflen; 105 int err; 106 107 /* Send it to CLVMD */ 108 rewrite: 109 if ( (err = write(_clvmd_sock, inbuf, inlen)) != inlen) { 110 if (err == -1 && errno == EINTR) 111 goto rewrite; 112 log_error("Error writing data to clvmd: %s", strerror(errno)); 113 return 0; 114 } 115 116 /* Get the response */ 117 reread: 118 if ((len = read(_clvmd_sock, outbuf, sizeof(struct clvm_header))) < 0) { 119 if (errno == EINTR) 120 goto reread; 121 log_error("Error reading data from clvmd: %s", strerror(errno)); 122 return 0; 123 } 124 125 if (len == 0) { 126 log_error("EOF reading CLVMD"); 127 errno = ENOTCONN; 128 return 0; 129 } 130 131 /* Allocate buffer */ 132 buflen = len + outheader->arglen; 133 *retbuf = dm_malloc(buflen); 134 if (!*retbuf) { 135 errno = ENOMEM; 136 return 0; 137 } 138 139 /* Copy the header */ 140 memcpy(*retbuf, outbuf, len); 141 outheader = (struct clvm_header *) *retbuf; 142 143 /* Read the returned values */ 144 off = 1; /* we've already read the first byte */ 145 while (off <= outheader->arglen && len > 0) { 146 len = read(_clvmd_sock, outheader->args + off, 147 buflen - off - offsetof(struct clvm_header, args)); 148 if (len > 0) 149 off += len; 150 } 151 152 /* Was it an error ? */ 153 if (outheader->status != 0) { 154 errno = outheader->status; 155 156 /* Only return an error here if there are no node-specific 157 errors present in the message that might have more detail */ 158 if (!(outheader->flags & CLVMD_FLAG_NODEERRS)) { 159 log_error("cluster request failed: %s", strerror(errno)); 160 return 0; 161 } 162 163 } 164 165 return 1; 166 } 167 168 /* Build the structure header and parse-out wildcard node names */ 169 static void _build_header(struct clvm_header *head, int cmd, const char *node, 170 int len) 171 { 172 head->cmd = cmd; 173 head->status = 0; 174 head->flags = 0; 175 head->clientid = 0; 176 head->arglen = len; 177 178 if (node) { 179 /* 180 * Allow a couple of special node names: 181 * "*" for all nodes, 182 * "." for the local node only 183 */ 184 if (strcmp(node, "*") == 0) { 185 head->node[0] = '\0'; 186 } else if (strcmp(node, ".") == 0) { 187 head->node[0] = '\0'; 188 head->flags = CLVMD_FLAG_LOCAL; 189 } else 190 strcpy(head->node, node); 191 } else 192 head->node[0] = '\0'; 193 } 194 195 /* 196 * Send a message to a(or all) node(s) in the cluster and wait for replies 197 */ 198 static int _cluster_request(char cmd, const char *node, void *data, int len, 199 lvm_response_t ** response, int *num) 200 { 201 char outbuf[sizeof(struct clvm_header) + len + strlen(node) + 1] __attribute((aligned(8))); 202 char *inptr; 203 char *retbuf = NULL; 204 int status; 205 int i; 206 int num_responses = 0; 207 struct clvm_header *head = (struct clvm_header *) outbuf; 208 lvm_response_t *rarray; 209 210 *num = 0; 211 212 if (_clvmd_sock == -1) 213 _clvmd_sock = _open_local_sock(); 214 215 if (_clvmd_sock == -1) 216 return 0; 217 218 _build_header(head, cmd, node, len); 219 memcpy(head->node + strlen(head->node) + 1, data, len); 220 221 status = _send_request(outbuf, sizeof(struct clvm_header) + 222 strlen(head->node) + len, &retbuf); 223 if (!status) 224 goto out; 225 226 /* Count the number of responses we got */ 227 head = (struct clvm_header *) retbuf; 228 inptr = head->args; 229 while (inptr[0]) { 230 num_responses++; 231 inptr += strlen(inptr) + 1; 232 inptr += sizeof(int); 233 inptr += strlen(inptr) + 1; 234 } 235 236 /* 237 * Allocate response array. 238 * With an extra pair of INTs on the front to sanity 239 * check the pointer when we are given it back to free 240 */ 241 *response = dm_malloc(sizeof(lvm_response_t) * num_responses); 242 if (!*response) { 243 errno = ENOMEM; 244 status = 0; 245 goto out; 246 } 247 248 rarray = *response; 249 250 /* Unpack the response into an lvm_response_t array */ 251 inptr = head->args; 252 i = 0; 253 while (inptr[0]) { 254 strcpy(rarray[i].node, inptr); 255 inptr += strlen(inptr) + 1; 256 257 memcpy(&rarray[i].status, inptr, sizeof(int)); 258 inptr += sizeof(int); 259 260 rarray[i].response = dm_malloc(strlen(inptr) + 1); 261 if (rarray[i].response == NULL) { 262 /* Free up everything else and return error */ 263 int j; 264 for (j = 0; j < i; j++) 265 dm_free(rarray[i].response); 266 free(*response); 267 errno = ENOMEM; 268 status = -1; 269 goto out; 270 } 271 272 strcpy(rarray[i].response, inptr); 273 rarray[i].len = strlen(inptr); 274 inptr += strlen(inptr) + 1; 275 i++; 276 } 277 *num = num_responses; 278 *response = rarray; 279 280 out: 281 if (retbuf) 282 dm_free(retbuf); 283 284 return status; 285 } 286 287 /* Free reply array */ 288 static int _cluster_free_request(lvm_response_t * response, int num) 289 { 290 int i; 291 292 for (i = 0; i < num; i++) { 293 dm_free(response[i].response); 294 } 295 296 dm_free(response); 297 298 return 1; 299 } 300 301 static int _lock_for_cluster(unsigned char cmd, uint32_t flags, const char *name) 302 { 303 int status; 304 int i; 305 char *args; 306 const char *node = ""; 307 int len; 308 int saved_errno = errno; 309 lvm_response_t *response = NULL; 310 int num_responses; 311 312 assert(name); 313 314 len = strlen(name) + 3; 315 args = alloca(len); 316 strcpy(args + 2, name); 317 318 args[0] = flags & 0x7F; /* Maskoff lock flags */ 319 args[1] = flags & 0xC0; /* Bitmap flags */ 320 321 if (mirror_in_sync()) 322 args[1] |= LCK_MIRROR_NOSYNC_MODE; 323 324 if (dmeventd_monitor_mode()) 325 args[1] |= LCK_DMEVENTD_MONITOR_MODE; 326 327 /* 328 * VG locks are just that: locks, and have no side effects 329 * so we only need to do them on the local node because all 330 * locks are cluster-wide. 331 * Also, if the lock is exclusive it makes no sense to try to 332 * acquire it on all nodes, so just do that on the local node too. 333 * One exception, is that P_ locks /do/ get distributed across 334 * the cluster because they might have side-effects. 335 */ 336 if (strncmp(name, "P_", 2) && 337 (cmd == CLVMD_CMD_LOCK_VG || 338 (flags & LCK_TYPE_MASK) == LCK_EXCL || 339 (flags & LCK_LOCAL) || 340 !(flags & LCK_CLUSTER_VG))) 341 node = "."; 342 343 status = _cluster_request(cmd, node, args, len, 344 &response, &num_responses); 345 346 /* If any nodes were down then display them and return an error */ 347 for (i = 0; i < num_responses; i++) { 348 if (response[i].status == EHOSTDOWN) { 349 log_error("clvmd not running on node %s", 350 response[i].node); 351 status = 0; 352 errno = response[i].status; 353 } else if (response[i].status) { 354 log_error("Error locking on node %s: %s", 355 response[i].node, 356 response[i].response[0] ? 357 response[i].response : 358 strerror(response[i].status)); 359 status = 0; 360 errno = response[i].status; 361 } 362 } 363 364 saved_errno = errno; 365 _cluster_free_request(response, num_responses); 366 errno = saved_errno; 367 368 return status; 369 } 370 371 /* API entry point for LVM */ 372 #ifdef CLUSTER_LOCKING_INTERNAL 373 static int _lock_resource(struct cmd_context *cmd, const char *resource, 374 uint32_t flags) 375 #else 376 int lock_resource(struct cmd_context *cmd, const char *resource, uint32_t flags) 377 #endif 378 { 379 char lockname[PATH_MAX]; 380 int cluster_cmd = 0; 381 const char *lock_scope; 382 const char *lock_type = ""; 383 384 assert(strlen(resource) < sizeof(lockname)); 385 assert(resource); 386 387 switch (flags & LCK_SCOPE_MASK) { 388 case LCK_VG: 389 /* If the VG name is empty then lock the unused PVs */ 390 if (*resource == '#' || (flags & LCK_CACHE)) 391 dm_snprintf(lockname, sizeof(lockname), "P_%s", 392 resource); 393 else 394 dm_snprintf(lockname, sizeof(lockname), "V_%s", 395 resource); 396 397 lock_scope = "VG"; 398 cluster_cmd = CLVMD_CMD_LOCK_VG; 399 flags &= LCK_TYPE_MASK; 400 break; 401 402 case LCK_LV: 403 cluster_cmd = CLVMD_CMD_LOCK_LV; 404 strcpy(lockname, resource); 405 lock_scope = "LV"; 406 flags &= 0xffdf; /* Mask off HOLD flag */ 407 break; 408 409 default: 410 log_error("Unrecognised lock scope: %d", 411 flags & LCK_SCOPE_MASK); 412 return 0; 413 } 414 415 switch(flags & LCK_TYPE_MASK) { 416 case LCK_UNLOCK: 417 lock_type = "UN"; 418 break; 419 case LCK_NULL: 420 lock_type = "NL"; 421 break; 422 case LCK_READ: 423 lock_type = "CR"; 424 break; 425 case LCK_PREAD: 426 lock_type = "PR"; 427 break; 428 case LCK_WRITE: 429 lock_type = "PW"; 430 break; 431 case LCK_EXCL: 432 lock_type = "EX"; 433 break; 434 default: 435 log_error("Unrecognised lock type: %u", 436 flags & LCK_TYPE_MASK); 437 return 0; 438 } 439 440 /* If we are unlocking a clustered VG, then trigger remote metadata backups */ 441 if (cluster_cmd == CLVMD_CMD_LOCK_VG && 442 ((flags & LCK_TYPE_MASK) == LCK_UNLOCK) && 443 (flags & LCK_CLUSTER_VG)) { 444 log_very_verbose("Requesing backup of VG metadata for %s", resource); 445 _lock_for_cluster(CLVMD_CMD_VG_BACKUP, LCK_CLUSTER_VG, resource); 446 } 447 448 log_very_verbose("Locking %s %s %s %s%s%s%s (0x%x)", lock_scope, lockname, 449 lock_type, 450 flags & LCK_NONBLOCK ? "" : "B", 451 flags & LCK_HOLD ? "H" : "", 452 flags & LCK_LOCAL ? "L" : "", 453 flags & LCK_CLUSTER_VG ? "C" : "", 454 flags); 455 456 /* Send a message to the cluster manager */ 457 return _lock_for_cluster(cluster_cmd, flags, lockname); 458 } 459 460 #ifdef CLUSTER_LOCKING_INTERNAL 461 static void _locking_end(void) 462 #else 463 void locking_end(void) 464 #endif 465 { 466 if (_clvmd_sock != -1 && close(_clvmd_sock)) 467 stack; 468 469 _clvmd_sock = -1; 470 } 471 472 #ifdef CLUSTER_LOCKING_INTERNAL 473 static void _reset_locking(void) 474 #else 475 void reset_locking(void) 476 #endif 477 { 478 if (close(_clvmd_sock)) 479 stack; 480 481 _clvmd_sock = _open_local_sock(); 482 if (_clvmd_sock == -1) 483 stack; 484 } 485 486 #ifdef CLUSTER_LOCKING_INTERNAL 487 int init_cluster_locking(struct locking_type *locking, struct cmd_context *cmd) 488 { 489 locking->lock_resource = _lock_resource; 490 locking->fin_locking = _locking_end; 491 locking->reset_locking = _reset_locking; 492 locking->flags = LCK_PRE_MEMLOCK | LCK_CLUSTERED; 493 494 _clvmd_sock = _open_local_sock(); 495 if (_clvmd_sock == -1) 496 return 0; 497 498 return 1; 499 } 500 #else 501 int locking_init(int type, struct config_tree *cf, uint32_t *flags) 502 { 503 _clvmd_sock = _open_local_sock(); 504 if (_clvmd_sock == -1) 505 return 0; 506 507 /* Ask LVM to lock memory before calling us */ 508 *flags |= LCK_PRE_MEMLOCK; 509 *flags |= LCK_CLUSTERED; 510 511 return 1; 512 } 513 #endif 514