]> andersk Git - moira.git/blame - dcm/dcm.pc
dcm_execute() needs to attempt krb5 first, too.
[moira.git] / dcm / dcm.pc
CommitLineData
7ac48069 1/* $Id$
2a2a3914 2 *
7ac48069 3 * The Data Control Manager for Moira.
2a2a3914 4 *
7ac48069 5 * Copyright (C) 1987-1998 by the Massachusetts Institute of Technology.
6 * For copying and distribution information, see the file
7 * <mit-copyright.h>.
2a2a3914 8 */
9
7ac48069 10#include <mit-copyright.h>
11#include <moira.h>
12#include <moira_site.h>
dfaf9b68 13#include <moira_schema.h>
7ac48069 14#include "update.h"
15
16#include <sys/param.h>
17#include <sys/stat.h>
18#include <sys/wait.h>
2a2a3914 19
85330553 20#include <errno.h>
2a2a3914 21#include <signal.h>
22#include <stdio.h>
7ac48069 23#include <stdlib.h>
2a2a3914 24#include <string.h>
2a2a3914 25#include <unistd.h>
2a2a3914 26
2a2a3914 27EXEC SQL INCLUDE sqlca;
7ac48069 28void sqlglm(char *, unsigned int *, unsigned int *);
29
30RCSID("$Header$");
31
32int generate_service(char *name, int force);
33void do_hosts(char *service);
8afdb7c2 34int dcm_send_file(char *service, int type, char *host, char *target);
7ac48069 35int dcm_execute(char *service, char *host, char *script);
36void dbmserr(void);
37
2a2a3914 38#define SQL_NO_MATCH 1403
5eaef520 39#define SOFT_FAIL(x) (((x) == MR_NO_MEM) || ((x) == MR_CANT_CONNECT) || ((x) == MR_CCONFIG) || ((x) == MR_DEADLOCK) || ((x) == MR_BUSY) || ((x) == MR_ABORT))
2a2a3914 40
8b6ef3aa 41/* argument parsing macro */
42#define argis(a, b) (!strcmp(*arg + 1, a) || !strcmp(*arg + 1, b))
43
5eaef520 44char whobuf[256], *whoami = whobuf, *db = "moira";
2a2a3914 45
8afdb7c2 46enum { UNIQUE, DISTRIBUTED, REPLICATED };
47
5eaef520 48int main(int argc, char **argv)
2a2a3914 49{
8b6ef3aa 50 int i, force = 0;
5eaef520 51 EXEC SQL BEGIN DECLARE SECTION;
dfaf9b68 52 char buf[SERVERS_NAME_SIZE], *name;
5eaef520 53 int enable;
54 EXEC SQL END DECLARE SECTION;
55 struct save_queue *sq;
0eed8c8e 56 int status, srvcnt = 0;
57 char **arg = argv, *services[BUFSIZ];
5eaef520 58
59 if (strchr(argv[0], '/'))
60 strcpy(whoami, strrchr(argv[0], '/') + 1);
61 else strcpy(whoami, argv[0]);
62 umask(7);
63
64 setvbuf(stderr, NULL, _IOLBF, BUFSIZ);
65 setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
66
67 initialize_sms_error_table();
68 initialize_krb_error_table();
69
8b6ef3aa 70 while (++arg - argv < argc)
71 {
72 if (**arg == '-')
73 {
74 if (argis("f", "force"))
75 force++;
76 else
77 {
78 com_err(whoami, 0, "Usage: %s [-f] servicename", argv[0]);
79 exit(1);
80 }
81 }
0eed8c8e 82 else
83 /* Doesn't begin with a dash, is a service name.
84 * Build an array of them we can iterate through later.
85 */
86 {
87 services[srvcnt] = malloc(SERVERS_NAME_SIZE);
88 if (!services[srvcnt])
89 {
90 com_err(whoami, 0, "Out of memory!");
91 exit(1);
92 }
93 strncpy(services[srvcnt], *arg, SERVERS_NAME_SIZE);
94 srvcnt++;
95 }
8b6ef3aa 96 }
97
0eed8c8e 98 /* Iterate through services specified on the command line, if any. */
99 if (srvcnt > 0)
5eaef520 100 {
0eed8c8e 101 for (i = 0; i < srvcnt; i++)
5eaef520 102 {
0eed8c8e 103 if (generate_service(services[i], force))
104 {
105 do_hosts(services[i]);
106 free(services[i]);
107 }
2a2a3914 108 }
5eaef520 109 exit(0);
2a2a3914 110 }
111
5eaef520 112 /* if DCM is not enabled, exit after logging */
113 if (!access(NODCMFILE, F_OK))
114 {
115 printf("/etc/nodcm exists -- exiting\n");
116 exit(1);
2a2a3914 117 }
5eaef520 118
ff20de45 119 EXEC SQL WHENEVER SQLERROR DO dbmserr();
120
5eaef520 121 EXEC SQL CONNECT :db IDENTIFIED BY :db;
122
123 EXEC SQL SELECT value INTO :enable FROM numvalues WHERE name = 'dcm_enable';
124 if (enable == 0)
125 {
126 printf("dcm_enable not set -- exiting\n");
127 exit(1);
2a2a3914 128 }
5eaef520 129
130 /* fetch list of services */
131 EXEC SQL DECLARE csr_svc CURSOR FOR SELECT LOWER(name) FROM servers
132 WHERE enable = 1 AND harderror = 0 AND update_int > 0;
133 EXEC SQL OPEN csr_svc;
134 sq = sq_create();
135 while (1)
136 {
137 EXEC SQL FETCH csr_svc INTO :buf;
138 if (sqlca.sqlcode)
139 break;
140
141 sq_save_data(sq, strdup(strtrim(buf)));
2a2a3914 142 }
5eaef520 143 EXEC SQL CLOSE csr_svc;
144 /* we will repeatedly open and close the db since it seems to get
145 upset if you keep it open across a fork */
146 EXEC SQL COMMIT RELEASE;
147
148 /* Now run through list */
149 while (sq_get_data(sq, &name))
150 {
8b6ef3aa 151 if (generate_service(name, force))
5eaef520 152 {
153 switch (fork())
154 {
155 case -1:
ff20de45 156 com_err(whoami, errno, "forking for service %s -- exiting",
5eaef520 157 name);
158 exit(1);
159 case 0:
ff20de45 160 sprintf(strchr(whoami, '\0'), " (%s:%ld)", name, (long)getpid());
5eaef520 161 do_hosts(name);
162 com_err(whoami, 0, "exiting");
163 exit(0);
164 default:
165 break;
2a2a3914 166 }
167 }
168 }
5eaef520 169
ff20de45 170 com_err(whoami, 0, "All files generated. Waiting for children to exit");
5eaef520 171 while (waitpid(0, &status, 0) > 0)
172 ;
173 com_err(whoami, 0, "exiting");
7ac48069 174 exit(0);
2a2a3914 175}
176
177int generate_service(char *name, int force)
178{
5eaef520 179 EXEC SQL BEGIN DECLARE SECTION;
8b6ef3aa 180 int interval, dfcheck, status, inprogress;
5eaef520 181 time_t now;
7ac48069 182 const char *errmsg;
5eaef520 183 EXEC SQL END DECLARE SECTION;
dfaf9b68 184 char dfgen_prog[MAXPATHLEN], dfgen_cmd[2 * MAXPATHLEN];
5eaef520 185 struct sigaction action, prevaction;
186 int waits;
187
188 EXEC SQL CONNECT :db IDENTIFIED BY :db;
189
8b6ef3aa 190 EXEC SQL SELECT update_int, dfcheck, inprogress INTO :interval, :dfcheck,
191 :inprogress FROM servers WHERE name = UPPER(:name);
5eaef520 192 if (sqlca.sqlcode == SQL_NO_MATCH)
193 {
194 com_err(whoami, 0, "No such service `%s'", name);
195 EXEC SQL COMMIT RELEASE;
196 return 0;
2a2a3914 197 }
5eaef520 198
8b6ef3aa 199 /* Someone might try to run a DCM from the command line while the
200 * regular one is running, which will bypass the "interval" test.
201 * Check inprogress to make sure they don't stomp on themselves.
0eed8c8e 202 *
203 * Note that there is still a race condition here, and this doesn't
204 * absolutely prevent 2 DCMs from stepping on one another, but it
205 * does reduce the window of vulnerability greatly.
8b6ef3aa 206 */
207 if (inprogress == 1)
208 {
209 com_err(whoami, 0, "DCM for service `%s' already in progress", name);
210 EXEC SQL COMMIT RELEASE;
211 return 0;
212 }
213
5eaef520 214 time(&now);
215
216 if ((interval * 60 + dfcheck < now) || force)
217 {
218 sprintf(dfgen_prog, "%s/%s.gen", BIN_DIR, name);
219 if (access(dfgen_prog, F_OK) != 0)
220 {
221 com_err(whoami, 0, "prog %s doesn't exist", dfgen_prog);
222 EXEC SQL COMMIT RELEASE;
223 return 0;
2a2a3914 224 }
5eaef520 225 sprintf(dfgen_cmd, "exec %s %s/%s.out", dfgen_prog, DCM_DIR, name);
226 com_err(whoami, 0, "running %s", dfgen_prog);
227
ff20de45 228 EXEC SQL WHENEVER SQLERROR GOTO gen_cleanup;
229
5eaef520 230 EXEC SQL UPDATE servers SET inprogress = 1
231 WHERE name = UPPER(:name);
ff20de45 232 EXEC SQL COMMIT;
5eaef520 233
234 action.sa_flags = 0;
235 sigemptyset(&action.sa_mask);
236 action.sa_handler = SIG_DFL;
237 sigaction(SIGCHLD, &action, &prevaction);
238 waits = system(dfgen_cmd);
239 sigaction(SIGCHLD, &prevaction, NULL);
240 if (WIFSIGNALED(waits))
241 {
242 status = MR_COREDUMP;
243 com_err(whoami, status, " %s exited on signal %d",
244 dfgen_prog, WTERMSIG(waits));
245 }
246 else if (WEXITSTATUS(waits))
247 {
248 /* extract the process's exit value */
249 status = WEXITSTATUS(waits) + ERROR_TABLE_BASE_sms;
250 if (status != MR_NO_CHANGE)
251 com_err(whoami, status, "in %s", dfgen_prog);
252 }
253 else
254 status = MR_SUCCESS;
255
256 if (status == MR_SUCCESS)
257 {
258 EXEC SQL UPDATE servers SET dfgen = :now, dfcheck = :now,
259 inprogress = 0 WHERE name = UPPER(:name);
260 EXEC SQL COMMIT RELEASE;
261 return 1;
262 }
263 else if (status == MR_NO_CHANGE)
264 {
265 EXEC SQL UPDATE servers SET dfcheck = :now, inprogress = 0
266 WHERE name = UPPER(:name);
267 }
268 else if (SOFT_FAIL(status))
269 {
270 errmsg = error_message(status);
271 EXEC SQL UPDATE servers SET errmsg = :errmsg, inprogress = 0
272 WHERE name = UPPER(:name);
273 }
274 else /* HARD_FAIL(status) */
275 {
276 errmsg = error_message(status);
ff20de45 277 EXEC SQL UPDATE servers SET harderror = :status, errmsg = :errmsg,
278 inprogress = 0 WHERE name = UPPER(:name);
5eaef520 279 critical_alert("DCM", "DCM building config files for %s: %s",
280 name, errmsg);
2a2a3914 281 }
282 }
8b6ef3aa 283 else
284 {
285 com_err(whoami, 0, "DCM for service `%s' has run too recently.", name);
286 com_err(whoami, 0, "Use the -force flag to force a DCM.");
287 }
288
5eaef520 289 EXEC SQL COMMIT RELEASE;
290 return 0;
ff20de45 291
292gen_cleanup:
293 EXEC SQL WHENEVER SQLERROR DO dbmserr();
294 EXEC SQL UPDATE servers SET inprogress = 0, harderror = MR_INTERNAL,
295 errmsg = 'DBMS Internal Error' WHERE name = UPPER(:name);
296 dbmserr();
2a2a3914 297}
298
299void do_hosts(char *service)
300{
5eaef520 301 EXEC SQL BEGIN DECLARE SECTION;
8afdb7c2 302 char server_type[SERVERS_TYPE_SIZE], host[MACHINE_NAME_SIZE], *name;
dfaf9b68 303 char target[SERVERS_TARGET_FILE_SIZE], script[SERVERS_SCRIPT_SIZE];
7ac48069 304 const char *errmsg;
8b6ef3aa 305 int status = 0, dfgen, type, mid, inprogress;
5eaef520 306 time_t now;
307 EXEC SQL END DECLARE SECTION;
dae7c89e 308 struct save_queue *sq;
5eaef520 309
310 time(&now);
85330553 311 mr_init();
5eaef520 312
313 EXEC SQL CONNECT :db IDENTIFIED BY :db;
314
8b6ef3aa 315 EXEC SQL SELECT dfgen, type, target_file, script, inprogress
316 INTO :dfgen, :server_type, :target, :script, :inprogress
5eaef520 317 FROM servers WHERE name = UPPER(:service);
8afdb7c2 318 if (!strncmp(strtrim(server_type), "REPLICAT", 8))
319 type = REPLICATED;
320 else if (!strncmp(server_type, "DISTRIB", 8))
321 type = DISTRIBUTED;
322 else
323 type = UNIQUE;
ff20de45 324 strtrim(target);
325 strtrim(script);
5eaef520 326
0eed8c8e 327 /* Rudimentary locking. Doesn't eliminate the possibility of 2 DCMs
328 * stepping on one another, but makes it harder.
329 */
8b6ef3aa 330 if (inprogress == 1)
331 {
332 com_err(whoami, 0, "DCM for service `%s' already in progress", name);
333 EXEC SQL COMMIT RELEASE;
334 return;
335 }
336
5eaef520 337 EXEC SQL DECLARE csr_hst1 CURSOR FOR
ff20de45 338 SELECT m.name, m.mach_id FROM machine m, serverhosts sh
5eaef520 339 WHERE sh.service = UPPER(:service)
340 AND sh.enable = 1 AND sh.hosterror = 0
ff20de45 341 AND sh.lts < :dfgen AND sh.mach_id = m.mach_id;
5eaef520 342 EXEC SQL OPEN csr_hst1;
ff20de45 343 sq = sq_create();
5eaef520 344 while (1)
345 {
ff20de45 346 EXEC SQL FETCH csr_hst1 INTO :host, mid;
5eaef520 347 if (sqlca.sqlcode == SQL_NO_MATCH)
348 break;
349
ff20de45 350 sq_save_data(sq, strdup(strtrim(host)));
351 sq_save_data(sq, (void *)mid);
352 }
353 EXEC SQL CLOSE csr_hst1;
354
355 EXEC SQL WHENEVER SQLERROR GOTO host_cleanup;
356 while (sq_get_data(sq, &name))
357 {
358 sq_get_data(sq, &mid);
8b6ef3aa 359
360 EXEC SQL SELECT inprogress INTO :inprogress FROM serverhosts
361 WHERE service = UPPER(:service) AND mach_id = :mid;
0eed8c8e 362 /* Check if someone got here before we did.
363 * There's still a race condition here, but it's a small one. */
8b6ef3aa 364 if (inprogress == 1)
365 {
366 com_err(whoami, 0, "DCM for service `%s' to host `%s' already in progress", service, name);
367 EXEC SQL COMMIT RELEASE;
368 return;
369 }
370
ff20de45 371 com_err(whoami, 0, "sending %s data to %s", service, name);
5eaef520 372 EXEC SQL UPDATE serverhosts SET inprogress = 1
ff20de45 373 WHERE service = UPPER(:service) AND mach_id = :mid;
374 EXEC SQL COMMIT;
8afdb7c2 375 status = dcm_send_file(service, type, name, target);
5eaef520 376 if (status)
377 {
378 errmsg = error_message(status);
379 EXEC SQL UPDATE serverhosts SET hosterrmsg = :errmsg,
ff20de45 380 inprogress = 0, success = 0, ltt = :now
dae7c89e 381 WHERE service = UPPER(:service) AND mach_id = :mid;
5eaef520 382 if (!SOFT_FAIL(status))
383 {
384 EXEC SQL UPDATE serverhosts SET hosterror = :status
dae7c89e 385 WHERE service = UPPER(:service) AND mach_id = :mid;
5eaef520 386 critical_alert("DCM", "DCM updating %s:%s: %s",
ff20de45 387 service, name, errmsg);
2a2a3914 388 }
ff20de45 389 EXEC SQL COMMIT;
5eaef520 390
8afdb7c2 391 if (type == REPLICATED)
5eaef520 392 break;
2a2a3914 393 }
394 }
ff20de45 395 sq_destroy(sq);
5eaef520 396
8afdb7c2 397 if (status == MR_SUCCESS || type != REPLICATED)
5eaef520 398 {
399 EXEC SQL DECLARE csr_hst2 CURSOR FOR
ff20de45 400 SELECT m.name, m.mach_id FROM machine m, serverhosts sh
5eaef520 401 WHERE sh.service = UPPER(:service) AND sh.inprogress = 1
d6ea2c57 402 AND sh.enable = 1 AND sh.hosterror = 0 AND sh.mach_id = m.mach_id;
5eaef520 403 EXEC SQL OPEN csr_hst2;
ff20de45 404 sq = sq_create();
5eaef520 405
406 while (1)
407 {
ff20de45 408 EXEC SQL FETCH csr_hst2 INTO :host, :mid;
5eaef520 409 if (sqlca.sqlcode == SQL_NO_MATCH)
410 break;
411
ff20de45 412 sq_save_data(sq, strdup(strtrim(host)));
413 sq_save_data(sq, (void *)mid);
414 }
415 EXEC SQL CLOSE csr_hst2;
416
417 while (sq_get_data(sq, &name))
418 {
419 sq_get_data(sq, &mid);
420
421 com_err(whoami, 0, "executing instructions on %s", name);
422 status = dcm_execute(service, name, script);
5eaef520 423 if (status)
424 {
425 errmsg = error_message(status);
426 EXEC SQL UPDATE serverhosts SET hosterrmsg = :errmsg,
ff20de45 427 inprogress = 0, success = 0, ltt = :now
428 WHERE service = UPPER(:service) AND mach_id = :mid;
5eaef520 429 if (!SOFT_FAIL(status))
430 {
431 EXEC SQL UPDATE serverhosts SET hosterror = :status
ff20de45 432 WHERE service = UPPER(:service) AND mach_id = :mid;
5eaef520 433 critical_alert("DCM", "DCM updating %s:%s: %s",
f272b0e1 434 service, name, errmsg);
2a2a3914 435 }
5eaef520 436
8afdb7c2 437 if (type == REPLICATED)
719f0386 438 break;
5eaef520 439 }
440 else
441 {
ff20de45 442 EXEC SQL UPDATE serverhosts SET inprogress = 0, ltt = :now,
443 lts = :now, success = 1 WHERE service = UPPER(:service)
444 AND mach_id = :mid;
2a2a3914 445 }
ff20de45 446 EXEC SQL COMMIT;
2a2a3914 447 }
5eaef520 448 EXEC SQL CLOSE csr_hst2;
2a2a3914 449 }
450
719f0386 451 if (type == REPLICATED)
452 {
453 /* Clear inprogress flag on any hosts we started but didn't
454 * finish.
455 */
456 EXEC SQL UPDATE serverhosts SET inprogress = 0
457 WHERE service = UPPER(:service);
458 }
459
ff20de45 460 EXEC SQL WHENEVER SQLERROR DO dbmserr();
8afdb7c2 461 if (status && !SOFT_FAIL(status) && type == REPLICATED)
5eaef520 462 {
463 EXEC SQL UPDATE servers SET harderror = :status, errmsg = :errmsg
464 WHERE name = UPPER(:service);
2a2a3914 465 }
466
5eaef520 467 EXEC SQL COMMIT RELEASE;
ff20de45 468 return;
469
470host_cleanup:
471 EXEC SQL UPDATE serverhosts SET inprogress = 0, success = 0, ltt = :now,
472 hosterror = MR_INTERNAL, hosterrmsg = 'DBMS Internal Error'
473 WHERE service = UPPER(:service) AND mach_id = :mid;
8afdb7c2 474 if (type == REPLICATED)
ff20de45 475 {
476 EXEC SQL UPDATE servers SET harderror = MR_INTERNAL,
477 errmsg = 'DBMS Internal Error' WHERE name = UPPER(:service);
478 }
2a2a3914 479}
480
8afdb7c2 481int dcm_send_file(char *service, int type, char *host, char *target)
2a2a3914 482{
85330553 483 char data[MAXPATHLEN];
484 int code, conn;
5eaef520 485
85330553 486 conn = mr_connect_internal(host, "moira_update");
487 if (!conn)
5eaef520 488 {
85330553 489 com_err(whoami, errno, "can't connect to %s", host);
5eaef520 490 return MR_CANT_CONNECT;
2a2a3914 491 }
492
991417e4 493 code = mr_send_krb5_auth(conn, host);
494 if (code)
495 code = mr_send_auth(conn, host);
5eaef520 496 if (code)
497 {
498 com_err(whoami, code, "authenticating to %s", host);
499 goto done;
2a2a3914 500 }
501
8afdb7c2 502 if (type == DISTRIBUTED)
503 sprintf(data, "%s/%s/%s", DCM_DIR, service, host);
504 else
505 sprintf(data, "%s/%s.out", DCM_DIR, service);
96c29960 506 code = mr_send_file(conn, data, target, 0);
5eaef520 507 if (code)
508 com_err(whoami, code, "sending data to %s", host);
2a2a3914 509
510done:
96c29960 511 mr_send_quit(conn);
85330553 512 close(conn);
5eaef520 513 return code;
2a2a3914 514}
515
516int dcm_execute(char *service, char *host, char *script)
517{
85330553 518 char inst[MAXPATHLEN];
519 int code, conn;
5eaef520 520
85330553 521 conn = mr_connect_internal(host, "moira_update");
522 if (!conn)
5eaef520 523 {
85330553 524 com_err(whoami, errno, "can't connect to %s", host);
5eaef520 525 return MR_CANT_CONNECT;
2a2a3914 526 }
527
76ba8bef 528 code = mr_send_krb5_auth(conn, host);
529 if (code)
530 code = mr_send_auth(conn, host);
5eaef520 531 if (code)
532 {
533 com_err(whoami, code, "authenticating to %s", host);
534 goto done;
2a2a3914 535 }
536
5eaef520 537 sprintf(inst, "/tmp/moira-update.XXXXXX");
538 mktemp(inst);
96c29960 539 code = mr_send_file(conn, script, inst, 0);
5eaef520 540 if (code)
541 {
542 com_err(whoami, code, "sending instructions to %s", host);
543 goto done;
2a2a3914 544 }
545
96c29960 546 code = mr_execute(conn, inst);
5eaef520 547 if (code)
548 com_err(whoami, code, "executing instructions on %s", host);
2a2a3914 549
550done:
96c29960 551 mr_send_quit(conn);
85330553 552 close(conn);
5eaef520 553 return code;
554}
2a2a3914 555
556void dbmserr(void)
557{
5eaef520 558 EXEC SQL BEGIN DECLARE SECTION;
559 char err_msg[256];
560 EXEC SQL END DECLARE SECTION;
561 int bufsize = 256, msglength = 0;
562
563 sqlglm(err_msg, &bufsize, &msglength);
564 err_msg[msglength] = '\0';
565 com_err(whoami, 0, "Encountered SQL error:\n%s", err_msg);
566 com_err(whoami, 0, "exiting");
567 exit(1);
2a2a3914 568}
This page took 1.45574 seconds and 5 git commands to generate.