-/*
- * The Data Control Manager for MOIRA.
+/* $Id$
*
- * Copyright 1987, 1988 by the Massachusetts Institute of Technology.
- * For copying and distribution information, see the file
- * "mit-copyright.h".
+ * The Data Control Manager for Moira.
*
- * $Source$
- * $Author$
- * $Header$
+ * Copyright (C) 1987-1998 by the Massachusetts Institute of Technology.
+ * For copying and distribution information, see the file
+ * <mit-copyright.h>.
*/
-#ifndef lint
-static char rcsid_dcm_c[] = "$Header$";
-#endif lint
+#include <mit-copyright.h>
+#include <moira.h>
+#include <moira_site.h>
+#include <moira_schema.h>
+#include "update.h"
+
+#include <sys/param.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <errno.h>
#include <signal.h>
#include <stdio.h>
+#include <stdlib.h>
#include <string.h>
-#include <time.h>
#include <unistd.h>
-#include <sys/param.h>
-#include <sys/wait.h>
-
-#include <com_err.h>
-#include <gdb.h>
-#include <moira.h>
-#include <moira_site.h>
EXEC SQL INCLUDE sqlca;
-EXEC SQL WHENEVER SQLERROR DO dbmserr();
+void sqlglm(char *, unsigned int *, unsigned int *);
+
+RCSID("$Header$");
+
+int generate_service(char *name, int force);
+void do_hosts(char *service);
+int dcm_send_file(char *service, int type, char *host, char *target);
+int dcm_execute(char *service, char *host, char *script);
+void dbmserr(void);
#define SQL_NO_MATCH 1403
-#define SOFT_FAIL(x) (((x) == MR_NO_MEM) || ((x) == MR_CANT_CONNECT) || ((x) == MR_CCONFIG) || ((x) == MR_DEADLOCK) || ((x) == MR_BUSY) || ((x) == MR_ABORT))
+#define SOFT_FAIL(x) (((x) == MR_NO_MEM) || ((x) == MR_CANT_CONNECT) || ((x) == MR_CCONFIG) || ((x) == MR_DEADLOCK) || ((x) == MR_BUSY) || ((x) == MR_ABORT))
-char whobuf[256], *whoami=whobuf, *db="moira";
-extern CONNECTION conn;
+/* argument parsing macro */
+#define argis(a, b) (!strcmp(*arg + 1, a) || !strcmp(*arg + 1, b))
-int main(argc, argv)
-int argc;
-char *argv[];
+char whobuf[256], *whoami = whobuf, *db = "moira";
+
+enum { UNIQUE, DISTRIBUTED, REPLICATED };
+
+int main(int argc, char **argv)
{
- int i;
- EXEC SQL BEGIN DECLARE SECTION;
- char buf[16], *name;
- int enable;
- EXEC SQL END DECLARE SECTION;
- struct save_queue *sq;
- int status;
-
- if (strchr(argv[0], '/')) strcpy(whoami, strrchr(argv[0], '/')+1);
- else strcpy(whoami, argv[0]);
- umask(7);
-
- setvbuf(stderr, NULL, _IOLBF, BUFSIZ);
- setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
-
- initialize_sms_error_table();
- initialize_krb_error_table();
-
- /* if services were specified on the command line, do just those ones */
- if (argc > 1) {
- for (i = 1; i < argc; i++) {
- if (generate_service(argv[i], 1))
- do_hosts(argv[i]);
+ int i, force = 0;
+ EXEC SQL BEGIN DECLARE SECTION;
+ char buf[SERVERS_NAME_SIZE], *name;
+ int enable;
+ EXEC SQL END DECLARE SECTION;
+ struct save_queue *sq;
+ int status, srvcnt = 0;
+ char **arg = argv, *services[BUFSIZ];
+
+ if (strchr(argv[0], '/'))
+ strcpy(whoami, strrchr(argv[0], '/') + 1);
+ else strcpy(whoami, argv[0]);
+ umask(7);
+
+ setvbuf(stderr, NULL, _IOLBF, BUFSIZ);
+ setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
+
+ initialize_sms_error_table();
+ initialize_krb_error_table();
+
+ while (++arg - argv < argc)
+ {
+ if (**arg == '-')
+ {
+ if (argis("f", "force"))
+ force++;
+ else
+ {
+ com_err(whoami, 0, "Usage: %s [-f] servicename", argv[0]);
+ exit(1);
+ }
+ }
+ else
+ /* Doesn't begin with a dash, is a service name.
+ * Build an array of them we can iterate through later.
+ */
+ {
+ services[srvcnt] = malloc(SERVERS_NAME_SIZE);
+ if (!services[srvcnt])
+ {
+ com_err(whoami, 0, "Out of memory!");
+ exit(1);
+ }
+ strncpy(services[srvcnt], *arg, SERVERS_NAME_SIZE);
+ srvcnt++;
}
- exit(0);
}
- /* if DCM is not enabled, exit after logging */
- if (!access(NODCMFILE, F_OK)) {
- printf("/etc/nodcm exists -- exiting\n");
- exit(1);
+ /* Iterate through services specified on the command line, if any. */
+ if (srvcnt > 0)
+ {
+ for (i = 0; i < srvcnt; i++)
+ {
+ if (generate_service(services[i], force))
+ {
+ do_hosts(services[i]);
+ free(services[i]);
+ }
+ }
+ exit(0);
}
-
- EXEC SQL CONNECT :db IDENTIFIED BY :db;
-
- EXEC SQL SELECT value INTO :enable FROM numvalues
- WHERE name='dcm_enable';
- if (enable == 0) {
- printf("dcm_enable not set -- exiting\n");
- exit(1);
+
+ /* if DCM is not enabled, exit after logging */
+ if (!access(NODCMFILE, F_OK))
+ {
+ printf("/etc/nodcm exists -- exiting\n");
+ exit(1);
+ }
+
+ EXEC SQL WHENEVER SQLERROR DO dbmserr();
+
+ EXEC SQL CONNECT :db IDENTIFIED BY :db;
+
+ EXEC SQL SELECT value INTO :enable FROM numvalues WHERE name = 'dcm_enable';
+ if (enable == 0)
+ {
+ printf("dcm_enable not set -- exiting\n");
+ exit(1);
}
-
- /* fetch list of services */
- EXEC SQL DECLARE csr_svc CURSOR FOR SELECT LOWER(name) FROM servers
- WHERE enable=1 AND harderror=0 AND update_int>0;
- EXEC SQL OPEN csr_svc;
- sq = sq_create();
- while(1) {
- EXEC SQL FETCH csr_svc INTO :buf;
- if (sqlca.sqlcode) break;
-
- sq_save_data(sq, strdup(strtrim(buf)));
+
+ /* fetch list of services */
+ EXEC SQL DECLARE csr_svc CURSOR FOR SELECT LOWER(name) FROM servers
+ WHERE enable = 1 AND harderror = 0 AND update_int > 0;
+ EXEC SQL OPEN csr_svc;
+ sq = sq_create();
+ while (1)
+ {
+ EXEC SQL FETCH csr_svc INTO :buf;
+ if (sqlca.sqlcode)
+ break;
+
+ sq_save_data(sq, strdup(strtrim(buf)));
}
- EXEC SQL CLOSE csr_svc;
- /* we will repeatedly open and close the db since it seems to get
- upset if you keep it open across a fork */
- EXEC SQL COMMIT RELEASE;
-
- /* Now run through list */
- while (sq_get_data(sq, &name)) {
- if (generate_service(name, 0)) {
- switch (fork()) {
- case -1:
- fprintf(stderr,
- "dcm: could not fork for service %s -- exiting",
- name);
- exit(1);
- case 0:
- sprintf(strchr(whoami, '\0'), " (%s)", name);
- do_hosts(name);
- com_err(whoami, 0, "exiting");
- exit(0);
- default:
- break;
+ EXEC SQL CLOSE csr_svc;
+ /* we will repeatedly open and close the db since it seems to get
+ upset if you keep it open across a fork */
+ EXEC SQL COMMIT RELEASE;
+
+ /* Now run through list */
+ while (sq_get_data(sq, &name))
+ {
+ if (generate_service(name, force))
+ {
+ switch (fork())
+ {
+ case -1:
+ com_err(whoami, errno, "forking for service %s -- exiting",
+ name);
+ exit(1);
+ case 0:
+ sprintf(strchr(whoami, '\0'), " (%s:%ld)", name, (long)getpid());
+ do_hosts(name);
+ com_err(whoami, 0, "exiting");
+ exit(0);
+ default:
+ break;
}
}
}
-
- /* wait for children */
- while (waitpid(0, &status, 0) > 0) ;
- com_err(whoami, 0, "exiting");
+
+ com_err(whoami, 0, "All files generated. Waiting for children to exit");
+ while (waitpid(0, &status, 0) > 0)
+ ;
+ com_err(whoami, 0, "exiting");
+ exit(0);
}
int generate_service(char *name, int force)
{
- EXEC SQL BEGIN DECLARE SECTION;
- int interval, dfcheck, status;
- time_t now;
- char *errmsg;
- EXEC SQL END DECLARE SECTION;
- char dfgen_prog[64], dfgen_cmd[128];
- struct sigaction action, prevaction;
- int waits;
-
- EXEC SQL CONNECT :db IDENTIFIED BY :db;
-
- EXEC SQL SELECT update_int, dfcheck INTO :interval, :dfcheck
- FROM servers WHERE name=UPPER(:name);
- if (sqlca.sqlcode == SQL_NO_MATCH) {
- com_err(whoami, 0, "No such service `%s'", name);
- EXEC SQL COMMIT RELEASE;
- return 0;
+ EXEC SQL BEGIN DECLARE SECTION;
+ int interval, dfcheck, status, inprogress;
+ time_t now;
+ const char *errmsg;
+ EXEC SQL END DECLARE SECTION;
+ char dfgen_prog[MAXPATHLEN], dfgen_cmd[2 * MAXPATHLEN];
+ struct sigaction action, prevaction;
+ int waits;
+
+ EXEC SQL CONNECT :db IDENTIFIED BY :db;
+
+ EXEC SQL SELECT update_int, dfcheck, inprogress INTO :interval, :dfcheck,
+ :inprogress FROM servers WHERE name = UPPER(:name);
+ if (sqlca.sqlcode == SQL_NO_MATCH)
+ {
+ com_err(whoami, 0, "No such service `%s'", name);
+ EXEC SQL COMMIT RELEASE;
+ return 0;
+ }
+
+ /* Someone might try to run a DCM from the command line while the
+ * regular one is running, which will bypass the "interval" test.
+ * Check inprogress to make sure they don't stomp on themselves.
+ *
+ * Note that there is still a race condition here, and this doesn't
+ * absolutely prevent 2 DCMs from stepping on one another, but it
+ * does reduce the window of vulnerability greatly.
+ */
+ if (inprogress == 1)
+ {
+ com_err(whoami, 0, "DCM for service `%s' already in progress", name);
+ EXEC SQL COMMIT RELEASE;
+ return 0;
}
-
- time(&now);
-
- if ((interval * 60 + dfcheck < now) || force) {
- sprintf(dfgen_prog, "%s/%s.gen", BIN_DIR, name);
- if (access(dfgen_prog, F_OK) != 0) {
- com_err(whoami, 0, "prog %s doesn't exist", dfgen_prog);
- EXEC SQL COMMIT RELEASE;
- return 0;
+
+ time(&now);
+
+ if ((interval * 60 + dfcheck < now) || force)
+ {
+ sprintf(dfgen_prog, "%s/%s.gen", BIN_DIR, name);
+ if (access(dfgen_prog, F_OK) != 0)
+ {
+ com_err(whoami, 0, "prog %s doesn't exist", dfgen_prog);
+ EXEC SQL COMMIT RELEASE;
+ return 0;
+ }
+ sprintf(dfgen_cmd, "exec %s %s/%s.out", dfgen_prog, DCM_DIR, name);
+ com_err(whoami, 0, "running %s", dfgen_prog);
+
+ EXEC SQL WHENEVER SQLERROR GOTO gen_cleanup;
+
+ EXEC SQL UPDATE servers SET inprogress = 1
+ WHERE name = UPPER(:name);
+ EXEC SQL COMMIT;
+
+ action.sa_flags = 0;
+ sigemptyset(&action.sa_mask);
+ action.sa_handler = SIG_DFL;
+ sigaction(SIGCHLD, &action, &prevaction);
+ waits = system(dfgen_cmd);
+ sigaction(SIGCHLD, &prevaction, NULL);
+ if (WIFSIGNALED(waits))
+ {
+ status = MR_COREDUMP;
+ com_err(whoami, status, " %s exited on signal %d",
+ dfgen_prog, WTERMSIG(waits));
+ }
+ else if (WEXITSTATUS(waits))
+ {
+ /* extract the process's exit value */
+ status = WEXITSTATUS(waits) + ERROR_TABLE_BASE_sms;
+ if (status != MR_NO_CHANGE)
+ com_err(whoami, status, "in %s", dfgen_prog);
}
- sprintf(dfgen_cmd, "exec %s %s/%s.out", dfgen_prog, DCM_DIR, name);
- com_err(whoami, 0, "running %s", dfgen_prog);
-
- EXEC SQL UPDATE servers SET inprogress=1
- WHERE name=UPPER(:name);
-
- action.sa_flags = 0;
- sigemptyset(&action.sa_mask);
- action.sa_handler = SIG_DFL;
- sigaction(SIGCHLD, &action, &prevaction);
- waits = system(dfgen_cmd);
- sigaction(SIGCHLD, &prevaction, NULL);
- if (WIFSIGNALED(waits)) {
- status = MR_COREDUMP;
- com_err(whoami, status, " %s exited on signal %d",
- dfgen_prog, WTERMSIG(waits));
- } else if (WEXITSTATUS(waits)) {
- /* extract the process's exit value */
- status = WEXITSTATUS(waits) + ERROR_TABLE_BASE_sms;
- if (status != MR_NO_CHANGE)
- com_err(whoami, status, "in %s", dfgen_prog);
- } else status = MR_SUCCESS;
-
- if (status == MR_SUCCESS) {
- EXEC SQL UPDATE servers SET dfgen = :now, dfcheck = :now,
- inprogress = 0 WHERE name=UPPER(:name);
- EXEC SQL COMMIT RELEASE;
- return 1;
- } else if (status == MR_NO_CHANGE) {
- EXEC SQL UPDATE servers SET dfcheck = :now, inprogress = 0
- WHERE name=UPPER(:name);
- } else if (SOFT_FAIL(status)) {
- errmsg = error_message(status);
- EXEC SQL UPDATE servers SET errmsg = :errmsg, inprogress = 0
- WHERE name=UPPER(:name);
- } else { /* HARD_FAIL(status) */
- errmsg = error_message(status);
- EXEC SQL UPDATE servers SET dfcheck = :now, harderror = :status,
- errmsg = :errmsg, inprogress = 0 WHERE name=UPPER(:name);
- critical_alert("DCM","DCM building config files for %s: %s",
- name, errmsg);
+ else
+ status = MR_SUCCESS;
+
+ if (status == MR_SUCCESS)
+ {
+ EXEC SQL UPDATE servers SET dfgen = :now, dfcheck = :now,
+ inprogress = 0 WHERE name = UPPER(:name);
+ EXEC SQL COMMIT RELEASE;
+ return 1;
}
+ else if (status == MR_NO_CHANGE)
+ {
+ EXEC SQL UPDATE servers SET dfcheck = :now, inprogress = 0
+ WHERE name = UPPER(:name);
+ }
+ else if (SOFT_FAIL(status))
+ {
+ errmsg = error_message(status);
+ EXEC SQL UPDATE servers SET errmsg = :errmsg, inprogress = 0
+ WHERE name = UPPER(:name);
+ }
+ else /* HARD_FAIL(status) */
+ {
+ errmsg = error_message(status);
+ EXEC SQL UPDATE servers SET harderror = :status, errmsg = :errmsg,
+ inprogress = 0 WHERE name = UPPER(:name);
+ critical_alert("DCM", "DCM building config files for %s: %s",
+ name, errmsg);
+ }
+ }
+ else
+ {
+ com_err(whoami, 0, "DCM for service `%s' has run too recently.", name);
+ com_err(whoami, 0, "Use the -force flag to force a DCM.");
}
- EXEC SQL COMMIT RELEASE;
- return 0;
+
+ EXEC SQL COMMIT RELEASE;
+ return 0;
+
+gen_cleanup:
+ EXEC SQL WHENEVER SQLERROR DO dbmserr();
+ EXEC SQL UPDATE servers SET inprogress = 0, harderror = MR_INTERNAL,
+ errmsg = 'DBMS Internal Error' WHERE name = UPPER(:name);
+ dbmserr();
}
void do_hosts(char *service)
{
- EXEC SQL BEGIN DECLARE SECTION;
- char type[16], host[73], target[64], script[128], *errmsg;
- int status = 0, mid, dfgen, replicated;
- time_t now;
- EXEC SQL END DECLARE SECTION;
-
- time(&now);
- gdb_init();
-
- EXEC SQL CONNECT :db IDENTIFIED BY :db;
-
- EXEC SQL SELECT dfgen, type, target_file, script
- INTO :dfgen, :type, :target, :script
- FROM servers WHERE name=UPPER(:service);
- replicated = !strncmp(type, "REPLICAT", 8);
-
- EXEC SQL DECLARE csr_hst1 CURSOR FOR
- SELECT m.name FROM machine m, serverhosts sh
- WHERE sh.service=UPPER(:service) AND sh.enable=1 AND sh.hosterror=0
- AND sh.lts<:dfgen AND sh.mach_id=m.mach_id
- FOR UPDATE OF sh.inprogress, sh.hosterror, sh.hosterrmsg;
- EXEC SQL OPEN csr_hst1;
-
- while (1) {
- EXEC SQL FETCH csr_hst1 INTO :host;
- if (sqlca.sqlcode == SQL_NO_MATCH) break;
-
- com_err(whoami, 0, "sending %s data to %s", service, strtrim(host));
- EXEC SQL UPDATE serverhosts SET inprogress = 1
- WHERE CURRENT OF csr_hst1;
- status = dcm_send_file(service, host, strtrim(target));
- if (status) {
- errmsg = error_message(status);
- EXEC SQL UPDATE serverhosts SET hosterrmsg = :errmsg,
- inprogress = 0 WHERE CURRENT OF csr_hst1;
- if (!SOFT_FAIL(status)) {
- EXEC SQL UPDATE serverhosts SET hosterror = :status
- WHERE CURRENT OF csr_hst1;
- critical_alert("DCM", "DCM updating %s:%s: %s",
- service, host, errmsg);
+ EXEC SQL BEGIN DECLARE SECTION;
+ char server_type[SERVERS_TYPE_SIZE], host[MACHINE_NAME_SIZE], *name;
+ char target[SERVERS_TARGET_FILE_SIZE], script[SERVERS_SCRIPT_SIZE];
+ const char *errmsg;
+ int status = 0, dfgen, type, mid, inprogress;
+ time_t now;
+ EXEC SQL END DECLARE SECTION;
+ struct save_queue *sq;
+
+ time(&now);
+ mr_init();
+
+ EXEC SQL CONNECT :db IDENTIFIED BY :db;
+
+ EXEC SQL SELECT dfgen, type, target_file, script, inprogress
+ INTO :dfgen, :server_type, :target, :script, :inprogress
+ FROM servers WHERE name = UPPER(:service);
+ if (!strncmp(strtrim(server_type), "REPLICAT", 8))
+ type = REPLICATED;
+ else if (!strncmp(server_type, "DISTRIB", 8))
+ type = DISTRIBUTED;
+ else
+ type = UNIQUE;
+ strtrim(target);
+ strtrim(script);
+
+ /* Rudimentary locking. Doesn't eliminate the possibility of 2 DCMs
+ * stepping on one another, but makes it harder.
+ */
+ if (inprogress == 1)
+ {
+ com_err(whoami, 0, "DCM for service `%s' already in progress", name);
+ EXEC SQL COMMIT RELEASE;
+ return;
+ }
+
+ EXEC SQL DECLARE csr_hst1 CURSOR FOR
+ SELECT m.name, m.mach_id FROM machine m, serverhosts sh
+ WHERE sh.service = UPPER(:service)
+ AND sh.enable = 1 AND sh.hosterror = 0
+ AND sh.lts < :dfgen AND sh.mach_id = m.mach_id;
+ EXEC SQL OPEN csr_hst1;
+ sq = sq_create();
+ while (1)
+ {
+ EXEC SQL FETCH csr_hst1 INTO :host, mid;
+ if (sqlca.sqlcode == SQL_NO_MATCH)
+ break;
+
+ sq_save_data(sq, strdup(strtrim(host)));
+ sq_save_data(sq, (void *)mid);
+ }
+ EXEC SQL CLOSE csr_hst1;
+
+ EXEC SQL WHENEVER SQLERROR GOTO host_cleanup;
+ while (sq_get_data(sq, &name))
+ {
+ sq_get_data(sq, &mid);
+
+ EXEC SQL SELECT inprogress INTO :inprogress FROM serverhosts
+ WHERE service = UPPER(:service) AND mach_id = :mid;
+ /* Check if someone got here before we did.
+ * There's still a race condition here, but it's a small one. */
+ if (inprogress == 1)
+ {
+ com_err(whoami, 0, "DCM for service `%s' to host `%s' already in progress", service, name);
+ EXEC SQL COMMIT RELEASE;
+ return;
+ }
+
+ com_err(whoami, 0, "sending %s data to %s", service, name);
+ EXEC SQL UPDATE serverhosts SET inprogress = 1
+ WHERE service = UPPER(:service) AND mach_id = :mid;
+ EXEC SQL COMMIT;
+ status = dcm_send_file(service, type, name, target);
+ if (status)
+ {
+ errmsg = error_message(status);
+ EXEC SQL UPDATE serverhosts SET hosterrmsg = :errmsg,
+ inprogress = 0, success = 0, ltt = :now
+ WHERE service = UPPER(:service) AND mach_id = :mid;
+ if (!SOFT_FAIL(status))
+ {
+ EXEC SQL UPDATE serverhosts SET hosterror = :status
+ WHERE service = UPPER(:service) AND mach_id = :mid;
+ critical_alert("DCM", "DCM updating %s:%s: %s",
+ service, name, errmsg);
}
-
- if (replicated) break;
+ EXEC SQL COMMIT;
+
+ if (type == REPLICATED)
+ break;
}
}
- EXEC SQL CLOSE csr_hst1;
-
- if (status == MR_SUCCESS || !replicated) {
- EXEC SQL DECLARE csr_hst2 CURSOR FOR
- SELECT m.name FROM machine m, serverhosts sh
- WHERE sh.service=UPPER(:service) AND sh.inprogress=1
- AND sh.mach_id=m.mach_id
- FOR UPDATE OF sh.hosterror, sh.hosterrmsg, sh.inprogress;
- EXEC SQL OPEN csr_hst2;
-
- while (1) {
- EXEC SQL FETCH csr_hst2 INTO :host;
- if (sqlca.sqlcode == SQL_NO_MATCH) break;
-
- com_err(whoami, 0, "executing instructions on %s", strtrim(host));
- status = dcm_execute(service, host, strtrim(script));
- if (status) {
- errmsg = error_message(status);
- EXEC SQL UPDATE serverhosts SET hosterrmsg = :errmsg,
- inprogress = 0 WHERE CURRENT OF csr_hst2;
- if (!SOFT_FAIL(status)) {
- EXEC SQL UPDATE serverhosts SET hosterror = :status
- WHERE CURRENT OF csr_hst2;
- critical_alert("DCM", "DCM updating %s:%s: %s",
- service, host, errmsg);
- }
-
- if (replicated) {
- /* We're giving up, so clear the inprogress flag on
- any hosts in this service we haven't gotten to yet */
- EXEC SQL UPDATE serverhosts SET inprogress = 0
- WHERE service=UPPER(:service);
- break;
+ sq_destroy(sq);
+
+ if (status == MR_SUCCESS || type != REPLICATED)
+ {
+ EXEC SQL DECLARE csr_hst2 CURSOR FOR
+ SELECT m.name, m.mach_id FROM machine m, serverhosts sh
+ WHERE sh.service = UPPER(:service) AND sh.inprogress = 1
+ AND sh.enable = 1 AND sh.hosterror = 0 AND sh.mach_id = m.mach_id;
+ EXEC SQL OPEN csr_hst2;
+ sq = sq_create();
+
+ while (1)
+ {
+ EXEC SQL FETCH csr_hst2 INTO :host, :mid;
+ if (sqlca.sqlcode == SQL_NO_MATCH)
+ break;
+
+ sq_save_data(sq, strdup(strtrim(host)));
+ sq_save_data(sq, (void *)mid);
+ }
+ EXEC SQL CLOSE csr_hst2;
+
+ while (sq_get_data(sq, &name))
+ {
+ sq_get_data(sq, &mid);
+
+ com_err(whoami, 0, "executing instructions on %s", name);
+ status = dcm_execute(service, name, script);
+ if (status)
+ {
+ errmsg = error_message(status);
+ EXEC SQL UPDATE serverhosts SET hosterrmsg = :errmsg,
+ inprogress = 0, success = 0, ltt = :now
+ WHERE service = UPPER(:service) AND mach_id = :mid;
+ if (!SOFT_FAIL(status))
+ {
+ EXEC SQL UPDATE serverhosts SET hosterror = :status
+ WHERE service = UPPER(:service) AND mach_id = :mid;
+ critical_alert("DCM", "DCM updating %s:%s: %s",
+ service, name, errmsg);
}
- } else {
- EXEC SQL UPDATE serverhosts SET inprogress=0, lts=:now
- WHERE CURRENT OF csr_hst2;
+
+ if (type == REPLICATED)
+ break;
+ }
+ else
+ {
+ EXEC SQL UPDATE serverhosts SET inprogress = 0, ltt = :now,
+ lts = :now, success = 1 WHERE service = UPPER(:service)
+ AND mach_id = :mid;
}
+ EXEC SQL COMMIT;
}
- EXEC SQL CLOSE csr_hst2;
+ EXEC SQL CLOSE csr_hst2;
+ }
+
+ if (type == REPLICATED)
+ {
+ /* Clear inprogress flag on any hosts we started but didn't
+ * finish.
+ */
+ EXEC SQL UPDATE serverhosts SET inprogress = 0
+ WHERE service = UPPER(:service);
}
- if (status && replicated) {
- EXEC SQL UPDATE servers SET harderror = :status, errmsg = :errmsg
- WHERE name = UPPER(:service);
+ EXEC SQL WHENEVER SQLERROR DO dbmserr();
+ if (status && !SOFT_FAIL(status) && type == REPLICATED)
+ {
+ EXEC SQL UPDATE servers SET harderror = :status, errmsg = :errmsg
+ WHERE name = UPPER(:service);
}
- EXEC SQL COMMIT RELEASE;
+ EXEC SQL COMMIT RELEASE;
+ return;
+
+host_cleanup:
+ EXEC SQL UPDATE serverhosts SET inprogress = 0, success = 0, ltt = :now,
+ hosterror = MR_INTERNAL, hosterrmsg = 'DBMS Internal Error'
+ WHERE service = UPPER(:service) AND mach_id = :mid;
+ if (type == REPLICATED)
+ {
+ EXEC SQL UPDATE servers SET harderror = MR_INTERNAL,
+ errmsg = 'DBMS Internal Error' WHERE name = UPPER(:service);
+ }
}
-int dcm_send_file(char *service, char *host, char *target)
+int dcm_send_file(char *service, int type, char *host, char *target)
{
- char addr[256], data[MAXPATHLEN];
- int code;
-
- sprintf(addr, "%s:moira_update", host);
- conn = start_server_connection(addr, "");
- if (!conn || (connection_status(conn) == CON_STOPPED)) {
- com_err(whoami, connection_errno(conn), "can't connect to %s", addr);
- return MR_CANT_CONNECT;
+ char data[MAXPATHLEN];
+ int code, conn;
+
+ conn = mr_connect_internal(host, "moira_update");
+ if (!conn)
+ {
+ com_err(whoami, errno, "can't connect to %s", host);
+ return MR_CANT_CONNECT;
}
- code = send_auth(host);
- if (code) {
- com_err(whoami, code, "authenticating to %s", host);
- goto done;
+ code = mr_send_krb5_auth(conn, host);
+ if (code)
+ code = mr_send_auth(conn, host);
+ if (code)
+ {
+ com_err(whoami, code, "authenticating to %s", host);
+ goto done;
}
+ if (type == DISTRIBUTED)
+ sprintf(data, "%s/%s/%s", DCM_DIR, service, host);
+ else
sprintf(data, "%s/%s.out", DCM_DIR, service);
- code = send_file(data, target, 1);
- if (code == MR_UNKNOWN_PROC) code = send_file(data, target, 0);
- if (code) com_err(whoami, code, "sending data to %s", host);
+ code = mr_send_file(conn, data, target, 0);
+ if (code)
+ com_err(whoami, code, "sending data to %s", host);
done:
- send_quit();
- sever_connection(conn);
- return(code);
+ mr_send_quit(conn);
+ close(conn);
+ return code;
}
int dcm_execute(char *service, char *host, char *script)
{
- char addr[256], inst[MAXPATHLEN];
- int code;
-
- sprintf(addr, "%s:moira_update", host);
- conn = start_server_connection(addr, "");
- if (!conn || (connection_status(conn) == CON_STOPPED)) {
- com_err(whoami, connection_errno(conn), "can't connect to %s", addr);
- return MR_CANT_CONNECT;
+ char inst[MAXPATHLEN];
+ int code, conn;
+
+ conn = mr_connect_internal(host, "moira_update");
+ if (!conn)
+ {
+ com_err(whoami, errno, "can't connect to %s", host);
+ return MR_CANT_CONNECT;
}
- code = send_auth(host);
- if (code) {
- com_err(whoami, code, "authenticating to %s", host);
- goto done;
+ code = mr_send_krb5_auth(conn, host);
+ if (code)
+ code = mr_send_auth(conn, host);
+ if (code)
+ {
+ com_err(whoami, code, "authenticating to %s", host);
+ goto done;
}
- sprintf(inst, "/tmp/moira-update.XXXXXX");
- mktemp(inst);
- code = send_file(script, inst, 0);
- if (code) {
- com_err(whoami, code, "sending instructions to %s", host);
- goto done;
+ sprintf(inst, "/tmp/moira-update.XXXXXX");
+ mktemp(inst);
+ code = mr_send_file(conn, script, inst, 0);
+ if (code)
+ {
+ com_err(whoami, code, "sending instructions to %s", host);
+ goto done;
}
- code = execute(inst);
- if (code) com_err(whoami, code, "executing instructions on %s", host);
+ code = mr_execute(conn, inst);
+ if (code)
+ com_err(whoami, code, "executing instructions on %s", host);
done:
- send_quit();
- sever_connection(conn);
- return(code);
-}
+ mr_send_quit(conn);
+ close(conn);
+ return code;
+}
void dbmserr(void)
{
- EXEC SQL BEGIN DECLARE SECTION;
- char err_msg[256];
- EXEC SQL END DECLARE SECTION;
- int bufsize=256, msglength=0;
-
- sqlglm(err_msg, &bufsize, &msglength);
- err_msg[msglength]=0;
- com_err(whoami, 0, "Encountered SQL error:\n%s", err_msg);
- com_err(whoami, 0, "exiting");
- exit(1);
+ EXEC SQL BEGIN DECLARE SECTION;
+ char err_msg[256];
+ EXEC SQL END DECLARE SECTION;
+ int bufsize = 256, msglength = 0;
+
+ sqlglm(err_msg, &bufsize, &msglength);
+ err_msg[msglength] = '\0';
+ com_err(whoami, 0, "Encountered SQL error:\n%s", err_msg);
+ com_err(whoami, 0, "exiting");
+ exit(1);
}