int generate_service(char *name, int force);
void do_hosts(char *service);
-int dcm_send_file(char *service, char *host, char *target);
+int dcm_send_file(char *service, int type, char *host, char *target);
int dcm_execute(char *service, char *host, char *script);
void dbmserr(void);
#define SQL_NO_MATCH 1403
#define SOFT_FAIL(x) (((x) == MR_NO_MEM) || ((x) == MR_CANT_CONNECT) || ((x) == MR_CCONFIG) || ((x) == MR_DEADLOCK) || ((x) == MR_BUSY) || ((x) == MR_ABORT))
+/* argument parsing macro */
+#define argis(a, b) (!strcmp(*arg + 1, a) || !strcmp(*arg + 1, b))
+
char whobuf[256], *whoami = whobuf, *db = "moira";
+enum { UNIQUE, DISTRIBUTED, REPLICATED };
+
int main(int argc, char **argv)
{
- int i;
+ int i, force = 0;
EXEC SQL BEGIN DECLARE SECTION;
char buf[SERVERS_NAME_SIZE], *name;
int enable;
EXEC SQL END DECLARE SECTION;
struct save_queue *sq;
- int status;
+ int status, srvcnt = 0;
+ char **arg = argv, *services[BUFSIZ];
if (strchr(argv[0], '/'))
strcpy(whoami, strrchr(argv[0], '/') + 1);
initialize_sms_error_table();
initialize_krb_error_table();
- /* if services were specified on the command line, do just those ones */
- if (argc > 1)
+ while (++arg - argv < argc)
+ {
+ if (**arg == '-')
+ {
+ if (argis("f", "force"))
+ force++;
+ else
+ {
+ com_err(whoami, 0, "Usage: %s [-f] servicename", argv[0]);
+ exit(1);
+ }
+ }
+ else
+ /* Doesn't begin with a dash, is a service name.
+ * Build an array of them we can iterate through later.
+ */
+ {
+ services[srvcnt] = malloc(SERVERS_NAME_SIZE);
+ if (!services[srvcnt])
+ {
+ com_err(whoami, 0, "Out of memory!");
+ exit(1);
+ }
+ strncpy(services[srvcnt], *arg, SERVERS_NAME_SIZE);
+ srvcnt++;
+ }
+ }
+
+ /* Iterate through services specified on the command line, if any. */
+ if (srvcnt > 0)
{
- for (i = 1; i < argc; i++)
+ for (i = 0; i < srvcnt; i++)
{
- if (generate_service(argv[i], 1))
- do_hosts(argv[i]);
+ if (generate_service(services[i], force))
+ {
+ do_hosts(services[i]);
+ free(services[i]);
+ }
}
exit(0);
}
/* Now run through list */
while (sq_get_data(sq, &name))
{
- if (generate_service(name, 0))
+ if (generate_service(name, force))
{
switch (fork())
{
int generate_service(char *name, int force)
{
EXEC SQL BEGIN DECLARE SECTION;
- int interval, dfcheck, status;
+ int interval, dfcheck, status, inprogress;
time_t now;
const char *errmsg;
EXEC SQL END DECLARE SECTION;
EXEC SQL CONNECT :db IDENTIFIED BY :db;
- EXEC SQL SELECT update_int, dfcheck INTO :interval, :dfcheck
- FROM servers WHERE name = UPPER(:name);
+ EXEC SQL SELECT update_int, dfcheck, inprogress INTO :interval, :dfcheck,
+ :inprogress FROM servers WHERE name = UPPER(:name);
if (sqlca.sqlcode == SQL_NO_MATCH)
{
com_err(whoami, 0, "No such service `%s'", name);
return 0;
}
+ /* Someone might try to run a DCM from the command line while the
+ * regular one is running, which will bypass the "interval" test.
+ * Check inprogress to make sure they don't stomp on themselves.
+ *
+ * Note that there is still a race condition here, and this doesn't
+ * absolutely prevent 2 DCMs from stepping on one another, but it
+ * does reduce the window of vulnerability greatly.
+ */
+ if (inprogress == 1)
+ {
+ com_err(whoami, 0, "DCM for service `%s' already in progress", name);
+ EXEC SQL COMMIT RELEASE;
+ return 0;
+ }
+
time(&now);
if ((interval * 60 + dfcheck < now) || force)
name, errmsg);
}
}
+ else
+ {
+ com_err(whoami, 0, "DCM for service `%s' has run too recently.", name);
+ com_err(whoami, 0, "Use the -force flag to force a DCM.");
+ }
+
EXEC SQL COMMIT RELEASE;
return 0;
void do_hosts(char *service)
{
EXEC SQL BEGIN DECLARE SECTION;
- char type[SERVERS_TYPE_SIZE], host[MACHINE_NAME_SIZE], *name;
+ char server_type[SERVERS_TYPE_SIZE], host[MACHINE_NAME_SIZE], *name;
char target[SERVERS_TARGET_FILE_SIZE], script[SERVERS_SCRIPT_SIZE];
const char *errmsg;
- int status = 0, dfgen, replicated, mid;
+ int status = 0, dfgen, type, mid, inprogress;
time_t now;
EXEC SQL END DECLARE SECTION;
struct save_queue *sq;
EXEC SQL CONNECT :db IDENTIFIED BY :db;
- EXEC SQL SELECT dfgen, type, target_file, script
- INTO :dfgen, :type, :target, :script
+ EXEC SQL SELECT dfgen, type, target_file, script, inprogress
+ INTO :dfgen, :server_type, :target, :script, :inprogress
FROM servers WHERE name = UPPER(:service);
- replicated = !strncmp(type, "REPLICAT", 8);
+ if (!strncmp(strtrim(server_type), "REPLICAT", 8))
+ type = REPLICATED;
+ else if (!strncmp(server_type, "DISTRIB", 8))
+ type = DISTRIBUTED;
+ else
+ type = UNIQUE;
strtrim(target);
strtrim(script);
+ /* Rudimentary locking. Doesn't eliminate the possibility of 2 DCMs
+ * stepping on one another, but makes it harder.
+ */
+ if (inprogress == 1)
+ {
+ com_err(whoami, 0, "DCM for service `%s' already in progress", name);
+ EXEC SQL COMMIT RELEASE;
+ return;
+ }
+
EXEC SQL DECLARE csr_hst1 CURSOR FOR
SELECT m.name, m.mach_id FROM machine m, serverhosts sh
WHERE sh.service = UPPER(:service)
while (sq_get_data(sq, &name))
{
sq_get_data(sq, &mid);
+
+ EXEC SQL SELECT inprogress INTO :inprogress FROM serverhosts
+ WHERE service = UPPER(:service) AND mach_id = :mid;
+ /* Check if someone got here before we did.
+ * There's still a race condition here, but it's a small one. */
+ if (inprogress == 1)
+ {
+ com_err(whoami, 0, "DCM for service `%s' to host `%s' already in progress", service, name);
+ EXEC SQL COMMIT RELEASE;
+ return;
+ }
+
com_err(whoami, 0, "sending %s data to %s", service, name);
EXEC SQL UPDATE serverhosts SET inprogress = 1
WHERE service = UPPER(:service) AND mach_id = :mid;
EXEC SQL COMMIT;
- status = dcm_send_file(service, name, target);
+ status = dcm_send_file(service, type, name, target);
if (status)
{
errmsg = error_message(status);
}
EXEC SQL COMMIT;
- if (replicated)
+ if (type == REPLICATED)
break;
}
}
sq_destroy(sq);
- if (status == MR_SUCCESS || !replicated)
+ if (status == MR_SUCCESS || type != REPLICATED)
{
EXEC SQL DECLARE csr_hst2 CURSOR FOR
SELECT m.name, m.mach_id FROM machine m, serverhosts sh
WHERE sh.service = UPPER(:service) AND sh.inprogress = 1
- AND sh.mach_id = m.mach_id;
+ AND sh.enable = 1 AND sh.hosterror = 0 AND sh.mach_id = m.mach_id;
EXEC SQL OPEN csr_hst2;
sq = sq_create();
service, name, errmsg);
}
- if (replicated)
- {
- /* We're giving up, so clear the inprogress flag on
- any hosts in this service we haven't gotten to yet */
- EXEC SQL UPDATE serverhosts SET inprogress = 0
- WHERE service = UPPER(:service);
- break;
- }
+ if (type == REPLICATED)
+ break;
}
else
{
EXEC SQL CLOSE csr_hst2;
}
+ if (type == REPLICATED)
+ {
+ /* Clear inprogress flag on any hosts we started but didn't
+ * finish.
+ */
+ EXEC SQL UPDATE serverhosts SET inprogress = 0
+ WHERE service = UPPER(:service);
+ }
+
EXEC SQL WHENEVER SQLERROR DO dbmserr();
- if (status && !SOFT_FAIL(status) && replicated)
+ if (status && !SOFT_FAIL(status) && type == REPLICATED)
{
EXEC SQL UPDATE servers SET harderror = :status, errmsg = :errmsg
WHERE name = UPPER(:service);
EXEC SQL UPDATE serverhosts SET inprogress = 0, success = 0, ltt = :now,
hosterror = MR_INTERNAL, hosterrmsg = 'DBMS Internal Error'
WHERE service = UPPER(:service) AND mach_id = :mid;
- if (replicated)
+ if (type == REPLICATED)
{
EXEC SQL UPDATE servers SET harderror = MR_INTERNAL,
errmsg = 'DBMS Internal Error' WHERE name = UPPER(:service);
}
}
-int dcm_send_file(char *service, char *host, char *target)
+int dcm_send_file(char *service, int type, char *host, char *target)
{
char data[MAXPATHLEN];
int code, conn;
return MR_CANT_CONNECT;
}
- code = send_auth(conn, host);
+ code = mr_send_krb5_auth(conn, host);
+ if (code)
+ code = mr_send_auth(conn, host);
if (code)
{
com_err(whoami, code, "authenticating to %s", host);
goto done;
}
- sprintf(data, "%s/%s.out", DCM_DIR, service);
- code = send_file(conn, data, target, 0);
+ if (type == DISTRIBUTED)
+ sprintf(data, "%s/%s/%s", DCM_DIR, service, host);
+ else
+ sprintf(data, "%s/%s.out", DCM_DIR, service);
+ code = mr_send_file(conn, data, target, 0);
if (code)
com_err(whoami, code, "sending data to %s", host);
done:
- send_quit(conn);
+ mr_send_quit(conn);
close(conn);
return code;
}
return MR_CANT_CONNECT;
}
- code = send_auth(conn, host);
+ code = mr_send_krb5_auth(conn, host);
+ if (code)
+ code = mr_send_auth(conn, host);
if (code)
{
com_err(whoami, code, "authenticating to %s", host);
sprintf(inst, "/tmp/moira-update.XXXXXX");
mktemp(inst);
- code = send_file(conn, script, inst, 0);
+ code = mr_send_file(conn, script, inst, 0);
if (code)
{
com_err(whoami, code, "sending instructions to %s", host);
goto done;
}
- code = execute(conn, inst);
+ code = mr_execute(conn, inst);
if (code)
com_err(whoami, code, "executing instructions on %s", host);
done:
- send_quit(conn);
+ mr_send_quit(conn);
close(conn);
return code;
}