3 * The Data Control Manager for Moira.
5 * Copyright (C) 1987-1998 by the Massachusetts Institute of Technology.
6 * For copying and distribution information, see the file
10 #include <mit-copyright.h>
12 #include <moira_site.h>
13 #include <moira_schema.h>
16 #include <sys/param.h>
27 EXEC SQL INCLUDE sqlca;
28 void sqlglm(char *, unsigned int *, unsigned int *);
32 int generate_service(char *name, int force);
33 void do_hosts(char *service);
34 int dcm_send_file(char *service, int type, char *host, char *target);
35 int dcm_execute(char *service, char *host, char *script);
38 #define SQL_NO_MATCH 1403
39 #define SOFT_FAIL(x) (((x) == MR_NO_MEM) || ((x) == MR_CANT_CONNECT) || ((x) == MR_CCONFIG) || ((x) == MR_DEADLOCK) || ((x) == MR_BUSY) || ((x) == MR_ABORT))
41 /* argument parsing macro */
42 #define argis(a, b) (!strcmp(*arg + 1, a) || !strcmp(*arg + 1, b))
44 char whobuf[256], *whoami = whobuf, *db = "moira";
46 enum { UNIQUE, DISTRIBUTED, REPLICATED };
48 int main(int argc, char **argv)
51 EXEC SQL BEGIN DECLARE SECTION;
52 char buf[SERVERS_NAME_SIZE], *name;
54 EXEC SQL END DECLARE SECTION;
55 struct save_queue *sq;
59 if (strchr(argv[0], '/'))
60 strcpy(whoami, strrchr(argv[0], '/') + 1);
61 else strcpy(whoami, argv[0]);
64 setvbuf(stderr, NULL, _IOLBF, BUFSIZ);
65 setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
67 initialize_sms_error_table();
68 initialize_krb_error_table();
70 while (++arg - argv < argc)
74 if (argis("f", "force"))
78 com_err(whoami, 0, "Usage: %s [-f] servicename", argv[0]);
84 /* if services were specified on the command line, do just those ones */
87 for (i = 1; i < argc; i++)
89 if (argv[i][0] == '-')
91 if (generate_service(argv[i], force))
97 /* if DCM is not enabled, exit after logging */
98 if (!access(NODCMFILE, F_OK))
100 printf("/etc/nodcm exists -- exiting\n");
104 EXEC SQL WHENEVER SQLERROR DO dbmserr();
106 EXEC SQL CONNECT :db IDENTIFIED BY :db;
108 EXEC SQL SELECT value INTO :enable FROM numvalues WHERE name = 'dcm_enable';
111 printf("dcm_enable not set -- exiting\n");
115 /* fetch list of services */
116 EXEC SQL DECLARE csr_svc CURSOR FOR SELECT LOWER(name) FROM servers
117 WHERE enable = 1 AND harderror = 0 AND update_int > 0;
118 EXEC SQL OPEN csr_svc;
122 EXEC SQL FETCH csr_svc INTO :buf;
126 sq_save_data(sq, strdup(strtrim(buf)));
128 EXEC SQL CLOSE csr_svc;
129 /* we will repeatedly open and close the db since it seems to get
130 upset if you keep it open across a fork */
131 EXEC SQL COMMIT RELEASE;
133 /* Now run through list */
134 while (sq_get_data(sq, &name))
136 if (generate_service(name, force))
141 com_err(whoami, errno, "forking for service %s -- exiting",
145 sprintf(strchr(whoami, '\0'), " (%s:%ld)", name, (long)getpid());
147 com_err(whoami, 0, "exiting");
155 com_err(whoami, 0, "All files generated. Waiting for children to exit");
156 while (waitpid(0, &status, 0) > 0)
158 com_err(whoami, 0, "exiting");
162 int generate_service(char *name, int force)
164 EXEC SQL BEGIN DECLARE SECTION;
165 int interval, dfcheck, status, inprogress;
168 EXEC SQL END DECLARE SECTION;
169 char dfgen_prog[MAXPATHLEN], dfgen_cmd[2 * MAXPATHLEN];
170 struct sigaction action, prevaction;
173 EXEC SQL CONNECT :db IDENTIFIED BY :db;
175 EXEC SQL SELECT update_int, dfcheck, inprogress INTO :interval, :dfcheck,
176 :inprogress FROM servers WHERE name = UPPER(:name);
177 if (sqlca.sqlcode == SQL_NO_MATCH)
179 com_err(whoami, 0, "No such service `%s'", name);
180 EXEC SQL COMMIT RELEASE;
184 /* Someone might try to run a DCM from the command line while the
185 * regular one is running, which will bypass the "interval" test.
186 * Check inprogress to make sure they don't stomp on themselves.
190 com_err(whoami, 0, "DCM for service `%s' already in progress", name);
191 EXEC SQL COMMIT RELEASE;
197 if ((interval * 60 + dfcheck < now) || force)
199 sprintf(dfgen_prog, "%s/%s.gen", BIN_DIR, name);
200 if (access(dfgen_prog, F_OK) != 0)
202 com_err(whoami, 0, "prog %s doesn't exist", dfgen_prog);
203 EXEC SQL COMMIT RELEASE;
206 sprintf(dfgen_cmd, "exec %s %s/%s.out", dfgen_prog, DCM_DIR, name);
207 com_err(whoami, 0, "running %s", dfgen_prog);
209 EXEC SQL WHENEVER SQLERROR GOTO gen_cleanup;
211 EXEC SQL UPDATE servers SET inprogress = 1
212 WHERE name = UPPER(:name);
216 sigemptyset(&action.sa_mask);
217 action.sa_handler = SIG_DFL;
218 sigaction(SIGCHLD, &action, &prevaction);
219 waits = system(dfgen_cmd);
220 sigaction(SIGCHLD, &prevaction, NULL);
221 if (WIFSIGNALED(waits))
223 status = MR_COREDUMP;
224 com_err(whoami, status, " %s exited on signal %d",
225 dfgen_prog, WTERMSIG(waits));
227 else if (WEXITSTATUS(waits))
229 /* extract the process's exit value */
230 status = WEXITSTATUS(waits) + ERROR_TABLE_BASE_sms;
231 if (status != MR_NO_CHANGE)
232 com_err(whoami, status, "in %s", dfgen_prog);
237 if (status == MR_SUCCESS)
239 EXEC SQL UPDATE servers SET dfgen = :now, dfcheck = :now,
240 inprogress = 0 WHERE name = UPPER(:name);
241 EXEC SQL COMMIT RELEASE;
244 else if (status == MR_NO_CHANGE)
246 EXEC SQL UPDATE servers SET dfcheck = :now, inprogress = 0
247 WHERE name = UPPER(:name);
249 else if (SOFT_FAIL(status))
251 errmsg = error_message(status);
252 EXEC SQL UPDATE servers SET errmsg = :errmsg, inprogress = 0
253 WHERE name = UPPER(:name);
255 else /* HARD_FAIL(status) */
257 errmsg = error_message(status);
258 EXEC SQL UPDATE servers SET harderror = :status, errmsg = :errmsg,
259 inprogress = 0 WHERE name = UPPER(:name);
260 critical_alert("DCM", "DCM building config files for %s: %s",
266 com_err(whoami, 0, "DCM for service `%s' has run too recently.", name);
267 com_err(whoami, 0, "Use the -force flag to force a DCM.");
270 EXEC SQL COMMIT RELEASE;
274 EXEC SQL WHENEVER SQLERROR DO dbmserr();
275 EXEC SQL UPDATE servers SET inprogress = 0, harderror = MR_INTERNAL,
276 errmsg = 'DBMS Internal Error' WHERE name = UPPER(:name);
280 void do_hosts(char *service)
282 EXEC SQL BEGIN DECLARE SECTION;
283 char server_type[SERVERS_TYPE_SIZE], host[MACHINE_NAME_SIZE], *name;
284 char target[SERVERS_TARGET_FILE_SIZE], script[SERVERS_SCRIPT_SIZE];
286 int status = 0, dfgen, type, mid, inprogress;
288 EXEC SQL END DECLARE SECTION;
289 struct save_queue *sq;
294 EXEC SQL CONNECT :db IDENTIFIED BY :db;
296 EXEC SQL SELECT dfgen, type, target_file, script, inprogress
297 INTO :dfgen, :server_type, :target, :script, :inprogress
298 FROM servers WHERE name = UPPER(:service);
299 if (!strncmp(strtrim(server_type), "REPLICAT", 8))
301 else if (!strncmp(server_type, "DISTRIB", 8))
310 com_err(whoami, 0, "DCM for service `%s' already in progress", name);
311 EXEC SQL COMMIT RELEASE;
315 EXEC SQL DECLARE csr_hst1 CURSOR FOR
316 SELECT m.name, m.mach_id FROM machine m, serverhosts sh
317 WHERE sh.service = UPPER(:service)
318 AND sh.enable = 1 AND sh.hosterror = 0
319 AND sh.lts < :dfgen AND sh.mach_id = m.mach_id;
320 EXEC SQL OPEN csr_hst1;
324 EXEC SQL FETCH csr_hst1 INTO :host, mid;
325 if (sqlca.sqlcode == SQL_NO_MATCH)
328 sq_save_data(sq, strdup(strtrim(host)));
329 sq_save_data(sq, (void *)mid);
331 EXEC SQL CLOSE csr_hst1;
333 EXEC SQL WHENEVER SQLERROR GOTO host_cleanup;
334 while (sq_get_data(sq, &name))
336 sq_get_data(sq, &mid);
338 EXEC SQL SELECT inprogress INTO :inprogress FROM serverhosts
339 WHERE service = UPPER(:service) AND mach_id = :mid;
342 com_err(whoami, 0, "DCM for service `%s' to host `%s' already in progress", service, name);
343 EXEC SQL COMMIT RELEASE;
347 com_err(whoami, 0, "sending %s data to %s", service, name);
348 EXEC SQL UPDATE serverhosts SET inprogress = 1
349 WHERE service = UPPER(:service) AND mach_id = :mid;
351 status = dcm_send_file(service, type, name, target);
354 errmsg = error_message(status);
355 EXEC SQL UPDATE serverhosts SET hosterrmsg = :errmsg,
356 inprogress = 0, success = 0, ltt = :now
357 WHERE service = UPPER(:service) AND mach_id = :mid;
358 if (!SOFT_FAIL(status))
360 EXEC SQL UPDATE serverhosts SET hosterror = :status
361 WHERE service = UPPER(:service) AND mach_id = :mid;
362 critical_alert("DCM", "DCM updating %s:%s: %s",
363 service, name, errmsg);
367 if (type == REPLICATED)
373 if (status == MR_SUCCESS || type != REPLICATED)
375 EXEC SQL DECLARE csr_hst2 CURSOR FOR
376 SELECT m.name, m.mach_id FROM machine m, serverhosts sh
377 WHERE sh.service = UPPER(:service) AND sh.inprogress = 1
378 AND sh.enable = 1 AND sh.hosterror = 0 AND sh.mach_id = m.mach_id;
379 EXEC SQL OPEN csr_hst2;
384 EXEC SQL FETCH csr_hst2 INTO :host, :mid;
385 if (sqlca.sqlcode == SQL_NO_MATCH)
388 sq_save_data(sq, strdup(strtrim(host)));
389 sq_save_data(sq, (void *)mid);
391 EXEC SQL CLOSE csr_hst2;
393 while (sq_get_data(sq, &name))
395 sq_get_data(sq, &mid);
397 com_err(whoami, 0, "executing instructions on %s", name);
398 status = dcm_execute(service, name, script);
401 errmsg = error_message(status);
402 EXEC SQL UPDATE serverhosts SET hosterrmsg = :errmsg,
403 inprogress = 0, success = 0, ltt = :now
404 WHERE service = UPPER(:service) AND mach_id = :mid;
405 if (!SOFT_FAIL(status))
407 EXEC SQL UPDATE serverhosts SET hosterror = :status
408 WHERE service = UPPER(:service) AND mach_id = :mid;
409 critical_alert("DCM", "DCM updating %s:%s: %s",
410 service, name, errmsg);
413 if (type == REPLICATED)
418 EXEC SQL UPDATE serverhosts SET inprogress = 0, ltt = :now,
419 lts = :now, success = 1 WHERE service = UPPER(:service)
424 EXEC SQL CLOSE csr_hst2;
427 if (type == REPLICATED)
429 /* Clear inprogress flag on any hosts we started but didn't
432 EXEC SQL UPDATE serverhosts SET inprogress = 0
433 WHERE service = UPPER(:service);
436 EXEC SQL WHENEVER SQLERROR DO dbmserr();
437 if (status && !SOFT_FAIL(status) && type == REPLICATED)
439 EXEC SQL UPDATE servers SET harderror = :status, errmsg = :errmsg
440 WHERE name = UPPER(:service);
443 EXEC SQL COMMIT RELEASE;
447 EXEC SQL UPDATE serverhosts SET inprogress = 0, success = 0, ltt = :now,
448 hosterror = MR_INTERNAL, hosterrmsg = 'DBMS Internal Error'
449 WHERE service = UPPER(:service) AND mach_id = :mid;
450 if (type == REPLICATED)
452 EXEC SQL UPDATE servers SET harderror = MR_INTERNAL,
453 errmsg = 'DBMS Internal Error' WHERE name = UPPER(:service);
457 int dcm_send_file(char *service, int type, char *host, char *target)
459 char data[MAXPATHLEN];
462 conn = mr_connect_internal(host, "moira_update");
465 com_err(whoami, errno, "can't connect to %s", host);
466 return MR_CANT_CONNECT;
469 code = mr_send_auth(conn, host);
472 com_err(whoami, code, "authenticating to %s", host);
476 if (type == DISTRIBUTED)
477 sprintf(data, "%s/%s/%s", DCM_DIR, service, host);
479 sprintf(data, "%s/%s.out", DCM_DIR, service);
480 code = mr_send_file(conn, data, target, 0);
482 com_err(whoami, code, "sending data to %s", host);
490 int dcm_execute(char *service, char *host, char *script)
492 char inst[MAXPATHLEN];
495 conn = mr_connect_internal(host, "moira_update");
498 com_err(whoami, errno, "can't connect to %s", host);
499 return MR_CANT_CONNECT;
502 code = mr_send_auth(conn, host);
505 com_err(whoami, code, "authenticating to %s", host);
509 sprintf(inst, "/tmp/moira-update.XXXXXX");
511 code = mr_send_file(conn, script, inst, 0);
514 com_err(whoami, code, "sending instructions to %s", host);
518 code = mr_execute(conn, inst);
520 com_err(whoami, code, "executing instructions on %s", host);
530 EXEC SQL BEGIN DECLARE SECTION;
532 EXEC SQL END DECLARE SECTION;
533 int bufsize = 256, msglength = 0;
535 sqlglm(err_msg, &bufsize, &msglength);
536 err_msg[msglength] = '\0';
537 com_err(whoami, 0, "Encountered SQL error:\n%s", err_msg);
538 com_err(whoami, 0, "exiting");