2 * The Data Control Manager for MOIRA.
4 * Copyright 1987, 1988 by the Massachusetts Institute of Technology.
5 * For copying and distribution information, see the file
14 static char rcsid_dcm_c[] = "$Header$";
29 #include <moira_site.h>
31 #include "mit-copyright.h"
35 extern int log_flags, errno;
39 /* declared global so that we can get the current time from different places. */
55 dbg = s ? atoi(s) : 0;
59 setvbuf(stderr, NULL, _IOLBF, BUFSIZ);
60 setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
62 while(++arg - argv < argc) {
66 dbg = atoi((*arg)[2]? *arg+2: *++arg);
70 set_com_err_hook(dcm_com_err_hook);
72 /* if /etc/nodcm exists, punt quietly. */
73 if (!access(NODCMFILE, F_OK)) {
77 if (status = mr_connect("")) {
78 com_err(whoami, status, " on mr_connect");
79 leave("connect failed");
82 if (status = mr_auth("dcm")) {
83 com_err(whoami, status, " on \"authenticate\"");
87 /* if DCM is not enabled, exit after logging */
88 qargv[0] = "dcm_enable";
89 if (status = mr_query("get_value", 1, qargv, gqval, &i)) {
90 com_err(whoami, status, " check dcm_enable");
91 leave("query failed");
95 leave("dcm_enable not set");
105 /* Used by the get_value query when checking for dcm_enable. */
107 gqval(argc, argv, hint)
112 *hint = atoi(argv[0]);
117 /* Used by qualified_get_server to make a list of servers to check */
119 qgetsv(argc, argv, sq)
122 struct save_queue *sq;
124 sq_save_data(sq, strsave(argv[0]));
129 /* Used by get_server_info to record all of the returned information */
131 getsvinfo(argc, argv, sserv)
134 struct service *sserv;
136 sserv->service = strsave(argv[0]);
137 sserv->interval = atoi(argv[1]);
138 sserv->target = strsave(argv[2]);
139 sserv->script = strsave(argv[3]);
140 sserv->dfgen = atoi(argv[4]);
141 sserv->dfcheck = atoi(argv[5]);
142 sserv->type = strsave(argv[6]);
143 sserv->enable = atoi(argv[7]);
144 sserv->inprogress = atoi(argv[8]);
145 sserv->harderror = atoi(argv[9]);
146 sserv->errmsg = strsave(argv[10]);
151 /* Scan the services and process any that need it. */
156 struct save_queue *sq, *sq_create();
157 char *service, dfgen_prog[64], dfgen_cmd[128];
159 int status, lock_fd, ex, (*cstat)();
163 struct sigaction action, prevaction;
165 if (dbg & DBG_VERBOSE)
166 com_err(whoami, 0, "starting pass over services");
169 qargv[1] = "dontcare";
172 if (status = mr_query_with_retry("qualified_get_server", 3, qargv,
174 com_err(whoami, status, " getting services");
175 leave("query failed");
177 while (sq_get_data(sq, &service)) {
178 for (p = service; *p; p++)
181 com_err(whoami, 0, "checking %s...", service);
183 sprintf(dfgen_prog, "%s/%s.gen", BIN_DIR, service);
184 if (!file_exists(dfgen_prog)) {
185 com_err(whoami, 0, "prog %s doesn't exist\n", dfgen_prog);
189 sprintf(dfgen_cmd, "exec %s %s/%s.out",
190 dfgen_prog, DCM_DIR, service);
191 gettimeofday(&tv, &tz);
192 if (status = mr_query_with_retry("get_server_info", 1, qargv,
194 com_err(whoami, status, " getting service %s info, skipping to next service", service);
197 svc.service = strsave(service);
198 qargv[0] = strsave(service);
199 qargv[1] = itoa(svc.dfgen);
200 qargv[2] = itoa(svc.dfcheck);
201 qargv[3] = strsave("0");
202 qargv[4] = itoa(svc.harderror);
203 qargv[5] = strsave(svc.errmsg);
204 if (svc.interval != 0) {
205 if (svc.interval * 60 + svc.dfcheck < tv.tv_sec) {
206 lock_fd = maybe_lock_update("@db@", service, 1);
212 qargv[3] = strsave("1");
213 qargv[4] = strsave("0");
214 qargv[5] = strsave("");
215 status = mr_query_with_retry("set_server_internal_flags", 6,
216 qargv, scream, NULL);
217 if (status != MR_SUCCESS) {
218 com_err(whoami, status, " setting server state");
222 com_err(whoami, status, " running %s", dfgen_prog);
225 sigemptyset(&action.sa_mask);
226 action.sa_handler = SIG_DFL;
227 sigaction(SIGCHLD, &action, &prevaction);
228 waits = system(dfgen_cmd);
229 sigaction(SIGCHLD, &prevaction, NULL);
230 if (WIFSIGNALED(waits)) {
231 status = MR_COREDUMP;
232 com_err(whoami, status, " %s exited on signal %d",
233 dfgen_prog, WTERMSIG(waits));
234 } else if (WEXITSTATUS(waits)) {
235 /* extract the process's exit value */
236 status = WEXITSTATUS(waits) + ERROR_TABLE_BASE_sms;
237 com_err(whoami, status, " %s exited", dfgen_prog);
240 if (SOFT_FAIL(status)) {
242 qargv[5] = strsave(error_message(status));
243 } else if (status == MR_NO_CHANGE) {
245 qargv[2] = itoa(tv.tv_sec);
246 svc.dfcheck = tv.tv_sec;
247 } else if (status == MR_SUCCESS) {
250 qargv[1] = itoa(tv.tv_sec);
251 qargv[2] = strsave(qargv[1]);
252 svc.dfcheck = svc.dfgen = tv.tv_sec;
253 } else { /* HARD_FAIL(status) */
257 qargv[2] = itoa(tv.tv_sec);
258 svc.dfcheck = tv.tv_sec;
259 qargv[4] = itoa(status);
260 qargv[5] = strsave(error_message(status));
261 critical_alert("DCM","DCM building config files for %s: %s",
266 qargv[3] = strsave("0");
267 status = mr_query_with_retry("set_server_internal_flags", 6,
268 qargv, scream, NULL);
270 com_err(whoami, status, " setting service state");
279 if (!strcmp(svc.type, "REPLICAT"))
283 lock_fd = maybe_lock_update("@db@", service, ex);
300 /* Used by qualified_get_server_host to make a list of hosts to check */
302 qgethost(argc, argv, sq)
305 struct save_queue *sq;
307 sq_save_data(sq, strsave(argv[1]));
312 /* Used by get_server_host_info to store all of the info about a host */
314 gethostinfo(argc, argv, shost)
317 struct svrhost *shost;
319 shost->service = strsave(argv[0]);
320 shost->machine = strsave(argv[1]);
321 shost->enable = atoi(argv[2]);
322 shost->override = atoi(argv[3]);
323 shost->success = atoi(argv[4]);
324 shost->inprogress = atoi(argv[5]);
325 shost->hosterror = atoi(argv[6]);
326 shost->errmsg = strsave(argv[7]);
327 shost->lasttry = atoi(argv[8]);
328 shost->lastsuccess = atoi(argv[9]);
329 shost->value1 = atoi(argv[10]);
330 shost->value2 = atoi(argv[11]);
331 shost->value3 = strsave(argv[12]);
336 /* Scans all of the hosts for a particular service, and processes them. */
341 char *argv[9], *machine;
343 struct save_queue *sq;
344 struct svrhost shost;
347 argv[0] = svc->service;
349 argv[2] = argv[3] = argv[4] = "DONTCARE";
351 status = mr_query_with_retry("qualified_get_server_host", 6, argv,
353 if (status == MR_NO_MATCH) {
356 com_err(whoami, status, " getting server_hosts for %s", svc->service);
359 while (sq_get_data(sq, &machine)) {
361 com_err(whoami, 0, "checking %s...", machine);
363 status = mr_query_with_retry("get_server_host_info", 2, argv,
364 gethostinfo, &shost);
366 com_err(whoami,status, " getting server_host_info for %s", machine);
369 if (!shost.enable || shost.hosterror ||
370 (shost.success && !shost.override &&
371 shost.lastsuccess >= svc->dfgen)) {
373 com_err(whoami, 0, "not updating %s:%s", svc->service, machine);
377 lock_fd = maybe_lock_update(machine, svc->service, 1);
380 argv[0] = svc->service;
382 argv[2] = argv[3] = argv[5] = "0";
384 argv[6] = strsave("");
385 argv[7] = itoa(tv.tv_sec);
386 argv[8] = itoa(shost.lastsuccess);
387 status = mr_query_with_retry("set_server_host_internal", 9, argv,
389 if (status != MR_SUCCESS) {
390 com_err(whoami,status," while setting internal state for %s:%s",
391 svc->service, machine);
394 status = mr_update_server(svc->service, machine, svc->target,
396 if (status == MR_SUCCESS) {
397 argv[2] = argv[4] = "0";
400 argv[8] = itoa(tv.tv_sec);
401 } else if (SOFT_FAIL(status)) {
404 argv[6] = strsave(error_message(status));
405 } else { /* HARD_FAIL */
406 argv[2] = itoa(shost.override);
408 argv[5] = itoa(status);
410 argv[6] = strsave(error_message(status));
411 critical_alert("DCM", "DCM updating %s:%s: %s",
412 machine, svc->service, argv[6]);
413 if (!strcmp(svc->type, "REPLICAT")) {
416 svc->harderror = status;
417 svc->errmsg = strsave(argv[6]);
418 qargv[0] = strsave(svc->service);
419 qargv[1] = itoa(svc->dfgen);
420 qargv[2] = itoa(svc->dfcheck);
421 qargv[3] = strsave("0");
422 qargv[4] = itoa(svc->harderror);
423 qargv[5] = strsave(svc->errmsg);
424 status = mr_query_with_retry("set_server_internal_flags",
425 6, qargv, scream, NULL);
427 com_err(whoami, status, " setting service state again");
435 status = mr_query_with_retry("set_server_host_internal",
436 9, argv,scream,NULL);
440 com_err(whoami, status, " setting host state again");
445 status = mr_query_with_retry("set_server_host_internal", 9, argv,
448 com_err(whoami, status, " setting host state again");