2 * The Data Control Manager for MOIRA.
4 * Copyright 1987, 1988 by the Massachusetts Institute of Technology.
5 * For copying and distribution information, see the file
14 static char rcsid_dcm_c[] = "$Header$";
25 #include <moira_site.h>
27 #include "mit-copyright.h"
30 extern char *getenv();
32 extern char *error_message();
38 /* declared global so that we can get the current time from different places. */
54 dbg = s ? atoi(s) : 0;
60 while(++arg - argv < argc) {
64 dbg = atoi((*arg)[2]? *arg+2: *++arg);
68 set_com_err_hook(dcm_com_err_hook);
70 /* if /etc/nodcm exists, punt quietly. */
71 if (!access(NODCMFILE, F_OK)) {
75 if (status = mr_connect("")) {
76 com_err(whoami, status, " on mr_connect");
77 leave("connect failed");
80 if (status = mr_auth("dcm")) {
81 com_err(whoami, status, " on \"authenticate\"");
85 /* if DCM is not enabled, exit after logging */
86 qargv[0] = "dcm_enable";
87 if (status = mr_query("get_value", 1, qargv, gqval, &i)) {
88 com_err(whoami, status, " check dcm_enable");
89 leave("query failed");
93 leave("dcm_enable not set");
103 /* Used by the get_value query when checking for dcm_enable. */
105 gqval(argc, argv, hint)
110 *hint = atoi(argv[0]);
115 /* Used by qualified_get_server to make a list of servers to check */
117 qgetsv(argc, argv, sq)
120 struct save_queue *sq;
122 sq_save_data(sq, strsave(argv[0]));
127 /* Used by get_server_info to record all of the returned information */
129 getsvinfo(argc, argv, sserv)
132 struct service *sserv;
134 sserv->service = strsave(argv[0]);
135 sserv->interval = atoi(argv[1]);
136 sserv->target = strsave(argv[2]);
137 sserv->script = strsave(argv[3]);
138 sserv->dfgen = atoi(argv[4]);
139 sserv->dfcheck = atoi(argv[5]);
140 sserv->type = strsave(argv[6]);
141 sserv->enable = atoi(argv[7]);
142 sserv->inprogress = atoi(argv[8]);
143 sserv->harderror = atoi(argv[9]);
144 sserv->errmsg = strsave(argv[10]);
149 /* Scan the services and process any that need it. */
154 struct save_queue *sq, *sq_create();
155 char *service, dfgen_prog[64], dfgen_cmd[128];
157 int status, lock_fd, ex, (*cstat)();
162 if (dbg & DBG_VERBOSE)
163 com_err(whoami, 0, "starting pass over services");
166 qargv[1] = "dontcare";
169 if (status = mr_query_with_retry("qualified_get_server", 3, qargv,
171 com_err(whoami, status, " getting services");
172 leave("query failed");
174 while (sq_get_data(sq, &service)) {
175 for (p = service; *p; p++)
178 com_err(whoami, 0, "checking %s...", service);
180 sprintf(dfgen_prog, "%s/%s.gen", BIN_DIR, service);
181 if (!file_exists(dfgen_prog)) {
182 com_err(whoami, 0, "prog %s doesn't exist\n", dfgen_prog);
186 sprintf(dfgen_cmd, "exec %s %s/%s.out",
187 dfgen_prog, DCM_DIR, service);
188 gettimeofday(&tv, &tz);
189 if (status = mr_query_with_retry("get_server_info", 1, qargv,
191 com_err(whoami, status, " getting service %s info, skipping to next service", service);
194 svc.service = strsave(service);
195 qargv[0] = strsave(service);
196 qargv[1] = itoa(svc.dfgen);
197 qargv[2] = itoa(svc.dfcheck);
198 qargv[3] = strsave("0");
199 qargv[4] = itoa(svc.harderror);
200 qargv[5] = strsave(svc.errmsg);
201 if (svc.interval != 0) {
202 if (svc.interval * 60 + svc.dfcheck < tv.tv_sec) {
203 lock_fd = maybe_lock_update("@db@", service, 1);
209 qargv[3] = strsave("1");
210 qargv[4] = strsave("0");
211 qargv[5] = strsave("");
212 status = mr_query_with_retry("set_server_internal_flags", 6,
213 qargv, scream, NULL);
214 if (status != MR_SUCCESS) {
215 com_err(whoami, status, " setting server state");
219 com_err(whoami, status, " running %s", dfgen_prog);
220 cstat = signal(SIGCHLD, SIG_DFL);
221 waits.w_status = system(dfgen_cmd);
222 signal(SIGCHLD, cstat);
223 if (waits.w_termsig) {
224 status = MR_COREDUMP;
225 com_err(whoami, status, " %s exited on signal %d",
226 dfgen_prog, waits.w_termsig);
227 } else if (waits.w_retcode) {
228 /* extract the process's exit value */
229 status = waits.w_retcode + ERROR_TABLE_BASE_sms;
230 com_err(whoami, status, " %s exited", dfgen_prog);
232 if (SOFT_FAIL(status)) {
234 qargv[5] = strsave(error_message(status));
235 } else if (status == MR_NO_CHANGE) {
237 qargv[2] = itoa(tv.tv_sec);
238 svc.dfcheck = tv.tv_sec;
239 } else if (status == MR_SUCCESS) {
242 qargv[1] = itoa(tv.tv_sec);
243 qargv[2] = strsave(qargv[1]);
244 svc.dfcheck = svc.dfgen = tv.tv_sec;
245 } else { /* HARD_FAIL(status) */
249 qargv[2] = itoa(tv.tv_sec);
250 svc.dfcheck = tv.tv_sec;
251 qargv[4] = itoa(status);
252 qargv[5] = strsave(error_message(status));
253 critical_alert("DCM","DCM building config files for %s: %s",
258 qargv[3] = strsave("0");
259 status = mr_query_with_retry("set_server_internal_flags", 6,
260 qargv, scream, NULL);
262 com_err(whoami, status, " setting service state");
271 if (!strcmp(svc.type, "REPLICAT"))
275 lock_fd = maybe_lock_update("@db@", service, ex);
292 /* Used by qualified_get_server_host to make a list of hosts to check */
294 qgethost(argc, argv, sq)
297 struct save_queue *sq;
299 sq_save_data(sq, strsave(argv[1]));
304 /* Used by get_server_host_info to store all of the info about a host */
306 gethostinfo(argc, argv, shost)
309 struct svrhost *shost;
311 shost->service = strsave(argv[0]);
312 shost->machine = strsave(argv[1]);
313 shost->enable = atoi(argv[2]);
314 shost->override = atoi(argv[3]);
315 shost->success = atoi(argv[4]);
316 shost->inprogress = atoi(argv[5]);
317 shost->hosterror = atoi(argv[6]);
318 shost->errmsg = strsave(argv[7]);
319 shost->lasttry = atoi(argv[8]);
320 shost->lastsuccess = atoi(argv[9]);
321 shost->value1 = atoi(argv[10]);
322 shost->value2 = atoi(argv[11]);
323 shost->value3 = strsave(argv[12]);
328 /* Scans all of the hosts for a particular service, and processes them. */
333 char *argv[9], *machine;
335 struct save_queue *sq;
336 struct svrhost shost;
339 argv[0] = svc->service;
341 argv[2] = argv[3] = argv[4] = "DONTCARE";
343 status = mr_query_with_retry("qualified_get_server_host", 6, argv,
345 if (status == MR_NO_MATCH) {
348 com_err(whoami, status, " getting server_hosts for %s", svc->service);
351 while (sq_get_data(sq, &machine)) {
353 com_err(whoami, 0, "checking %s...", machine);
355 status = mr_query_with_retry("get_server_host_info", 2, argv,
356 gethostinfo, &shost);
358 com_err(whoami,status, " getting server_host_info for %s", machine);
361 if (!shost.enable || shost.hosterror ||
362 (shost.success && !shost.override &&
363 shost.lastsuccess >= svc->dfgen)) {
365 com_err(whoami, 0, "not updating %s:%s", svc->service, machine);
369 lock_fd = maybe_lock_update(machine, svc->service, 1);
372 argv[0] = svc->service;
374 argv[2] = argv[3] = argv[5] = "0";
376 argv[6] = strsave("");
377 argv[7] = itoa(tv.tv_sec);
378 argv[8] = itoa(shost.lastsuccess);
379 status = mr_query_with_retry("set_server_host_internal", 9, argv,
381 if (status != MR_SUCCESS) {
382 com_err(whoami,status," while setting internal state for %s:%s",
383 svc->service, machine);
386 status = mr_update_server(svc->service, machine, svc->target,
388 if (status == MR_SUCCESS) {
389 argv[2] = argv[4] = "0";
392 argv[8] = itoa(tv.tv_sec);
393 } else if (SOFT_FAIL(status)) {
396 argv[6] = strsave(error_message(status));
397 } else { /* HARD_FAIL */
398 argv[2] = itoa(shost.override);
400 argv[5] = itoa(status);
402 argv[6] = strsave(error_message(status));
403 critical_alert("DCM", "DCM updating %s:%s: %s",
404 machine, svc->service, argv[6]);
405 if (!strcmp(svc->type, "REPLICAT")) {
408 svc->harderror = status;
409 svc->errmsg = strsave(argv[6]);
410 qargv[0] = strsave(svc->service);
411 qargv[1] = itoa(svc->dfgen);
412 qargv[2] = itoa(svc->dfcheck);
413 qargv[3] = strsave("0");
414 qargv[4] = itoa(svc->harderror);
415 qargv[5] = strsave(svc->errmsg);
416 status = mr_query_with_retry("set_server_internal_flags",
417 6, qargv, scream, NULL);
419 com_err(whoami, status, " setting service state again");
427 status = mr_query_with_retry("set_server_host_internal",
428 9, argv,scream,NULL);
432 com_err(whoami, status, " setting host state again");
437 status = mr_query_with_retry("set_server_host_internal", 9, argv,
440 com_err(whoami, status, " setting host state again");