]> andersk Git - moira.git/blame - dcm/dcm.c
handle errors in REPLICAT services correctly; remove non-functional if
[moira.git] / dcm / dcm.c
CommitLineData
846841f4 1/*
2 * The Data Control Manager for SMS.
3 *
4 * Copyright 1987, 1988 by the Massachusetts Institute of Technology.
5 * For copying and distribution information, see the file
6 * "mit-copyright.h".
7 *
8 * $Source$
9 * $Author$
10 * $Header$
11 */
12
13#ifndef lint
14static char rcsid_dcm_c[] = "$Header$";
15#endif lint
16
17#include <stdio.h>
18#include <update.h>
19#include <sys/file.h>
20#include <sys/time.h>
21#include <sys/wait.h>
22#include <ctype.h>
23#include <sms.h>
24#include <sms_app.h>
25#include "dcm.h"
26#include "mit-copyright.h"
27
28extern char *ctime();
29extern char *getenv();
30extern int log_flags;
31extern char *error_message();
32char *itoa();
33int gqval();
34long time();
35
36
37/* declared global so that we can get the current time from different places. */
38struct timeval tv;
39
40
41main(argc, argv)
42int argc;
43char *argv[];
44{
45 int i;
46 char **arg = argv;
47 char *qargv[3];
48 int status;
49
50 whoami = argv[0];
51 dbg = atoi(getenv("DEBUG"));
52 umask(UMASK);
53 log_flags = 0;
54 setlinebuf(stderr);
55 setlinebuf(stdout);
56
57 while(++arg - argv < argc) {
58 if (**arg == '-')
59 switch((*arg)[1]) {
60 case 'd':
61 dbg = atoi((*arg)[2]? *arg+2: *++arg);
62 break;
63 }
64 }
65 set_com_err_hook(dcm_com_err_hook);
66
67 /* if /etc/nodcm exists, punt quietly. */
68 if (!access("/etc/nodcm", F_OK)) {
69 exit(1);
70 }
71
72 if (status = sms_connect()) {
73 com_err(whoami, status, " on sms_connect");
74 leave("connect failed");
75 }
76
77 if (status = sms_auth("dcm")) {
78 com_err(whoami, status, " on \"authenticate\"");
79 leave("auth failed");
80 }
81
82 /* if DCM is not enabled, exit after logging */
83 qargv[0] = "dcm_enable";
84 if (status = sms_query("get_value", 1, qargv, gqval, &i)) {
85 com_err(whoami, status, " check dcm_enable");
86 leave("query failed");
87 }
88 if (i == 0) {
89 errno = 0;
90 leave("dcm_enable not set");
91 }
92
93 /* do it! */
846841f4 94 do_services();
95 errno = 0;
96 leave("");
97}
98
99
100/* Used by the get_value query when checking for dcm_enable. */
101
102gqval(argc, argv, hint)
103int argc;
104char **argv;
105int *hint;
106{
107 *hint = atoi(argv[0]);
108 return(UPCALL_STOP);
109}
110
111
112/* Used by qualified_get_server to make a list of servers to check */
113
114qgetsv(argc, argv, sq)
115int argc;
116char **argv;
117struct save_queue *sq;
118{
119 sq_save_data(sq, strsave(argv[0]));
120 return(UPCALL_CONT);
121}
122
123
124/* Used by get_server_info to record all of the returned information */
125
126getsvinfo(argc, argv, sserv)
127int argc;
128char **argv;
129struct service *sserv;
130{
131 sserv->service = strsave(argv[0]);
132 sserv->interval = atoi(argv[1]);
133 sserv->target = strsave(argv[2]);
134 sserv->script = strsave(argv[3]);
135 sserv->dfgen = atoi(argv[4]);
136 sserv->dfcheck = atoi(argv[5]);
137 sserv->type = strsave(argv[6]);
138 sserv->enable = atoi(argv[7]);
139 sserv->inprogress = atoi(argv[8]);
140 sserv->harderror = atoi(argv[9]);
141 sserv->errmsg = strsave(argv[10]);
142 return(UPCALL_STOP);
143}
144
145
146/* Scan the services and process any that need it. */
147
148do_services()
149{
150 char *qargv[6];
151 struct save_queue *sq, *sq_create();
152 char *service, dfgen_prog[64], dfgen_cmd[128];
153 struct service svc;
154 int status, lock_fd, ex;
155 struct timezone tz;
156 register char *p;
157 union wait waits;
158
159 if (dbg & DBG_VERBOSE)
160 com_err(whoami, 0, "starting pass over services");
161
162 qargv[0] = "true";
163 qargv[1] = "dontcare";
164 qargv[2] = "false";
165 sq = sq_create();
166 if (status = sms_query("qualified_get_server", 3, qargv, qgetsv, sq)) {
167 com_err(whoami, status, " getting services");
168 leave("query failed");
169 }
170 while (sq_get_data(sq, &service)) {
171 for (p = service; *p; p++)
172 if (isupper(*p))
173 *p = tolower(*p);
174 com_err(whoami, 0, "checking %s...", service);
175 qargv[0] = service;
176 sprintf(dfgen_prog, "%s/bin/%s.gen", SMS_DIR, service);
177 if (!file_exists(dfgen_prog)) {
178 com_err(whoami, 0, "prog %s doesn't exist\n", dfgen_prog);
179 free(service);
180 continue;
181 }
182 sprintf(dfgen_cmd, "exec %s %s/dcm/%s.out",
183 dfgen_prog, SMS_DIR, service);
184 gettimeofday(&tv, &tz);
185 if (status = sms_query("get_server_info", 1, qargv, getsvinfo, &svc)) {
186 com_err(whoami, status, " getting service %s info", service);
187 }
188 svc.service = strsave(service);
189 qargv[0] = strsave(service);
190 qargv[1] = itoa(svc.dfgen);
191 qargv[2] = itoa(svc.dfcheck);
192 qargv[3] = strsave("0");
193 qargv[4] = itoa(svc.harderror);
194 qargv[5] = strsave(svc.errmsg);
195 if (svc.interval != 0) {
196 if (svc.interval * 60 + svc.dfcheck < tv.tv_sec) {
197 lock_fd = maybe_lock_update(SMS_DIR, "@db@", service, 1);
198 if (lock_fd < 0)
199 goto free_service;
200 free(qargv[3]);
201 free(qargv[4]);
202 free(qargv[5]);
203 qargv[3] = strsave("1");
204 qargv[4] = strsave("0");
205 qargv[5] = strsave("");
206 status = sms_query("set_server_internal_flags", 6, qargv,
207 scream, NULL);
208 if (status != SMS_SUCCESS) {
209 com_err(whoami, status, " setting server state");
210 goto free_service;
211 }
212
213 com_err(whoami, status, " running %s", dfgen_prog);
214 waits.w_status = system(dfgen_cmd);
c9c95b8a 215 if (waits.w_termsig) {
216 status = SMS_TAR_FAIL;
217 com_err(whoami, status, " %s exited on signal %d",
218 dfgen_prog, waits.w_termsig);
219 } else if (waits.w_retcode) {
220 /* extract the process's exit value */
221 status = waits.w_retcode + sms_err_base;
846841f4 222 com_err(whoami, status, " %s exited", dfgen_prog);
223 }
224 if (SOFT_FAIL(status)) {
225 free(qargv[5]);
226 qargv[5] = strsave(error_message(status));
227 } else if (status == SMS_NO_CHANGE) {
228 free(qargv[2]);
229 qargv[2] = itoa(tv.tv_sec);
230 svc.dfcheck = tv.tv_sec;
231 } else if (status == SMS_SUCCESS) {
232 free(qargv[1]);
233 free(qargv[2]);
234 qargv[1] = itoa(tv.tv_sec);
235 qargv[2] = strsave(qargv[1]);
236 svc.dfcheck = svc.dfgen = tv.tv_sec;
237 } else { /* HARD_FAIL(status) */
238 free(qargv[2]);
239 free(qargv[4]);
240 free(qargv[5]);
241 qargv[2] = itoa(tv.tv_sec);
242 svc.dfcheck = tv.tv_sec;
243 qargv[4] = itoa(status);
244 qargv[5] = strsave(error_message(status));
245 critical_alert("DCM","DCM building config files for %s: %s",
246 service, qargv[5]);
247 }
248 free_service:
249 free(qargv[3]);
250 qargv[3] = strsave("0");
251 status = sms_query("set_server_internal_flags", 6, qargv,
252 scream, NULL);
253 close(lock_fd);
254 free(qargv[0]);
255 free(qargv[1]);
256 free(qargv[2]);
257 free(qargv[3]);
258 free(qargv[4]);
259 free(qargv[5]);
260 }
261 if (!strcmp(svc.type, "REPLICAT"))
262 ex = 1;
263 else
264 ex = 0;
265 lock_fd = maybe_lock_update(SMS_DIR, "@db@", service, ex);
266 if (lock_fd >= 0) {
267 do_hosts(&svc);
268 close(lock_fd);
269 }
270 }
271 free(svc.service);
272 free(svc.target);
273 free(svc.script);
274 free(svc.type);
275 free(svc.errmsg);
276 free(service);
277 }
278 sq_destroy(sq);
279}
280
281
282/* Used by qualified_get_server_host to make a list of hosts to check */
283
284qgethost(argc, argv, sq)
285int argc;
286char **argv;
287struct save_queue *sq;
288{
289 sq_save_data(sq, strsave(argv[1]));
290 return(UPCALL_CONT);
291}
292
293
294/* Used by get_server_host_info to store all of the info about a host */
295
296gethostinfo(argc, argv, shost)
297int argc;
298char **argv;
299struct svrhost *shost;
300{
301 shost->service = strsave(argv[0]);
302 shost->machine = strsave(argv[1]);
303 shost->enable = atoi(argv[2]);
304 shost->override = atoi(argv[3]);
305 shost->success = atoi(argv[4]);
306 shost->inprogress = atoi(argv[5]);
307 shost->hosterror = atoi(argv[6]);
308 shost->errmsg = strsave(argv[7]);
309 shost->lasttry = atoi(argv[8]);
310 shost->lastsuccess = atoi(argv[9]);
311 shost->value1 = atoi(argv[10]);
312 shost->value2 = atoi(argv[11]);
313 shost->value3 = strsave(argv[12]);
314 return(UPCALL_STOP);
315}
316
317
318/* Scans all of the hosts for a particular service, and processes them. */
319
320do_hosts(svc)
321struct service *svc;
322{
323 char *argv[9], *machine;
324 int status, lock_fd;
325 struct save_queue *sq;
326 struct svrhost shost;
327
328 sq = sq_create();
329 argv[0] = svc->service;
330 argv[1] = "TRUE";
331 argv[2] = argv[3] = argv[4] = "DONTCARE";
332 argv[5] = "FALSE";
333 status = sms_query("qualified_get_server_host", 6, argv, qgethost, sq);
334 if (status == SMS_NO_MATCH) {
335 return;
336 } else if (status) {
337 com_err(whoami, status, " getting server_hosts for %s", svc->service);
338 return;
339 }
340 while (sq_get_data(sq, &machine)) {
341 if (dbg & DBG_TRACE)
342 com_err(whoami, 0, "checking %s...", machine);
343 argv[1] = machine;
344 status = sms_query("get_server_host_info", 2, argv,gethostinfo, &shost);
345 if (status) {
346 com_err(whoami,status, " getting server_host_info for %s", machine);
347 goto free_mach;
348 }
349 if (!shost.enable || shost.hosterror ||
4112693b 350 (shost.success && !shost.override &&
6899dfbe 351 shost.lastsuccess >= svc->dfgen)) {
846841f4 352 if (dbg & DBG_TRACE)
353 com_err(whoami, 0, "not updating %s:%s", svc->service, machine);
354 goto free_mach;
355 }
062079b1 356
357 lock_fd = maybe_lock_update(SMS_DIR, machine, svc->service, 1);
358 if (lock_fd < 0)
359 goto free_mach;
360 argv[0] = svc->service;
361 argv[1] = machine;
362 argv[2] = argv[3] = argv[5] = "0";
363 argv[4] = "1";
364 argv[6] = strsave("");
365 argv[7] = itoa(tv.tv_sec);
366 argv[8] = itoa(shost.lastsuccess);
367 status = sms_query("set_server_host_internal", 9, argv,scream,NULL);
368 if (status != SMS_SUCCESS) {
369 com_err(whoami,status," while setting internal state for %s:%s",
370 svc->service, machine);
371 goto free_mach;
372 }
373 status = sms_update_server(svc->service, machine, svc->target,
374 svc->script);
375 if (status == SMS_SUCCESS) {
376 argv[2] = "0";
377 argv[3] = "1";
378 free(argv[8]);
379 argv[8] = itoa(tv.tv_sec);
380 } else if (SOFT_FAIL(status)) {
381 free(argv[6]);
382 argv[6] = strsave(error_message(status));
383 } else { /* HARD_FAIL */
384 argv[2] = itoa(shost.override);
385 argv[5] = itoa(status);
386 free(argv[6]);
387 argv[6] = strsave(error_message(status));
388 critical_alert("DCM", "DCM updating %s:%s: %s",
389 machine, svc->service, argv[6]);
390 if (!strcmp(svc->type, "REPLICAT")) {
391 char *qargv[6];
392
393 svc->harderror = status;
394 svc->errmsg = strsave(argv[6]);
395 qargv[0] = strsave(svc->service);
396 qargv[1] = itoa(svc->dfgen);
397 qargv[2] = itoa(svc->dfcheck);
398 qargv[3] = strsave("0");
399 qargv[4] = itoa(svc->harderror);
400 qargv[5] = strsave(svc->errmsg);
401 status = sms_query("set_server_internal_flags",
402 6, qargv, scream, NULL);
403 free(qargv[0]);
404 free(qargv[1]);
405 free(qargv[2]);
406 free(qargv[3]);
407 free(qargv[4]);
408 free(qargv[5]);
409 close(lock_fd);
846841f4 410 free(argv[2]);
062079b1 411 argv[4] = "0";
846841f4 412 free(argv[5]);
062079b1 413 status = sms_query("set_server_host_internal",
414 9, argv,scream,NULL);
415 return(-1);
846841f4 416 }
062079b1 417 free(argv[2]);
418 free(argv[5]);
846841f4 419 }
062079b1 420 argv[4] = "0";
421 close(lock_fd);
422 status = sms_query("set_server_host_internal", 9, argv,scream,NULL);
846841f4 423 free_mach:
424 free(machine);
425 close(lock_fd);
426 }
427 return(0);
428}
This page took 0.106832 seconds and 5 git commands to generate.