]> andersk Git - moira.git/blame - dcm/dcm.c
wait a minute after deadlock before trying again to update state in DB
[moira.git] / dcm / dcm.c
CommitLineData
846841f4 1/*
2 * The Data Control Manager for SMS.
3 *
4 * Copyright 1987, 1988 by the Massachusetts Institute of Technology.
5 * For copying and distribution information, see the file
6 * "mit-copyright.h".
7 *
8 * $Source$
9 * $Author$
10 * $Header$
11 */
12
13#ifndef lint
14static char rcsid_dcm_c[] = "$Header$";
15#endif lint
16
17#include <stdio.h>
18#include <update.h>
19#include <sys/file.h>
20#include <sys/time.h>
21#include <sys/wait.h>
22#include <ctype.h>
23#include <sms.h>
24#include <sms_app.h>
25#include "dcm.h"
26#include "mit-copyright.h"
27
28extern char *ctime();
29extern char *getenv();
30extern int log_flags;
31extern char *error_message();
32char *itoa();
33int gqval();
34long time();
35
36
1332916a 37#define DEADLOCK_WAIT (3 * 60) /* number of seconds to wait after
38 a deadlock before trying again. */
39
846841f4 40/* declared global so that we can get the current time from different places. */
41struct timeval tv;
42
43
44main(argc, argv)
45int argc;
46char *argv[];
47{
48 int i;
49 char **arg = argv;
50 char *qargv[3];
51 int status;
52
53 whoami = argv[0];
54 dbg = atoi(getenv("DEBUG"));
55 umask(UMASK);
56 log_flags = 0;
57 setlinebuf(stderr);
58 setlinebuf(stdout);
59
60 while(++arg - argv < argc) {
61 if (**arg == '-')
62 switch((*arg)[1]) {
63 case 'd':
64 dbg = atoi((*arg)[2]? *arg+2: *++arg);
65 break;
66 }
67 }
68 set_com_err_hook(dcm_com_err_hook);
69
70 /* if /etc/nodcm exists, punt quietly. */
71 if (!access("/etc/nodcm", F_OK)) {
72 exit(1);
73 }
74
9a2d61b0 75 if (status = sms_connect("")) {
846841f4 76 com_err(whoami, status, " on sms_connect");
77 leave("connect failed");
78 }
79
80 if (status = sms_auth("dcm")) {
81 com_err(whoami, status, " on \"authenticate\"");
82 leave("auth failed");
83 }
84
85 /* if DCM is not enabled, exit after logging */
86 qargv[0] = "dcm_enable";
87 if (status = sms_query("get_value", 1, qargv, gqval, &i)) {
88 com_err(whoami, status, " check dcm_enable");
89 leave("query failed");
90 }
91 if (i == 0) {
92 errno = 0;
93 leave("dcm_enable not set");
94 }
95
96 /* do it! */
846841f4 97 do_services();
98 errno = 0;
99 leave("");
100}
101
102
103/* Used by the get_value query when checking for dcm_enable. */
104
105gqval(argc, argv, hint)
106int argc;
107char **argv;
108int *hint;
109{
110 *hint = atoi(argv[0]);
111 return(UPCALL_STOP);
112}
113
114
115/* Used by qualified_get_server to make a list of servers to check */
116
117qgetsv(argc, argv, sq)
118int argc;
119char **argv;
120struct save_queue *sq;
121{
122 sq_save_data(sq, strsave(argv[0]));
123 return(UPCALL_CONT);
124}
125
126
127/* Used by get_server_info to record all of the returned information */
128
129getsvinfo(argc, argv, sserv)
130int argc;
131char **argv;
132struct service *sserv;
133{
134 sserv->service = strsave(argv[0]);
135 sserv->interval = atoi(argv[1]);
136 sserv->target = strsave(argv[2]);
137 sserv->script = strsave(argv[3]);
138 sserv->dfgen = atoi(argv[4]);
139 sserv->dfcheck = atoi(argv[5]);
140 sserv->type = strsave(argv[6]);
141 sserv->enable = atoi(argv[7]);
142 sserv->inprogress = atoi(argv[8]);
143 sserv->harderror = atoi(argv[9]);
144 sserv->errmsg = strsave(argv[10]);
145 return(UPCALL_STOP);
146}
147
148
149/* Scan the services and process any that need it. */
150
151do_services()
152{
153 char *qargv[6];
154 struct save_queue *sq, *sq_create();
155 char *service, dfgen_prog[64], dfgen_cmd[128];
156 struct service svc;
157 int status, lock_fd, ex;
158 struct timezone tz;
159 register char *p;
160 union wait waits;
161
162 if (dbg & DBG_VERBOSE)
163 com_err(whoami, 0, "starting pass over services");
164
165 qargv[0] = "true";
166 qargv[1] = "dontcare";
167 qargv[2] = "false";
168 sq = sq_create();
169 if (status = sms_query("qualified_get_server", 3, qargv, qgetsv, sq)) {
170 com_err(whoami, status, " getting services");
171 leave("query failed");
172 }
173 while (sq_get_data(sq, &service)) {
174 for (p = service; *p; p++)
175 if (isupper(*p))
176 *p = tolower(*p);
177 com_err(whoami, 0, "checking %s...", service);
178 qargv[0] = service;
179 sprintf(dfgen_prog, "%s/bin/%s.gen", SMS_DIR, service);
180 if (!file_exists(dfgen_prog)) {
181 com_err(whoami, 0, "prog %s doesn't exist\n", dfgen_prog);
182 free(service);
183 continue;
184 }
185 sprintf(dfgen_cmd, "exec %s %s/dcm/%s.out",
186 dfgen_prog, SMS_DIR, service);
187 gettimeofday(&tv, &tz);
188 if (status = sms_query("get_server_info", 1, qargv, getsvinfo, &svc)) {
4e5690ff 189 com_err(whoami, status, " getting service %s info, skipping to next service", service);
190 continue;
846841f4 191 }
192 svc.service = strsave(service);
193 qargv[0] = strsave(service);
194 qargv[1] = itoa(svc.dfgen);
195 qargv[2] = itoa(svc.dfcheck);
196 qargv[3] = strsave("0");
197 qargv[4] = itoa(svc.harderror);
198 qargv[5] = strsave(svc.errmsg);
199 if (svc.interval != 0) {
200 if (svc.interval * 60 + svc.dfcheck < tv.tv_sec) {
201 lock_fd = maybe_lock_update(SMS_DIR, "@db@", service, 1);
202 if (lock_fd < 0)
203 goto free_service;
204 free(qargv[3]);
205 free(qargv[4]);
206 free(qargv[5]);
207 qargv[3] = strsave("1");
208 qargv[4] = strsave("0");
209 qargv[5] = strsave("");
210 status = sms_query("set_server_internal_flags", 6, qargv,
211 scream, NULL);
212 if (status != SMS_SUCCESS) {
213 com_err(whoami, status, " setting server state");
214 goto free_service;
215 }
216
217 com_err(whoami, status, " running %s", dfgen_prog);
218 waits.w_status = system(dfgen_cmd);
c9c95b8a 219 if (waits.w_termsig) {
220 status = SMS_TAR_FAIL;
221 com_err(whoami, status, " %s exited on signal %d",
222 dfgen_prog, waits.w_termsig);
223 } else if (waits.w_retcode) {
224 /* extract the process's exit value */
225 status = waits.w_retcode + sms_err_base;
846841f4 226 com_err(whoami, status, " %s exited", dfgen_prog);
227 }
228 if (SOFT_FAIL(status)) {
229 free(qargv[5]);
230 qargv[5] = strsave(error_message(status));
231 } else if (status == SMS_NO_CHANGE) {
232 free(qargv[2]);
233 qargv[2] = itoa(tv.tv_sec);
234 svc.dfcheck = tv.tv_sec;
235 } else if (status == SMS_SUCCESS) {
236 free(qargv[1]);
237 free(qargv[2]);
238 qargv[1] = itoa(tv.tv_sec);
239 qargv[2] = strsave(qargv[1]);
240 svc.dfcheck = svc.dfgen = tv.tv_sec;
241 } else { /* HARD_FAIL(status) */
242 free(qargv[2]);
243 free(qargv[4]);
244 free(qargv[5]);
245 qargv[2] = itoa(tv.tv_sec);
246 svc.dfcheck = tv.tv_sec;
247 qargv[4] = itoa(status);
248 qargv[5] = strsave(error_message(status));
249 critical_alert("DCM","DCM building config files for %s: %s",
250 service, qargv[5]);
251 }
252 free_service:
253 free(qargv[3]);
254 qargv[3] = strsave("0");
255 status = sms_query("set_server_internal_flags", 6, qargv,
256 scream, NULL);
4e5690ff 257 if (status) {
258 com_err(whoami, status,
1332916a 259 " setting service state, sleeping");
260 sleep(DEADLOCK_WAIT);
4e5690ff 261 status = sms_query("set_server_internal_flags", 6, qargv,
262 scream, NULL);
263 if (status)
264 com_err(whoami, status, " setting service state again");
265 }
846841f4 266 close(lock_fd);
267 free(qargv[0]);
268 free(qargv[1]);
269 free(qargv[2]);
270 free(qargv[3]);
271 free(qargv[4]);
272 free(qargv[5]);
273 }
274 if (!strcmp(svc.type, "REPLICAT"))
275 ex = 1;
276 else
277 ex = 0;
278 lock_fd = maybe_lock_update(SMS_DIR, "@db@", service, ex);
279 if (lock_fd >= 0) {
280 do_hosts(&svc);
281 close(lock_fd);
282 }
283 }
284 free(svc.service);
285 free(svc.target);
286 free(svc.script);
287 free(svc.type);
288 free(svc.errmsg);
289 free(service);
290 }
291 sq_destroy(sq);
292}
293
294
295/* Used by qualified_get_server_host to make a list of hosts to check */
296
297qgethost(argc, argv, sq)
298int argc;
299char **argv;
300struct save_queue *sq;
301{
302 sq_save_data(sq, strsave(argv[1]));
303 return(UPCALL_CONT);
304}
305
306
307/* Used by get_server_host_info to store all of the info about a host */
308
309gethostinfo(argc, argv, shost)
310int argc;
311char **argv;
312struct svrhost *shost;
313{
314 shost->service = strsave(argv[0]);
315 shost->machine = strsave(argv[1]);
316 shost->enable = atoi(argv[2]);
317 shost->override = atoi(argv[3]);
318 shost->success = atoi(argv[4]);
319 shost->inprogress = atoi(argv[5]);
320 shost->hosterror = atoi(argv[6]);
321 shost->errmsg = strsave(argv[7]);
322 shost->lasttry = atoi(argv[8]);
323 shost->lastsuccess = atoi(argv[9]);
324 shost->value1 = atoi(argv[10]);
325 shost->value2 = atoi(argv[11]);
326 shost->value3 = strsave(argv[12]);
327 return(UPCALL_STOP);
328}
329
330
331/* Scans all of the hosts for a particular service, and processes them. */
332
333do_hosts(svc)
334struct service *svc;
335{
336 char *argv[9], *machine;
337 int status, lock_fd;
338 struct save_queue *sq;
339 struct svrhost shost;
340
341 sq = sq_create();
342 argv[0] = svc->service;
343 argv[1] = "TRUE";
344 argv[2] = argv[3] = argv[4] = "DONTCARE";
345 argv[5] = "FALSE";
346 status = sms_query("qualified_get_server_host", 6, argv, qgethost, sq);
347 if (status == SMS_NO_MATCH) {
348 return;
349 } else if (status) {
350 com_err(whoami, status, " getting server_hosts for %s", svc->service);
351 return;
352 }
353 while (sq_get_data(sq, &machine)) {
354 if (dbg & DBG_TRACE)
355 com_err(whoami, 0, "checking %s...", machine);
356 argv[1] = machine;
357 status = sms_query("get_server_host_info", 2, argv,gethostinfo, &shost);
358 if (status) {
359 com_err(whoami,status, " getting server_host_info for %s", machine);
360 goto free_mach;
361 }
362 if (!shost.enable || shost.hosterror ||
4112693b 363 (shost.success && !shost.override &&
6899dfbe 364 shost.lastsuccess >= svc->dfgen)) {
846841f4 365 if (dbg & DBG_TRACE)
366 com_err(whoami, 0, "not updating %s:%s", svc->service, machine);
367 goto free_mach;
368 }
062079b1 369
370 lock_fd = maybe_lock_update(SMS_DIR, machine, svc->service, 1);
371 if (lock_fd < 0)
372 goto free_mach;
373 argv[0] = svc->service;
374 argv[1] = machine;
375 argv[2] = argv[3] = argv[5] = "0";
376 argv[4] = "1";
377 argv[6] = strsave("");
378 argv[7] = itoa(tv.tv_sec);
379 argv[8] = itoa(shost.lastsuccess);
380 status = sms_query("set_server_host_internal", 9, argv,scream,NULL);
381 if (status != SMS_SUCCESS) {
382 com_err(whoami,status," while setting internal state for %s:%s",
383 svc->service, machine);
384 goto free_mach;
385 }
386 status = sms_update_server(svc->service, machine, svc->target,
387 svc->script);
388 if (status == SMS_SUCCESS) {
389 argv[2] = "0";
390 argv[3] = "1";
391 free(argv[8]);
392 argv[8] = itoa(tv.tv_sec);
393 } else if (SOFT_FAIL(status)) {
394 free(argv[6]);
395 argv[6] = strsave(error_message(status));
396 } else { /* HARD_FAIL */
397 argv[2] = itoa(shost.override);
398 argv[5] = itoa(status);
399 free(argv[6]);
400 argv[6] = strsave(error_message(status));
401 critical_alert("DCM", "DCM updating %s:%s: %s",
402 machine, svc->service, argv[6]);
403 if (!strcmp(svc->type, "REPLICAT")) {
404 char *qargv[6];
405
406 svc->harderror = status;
407 svc->errmsg = strsave(argv[6]);
408 qargv[0] = strsave(svc->service);
409 qargv[1] = itoa(svc->dfgen);
410 qargv[2] = itoa(svc->dfcheck);
411 qargv[3] = strsave("0");
412 qargv[4] = itoa(svc->harderror);
413 qargv[5] = strsave(svc->errmsg);
414 status = sms_query("set_server_internal_flags",
415 6, qargv, scream, NULL);
4e5690ff 416 if (status) {
417 com_err(whoami, status,
1332916a 418 " setting service state, sleeping");
419 sleep(DEADLOCK_WAIT);
4e5690ff 420 status = sms_query("set_server_internal_flags",
421 6, qargv, scream, NULL);
422 if (status)
423 com_err(whoami, status, " setting service state again");
424 }
062079b1 425 free(qargv[0]);
426 free(qargv[1]);
427 free(qargv[2]);
428 free(qargv[3]);
429 free(qargv[4]);
430 free(qargv[5]);
431 close(lock_fd);
846841f4 432 free(argv[2]);
062079b1 433 argv[4] = "0";
846841f4 434 free(argv[5]);
062079b1 435 status = sms_query("set_server_host_internal",
436 9, argv,scream,NULL);
4e5690ff 437 if (status) {
438 com_err(whoami, status,
1332916a 439 " setting host state, sleeping");
440 sleep(DEADLOCK_WAIT);
4e5690ff 441 status = sms_query("set_server_host_internal",
442 9, argv,scream,NULL);
443 if (status)
444 com_err(whoami, status, " setting host state again");
445 }
062079b1 446 return(-1);
846841f4 447 }
062079b1 448 free(argv[2]);
449 free(argv[5]);
846841f4 450 }
062079b1 451 argv[4] = "0";
452 close(lock_fd);
453 status = sms_query("set_server_host_internal", 9, argv,scream,NULL);
4e5690ff 454 if (status) {
1332916a 455 com_err(whoami, status, " setting host state, sleeping");
456 sleep(DEADLOCK_WAIT);
4e5690ff 457 status = sms_query("set_server_host_internal", 9, argv,scream,NULL);
458 if (status)
459 com_err(whoami, status, " setting host state again");
460 }
846841f4 461 free_mach:
462 free(machine);
463 close(lock_fd);
464 }
465 return(0);
466}
This page took 0.164264 seconds and 5 git commands to generate.