]>
Commit | Line | Data |
---|---|---|
846841f4 | 1 | /* |
2 | * The Data Control Manager for SMS. | |
3 | * | |
4 | * Copyright 1987, 1988 by the Massachusetts Institute of Technology. | |
5 | * For copying and distribution information, see the file | |
6 | * "mit-copyright.h". | |
7 | * | |
8 | * $Source$ | |
9 | * $Author$ | |
10 | * $Header$ | |
11 | */ | |
12 | ||
13 | #ifndef lint | |
14 | static char rcsid_dcm_c[] = "$Header$"; | |
15 | #endif lint | |
16 | ||
17 | #include <stdio.h> | |
18 | #include <update.h> | |
19 | #include <sys/file.h> | |
20 | #include <sys/time.h> | |
21 | #include <sys/wait.h> | |
22 | #include <ctype.h> | |
23 | #include <sms.h> | |
24 | #include <sms_app.h> | |
25 | #include "dcm.h" | |
26 | #include "mit-copyright.h" | |
27 | ||
28 | extern char *ctime(); | |
29 | extern char *getenv(); | |
30 | extern int log_flags; | |
31 | extern char *error_message(); | |
32 | char *itoa(); | |
33 | int gqval(); | |
34 | long time(); | |
35 | ||
36 | ||
1332916a | 37 | #define DEADLOCK_WAIT (3 * 60) /* number of seconds to wait after |
38 | a deadlock before trying again. */ | |
39 | ||
846841f4 | 40 | /* declared global so that we can get the current time from different places. */ |
41 | struct timeval tv; | |
42 | ||
43 | ||
44 | main(argc, argv) | |
45 | int argc; | |
46 | char *argv[]; | |
47 | { | |
48 | int i; | |
49 | char **arg = argv; | |
50 | char *qargv[3]; | |
51 | int status; | |
52 | ||
53 | whoami = argv[0]; | |
54 | dbg = atoi(getenv("DEBUG")); | |
55 | umask(UMASK); | |
56 | log_flags = 0; | |
57 | setlinebuf(stderr); | |
58 | setlinebuf(stdout); | |
59 | ||
60 | while(++arg - argv < argc) { | |
61 | if (**arg == '-') | |
62 | switch((*arg)[1]) { | |
63 | case 'd': | |
64 | dbg = atoi((*arg)[2]? *arg+2: *++arg); | |
65 | break; | |
66 | } | |
67 | } | |
68 | set_com_err_hook(dcm_com_err_hook); | |
69 | ||
70 | /* if /etc/nodcm exists, punt quietly. */ | |
71 | if (!access("/etc/nodcm", F_OK)) { | |
72 | exit(1); | |
73 | } | |
74 | ||
9a2d61b0 | 75 | if (status = sms_connect("")) { |
846841f4 | 76 | com_err(whoami, status, " on sms_connect"); |
77 | leave("connect failed"); | |
78 | } | |
79 | ||
80 | if (status = sms_auth("dcm")) { | |
81 | com_err(whoami, status, " on \"authenticate\""); | |
82 | leave("auth failed"); | |
83 | } | |
84 | ||
85 | /* if DCM is not enabled, exit after logging */ | |
86 | qargv[0] = "dcm_enable"; | |
87 | if (status = sms_query("get_value", 1, qargv, gqval, &i)) { | |
88 | com_err(whoami, status, " check dcm_enable"); | |
89 | leave("query failed"); | |
90 | } | |
91 | if (i == 0) { | |
92 | errno = 0; | |
93 | leave("dcm_enable not set"); | |
94 | } | |
95 | ||
96 | /* do it! */ | |
846841f4 | 97 | do_services(); |
98 | errno = 0; | |
99 | leave(""); | |
100 | } | |
101 | ||
102 | ||
103 | /* Used by the get_value query when checking for dcm_enable. */ | |
104 | ||
105 | gqval(argc, argv, hint) | |
106 | int argc; | |
107 | char **argv; | |
108 | int *hint; | |
109 | { | |
110 | *hint = atoi(argv[0]); | |
111 | return(UPCALL_STOP); | |
112 | } | |
113 | ||
114 | ||
115 | /* Used by qualified_get_server to make a list of servers to check */ | |
116 | ||
117 | qgetsv(argc, argv, sq) | |
118 | int argc; | |
119 | char **argv; | |
120 | struct save_queue *sq; | |
121 | { | |
122 | sq_save_data(sq, strsave(argv[0])); | |
123 | return(UPCALL_CONT); | |
124 | } | |
125 | ||
126 | ||
127 | /* Used by get_server_info to record all of the returned information */ | |
128 | ||
129 | getsvinfo(argc, argv, sserv) | |
130 | int argc; | |
131 | char **argv; | |
132 | struct service *sserv; | |
133 | { | |
134 | sserv->service = strsave(argv[0]); | |
135 | sserv->interval = atoi(argv[1]); | |
136 | sserv->target = strsave(argv[2]); | |
137 | sserv->script = strsave(argv[3]); | |
138 | sserv->dfgen = atoi(argv[4]); | |
139 | sserv->dfcheck = atoi(argv[5]); | |
140 | sserv->type = strsave(argv[6]); | |
141 | sserv->enable = atoi(argv[7]); | |
142 | sserv->inprogress = atoi(argv[8]); | |
143 | sserv->harderror = atoi(argv[9]); | |
144 | sserv->errmsg = strsave(argv[10]); | |
145 | return(UPCALL_STOP); | |
146 | } | |
147 | ||
148 | ||
149 | /* Scan the services and process any that need it. */ | |
150 | ||
151 | do_services() | |
152 | { | |
153 | char *qargv[6]; | |
154 | struct save_queue *sq, *sq_create(); | |
155 | char *service, dfgen_prog[64], dfgen_cmd[128]; | |
156 | struct service svc; | |
157 | int status, lock_fd, ex; | |
158 | struct timezone tz; | |
159 | register char *p; | |
160 | union wait waits; | |
161 | ||
162 | if (dbg & DBG_VERBOSE) | |
163 | com_err(whoami, 0, "starting pass over services"); | |
164 | ||
165 | qargv[0] = "true"; | |
166 | qargv[1] = "dontcare"; | |
167 | qargv[2] = "false"; | |
168 | sq = sq_create(); | |
169 | if (status = sms_query("qualified_get_server", 3, qargv, qgetsv, sq)) { | |
170 | com_err(whoami, status, " getting services"); | |
171 | leave("query failed"); | |
172 | } | |
173 | while (sq_get_data(sq, &service)) { | |
174 | for (p = service; *p; p++) | |
175 | if (isupper(*p)) | |
176 | *p = tolower(*p); | |
177 | com_err(whoami, 0, "checking %s...", service); | |
178 | qargv[0] = service; | |
179 | sprintf(dfgen_prog, "%s/bin/%s.gen", SMS_DIR, service); | |
180 | if (!file_exists(dfgen_prog)) { | |
181 | com_err(whoami, 0, "prog %s doesn't exist\n", dfgen_prog); | |
182 | free(service); | |
183 | continue; | |
184 | } | |
185 | sprintf(dfgen_cmd, "exec %s %s/dcm/%s.out", | |
186 | dfgen_prog, SMS_DIR, service); | |
187 | gettimeofday(&tv, &tz); | |
188 | if (status = sms_query("get_server_info", 1, qargv, getsvinfo, &svc)) { | |
4e5690ff | 189 | com_err(whoami, status, " getting service %s info, skipping to next service", service); |
190 | continue; | |
846841f4 | 191 | } |
192 | svc.service = strsave(service); | |
193 | qargv[0] = strsave(service); | |
194 | qargv[1] = itoa(svc.dfgen); | |
195 | qargv[2] = itoa(svc.dfcheck); | |
196 | qargv[3] = strsave("0"); | |
197 | qargv[4] = itoa(svc.harderror); | |
198 | qargv[5] = strsave(svc.errmsg); | |
199 | if (svc.interval != 0) { | |
200 | if (svc.interval * 60 + svc.dfcheck < tv.tv_sec) { | |
201 | lock_fd = maybe_lock_update(SMS_DIR, "@db@", service, 1); | |
202 | if (lock_fd < 0) | |
203 | goto free_service; | |
204 | free(qargv[3]); | |
205 | free(qargv[4]); | |
206 | free(qargv[5]); | |
207 | qargv[3] = strsave("1"); | |
208 | qargv[4] = strsave("0"); | |
209 | qargv[5] = strsave(""); | |
210 | status = sms_query("set_server_internal_flags", 6, qargv, | |
211 | scream, NULL); | |
212 | if (status != SMS_SUCCESS) { | |
213 | com_err(whoami, status, " setting server state"); | |
214 | goto free_service; | |
215 | } | |
216 | ||
217 | com_err(whoami, status, " running %s", dfgen_prog); | |
218 | waits.w_status = system(dfgen_cmd); | |
c9c95b8a | 219 | if (waits.w_termsig) { |
220 | status = SMS_TAR_FAIL; | |
221 | com_err(whoami, status, " %s exited on signal %d", | |
222 | dfgen_prog, waits.w_termsig); | |
223 | } else if (waits.w_retcode) { | |
224 | /* extract the process's exit value */ | |
225 | status = waits.w_retcode + sms_err_base; | |
846841f4 | 226 | com_err(whoami, status, " %s exited", dfgen_prog); |
227 | } | |
228 | if (SOFT_FAIL(status)) { | |
229 | free(qargv[5]); | |
230 | qargv[5] = strsave(error_message(status)); | |
231 | } else if (status == SMS_NO_CHANGE) { | |
232 | free(qargv[2]); | |
233 | qargv[2] = itoa(tv.tv_sec); | |
234 | svc.dfcheck = tv.tv_sec; | |
235 | } else if (status == SMS_SUCCESS) { | |
236 | free(qargv[1]); | |
237 | free(qargv[2]); | |
238 | qargv[1] = itoa(tv.tv_sec); | |
239 | qargv[2] = strsave(qargv[1]); | |
240 | svc.dfcheck = svc.dfgen = tv.tv_sec; | |
241 | } else { /* HARD_FAIL(status) */ | |
242 | free(qargv[2]); | |
243 | free(qargv[4]); | |
244 | free(qargv[5]); | |
245 | qargv[2] = itoa(tv.tv_sec); | |
246 | svc.dfcheck = tv.tv_sec; | |
247 | qargv[4] = itoa(status); | |
248 | qargv[5] = strsave(error_message(status)); | |
249 | critical_alert("DCM","DCM building config files for %s: %s", | |
250 | service, qargv[5]); | |
251 | } | |
252 | free_service: | |
253 | free(qargv[3]); | |
254 | qargv[3] = strsave("0"); | |
255 | status = sms_query("set_server_internal_flags", 6, qargv, | |
256 | scream, NULL); | |
4e5690ff | 257 | if (status) { |
258 | com_err(whoami, status, | |
1332916a | 259 | " setting service state, sleeping"); |
260 | sleep(DEADLOCK_WAIT); | |
4e5690ff | 261 | status = sms_query("set_server_internal_flags", 6, qargv, |
262 | scream, NULL); | |
263 | if (status) | |
264 | com_err(whoami, status, " setting service state again"); | |
265 | } | |
846841f4 | 266 | close(lock_fd); |
267 | free(qargv[0]); | |
268 | free(qargv[1]); | |
269 | free(qargv[2]); | |
270 | free(qargv[3]); | |
271 | free(qargv[4]); | |
272 | free(qargv[5]); | |
273 | } | |
274 | if (!strcmp(svc.type, "REPLICAT")) | |
275 | ex = 1; | |
276 | else | |
277 | ex = 0; | |
278 | lock_fd = maybe_lock_update(SMS_DIR, "@db@", service, ex); | |
279 | if (lock_fd >= 0) { | |
280 | do_hosts(&svc); | |
281 | close(lock_fd); | |
282 | } | |
283 | } | |
284 | free(svc.service); | |
285 | free(svc.target); | |
286 | free(svc.script); | |
287 | free(svc.type); | |
288 | free(svc.errmsg); | |
289 | free(service); | |
290 | } | |
291 | sq_destroy(sq); | |
292 | } | |
293 | ||
294 | ||
295 | /* Used by qualified_get_server_host to make a list of hosts to check */ | |
296 | ||
297 | qgethost(argc, argv, sq) | |
298 | int argc; | |
299 | char **argv; | |
300 | struct save_queue *sq; | |
301 | { | |
302 | sq_save_data(sq, strsave(argv[1])); | |
303 | return(UPCALL_CONT); | |
304 | } | |
305 | ||
306 | ||
307 | /* Used by get_server_host_info to store all of the info about a host */ | |
308 | ||
309 | gethostinfo(argc, argv, shost) | |
310 | int argc; | |
311 | char **argv; | |
312 | struct svrhost *shost; | |
313 | { | |
314 | shost->service = strsave(argv[0]); | |
315 | shost->machine = strsave(argv[1]); | |
316 | shost->enable = atoi(argv[2]); | |
317 | shost->override = atoi(argv[3]); | |
318 | shost->success = atoi(argv[4]); | |
319 | shost->inprogress = atoi(argv[5]); | |
320 | shost->hosterror = atoi(argv[6]); | |
321 | shost->errmsg = strsave(argv[7]); | |
322 | shost->lasttry = atoi(argv[8]); | |
323 | shost->lastsuccess = atoi(argv[9]); | |
324 | shost->value1 = atoi(argv[10]); | |
325 | shost->value2 = atoi(argv[11]); | |
326 | shost->value3 = strsave(argv[12]); | |
327 | return(UPCALL_STOP); | |
328 | } | |
329 | ||
330 | ||
331 | /* Scans all of the hosts for a particular service, and processes them. */ | |
332 | ||
333 | do_hosts(svc) | |
334 | struct service *svc; | |
335 | { | |
336 | char *argv[9], *machine; | |
337 | int status, lock_fd; | |
338 | struct save_queue *sq; | |
339 | struct svrhost shost; | |
340 | ||
341 | sq = sq_create(); | |
342 | argv[0] = svc->service; | |
343 | argv[1] = "TRUE"; | |
344 | argv[2] = argv[3] = argv[4] = "DONTCARE"; | |
345 | argv[5] = "FALSE"; | |
346 | status = sms_query("qualified_get_server_host", 6, argv, qgethost, sq); | |
347 | if (status == SMS_NO_MATCH) { | |
348 | return; | |
349 | } else if (status) { | |
350 | com_err(whoami, status, " getting server_hosts for %s", svc->service); | |
351 | return; | |
352 | } | |
353 | while (sq_get_data(sq, &machine)) { | |
354 | if (dbg & DBG_TRACE) | |
355 | com_err(whoami, 0, "checking %s...", machine); | |
356 | argv[1] = machine; | |
357 | status = sms_query("get_server_host_info", 2, argv,gethostinfo, &shost); | |
358 | if (status) { | |
359 | com_err(whoami,status, " getting server_host_info for %s", machine); | |
360 | goto free_mach; | |
361 | } | |
362 | if (!shost.enable || shost.hosterror || | |
4112693b | 363 | (shost.success && !shost.override && |
6899dfbe | 364 | shost.lastsuccess >= svc->dfgen)) { |
846841f4 | 365 | if (dbg & DBG_TRACE) |
366 | com_err(whoami, 0, "not updating %s:%s", svc->service, machine); | |
367 | goto free_mach; | |
368 | } | |
062079b1 | 369 | |
370 | lock_fd = maybe_lock_update(SMS_DIR, machine, svc->service, 1); | |
371 | if (lock_fd < 0) | |
372 | goto free_mach; | |
373 | argv[0] = svc->service; | |
374 | argv[1] = machine; | |
375 | argv[2] = argv[3] = argv[5] = "0"; | |
376 | argv[4] = "1"; | |
377 | argv[6] = strsave(""); | |
378 | argv[7] = itoa(tv.tv_sec); | |
379 | argv[8] = itoa(shost.lastsuccess); | |
380 | status = sms_query("set_server_host_internal", 9, argv,scream,NULL); | |
381 | if (status != SMS_SUCCESS) { | |
382 | com_err(whoami,status," while setting internal state for %s:%s", | |
383 | svc->service, machine); | |
384 | goto free_mach; | |
385 | } | |
386 | status = sms_update_server(svc->service, machine, svc->target, | |
387 | svc->script); | |
388 | if (status == SMS_SUCCESS) { | |
389 | argv[2] = "0"; | |
390 | argv[3] = "1"; | |
391 | free(argv[8]); | |
392 | argv[8] = itoa(tv.tv_sec); | |
393 | } else if (SOFT_FAIL(status)) { | |
394 | free(argv[6]); | |
395 | argv[6] = strsave(error_message(status)); | |
396 | } else { /* HARD_FAIL */ | |
397 | argv[2] = itoa(shost.override); | |
398 | argv[5] = itoa(status); | |
399 | free(argv[6]); | |
400 | argv[6] = strsave(error_message(status)); | |
401 | critical_alert("DCM", "DCM updating %s:%s: %s", | |
402 | machine, svc->service, argv[6]); | |
403 | if (!strcmp(svc->type, "REPLICAT")) { | |
404 | char *qargv[6]; | |
405 | ||
406 | svc->harderror = status; | |
407 | svc->errmsg = strsave(argv[6]); | |
408 | qargv[0] = strsave(svc->service); | |
409 | qargv[1] = itoa(svc->dfgen); | |
410 | qargv[2] = itoa(svc->dfcheck); | |
411 | qargv[3] = strsave("0"); | |
412 | qargv[4] = itoa(svc->harderror); | |
413 | qargv[5] = strsave(svc->errmsg); | |
414 | status = sms_query("set_server_internal_flags", | |
415 | 6, qargv, scream, NULL); | |
4e5690ff | 416 | if (status) { |
417 | com_err(whoami, status, | |
1332916a | 418 | " setting service state, sleeping"); |
419 | sleep(DEADLOCK_WAIT); | |
4e5690ff | 420 | status = sms_query("set_server_internal_flags", |
421 | 6, qargv, scream, NULL); | |
422 | if (status) | |
423 | com_err(whoami, status, " setting service state again"); | |
424 | } | |
062079b1 | 425 | free(qargv[0]); |
426 | free(qargv[1]); | |
427 | free(qargv[2]); | |
428 | free(qargv[3]); | |
429 | free(qargv[4]); | |
430 | free(qargv[5]); | |
431 | close(lock_fd); | |
846841f4 | 432 | free(argv[2]); |
062079b1 | 433 | argv[4] = "0"; |
846841f4 | 434 | free(argv[5]); |
062079b1 | 435 | status = sms_query("set_server_host_internal", |
436 | 9, argv,scream,NULL); | |
4e5690ff | 437 | if (status) { |
438 | com_err(whoami, status, | |
1332916a | 439 | " setting host state, sleeping"); |
440 | sleep(DEADLOCK_WAIT); | |
4e5690ff | 441 | status = sms_query("set_server_host_internal", |
442 | 9, argv,scream,NULL); | |
443 | if (status) | |
444 | com_err(whoami, status, " setting host state again"); | |
445 | } | |
062079b1 | 446 | return(-1); |
846841f4 | 447 | } |
062079b1 | 448 | free(argv[2]); |
449 | free(argv[5]); | |
846841f4 | 450 | } |
062079b1 | 451 | argv[4] = "0"; |
452 | close(lock_fd); | |
453 | status = sms_query("set_server_host_internal", 9, argv,scream,NULL); | |
4e5690ff | 454 | if (status) { |
1332916a | 455 | com_err(whoami, status, " setting host state, sleeping"); |
456 | sleep(DEADLOCK_WAIT); | |
4e5690ff | 457 | status = sms_query("set_server_host_internal", 9, argv,scream,NULL); |
458 | if (status) | |
459 | com_err(whoami, status, " setting host state again"); | |
460 | } | |
846841f4 | 461 | free_mach: |
462 | free(machine); | |
463 | close(lock_fd); | |
464 | } | |
465 | return(0); | |
466 | } |