]> andersk Git - moira.git/blame - dcm/dcm.pc
do_hosts: on a soft error for a replicated service, exit but _don't_
[moira.git] / dcm / dcm.pc
CommitLineData
7ac48069 1/* $Id$
2a2a3914 2 *
7ac48069 3 * The Data Control Manager for Moira.
2a2a3914 4 *
7ac48069 5 * Copyright (C) 1987-1998 by the Massachusetts Institute of Technology.
6 * For copying and distribution information, see the file
7 * <mit-copyright.h>.
2a2a3914 8 */
9
7ac48069 10#include <mit-copyright.h>
11#include <moira.h>
12#include <moira_site.h>
dfaf9b68 13#include <moira_schema.h>
7ac48069 14#include "update.h"
15
16#include <sys/param.h>
17#include <sys/stat.h>
18#include <sys/wait.h>
2a2a3914 19
85330553 20#include <errno.h>
2a2a3914 21#include <signal.h>
22#include <stdio.h>
7ac48069 23#include <stdlib.h>
2a2a3914 24#include <string.h>
2a2a3914 25#include <unistd.h>
2a2a3914 26
2a2a3914 27EXEC SQL INCLUDE sqlca;
7ac48069 28void sqlglm(char *, unsigned int *, unsigned int *);
29
30RCSID("$Header$");
31
32int generate_service(char *name, int force);
33void do_hosts(char *service);
34int dcm_send_file(char *service, char *host, char *target);
35int dcm_execute(char *service, char *host, char *script);
36void dbmserr(void);
37
2a2a3914 38#define SQL_NO_MATCH 1403
5eaef520 39#define SOFT_FAIL(x) (((x) == MR_NO_MEM) || ((x) == MR_CANT_CONNECT) || ((x) == MR_CCONFIG) || ((x) == MR_DEADLOCK) || ((x) == MR_BUSY) || ((x) == MR_ABORT))
2a2a3914 40
5eaef520 41char whobuf[256], *whoami = whobuf, *db = "moira";
2a2a3914 42
5eaef520 43int main(int argc, char **argv)
2a2a3914 44{
5eaef520 45 int i;
46 EXEC SQL BEGIN DECLARE SECTION;
dfaf9b68 47 char buf[SERVERS_NAME_SIZE], *name;
5eaef520 48 int enable;
49 EXEC SQL END DECLARE SECTION;
50 struct save_queue *sq;
51 int status;
52
53 if (strchr(argv[0], '/'))
54 strcpy(whoami, strrchr(argv[0], '/') + 1);
55 else strcpy(whoami, argv[0]);
56 umask(7);
57
58 setvbuf(stderr, NULL, _IOLBF, BUFSIZ);
59 setvbuf(stdout, NULL, _IOLBF, BUFSIZ);
60
61 initialize_sms_error_table();
62 initialize_krb_error_table();
63
64 /* if services were specified on the command line, do just those ones */
65 if (argc > 1)
66 {
67 for (i = 1; i < argc; i++)
68 {
69 if (generate_service(argv[i], 1))
70 do_hosts(argv[i]);
2a2a3914 71 }
5eaef520 72 exit(0);
2a2a3914 73 }
74
5eaef520 75 /* if DCM is not enabled, exit after logging */
76 if (!access(NODCMFILE, F_OK))
77 {
78 printf("/etc/nodcm exists -- exiting\n");
79 exit(1);
2a2a3914 80 }
5eaef520 81
ff20de45 82 EXEC SQL WHENEVER SQLERROR DO dbmserr();
83
5eaef520 84 EXEC SQL CONNECT :db IDENTIFIED BY :db;
85
86 EXEC SQL SELECT value INTO :enable FROM numvalues WHERE name = 'dcm_enable';
87 if (enable == 0)
88 {
89 printf("dcm_enable not set -- exiting\n");
90 exit(1);
2a2a3914 91 }
5eaef520 92
93 /* fetch list of services */
94 EXEC SQL DECLARE csr_svc CURSOR FOR SELECT LOWER(name) FROM servers
95 WHERE enable = 1 AND harderror = 0 AND update_int > 0;
96 EXEC SQL OPEN csr_svc;
97 sq = sq_create();
98 while (1)
99 {
100 EXEC SQL FETCH csr_svc INTO :buf;
101 if (sqlca.sqlcode)
102 break;
103
104 sq_save_data(sq, strdup(strtrim(buf)));
2a2a3914 105 }
5eaef520 106 EXEC SQL CLOSE csr_svc;
107 /* we will repeatedly open and close the db since it seems to get
108 upset if you keep it open across a fork */
109 EXEC SQL COMMIT RELEASE;
110
111 /* Now run through list */
112 while (sq_get_data(sq, &name))
113 {
114 if (generate_service(name, 0))
115 {
116 switch (fork())
117 {
118 case -1:
ff20de45 119 com_err(whoami, errno, "forking for service %s -- exiting",
5eaef520 120 name);
121 exit(1);
122 case 0:
ff20de45 123 sprintf(strchr(whoami, '\0'), " (%s:%ld)", name, (long)getpid());
5eaef520 124 do_hosts(name);
125 com_err(whoami, 0, "exiting");
126 exit(0);
127 default:
128 break;
2a2a3914 129 }
130 }
131 }
5eaef520 132
ff20de45 133 com_err(whoami, 0, "All files generated. Waiting for children to exit");
5eaef520 134 while (waitpid(0, &status, 0) > 0)
135 ;
136 com_err(whoami, 0, "exiting");
7ac48069 137 exit(0);
2a2a3914 138}
139
140int generate_service(char *name, int force)
141{
5eaef520 142 EXEC SQL BEGIN DECLARE SECTION;
143 int interval, dfcheck, status;
144 time_t now;
7ac48069 145 const char *errmsg;
5eaef520 146 EXEC SQL END DECLARE SECTION;
dfaf9b68 147 char dfgen_prog[MAXPATHLEN], dfgen_cmd[2 * MAXPATHLEN];
5eaef520 148 struct sigaction action, prevaction;
149 int waits;
150
151 EXEC SQL CONNECT :db IDENTIFIED BY :db;
152
dfaf9b68 153 EXEC SQL SELECT update_int, dfcheck INTO :interval, :dfcheck
5eaef520 154 FROM servers WHERE name = UPPER(:name);
155 if (sqlca.sqlcode == SQL_NO_MATCH)
156 {
157 com_err(whoami, 0, "No such service `%s'", name);
158 EXEC SQL COMMIT RELEASE;
159 return 0;
2a2a3914 160 }
5eaef520 161
162 time(&now);
163
164 if ((interval * 60 + dfcheck < now) || force)
165 {
166 sprintf(dfgen_prog, "%s/%s.gen", BIN_DIR, name);
167 if (access(dfgen_prog, F_OK) != 0)
168 {
169 com_err(whoami, 0, "prog %s doesn't exist", dfgen_prog);
170 EXEC SQL COMMIT RELEASE;
171 return 0;
2a2a3914 172 }
5eaef520 173 sprintf(dfgen_cmd, "exec %s %s/%s.out", dfgen_prog, DCM_DIR, name);
174 com_err(whoami, 0, "running %s", dfgen_prog);
175
ff20de45 176 EXEC SQL WHENEVER SQLERROR GOTO gen_cleanup;
177
5eaef520 178 EXEC SQL UPDATE servers SET inprogress = 1
179 WHERE name = UPPER(:name);
ff20de45 180 EXEC SQL COMMIT;
5eaef520 181
182 action.sa_flags = 0;
183 sigemptyset(&action.sa_mask);
184 action.sa_handler = SIG_DFL;
185 sigaction(SIGCHLD, &action, &prevaction);
186 waits = system(dfgen_cmd);
187 sigaction(SIGCHLD, &prevaction, NULL);
188 if (WIFSIGNALED(waits))
189 {
190 status = MR_COREDUMP;
191 com_err(whoami, status, " %s exited on signal %d",
192 dfgen_prog, WTERMSIG(waits));
193 }
194 else if (WEXITSTATUS(waits))
195 {
196 /* extract the process's exit value */
197 status = WEXITSTATUS(waits) + ERROR_TABLE_BASE_sms;
198 if (status != MR_NO_CHANGE)
199 com_err(whoami, status, "in %s", dfgen_prog);
200 }
201 else
202 status = MR_SUCCESS;
203
204 if (status == MR_SUCCESS)
205 {
206 EXEC SQL UPDATE servers SET dfgen = :now, dfcheck = :now,
207 inprogress = 0 WHERE name = UPPER(:name);
208 EXEC SQL COMMIT RELEASE;
209 return 1;
210 }
211 else if (status == MR_NO_CHANGE)
212 {
213 EXEC SQL UPDATE servers SET dfcheck = :now, inprogress = 0
214 WHERE name = UPPER(:name);
215 }
216 else if (SOFT_FAIL(status))
217 {
218 errmsg = error_message(status);
219 EXEC SQL UPDATE servers SET errmsg = :errmsg, inprogress = 0
220 WHERE name = UPPER(:name);
221 }
222 else /* HARD_FAIL(status) */
223 {
224 errmsg = error_message(status);
ff20de45 225 EXEC SQL UPDATE servers SET harderror = :status, errmsg = :errmsg,
226 inprogress = 0 WHERE name = UPPER(:name);
5eaef520 227 critical_alert("DCM", "DCM building config files for %s: %s",
228 name, errmsg);
2a2a3914 229 }
230 }
5eaef520 231 EXEC SQL COMMIT RELEASE;
232 return 0;
ff20de45 233
234gen_cleanup:
235 EXEC SQL WHENEVER SQLERROR DO dbmserr();
236 EXEC SQL UPDATE servers SET inprogress = 0, harderror = MR_INTERNAL,
237 errmsg = 'DBMS Internal Error' WHERE name = UPPER(:name);
238 dbmserr();
2a2a3914 239}
240
241void do_hosts(char *service)
242{
5eaef520 243 EXEC SQL BEGIN DECLARE SECTION;
dae7c89e 244 char type[SERVERS_TYPE_SIZE], host[MACHINE_NAME_SIZE], *name;
dfaf9b68 245 char target[SERVERS_TARGET_FILE_SIZE], script[SERVERS_SCRIPT_SIZE];
7ac48069 246 const char *errmsg;
ff20de45 247 int status = 0, dfgen, replicated, mid;
5eaef520 248 time_t now;
249 EXEC SQL END DECLARE SECTION;
dae7c89e 250 struct save_queue *sq;
5eaef520 251
252 time(&now);
85330553 253 mr_init();
5eaef520 254
255 EXEC SQL CONNECT :db IDENTIFIED BY :db;
256
257 EXEC SQL SELECT dfgen, type, target_file, script
258 INTO :dfgen, :type, :target, :script
259 FROM servers WHERE name = UPPER(:service);
260 replicated = !strncmp(type, "REPLICAT", 8);
ff20de45 261 strtrim(target);
262 strtrim(script);
5eaef520 263
264 EXEC SQL DECLARE csr_hst1 CURSOR FOR
ff20de45 265 SELECT m.name, m.mach_id FROM machine m, serverhosts sh
5eaef520 266 WHERE sh.service = UPPER(:service)
267 AND sh.enable = 1 AND sh.hosterror = 0
ff20de45 268 AND sh.lts < :dfgen AND sh.mach_id = m.mach_id;
5eaef520 269 EXEC SQL OPEN csr_hst1;
ff20de45 270 sq = sq_create();
5eaef520 271 while (1)
272 {
ff20de45 273 EXEC SQL FETCH csr_hst1 INTO :host, mid;
5eaef520 274 if (sqlca.sqlcode == SQL_NO_MATCH)
275 break;
276
ff20de45 277 sq_save_data(sq, strdup(strtrim(host)));
278 sq_save_data(sq, (void *)mid);
279 }
280 EXEC SQL CLOSE csr_hst1;
281
282 EXEC SQL WHENEVER SQLERROR GOTO host_cleanup;
283 while (sq_get_data(sq, &name))
284 {
285 sq_get_data(sq, &mid);
286 com_err(whoami, 0, "sending %s data to %s", service, name);
5eaef520 287 EXEC SQL UPDATE serverhosts SET inprogress = 1
ff20de45 288 WHERE service = UPPER(:service) AND mach_id = :mid;
289 EXEC SQL COMMIT;
290 status = dcm_send_file(service, name, target);
5eaef520 291 if (status)
292 {
293 errmsg = error_message(status);
294 EXEC SQL UPDATE serverhosts SET hosterrmsg = :errmsg,
ff20de45 295 inprogress = 0, success = 0, ltt = :now
dae7c89e 296 WHERE service = UPPER(:service) AND mach_id = :mid;
5eaef520 297 if (!SOFT_FAIL(status))
298 {
299 EXEC SQL UPDATE serverhosts SET hosterror = :status
dae7c89e 300 WHERE service = UPPER(:service) AND mach_id = :mid;
5eaef520 301 critical_alert("DCM", "DCM updating %s:%s: %s",
ff20de45 302 service, name, errmsg);
2a2a3914 303 }
ff20de45 304 EXEC SQL COMMIT;
5eaef520 305
306 if (replicated)
307 break;
2a2a3914 308 }
309 }
ff20de45 310 sq_destroy(sq);
5eaef520 311
312 if (status == MR_SUCCESS || !replicated)
313 {
314 EXEC SQL DECLARE csr_hst2 CURSOR FOR
ff20de45 315 SELECT m.name, m.mach_id FROM machine m, serverhosts sh
5eaef520 316 WHERE sh.service = UPPER(:service) AND sh.inprogress = 1
ff20de45 317 AND sh.mach_id = m.mach_id;
5eaef520 318 EXEC SQL OPEN csr_hst2;
ff20de45 319 sq = sq_create();
5eaef520 320
321 while (1)
322 {
ff20de45 323 EXEC SQL FETCH csr_hst2 INTO :host, :mid;
5eaef520 324 if (sqlca.sqlcode == SQL_NO_MATCH)
325 break;
326
ff20de45 327 sq_save_data(sq, strdup(strtrim(host)));
328 sq_save_data(sq, (void *)mid);
329 }
330 EXEC SQL CLOSE csr_hst2;
331
332 while (sq_get_data(sq, &name))
333 {
334 sq_get_data(sq, &mid);
335
336 com_err(whoami, 0, "executing instructions on %s", name);
337 status = dcm_execute(service, name, script);
5eaef520 338 if (status)
339 {
340 errmsg = error_message(status);
341 EXEC SQL UPDATE serverhosts SET hosterrmsg = :errmsg,
ff20de45 342 inprogress = 0, success = 0, ltt = :now
343 WHERE service = UPPER(:service) AND mach_id = :mid;
5eaef520 344 if (!SOFT_FAIL(status))
345 {
346 EXEC SQL UPDATE serverhosts SET hosterror = :status
ff20de45 347 WHERE service = UPPER(:service) AND mach_id = :mid;
5eaef520 348 critical_alert("DCM", "DCM updating %s:%s: %s",
f272b0e1 349 service, name, errmsg);
2a2a3914 350 }
5eaef520 351
352 if (replicated)
353 {
354 /* We're giving up, so clear the inprogress flag on
355 any hosts in this service we haven't gotten to yet */
356 EXEC SQL UPDATE serverhosts SET inprogress = 0
357 WHERE service = UPPER(:service);
358 break;
2a2a3914 359 }
5eaef520 360 }
361 else
362 {
ff20de45 363 EXEC SQL UPDATE serverhosts SET inprogress = 0, ltt = :now,
364 lts = :now, success = 1 WHERE service = UPPER(:service)
365 AND mach_id = :mid;
2a2a3914 366 }
ff20de45 367 EXEC SQL COMMIT;
2a2a3914 368 }
5eaef520 369 EXEC SQL CLOSE csr_hst2;
2a2a3914 370 }
371
ff20de45 372 EXEC SQL WHENEVER SQLERROR DO dbmserr();
ed43b350 373 if (status && !SOFT_FAIL(status) && replicated)
5eaef520 374 {
375 EXEC SQL UPDATE servers SET harderror = :status, errmsg = :errmsg
376 WHERE name = UPPER(:service);
2a2a3914 377 }
378
5eaef520 379 EXEC SQL COMMIT RELEASE;
ff20de45 380 return;
381
382host_cleanup:
383 EXEC SQL UPDATE serverhosts SET inprogress = 0, success = 0, ltt = :now,
384 hosterror = MR_INTERNAL, hosterrmsg = 'DBMS Internal Error'
385 WHERE service = UPPER(:service) AND mach_id = :mid;
386 if (replicated)
387 {
388 EXEC SQL UPDATE servers SET harderror = MR_INTERNAL,
389 errmsg = 'DBMS Internal Error' WHERE name = UPPER(:service);
390 }
2a2a3914 391}
392
393int dcm_send_file(char *service, char *host, char *target)
394{
85330553 395 char data[MAXPATHLEN];
396 int code, conn;
5eaef520 397
85330553 398 conn = mr_connect_internal(host, "moira_update");
399 if (!conn)
5eaef520 400 {
85330553 401 com_err(whoami, errno, "can't connect to %s", host);
5eaef520 402 return MR_CANT_CONNECT;
2a2a3914 403 }
404
85330553 405 code = send_auth(conn, host);
5eaef520 406 if (code)
407 {
408 com_err(whoami, code, "authenticating to %s", host);
409 goto done;
2a2a3914 410 }
411
5eaef520 412 sprintf(data, "%s/%s.out", DCM_DIR, service);
85330553 413 code = send_file(conn, data, target, 0);
5eaef520 414 if (code)
415 com_err(whoami, code, "sending data to %s", host);
2a2a3914 416
417done:
85330553 418 send_quit(conn);
419 close(conn);
5eaef520 420 return code;
2a2a3914 421}
422
423int dcm_execute(char *service, char *host, char *script)
424{
85330553 425 char inst[MAXPATHLEN];
426 int code, conn;
5eaef520 427
85330553 428 conn = mr_connect_internal(host, "moira_update");
429 if (!conn)
5eaef520 430 {
85330553 431 com_err(whoami, errno, "can't connect to %s", host);
5eaef520 432 return MR_CANT_CONNECT;
2a2a3914 433 }
434
85330553 435 code = send_auth(conn, host);
5eaef520 436 if (code)
437 {
438 com_err(whoami, code, "authenticating to %s", host);
439 goto done;
2a2a3914 440 }
441
5eaef520 442 sprintf(inst, "/tmp/moira-update.XXXXXX");
443 mktemp(inst);
85330553 444 code = send_file(conn, script, inst, 0);
5eaef520 445 if (code)
446 {
447 com_err(whoami, code, "sending instructions to %s", host);
448 goto done;
2a2a3914 449 }
450
85330553 451 code = execute(conn, inst);
5eaef520 452 if (code)
453 com_err(whoami, code, "executing instructions on %s", host);
2a2a3914 454
455done:
85330553 456 send_quit(conn);
457 close(conn);
5eaef520 458 return code;
459}
2a2a3914 460
461void dbmserr(void)
462{
5eaef520 463 EXEC SQL BEGIN DECLARE SECTION;
464 char err_msg[256];
465 EXEC SQL END DECLARE SECTION;
466 int bufsize = 256, msglength = 0;
467
468 sqlglm(err_msg, &bufsize, &msglength);
469 err_msg[msglength] = '\0';
470 com_err(whoami, 0, "Encountered SQL error:\n%s", err_msg);
471 com_err(whoami, 0, "exiting");
472 exit(1);
2a2a3914 473}
This page took 0.129298 seconds and 5 git commands to generate.