| | varnish-cache/bin/varnishd/mgt/mgt_child.c |
0 |
|
/*- |
1 |
|
* Copyright (c) 2006 Verdens Gang AS |
2 |
|
* Copyright (c) 2006-2015 Varnish Software AS |
3 |
|
* All rights reserved. |
4 |
|
* |
5 |
|
* Author: Poul-Henning Kamp <phk@phk.freebsd.dk> |
6 |
|
* |
7 |
|
* SPDX-License-Identifier: BSD-2-Clause |
8 |
|
* |
9 |
|
* Redistribution and use in source and binary forms, with or without |
10 |
|
* modification, are permitted provided that the following conditions |
11 |
|
* are met: |
12 |
|
* 1. Redistributions of source code must retain the above copyright |
13 |
|
* notice, this list of conditions and the following disclaimer. |
14 |
|
* 2. Redistributions in binary form must reproduce the above copyright |
15 |
|
* notice, this list of conditions and the following disclaimer in the |
16 |
|
* documentation and/or other materials provided with the distribution. |
17 |
|
* |
18 |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
19 |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
20 |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
21 |
|
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
22 |
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
23 |
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
24 |
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
25 |
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
26 |
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
27 |
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
28 |
|
* SUCH DAMAGE. |
29 |
|
* |
30 |
|
* The mechanics of handling the child process |
31 |
|
*/ |
32 |
|
|
33 |
|
#include "config.h" |
34 |
|
|
35 |
|
#include <sys/types.h> |
36 |
|
|
37 |
|
#include <poll.h> |
38 |
|
#include <stdarg.h> |
39 |
|
#include <stdio.h> |
40 |
|
#include <string.h> |
41 |
|
#include <syslog.h> |
42 |
|
#include <unistd.h> |
43 |
|
#include <sys/types.h> |
44 |
|
#include <sys/socket.h> |
45 |
|
#include <sys/time.h> |
46 |
|
#include <sys/resource.h> |
47 |
|
|
48 |
|
#include "mgt.h" |
49 |
|
#include "acceptor/cache_acceptor.h" |
50 |
|
#include "acceptor/mgt_acceptor.h" |
51 |
|
|
52 |
|
#include "vapi/vsig.h" |
53 |
|
|
54 |
|
#include "vbm.h" |
55 |
|
#include "vcli_serve.h" |
56 |
|
#include "vev.h" |
57 |
|
#include "vfil.h" |
58 |
|
#include "vlu.h" |
59 |
|
#include "vtim.h" |
60 |
|
|
61 |
|
#include "common/heritage.h" |
62 |
|
|
63 |
|
static pid_t child_pid = -1; |
64 |
|
|
65 |
|
static struct vbitmap *fd_map; |
66 |
|
|
67 |
|
static int child_cli_fd = -1; |
68 |
|
static int child_output = -1; |
69 |
|
|
70 |
|
static enum { |
71 |
|
CH_STOPPED = 0, |
72 |
|
CH_STARTING = 1, |
73 |
|
CH_RUNNING = 2, |
74 |
|
CH_STOPPING = 3, |
75 |
|
CH_DIED = 4 |
76 |
|
} child_state = CH_STOPPED; |
77 |
|
|
78 |
|
static const char * const ch_state[] = { |
79 |
|
[CH_STOPPED] = "stopped", |
80 |
|
[CH_STARTING] = "starting", |
81 |
|
[CH_RUNNING] = "running", |
82 |
|
[CH_STOPPING] = "stopping", |
83 |
|
[CH_DIED] = "died, (restarting)", |
84 |
|
}; |
85 |
|
|
86 |
|
static struct vev *ev_poker; |
87 |
|
static struct vev *ev_listen; |
88 |
|
static struct vlu *child_std_vlu; |
89 |
|
|
90 |
|
static struct vsb *child_panic = NULL; |
91 |
|
|
92 |
|
static void mgt_reap_child(void); |
93 |
|
static int kill_child(void); |
94 |
|
|
95 |
|
/*===================================================================== |
96 |
|
* Panic string evacuation and handling |
97 |
|
*/ |
98 |
|
|
99 |
|
static void |
100 |
518 |
mgt_panic_record(pid_t r) |
101 |
|
{ |
102 |
|
char time_str[30]; |
103 |
|
|
104 |
518 |
if (child_panic != NULL) |
105 |
0 |
VSB_destroy(&child_panic); |
106 |
518 |
child_panic = VSB_new_auto(); |
107 |
518 |
AN(child_panic); |
108 |
518 |
VTIM_format(VTIM_real(), time_str); |
109 |
518 |
VSB_printf(child_panic, "Panic at: %s\n", time_str); |
110 |
1036 |
VSB_quote(child_panic, heritage.panic_str, |
111 |
518 |
strnlen(heritage.panic_str, heritage.panic_str_len), |
112 |
|
VSB_QUOTE_NONL); |
113 |
518 |
AZ(VSB_finish(child_panic)); |
114 |
518 |
MGT_Complain(C_ERR, "Child (%jd) %s", |
115 |
518 |
(intmax_t)r, VSB_data(child_panic)); |
116 |
518 |
} |
117 |
|
|
118 |
|
static void |
119 |
440 |
mgt_panic_clear(void) |
120 |
|
{ |
121 |
440 |
VSB_destroy(&child_panic); |
122 |
440 |
} |
123 |
|
|
124 |
|
static void |
125 |
37520 |
cli_panic_show(struct cli *cli, const char * const *av, int json) |
126 |
|
{ |
127 |
37520 |
if (!child_panic) { |
128 |
37080 |
VCLI_SetResult(cli, CLIS_CANT); |
129 |
37080 |
VCLI_Out(cli, |
130 |
|
"Child has not panicked or panic has been cleared"); |
131 |
37080 |
return; |
132 |
|
} |
133 |
|
|
134 |
440 |
if (!json) { |
135 |
280 |
VCLI_Out(cli, "%s\n", VSB_data(child_panic)); |
136 |
280 |
return; |
137 |
|
} |
138 |
|
|
139 |
160 |
VCLI_JSON_begin(cli, 2, av); |
140 |
160 |
VCLI_Out(cli, ",\n"); |
141 |
160 |
VCLI_JSON_str(cli, VSB_data(child_panic)); |
142 |
160 |
VCLI_JSON_end(cli); |
143 |
37520 |
} |
144 |
|
|
145 |
|
static void v_matchproto_(cli_func_t) |
146 |
37360 |
mch_cli_panic_show(struct cli *cli, const char * const *av, void *priv) |
147 |
|
{ |
148 |
37360 |
(void)priv; |
149 |
37360 |
cli_panic_show(cli, av, 0); |
150 |
37360 |
} |
151 |
|
|
152 |
|
static void v_matchproto_(cli_func_t) |
153 |
160 |
mch_cli_panic_show_json(struct cli *cli, const char * const *av, void *priv) |
154 |
|
{ |
155 |
160 |
(void)priv; |
156 |
160 |
cli_panic_show(cli, av, 1); |
157 |
160 |
} |
158 |
|
|
159 |
|
static void v_matchproto_(cli_func_t) |
160 |
560 |
mch_cli_panic_clear(struct cli *cli, const char * const *av, void *priv) |
161 |
|
{ |
162 |
560 |
(void)priv; |
163 |
|
|
164 |
560 |
if (av[2] != NULL && strcmp(av[2], "-z")) { |
165 |
0 |
VCLI_SetResult(cli, CLIS_PARAM); |
166 |
0 |
VCLI_Out(cli, "Unknown parameter \"%s\".", av[2]); |
167 |
0 |
return; |
168 |
80 |
} else if (av[2] != NULL) { |
169 |
80 |
VSC_C_mgt->child_panic = 0; |
170 |
80 |
if (child_panic == NULL) |
171 |
40 |
return; |
172 |
40 |
} |
173 |
520 |
if (child_panic == NULL) { |
174 |
80 |
VCLI_SetResult(cli, CLIS_CANT); |
175 |
80 |
VCLI_Out(cli, "No panic to clear"); |
176 |
80 |
return; |
177 |
|
} |
178 |
440 |
mgt_panic_clear(); |
179 |
560 |
} |
180 |
|
|
181 |
|
/*===================================================================== |
182 |
|
* Track the highest file descriptor the parent knows is being used. |
183 |
|
* |
184 |
|
* This allows the child process to clean/close only a small fraction |
185 |
|
* of the possible file descriptors after exec(2). |
186 |
|
* |
187 |
|
* This is likely to a bit on the low side, as libc and other libraries |
188 |
|
* has a tendency to cache file descriptors (syslog, resolver, etc.) |
189 |
|
* so we add a margin of 10 fds. |
190 |
|
* |
191 |
|
* For added safety, we check that we see no file descriptor open for |
192 |
|
* another margin above the limit for which we close by design |
193 |
|
*/ |
194 |
|
|
195 |
|
static int mgt_max_fd; |
196 |
|
|
197 |
|
#define CLOSE_FD_UP_TO (mgt_max_fd + 10) |
198 |
|
#define CHECK_FD_UP_TO (CLOSE_FD_UP_TO + 10) |
199 |
|
|
200 |
|
void |
201 |
386280 |
MCH_TrackHighFd(int fd) |
202 |
|
{ |
203 |
|
/* |
204 |
|
* Assert > 0, to catch bogus opens, we know where stdin goes |
205 |
|
* in the master process. |
206 |
|
*/ |
207 |
386280 |
assert(fd > 0); |
208 |
386280 |
mgt_max_fd = vmax(mgt_max_fd, fd); |
209 |
386280 |
} |
210 |
|
|
211 |
|
/*-------------------------------------------------------------------- |
212 |
|
* Keep track of which filedescriptors the child should inherit and |
213 |
|
* which should be closed after fork() |
214 |
|
*/ |
215 |
|
|
216 |
|
void |
217 |
230280 |
MCH_Fd_Inherit(int fd, const char *what) |
218 |
|
{ |
219 |
|
|
220 |
230280 |
assert(fd >= 0); |
221 |
|
// XXX why? |
222 |
230280 |
if (fd > 0) |
223 |
230280 |
MCH_TrackHighFd(fd); |
224 |
422280 |
if (fd_map == NULL) |
225 |
38280 |
fd_map = vbit_new(128); |
226 |
230280 |
AN(fd_map); |
227 |
230280 |
if (what != NULL) |
228 |
154560 |
vbit_set(fd_map, fd); |
229 |
|
else |
230 |
75720 |
vbit_clr(fd_map, fd); |
231 |
230280 |
} |
232 |
|
|
233 |
|
/*===================================================================== |
234 |
|
* Listen to stdout+stderr from the child |
235 |
|
*/ |
236 |
|
|
237 |
|
static const char *whining_child = C_ERR; |
238 |
|
|
239 |
|
static int v_matchproto_(vlu_f) |
240 |
103840 |
child_line(void *priv, const char *p) |
241 |
|
{ |
242 |
103840 |
(void)priv; |
243 |
|
|
244 |
103840 |
MGT_Complain(whining_child, "Child (%jd) said %s", (intmax_t)child_pid, p); |
245 |
103840 |
return (0); |
246 |
|
} |
247 |
|
|
248 |
|
/*-------------------------------------------------------------------- |
249 |
|
* NB: Notice cleanup call from mgt_reap_child() |
250 |
|
*/ |
251 |
|
|
252 |
|
static int v_matchproto_(vev_cb_f) |
253 |
165945 |
child_listener(const struct vev *e, int what) |
254 |
|
{ |
255 |
|
|
256 |
165945 |
if ((what & ~VEV__RD) || VLU_Fd(child_std_vlu, child_output)) { |
257 |
360 |
ev_listen = NULL; |
258 |
360 |
if (e != NULL) |
259 |
360 |
mgt_reap_child(); |
260 |
84210 |
return (1); |
261 |
|
} |
262 |
81735 |
return (0); |
263 |
165945 |
} |
264 |
|
|
265 |
|
/*===================================================================== |
266 |
|
* Periodically poke the child, to see that it still lives |
267 |
|
*/ |
268 |
|
|
269 |
|
static int v_matchproto_(vev_cb_f) |
270 |
7443 |
child_poker(const struct vev *e, int what) |
271 |
|
{ |
272 |
7443 |
char *r = NULL; |
273 |
|
unsigned status; |
274 |
|
|
275 |
7443 |
(void)e; |
276 |
7443 |
(void)what; |
277 |
7443 |
if (child_state != CH_RUNNING) |
278 |
0 |
return (1); |
279 |
7443 |
if (child_pid < 0) |
280 |
0 |
return (0); |
281 |
7443 |
if (mgt_cli_askchild(&status, &r, "ping\n") || strncmp("PONG ", r, 5)) { |
282 |
0 |
MGT_Complain(C_ERR, "Unexpected reply from ping: %u %s", |
283 |
0 |
status, r); |
284 |
0 |
if (status != CLIS_COMMS) |
285 |
0 |
MCH_Cli_Fail(); |
286 |
0 |
} |
287 |
7443 |
free(r); |
288 |
7443 |
return (0); |
289 |
7443 |
} |
290 |
|
|
291 |
|
/*===================================================================== |
292 |
|
* Launch the child process |
293 |
|
*/ |
294 |
|
|
295 |
|
#define mgt_launch_err(cli, status, ...) do { \ |
296 |
|
MGT_Complain(C_ERR, __VA_ARGS__); \ |
297 |
|
if (cli == NULL) \ |
298 |
|
break; \ |
299 |
|
VCLI_Out(cli, __VA_ARGS__); \ |
300 |
|
VCLI_SetResult(cli, status); \ |
301 |
|
} while (0) |
302 |
|
|
303 |
|
static void |
304 |
37600 |
mgt_launch_child(struct cli *cli) |
305 |
|
{ |
306 |
|
pid_t pid; |
307 |
|
unsigned u; |
308 |
|
char *p; |
309 |
|
struct vev *e; |
310 |
|
int i, cp[2]; |
311 |
|
struct rlimit rl[1]; |
312 |
|
vtim_dur dstart; |
313 |
|
int bstart; |
314 |
|
vtim_mono t0; |
315 |
|
|
316 |
37600 |
if (child_state != CH_STOPPED && child_state != CH_DIED) |
317 |
0 |
return; |
318 |
|
|
319 |
37600 |
child_state = CH_STARTING; |
320 |
|
|
321 |
|
/* Open pipe for mgt->child CLI */ |
322 |
37600 |
AZ(socketpair(AF_UNIX, SOCK_STREAM, 0, cp)); |
323 |
37600 |
heritage.cli_fd = cp[0]; |
324 |
37600 |
assert(cp[0] > STDERR_FILENO); // See #2782 |
325 |
37600 |
assert(cp[1] > STDERR_FILENO); |
326 |
37600 |
MCH_Fd_Inherit(heritage.cli_fd, "cli_fd"); |
327 |
37600 |
child_cli_fd = cp[1]; |
328 |
|
|
329 |
|
/* |
330 |
|
* Open pipe for child stdout/err |
331 |
|
* NB: not inherited, because we dup2() it to stdout/stderr in child |
332 |
|
*/ |
333 |
37600 |
AZ(pipe(cp)); |
334 |
37600 |
heritage.std_fd = cp[1]; |
335 |
37600 |
child_output = cp[0]; |
336 |
|
|
337 |
37600 |
mgt_SHM_ChildNew(); |
338 |
|
|
339 |
37600 |
AN(heritage.param); |
340 |
37600 |
AN(heritage.panic_str); |
341 |
37600 |
VJ_master(JAIL_MASTER_SYSTEM); |
342 |
37600 |
if ((pid = fork()) < 0) { |
343 |
0 |
VJ_master(JAIL_MASTER_LOW); |
344 |
0 |
perror("Could not fork child"); |
345 |
0 |
exit(1); // XXX Harsh ? |
346 |
|
} |
347 |
74436 |
if (pid == 0) { |
348 |
|
|
349 |
36836 |
if (MGT_FEATURE(FEATURE_NO_COREDUMP)) { |
350 |
598 |
memset(rl, 0, sizeof *rl); |
351 |
598 |
rl->rlim_cur = 0; |
352 |
598 |
AZ(setrlimit(RLIMIT_CORE, rl)); |
353 |
598 |
} |
354 |
|
|
355 |
|
/* Redirect stdin/out/err */ |
356 |
36836 |
VFIL_null_fd(STDIN_FILENO); |
357 |
36836 |
assert(dup2(heritage.std_fd, STDOUT_FILENO) == STDOUT_FILENO); |
358 |
36836 |
assert(dup2(heritage.std_fd, STDERR_FILENO) == STDERR_FILENO); |
359 |
|
|
360 |
36836 |
setbuf(stdout, NULL); |
361 |
36836 |
setbuf(stderr, NULL); |
362 |
36836 |
printf("Child starts\n"); |
363 |
|
|
364 |
|
/* |
365 |
|
* Close all FDs the child shouldn't know about |
366 |
|
* |
367 |
|
* We cannot just close these filedescriptors, some random |
368 |
|
* library routine might miss it later on and wantonly close |
369 |
|
* a FD we use at that point in time. (See bug #1841). |
370 |
|
* We close the FD and replace it with /dev/null instead, |
371 |
|
* That prevents security leakage, and gives the library |
372 |
|
* code a valid FD to close when it discovers the changed |
373 |
|
* circumstances. |
374 |
|
*/ |
375 |
36836 |
closelog(); |
376 |
|
|
377 |
847666 |
for (i = STDERR_FILENO + 1; i <= CLOSE_FD_UP_TO; i++) { |
378 |
810830 |
if (vbit_test(fd_map, i)) |
379 |
110986 |
continue; |
380 |
699844 |
if (close(i) == 0) |
381 |
331246 |
VFIL_null_fd(i); |
382 |
699844 |
} |
383 |
405196 |
for (i = CLOSE_FD_UP_TO + 1; i <= CHECK_FD_UP_TO; i++) { |
384 |
368360 |
assert(close(i) == -1); |
385 |
368360 |
assert(errno == EBADF); |
386 |
368360 |
} |
387 |
|
|
388 |
36836 |
mgt_ProcTitle("Child"); |
389 |
|
|
390 |
36836 |
heritage.cls = mgt_cls; |
391 |
36836 |
heritage.ident = VSB_data(vident) + 1; |
392 |
|
|
393 |
36836 |
vext_load(); |
394 |
|
|
395 |
36836 |
STV_Init(); |
396 |
|
|
397 |
36836 |
VJ_subproc(JAIL_SUBPROC_WORKER); |
398 |
|
|
399 |
|
/* |
400 |
|
* We pass these two params because child_main needs them |
401 |
|
* well before it has found its own param struct. |
402 |
|
*/ |
403 |
73672 |
child_main(mgt_param.sigsegv_handler, |
404 |
36836 |
mgt_param.wthread_stacksize); |
405 |
|
|
406 |
|
/* |
407 |
|
* It would be natural to clean VSMW up here, but it is apt |
408 |
|
* to fail in some scenarios because of the fall-back |
409 |
|
* "rm -rf" in mgt_SHM_ChildDestroy() which is there to |
410 |
|
* catch the cases were we don't get here. |
411 |
|
*/ |
412 |
|
// VSMW_Destroy(&heritage.proc_vsmw); |
413 |
|
|
414 |
36836 |
exit(0); |
415 |
|
} |
416 |
37600 |
VJ_master(JAIL_MASTER_LOW); |
417 |
37600 |
assert(pid > 1); |
418 |
37600 |
MGT_Complain(C_DEBUG, "Child (%jd) Started", (intmax_t)pid); |
419 |
37600 |
VSC_C_mgt->child_start++; |
420 |
|
|
421 |
|
/* Close stuff the child got */ |
422 |
37600 |
closefd(&heritage.std_fd); |
423 |
|
|
424 |
37600 |
MCH_Fd_Inherit(heritage.cli_fd, NULL); |
425 |
37600 |
closefd(&heritage.cli_fd); |
426 |
|
|
427 |
37600 |
child_std_vlu = VLU_New(child_line, NULL, 0); |
428 |
37600 |
AN(child_std_vlu); |
429 |
|
|
430 |
|
/* Wait for cache/cache_cli.c::CLI_Run() to check in */ |
431 |
37600 |
bstart = mgt_param.startup_timeout >= mgt_param.cli_timeout; |
432 |
37600 |
dstart = bstart ? mgt_param.startup_timeout : mgt_param.cli_timeout; |
433 |
37600 |
t0 = VTIM_mono(); |
434 |
37600 |
u = mgt_cli_start_child(child_cli_fd, dstart); |
435 |
37600 |
if (u != CLIS_OK) { |
436 |
320 |
assert(u == CLIS_COMMS); |
437 |
320 |
if (VTIM_mono() - t0 < dstart) |
438 |
160 |
mgt_launch_err(cli, u, "Child failed on launch "); |
439 |
|
else |
440 |
160 |
mgt_launch_err(cli, u, "Child failed on launch " |
441 |
|
"within %s_timeout=%.2fs%s", |
442 |
|
bstart ? "startup" : "cli", dstart, |
443 |
|
bstart ? "" : " (tip: set startup_timeout)"); |
444 |
320 |
child_pid = pid; |
445 |
320 |
(void)kill_child(); |
446 |
320 |
mgt_reap_child(); |
447 |
320 |
child_state = CH_STOPPED; |
448 |
320 |
return; |
449 |
|
} else { |
450 |
37280 |
assert(u == CLIS_OK); |
451 |
37280 |
fprintf(stderr, "Child launched OK\n"); |
452 |
|
} |
453 |
37280 |
whining_child = C_INFO; |
454 |
|
|
455 |
37280 |
AZ(ev_listen); |
456 |
37280 |
e = VEV_Alloc(); |
457 |
37280 |
XXXAN(e); |
458 |
37280 |
e->fd = child_output; |
459 |
37280 |
e->fd_flags = VEV__RD; |
460 |
37280 |
e->name = "Child listener"; |
461 |
37280 |
e->callback = child_listener; |
462 |
37280 |
AZ(VEV_Start(mgt_evb, e)); |
463 |
37280 |
ev_listen = e; |
464 |
37280 |
AZ(ev_poker); |
465 |
37280 |
if (mgt_param.ping_interval > 0) { |
466 |
37280 |
e = VEV_Alloc(); |
467 |
37280 |
XXXAN(e); |
468 |
37280 |
e->timeout = mgt_param.ping_interval; |
469 |
37280 |
e->callback = child_poker; |
470 |
37280 |
e->name = "child poker"; |
471 |
37280 |
AZ(VEV_Start(mgt_evb, e)); |
472 |
37280 |
ev_poker = e; |
473 |
37280 |
} |
474 |
|
|
475 |
37280 |
child_pid = pid; |
476 |
|
|
477 |
37280 |
if (mgt_push_vcls(cli, &u, &p)) { |
478 |
40 |
mgt_launch_err(cli, u, "Child (%jd) Pushing vcls failed:\n%s", |
479 |
|
(intmax_t)child_pid, p); |
480 |
40 |
free(p); |
481 |
40 |
MCH_Stop_Child(); |
482 |
40 |
return; |
483 |
|
} |
484 |
|
|
485 |
37240 |
if (mgt_cli_askchild(&u, &p, "start\n")) { |
486 |
0 |
mgt_launch_err(cli, u, "Child (%jd) Acceptor start failed:\n%s", |
487 |
|
(intmax_t)child_pid, p); |
488 |
0 |
free(p); |
489 |
0 |
MCH_Stop_Child(); |
490 |
0 |
return; |
491 |
|
} |
492 |
|
|
493 |
37240 |
free(p); |
494 |
37240 |
child_state = CH_RUNNING; |
495 |
37600 |
} |
496 |
|
|
497 |
|
/*===================================================================== |
498 |
|
* Cleanup when child dies. |
499 |
|
*/ |
500 |
|
|
501 |
|
static int |
502 |
469 |
kill_child(void) |
503 |
|
{ |
504 |
|
int i, error; |
505 |
|
|
506 |
469 |
VJ_master(JAIL_MASTER_KILL); |
507 |
469 |
i = kill(child_pid, SIGQUIT); |
508 |
469 |
error = errno; |
509 |
469 |
VJ_master(JAIL_MASTER_LOW); |
510 |
469 |
errno = error; |
511 |
469 |
return (i); |
512 |
|
} |
513 |
|
|
514 |
|
static void |
515 |
37600 |
mgt_reap_child(void) |
516 |
|
{ |
517 |
|
int i; |
518 |
37600 |
int status = 0xffff; |
519 |
|
struct vsb *vsb; |
520 |
37600 |
pid_t r = 0; |
521 |
|
|
522 |
37600 |
assert(child_pid != -1); |
523 |
|
|
524 |
|
/* |
525 |
|
* Close the CLI connections |
526 |
|
* This signals orderly shut down to child |
527 |
|
*/ |
528 |
37600 |
mgt_cli_stop_child(); |
529 |
37600 |
if (child_cli_fd >= 0) |
530 |
37600 |
closefd(&child_cli_fd); |
531 |
|
|
532 |
|
/* Stop the poker */ |
533 |
37920 |
if (ev_poker != NULL) { |
534 |
37280 |
VEV_Stop(mgt_evb, ev_poker); |
535 |
37280 |
free(ev_poker); |
536 |
37280 |
ev_poker = NULL; |
537 |
37280 |
} |
538 |
|
|
539 |
|
/* Stop the listener */ |
540 |
37640 |
if (ev_listen != NULL) { |
541 |
36920 |
VEV_Stop(mgt_evb, ev_listen); |
542 |
36920 |
free(ev_listen); |
543 |
36920 |
ev_listen = NULL; |
544 |
36920 |
} |
545 |
|
|
546 |
|
/* Compose obituary */ |
547 |
37600 |
vsb = VSB_new_auto(); |
548 |
37600 |
XXXAN(vsb); |
549 |
|
|
550 |
37600 |
(void)VFIL_nonblocking(child_output); |
551 |
|
/* Wait for child to die */ |
552 |
84273 |
for (i = 0; i < mgt_param.cli_timeout * 10; i++) { |
553 |
84204 |
(void)child_listener(NULL, VEV__RD); |
554 |
84204 |
r = waitpid(child_pid, &status, WNOHANG); |
555 |
84204 |
if (r == child_pid) |
556 |
37531 |
break; |
557 |
46673 |
(void)usleep(100000); |
558 |
46673 |
} |
559 |
74993 |
if (r == 0) { |
560 |
138 |
VSB_printf(vsb, "Child (%jd) not dying (waitpid = %jd)," |
561 |
69 |
" killing\n", (intmax_t)child_pid, (intmax_t)r); |
562 |
|
|
563 |
|
/* Kick it Jim... */ |
564 |
69 |
(void)kill_child(); |
565 |
69 |
r = waitpid(child_pid, &status, 0); |
566 |
69 |
} |
567 |
138 |
if (r != child_pid) |
568 |
0 |
fprintf(stderr, "WAIT 0x%jd\n", (intmax_t)r); |
569 |
37600 |
assert(r == child_pid); |
570 |
|
|
571 |
75200 |
VSB_printf(vsb, "Child (%jd) %s", (intmax_t)r, |
572 |
37600 |
status ? "died" : "ended"); |
573 |
37600 |
if (WIFEXITED(status) && WEXITSTATUS(status)) { |
574 |
160 |
VSB_printf(vsb, " status=%d", WEXITSTATUS(status)); |
575 |
160 |
exit_status |= 0x20; |
576 |
160 |
if (WEXITSTATUS(status) == 1) |
577 |
0 |
VSC_C_mgt->child_exit++; |
578 |
|
else |
579 |
160 |
VSC_C_mgt->child_stop++; |
580 |
160 |
} |
581 |
37600 |
if (WIFSIGNALED(status)) { |
582 |
600 |
VSB_printf(vsb, " signal=%d", WTERMSIG(status)); |
583 |
600 |
exit_status |= 0x40; |
584 |
600 |
VSC_C_mgt->child_died++; |
585 |
600 |
} |
586 |
|
#ifdef WCOREDUMP |
587 |
1120 |
if (WCOREDUMP(status)) { |
588 |
80 |
VSB_cat(vsb, " (core dumped)"); |
589 |
80 |
if (!MGT_FEATURE(FEATURE_NO_COREDUMP)) |
590 |
80 |
exit_status |= 0x80; |
591 |
80 |
VSC_C_mgt->child_dump++; |
592 |
80 |
} |
593 |
|
#endif |
594 |
37600 |
AZ(VSB_finish(vsb)); |
595 |
37600 |
MGT_Complain(status ? C_ERR : C_INFO, "%s", VSB_data(vsb)); |
596 |
37600 |
VSB_destroy(&vsb); |
597 |
|
|
598 |
|
/* Dispose of shared memory but evacuate panic messages first */ |
599 |
37600 |
if (heritage.panic_str[0] != '\0') { |
600 |
518 |
mgt_panic_record(r); |
601 |
518 |
VSC_C_mgt->child_panic++; |
602 |
518 |
} |
603 |
|
|
604 |
676 |
mgt_SHM_ChildDestroy(); |
605 |
|
|
606 |
676 |
if (child_state == CH_RUNNING) |
607 |
360 |
child_state = CH_DIED; |
608 |
|
|
609 |
|
/* Pick up any stuff lingering on stdout/stderr */ |
610 |
37600 |
(void)child_listener(NULL, VEV__RD); |
611 |
37600 |
closefd(&child_output); |
612 |
37600 |
VLU_Destroy(&child_std_vlu); |
613 |
|
|
614 |
37600 |
child_pid = -1; |
615 |
|
|
616 |
37600 |
MGT_Complain(C_DEBUG, "Child cleanup complete"); |
617 |
|
|
618 |
|
/* XXX number of retries? interval? */ |
619 |
37600 |
for (i = 0; i < 3; i++) { |
620 |
37600 |
if (VCA_reopen_sockets() == 0) |
621 |
37600 |
break; |
622 |
|
/* error already logged */ |
623 |
0 |
(void)sleep(1); |
624 |
0 |
} |
625 |
37600 |
if (i == 3) { |
626 |
|
/* We failed to reopen our listening sockets. No choice |
627 |
|
* but to exit. */ |
628 |
0 |
MGT_Complain(C_ERR, |
629 |
|
"Could not reopen listening sockets. Exiting."); |
630 |
0 |
exit(1); |
631 |
|
} |
632 |
|
|
633 |
37600 |
if (child_state == CH_DIED && mgt_param.auto_restart) |
634 |
0 |
mgt_launch_child(NULL); |
635 |
37600 |
else if (child_state == CH_DIED) |
636 |
360 |
child_state = CH_STOPPED; |
637 |
37240 |
else if (child_state == CH_STOPPING) |
638 |
36920 |
child_state = CH_STOPPED; |
639 |
37600 |
} |
640 |
|
|
641 |
|
/*===================================================================== |
642 |
|
* If CLI communications with the child process fails, there is nothing |
643 |
|
* for us to do but to drag it behind the barn and get it over with. |
644 |
|
* |
645 |
|
* The typical case is where the child process fails to return a reply |
646 |
|
* before the cli_timeout expires. This invalidates the CLI pipes for |
647 |
|
* all future use, as we don't know if the child was just slow and the |
648 |
|
* result gets piped later on, or if the child is catatonic. |
649 |
|
*/ |
650 |
|
|
651 |
|
void |
652 |
80 |
MCH_Cli_Fail(void) |
653 |
|
{ |
654 |
|
|
655 |
80 |
if (child_state != CH_RUNNING && child_state != CH_STARTING) |
656 |
0 |
return; |
657 |
80 |
if (child_pid < 0) |
658 |
0 |
return; |
659 |
80 |
if (kill_child() == 0) |
660 |
80 |
MGT_Complain(C_ERR, "Child (%jd) not responding to CLI," |
661 |
80 |
" killed it.", (intmax_t)child_pid); |
662 |
|
else |
663 |
0 |
MGT_Complain(C_ERR, "Failed to kill child with PID %jd: %s", |
664 |
0 |
(intmax_t)child_pid, VAS_errtxt(errno)); |
665 |
80 |
} |
666 |
|
|
667 |
|
/*===================================================================== |
668 |
|
* Controlled stop of child process |
669 |
|
* |
670 |
|
* Reaping the child asks for orderly shutdown |
671 |
|
*/ |
672 |
|
|
673 |
|
void |
674 |
74280 |
MCH_Stop_Child(void) |
675 |
|
{ |
676 |
|
|
677 |
74280 |
if (child_state != CH_RUNNING && child_state != CH_STARTING) |
678 |
37360 |
return; |
679 |
|
|
680 |
36920 |
child_state = CH_STOPPING; |
681 |
|
|
682 |
36920 |
MGT_Complain(C_DEBUG, "Stopping Child"); |
683 |
|
|
684 |
36920 |
mgt_reap_child(); |
685 |
74280 |
} |
686 |
|
|
687 |
|
/*===================================================================== |
688 |
|
*/ |
689 |
|
|
690 |
|
int |
691 |
240 |
MCH_Start_Child(void) |
692 |
|
{ |
693 |
240 |
mgt_launch_child(NULL); |
694 |
240 |
if (child_state != CH_RUNNING) |
695 |
200 |
return (2); |
696 |
40 |
return (0); |
697 |
240 |
} |
698 |
|
|
699 |
|
/*==================================================================== |
700 |
|
* Query if the child is running |
701 |
|
*/ |
702 |
|
|
703 |
|
int |
704 |
589880 |
MCH_Running(void) |
705 |
|
{ |
706 |
|
|
707 |
589880 |
return (child_pid > 0); |
708 |
|
} |
709 |
|
|
710 |
|
/*===================================================================== |
711 |
|
* CLI commands |
712 |
|
*/ |
713 |
|
|
714 |
|
static void v_matchproto_(cli_func_t) |
715 |
80 |
mch_pid(struct cli *cli, const char * const *av, void *priv) |
716 |
|
{ |
717 |
|
|
718 |
80 |
(void)av; |
719 |
80 |
(void)priv; |
720 |
80 |
VCLI_Out(cli, "Master: %10jd\n", (intmax_t)getpid()); |
721 |
80 |
if (!MCH_Running()) |
722 |
40 |
return; |
723 |
40 |
VCLI_Out(cli, "Worker: %10jd\n", (intmax_t)child_pid); |
724 |
80 |
} |
725 |
|
|
726 |
|
static void v_matchproto_(cli_func_t) |
727 |
80 |
mch_pid_json(struct cli *cli, const char * const *av, void *priv) |
728 |
|
{ |
729 |
|
|
730 |
80 |
(void)priv; |
731 |
80 |
VCLI_JSON_begin(cli, 2, av); |
732 |
80 |
VCLI_Out(cli, ",\n {\"master\": %jd", (intmax_t)getpid()); |
733 |
80 |
if (MCH_Running()) |
734 |
40 |
VCLI_Out(cli, ", \"worker\": %jd", (intmax_t)child_pid); |
735 |
80 |
VCLI_Out(cli, "}"); |
736 |
80 |
VCLI_JSON_end(cli); |
737 |
80 |
} |
738 |
|
|
739 |
|
static void v_matchproto_(cli_func_t) |
740 |
37560 |
mch_cli_server_start(struct cli *cli, const char * const *av, void *priv) |
741 |
|
{ |
742 |
|
const char *err; |
743 |
|
|
744 |
37560 |
(void)av; |
745 |
37560 |
(void)priv; |
746 |
37560 |
if (child_state == CH_STOPPED) { |
747 |
37400 |
err = mgt_has_vcl(); |
748 |
37400 |
if (err == NULL) { |
749 |
37360 |
mgt_launch_child(cli); |
750 |
37360 |
} else { |
751 |
40 |
VCLI_SetResult(cli, CLIS_CANT); |
752 |
40 |
VCLI_Out(cli, "%s", err); |
753 |
|
} |
754 |
37400 |
} else { |
755 |
160 |
VCLI_SetResult(cli, CLIS_CANT); |
756 |
160 |
VCLI_Out(cli, "Child in state %s", ch_state[child_state]); |
757 |
|
} |
758 |
37560 |
} |
759 |
|
|
760 |
|
static void v_matchproto_(cli_func_t) |
761 |
39520 |
mch_cli_server_stop(struct cli *cli, const char * const *av, void *priv) |
762 |
|
{ |
763 |
|
|
764 |
39520 |
(void)av; |
765 |
39520 |
(void)priv; |
766 |
39520 |
if (child_state == CH_RUNNING) { |
767 |
36760 |
MCH_Stop_Child(); |
768 |
36760 |
} else { |
769 |
2760 |
VCLI_SetResult(cli, CLIS_CANT); |
770 |
2760 |
VCLI_Out(cli, "Child in state %s", ch_state[child_state]); |
771 |
|
} |
772 |
39520 |
} |
773 |
|
|
774 |
|
static void v_matchproto_(cli_func_t) |
775 |
76440 |
mch_cli_server_status(struct cli *cli, const char * const *av, void *priv) |
776 |
|
{ |
777 |
76440 |
(void)av; |
778 |
76440 |
(void)priv; |
779 |
76440 |
VCLI_Out(cli, "Child in state %s", ch_state[child_state]); |
780 |
76440 |
} |
781 |
|
|
782 |
|
static void v_matchproto_(cli_func_t) |
783 |
320 |
mch_cli_server_status_json(struct cli *cli, const char * const *av, void *priv) |
784 |
|
{ |
785 |
320 |
(void)priv; |
786 |
320 |
VCLI_JSON_begin(cli, 2, av); |
787 |
320 |
VCLI_Out(cli, ", "); |
788 |
320 |
VCLI_JSON_str(cli, ch_state[child_state]); |
789 |
320 |
VCLI_JSON_end(cli); |
790 |
320 |
} |
791 |
|
|
792 |
|
static struct cli_proto cli_mch[] = { |
793 |
|
{ CLICMD_SERVER_STATUS, "", mch_cli_server_status, |
794 |
|
mch_cli_server_status_json }, |
795 |
|
{ CLICMD_SERVER_START, "", mch_cli_server_start }, |
796 |
|
{ CLICMD_SERVER_STOP, "", mch_cli_server_stop }, |
797 |
|
{ CLICMD_PANIC_SHOW, "", mch_cli_panic_show, |
798 |
|
mch_cli_panic_show_json }, |
799 |
|
{ CLICMD_PANIC_CLEAR, "", mch_cli_panic_clear }, |
800 |
|
{ CLICMD_PID, "", mch_pid, mch_pid_json }, |
801 |
|
{ NULL } |
802 |
|
}; |
803 |
|
|
804 |
|
/*===================================================================== |
805 |
|
* This thread is the master thread in the management process. |
806 |
|
* The relatively simple task is to start and stop the child process |
807 |
|
* and to reincarnate it in case of trouble. |
808 |
|
*/ |
809 |
|
|
810 |
|
void |
811 |
37680 |
MCH_Init(void) |
812 |
|
{ |
813 |
|
|
814 |
37680 |
VCLS_AddFunc(mgt_cls, MCF_AUTH, cli_mch); |
815 |
37680 |
} |