varnish-cache/bin/varnishd/common/common_vsmw.c
0
/*-
1
 * Copyright (c) 2010-2011 Varnish Software AS
2
 * All rights reserved.
3
 *
4
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
5
 *
6
 * SPDX-License-Identifier: BSD-2-Clause
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 *
29
 * VSM stuff common to manager and child.
30
 *
31
 */
32
33
#include "config.h"
34
35
#include <fcntl.h>
36
#include <stdarg.h>
37
#include <stdio.h>
38
#include <stdint.h>
39
#include <stdlib.h>
40
#include <string.h>
41
#include <time.h>
42
#include <unistd.h>
43
#include <sys/mman.h>
44
#include <sys/resource.h>
45
#include <sys/stat.h>
46
47
#include "vdef.h"
48
#include "vas.h"
49
#include "vsb.h"
50
#include "miniobj.h"
51
#include "vqueue.h"
52
53
#include "vfil.h"
54
#include "vrnd.h"
55
56
#include "heritage.h"
57
#include "vsmw.h"
58
59
#ifndef MAP_HASSEMAPHORE
60
#  define MAP_HASSEMAPHORE 0 /* XXX Linux */
61
#endif
62
63
#ifndef MAP_NOSYNC
64
#  define MAP_NOSYNC 0 /* XXX Linux */
65
#endif
66
67
static void v_matchproto_(vsm_lock_f)
68 828800
vsmw_dummy_lock(void)
69
{
70 828800
}
71
72
static int vsmw_haslock;
73
vsm_lock_f *vsmw_lock = vsmw_dummy_lock;
74
vsm_lock_f *vsmw_unlock = vsmw_dummy_lock;
75
76
#define vsmw_assert_lock()      AN(vsmw_haslock)
77
78
#define vsmw_do_lock() vsmw_do_lock_(__func__, __LINE__)
79
80
#define vsmw_do_lock_(f, l)                             \
81
        do {                                    \
82
                vsmw_lock();                    \
83
                AZ(vsmw_haslock);               \
84
                vsmw_haslock = 1;               \
85
        } while(0)
86
87
#define vsmw_do_unlock() vsmw_do_unlock_(__func__, __LINE__)
88
#define vsmw_do_unlock_(f, l)                           \
89
        do {                                    \
90
                AN(vsmw_haslock);               \
91
                vsmw_haslock = 0;               \
92
                vsmw_unlock();                  \
93
        } while(0)
94
95
/*--------------------------------------------------------------------*/
96
97
struct vsmw_cluster {
98
        unsigned                        magic;
99
#define VSMW_CLUSTER_MAGIC              0x28b74c00
100
101
        VTAILQ_ENTRY(vsmw_cluster)      list;
102
        struct vsmwseg                  *cseg;
103
        char                            *fn;
104
        size_t                          len;
105
        void                            *ptr;
106
        size_t                          next;
107
        int                             refs;
108
        int                             named;
109
};
110
111
struct vsmwseg {
112
        unsigned                        magic;
113
#define VSMWSEG_MAGIC                   0x7e4ccaea
114
        VTAILQ_ENTRY(vsmwseg)           list;
115
        struct vsmw_cluster             *cluster;
116
117
        char                            *category;
118
        size_t                          off;
119
        size_t                          len;
120
        char                            *id;
121
        void                            *ptr;
122
};
123
124
struct vsmw {
125
        unsigned                        magic;
126
#define VSMW_MAGIC                      0xc2ca2cd9
127
        int                             vdirfd;
128
        int                             mode;
129
        char                            *idx;
130
        VTAILQ_HEAD(, vsmw_cluster)     clusters;
131
        VTAILQ_HEAD(, vsmwseg)          segs;
132
        struct vsb                      *vsb;
133
        pid_t                           pid;
134
        time_t                          birth;
135
        uint64_t                        nsegs;
136
        uint64_t                        nsubs;
137
};
138
139
/* Allocations in clusters never start at offset zero */
140
#define VSM_CLUSTER_OFFSET 16
141
142
/*--------------------------------------------------------------------*/
143
144
static void
145 74756
vsmw_idx_head(const struct vsmw *vsmw, int fd)
146
{
147
        char buf[64];
148
149 74756
        bprintf(buf, "# %jd %jd\n", (intmax_t)vsmw->pid, (intmax_t)vsmw->birth);
150
        // XXX handle ENOSPC? #2764
151 74756
        assert(write(fd, buf, strlen(buf)) == strlen(buf));
152 74756
}
153
154
#define ASSERT_SEG_STR(x) do {                  \
155
                AN(x);                          \
156
                AZ(strchr(x, '\n'));            \
157
        } while (0);
158
159
static void
160 2060543
vsmw_fmt_index(const struct vsmw *vsmw, const struct vsmwseg *seg, char act)
161
{
162
163 2060543
        vsmw_assert_lock();
164 2060543
        CHECK_OBJ_NOTNULL(vsmw, VSMW_MAGIC);
165 2060543
        CHECK_OBJ_NOTNULL(seg, VSMWSEG_MAGIC);
166 2060543
        AN(seg->cluster);
167 2060543
        ASSERT_SEG_STR(seg->category);
168 2060543
        ASSERT_SEG_STR(seg->id);
169
170 4121086
        VSB_printf(vsmw->vsb, "%c %s %zu %zu %s %s\n",
171 2060543
            act,
172 2060543
            seg->cluster->fn,
173 2060543
            seg->off,
174 2060543
            seg->len,
175 2060543
            seg->category,
176 2060543
            seg->id);
177 2060543
}
178
179
/*--------------------------------------------------------------------*/
180
181
static void
182 1738021
vsmw_mkent(const struct vsmw *vsmw, const char *pfx)
183
{
184
        int fd;
185
        uint64_t rn;
186
187 1738021
        AN(pfx);
188 1738021
        vsmw_assert_lock();
189 1738021
        while (1) {
190 1738021
                VSB_clear(vsmw->vsb);
191 1738021
                VSB_printf(vsmw->vsb, "_.%s", pfx);
192 1738021
                AZ(VRND_RandomCrypto(&rn, sizeof rn));
193 1738021
                VSB_printf(vsmw->vsb, ".%016jx", (uintmax_t)rn);
194 1738021
                AZ(VSB_finish(vsmw->vsb));
195 1738021
                fd = openat(vsmw->vdirfd, VSB_data(vsmw->vsb), O_RDONLY);
196 1738021
                if (fd < 0 && errno == ENOENT)
197 1738021
                        return;
198 0
                if (fd >= 0)
199 0
                        closefd(&fd);
200
        }
201
}
202
203
/*--------------------------------------------------------------------*/
204
205
static void
206 2058223
vsmw_append_record(struct vsmw *vsmw, struct vsmwseg *seg, char act)
207
{
208
        int fd;
209
210 2058223
        vsmw_assert_lock();
211 2058223
        CHECK_OBJ_NOTNULL(vsmw, VSMW_MAGIC);
212 2058223
        CHECK_OBJ_NOTNULL(seg, VSMWSEG_MAGIC);
213 2058223
        fd = openat(vsmw->vdirfd, vsmw->idx, O_APPEND | O_WRONLY);
214 2058223
        assert(fd >= 0);
215 2058223
        VSB_clear(vsmw->vsb);
216 2058223
        vsmw_fmt_index(vsmw, seg, act);
217 2058223
        AZ(VSB_finish(vsmw->vsb));
218 2058223
        XXXAZ(VSB_tofile(vsmw->vsb, fd)); // XXX handle ENOSPC? #2764
219 2058223
        closefd(&fd);
220 2058223
}
221
222
/*--------------------------------------------------------------------*/
223
224
static void
225 1788221
vsmw_addseg(struct vsmw *vsmw, struct vsmwseg *seg)
226
{
227
228 1788221
        vsmw_assert_lock();
229 1788221
        VTAILQ_INSERT_TAIL(&vsmw->segs, seg, list);
230 1788221
        vsmw_append_record(vsmw, seg, '+');
231 1788221
        vsmw->nsegs++;
232 1788221
}
233
234
/*--------------------------------------------------------------------*/
235
236
static void
237 270362
vsmw_delseg(struct vsmw *vsmw, struct vsmwseg *seg)
238
{
239 270362
        char *t = NULL;
240
        int fd;
241
        struct vsmwseg *s2;
242
243 270362
        vsmw_assert_lock();
244 270362
        CHECK_OBJ_NOTNULL(vsmw, VSMW_MAGIC);
245 270362
        CHECK_OBJ_NOTNULL(seg, VSMWSEG_MAGIC);
246
247 270362
        VTAILQ_REMOVE(&vsmw->segs, seg, list);
248
249 270362
        vsmw->nsegs--;
250 270362
        if (vsmw->nsubs < 10 || vsmw->nsubs * 2 < vsmw->nsegs) {
251 270002
                vsmw_append_record(vsmw, seg, '-');
252 270002
                vsmw->nsubs++;
253 270002
        } else {
254 360
                vsmw_mkent(vsmw, vsmw->idx);
255 360
                REPLACE(t, VSB_data(vsmw->vsb));
256 720
                fd = openat(vsmw->vdirfd,
257 360
                    t, O_WRONLY|O_CREAT|O_EXCL, vsmw->mode);
258 360
                assert(fd >= 0);
259 360
                vsmw_idx_head(vsmw, fd);
260 360
                VSB_clear(vsmw->vsb);
261 2680
                VTAILQ_FOREACH(s2, &vsmw->segs, list)
262 2320
                        vsmw_fmt_index(vsmw, s2, '+');
263 360
                AZ(VSB_finish(vsmw->vsb));
264 360
                XXXAZ(VSB_tofile(vsmw->vsb, fd)); // XXX handle ENOSPC? #2764
265 360
                closefd(&fd);
266 360
                AZ(renameat(vsmw->vdirfd, t, vsmw->vdirfd, vsmw->idx));
267 360
                REPLACE(t, NULL);
268 360
                vsmw->nsubs = 0;
269
        }
270 270362
        REPLACE(seg->category, NULL);
271 270362
        REPLACE(seg->id, NULL);
272 270362
        FREE_OBJ(seg);
273 270362
}
274
275
/*--------------------------------------------------------------------*/
276
277
#ifdef RLIMIT_MEMLOCK
278
static void
279 75440
printlim(const char *name, rlim_t lim)
280
{
281
282 75440
        fprintf(stderr, "Info: %s: ", name);
283 75440
        if (lim == RLIM_INFINITY)
284 75440
                fprintf(stderr, "unlimited\n");
285
        else
286 0
                fprintf(stderr, "%ju bytes\n", (uintmax_t)lim);
287 75440
}
288
289
static void
290 37720
printmemlock(void) {
291
        struct rlimit rlim;
292
293 37720
        AZ(getrlimit(RLIMIT_MEMLOCK, &rlim));
294 37720
        printlim("max locked memory (soft)", rlim.rlim_cur);
295 37720
        printlim("max locked memory (hard)", rlim.rlim_max);
296 37720
}
297
#else
298
static void printmemlock(void) {}
299
#endif
300
301
static struct vsmw_cluster *
302 1737661
vsmw_newcluster(struct vsmw *vsmw, size_t len, const char *pfx)
303
{
304
        struct vsmw_cluster *vc;
305
        static int warn = 0;
306
        int fd;
307
        size_t ps;
308
309 1737661
        vsmw_assert_lock();
310 1737661
        ALLOC_OBJ(vc, VSMW_CLUSTER_MAGIC);
311 1737661
        AN(vc);
312
313 1737661
        vsmw_mkent(vsmw, pfx);
314 1737661
        REPLACE(vc->fn, VSB_data(vsmw->vsb));
315
316 1737661
        VTAILQ_INSERT_TAIL(&vsmw->clusters, vc, list);
317
318 1737661
        ps = getpagesize();
319 1737661
        len = RUP2(len, ps);
320 1737661
        vc->len = len;
321
322 3475322
        fd = openat(vsmw->vdirfd, vc->fn,
323 1737661
            O_RDWR | O_CREAT | O_EXCL, vsmw->mode);
324 1737661
        assert(fd >= 0);
325
326 1737661
        AZ(VFIL_allocate(fd, (off_t)len, 1));
327
328 3475322
        vc->ptr = (void *)mmap(NULL, len,
329
            PROT_READ|PROT_WRITE,
330
            MAP_HASSEMAPHORE | MAP_NOSYNC | MAP_SHARED,
331 1737661
            fd, 0);
332
333 1737661
        closefd(&fd);
334 1737661
        assert(vc->ptr != MAP_FAILED);
335 1737661
        if (mlock(vc->ptr, len) && warn++ == 0)  {
336 75440
                fprintf(stderr, "Warning: mlock() of VSM failed: %s (%d)\n",
337 37720
                    VAS_errtxt(errno), errno);
338 37720
                printmemlock();
339 37720
        }
340
341 1737661
        return (vc);
342
}
343
344
struct vsmw_cluster *
345 47400
VSMW_NewCluster(struct vsmw *vsmw, size_t len, const char *pfx)
346
{
347
        struct vsmw_cluster *vc;
348
        struct vsmwseg *seg;
349
350 47400
        vsmw_do_lock();
351 47400
        vc = vsmw_newcluster(vsmw, len + VSM_CLUSTER_OFFSET, pfx);
352 47400
        AN(vc);
353 47400
        vc->next += VSM_CLUSTER_OFFSET;
354
355 47400
        ALLOC_OBJ(seg, VSMWSEG_MAGIC);
356 47400
        AN(seg);
357 47400
        vc->cseg = seg;
358 47400
        seg->len = vc->len;
359 47400
        seg->cluster = vc;
360 47400
        REPLACE(seg->category, "");
361 47400
        REPLACE(seg->id, "");
362 47400
        vc->refs++;
363 47400
        vc->named = 1;
364 47400
        vsmw_addseg(vsmw, seg);
365
366 47400
        vsmw_do_unlock();
367 47400
        return (vc);
368
}
369
370
static void
371 79221
vsmw_DestroyCluster_locked(struct vsmw *vsmw, struct vsmw_cluster *vc)
372
{
373
374 79221
        vsmw_assert_lock();
375 79221
        CHECK_OBJ_NOTNULL(vsmw, VSMW_MAGIC);
376 79221
        CHECK_OBJ_NOTNULL(vc, VSMW_CLUSTER_MAGIC);
377
378 79221
        AZ(vc->refs);
379
380 79221
        AZ(munmap(vc->ptr, vc->len));
381 79221
        if (vc->named)
382 3101
                vsmw_delseg(vsmw, vc->cseg);
383 79221
        vc->cseg = 0;
384
385 79221
        VTAILQ_REMOVE(&vsmw->clusters, vc, list);
386 79221
        if (unlinkat(vsmw->vdirfd, vc->fn, 0))
387 0
                assert (errno == ENOENT);
388 79221
        REPLACE(vc->fn, NULL);
389 79221
        FREE_OBJ(vc);
390 79221
}
391
392
void
393 3101
VSMW_DestroyCluster(struct vsmw *vsmw, struct vsmw_cluster **vsmcp)
394
{
395
        struct vsmw_cluster *vc;
396
397 3101
        TAKE_OBJ_NOTNULL(vc, vsmcp, VSMW_CLUSTER_MAGIC);
398
399 3101
        vsmw_do_lock();
400 3101
        if (--vc->refs == 0)
401 1720
                vsmw_DestroyCluster_locked(vsmw, vc);
402 3101
        vsmw_do_unlock();
403 3101
}
404
405
/*--------------------------------------------------------------------*/
406
407
void *
408 1740821
VSMW_Allocv(struct vsmw *vsmw, struct vsmw_cluster *vc,
409
    const char *category, size_t payload, const char *prefix,
410
    const char *fmt, va_list va)
411
{
412
        struct vsmwseg *seg;
413
        ssize_t l;
414
415 1740821
        vsmw_do_lock();
416 1740821
        CHECK_OBJ_NOTNULL(vsmw, VSMW_MAGIC);
417
418 1740821
        ALLOC_OBJ(seg, VSMWSEG_MAGIC);
419 1740821
        AN(seg);
420 1740821
        REPLACE(seg->category, category);
421 1740821
        seg->len = PRNDUP(payload);
422
423 1740821
        VSB_clear(vsmw->vsb);
424 1740821
        if (prefix != NULL) {
425 1260931
                assert(prefix[0] != '\0');
426 1260931
                VSB_cat(vsmw->vsb, prefix);
427 1260931
                if (fmt[0] != '\0')
428 1186495
                        VSB_cat(vsmw->vsb, ".");
429 1260931
        }
430 1740821
        l = VSB_len(vsmw->vsb);
431 1740821
        assert(l >= 0);
432 1740821
        VSB_vprintf(vsmw->vsb, fmt, va);
433 1740821
        AZ(VSB_finish(vsmw->vsb));
434 1740821
        assert(fmt[0] == '\0' || l < VSB_len(vsmw->vsb));
435
436 1740821
        REPLACE(seg->id, VSB_data(vsmw->vsb));
437
438 1740821
        if (vc == NULL)
439 1690261
                vc = vsmw_newcluster(vsmw, seg->len, category);
440 1740821
        AN(vc);
441 1740821
        vc->refs++;
442
443 1740821
        seg->cluster = vc;
444 1740821
        seg->off = vc->next;
445 1740821
        vc->next += seg->len;
446 1740821
        assert(vc->next <= vc->len);
447 1740821
        seg->ptr = seg->off + (char*)vc->ptr;
448
449 1740821
        vsmw_addseg(vsmw, seg);
450
451 1740821
        vsmw_do_unlock();
452 1740821
        return (seg->ptr);
453
}
454
455
void *
456 224996
VSMW_Allocf(struct vsmw *vsmw, struct vsmw_cluster *vc,
457
    const char *category, size_t len, const char *fmt, ...)
458
{
459
        va_list ap;
460
        void *p;
461
462 224996
        va_start(ap, fmt);
463 224996
        p = VSMW_Allocv(vsmw, vc, category, len, NULL, fmt, ap);
464 224996
        va_end(ap);
465 224996
        return (p);
466
}
467
468
/*--------------------------------------------------------------------*/
469
470
void
471 78701
VSMW_Free(struct vsmw *vsmw, void **pp)
472
{
473
        struct vsmwseg *seg;
474
        struct vsmw_cluster *cp;
475
476 78701
        vsmw_do_lock();
477 78701
        CHECK_OBJ_NOTNULL(vsmw, VSMW_MAGIC);
478 78701
        AN(pp);
479 631346
        VTAILQ_FOREACH(seg, &vsmw->segs, list)
480 631346
                if (seg->ptr == *pp)
481 78701
                        break;
482 78701
        AN(seg);
483 78701
        *pp = NULL;
484
485 78701
        cp = seg->cluster;
486 78701
        CHECK_OBJ_NOTNULL(cp, VSMW_CLUSTER_MAGIC);
487 78701
        assert(cp->refs > 0);
488
489 78701
        vsmw_delseg(vsmw, seg);
490
491 78701
        if (!--cp->refs)
492 77501
                vsmw_DestroyCluster_locked(vsmw, cp);
493 78701
        vsmw_do_unlock();
494 78701
}
495
496
/*--------------------------------------------------------------------*/
497
498
struct vsmw *
499 74396
VSMW_New(int vdirfd, int mode, const char *idxname)
500
{
501
        struct vsmw *vsmw;
502
        int fd;
503
504 74396
        assert(vdirfd > 0);
505 74396
        assert(mode > 0);
506 74396
        AN(idxname);
507
508 74396
        vsmw_do_lock();
509 74396
        ALLOC_OBJ(vsmw, VSMW_MAGIC);
510 74396
        AN(vsmw);
511
512 74396
        VTAILQ_INIT(&vsmw->segs);
513 74396
        VTAILQ_INIT(&vsmw->clusters);
514 74396
        vsmw->vsb = VSB_new_auto();
515 74396
        AN(vsmw->vsb);
516 74396
        REPLACE(vsmw->idx, idxname);
517 74396
        vsmw->mode = mode;
518 74396
        vsmw->vdirfd = vdirfd;
519 74396
        vsmw->pid = getpid();
520 74396
        vsmw->birth = time(NULL);
521
522 74396
        if (unlinkat(vdirfd, vsmw->idx, 0))
523 74396
                assert (errno == ENOENT);
524 148792
        fd = openat(vdirfd,
525 74396
            vsmw->idx, O_APPEND | O_WRONLY | O_CREAT, vsmw->mode);
526 74396
        assert(fd >= 0);
527 74396
        vsmw_idx_head(vsmw, fd);
528 74396
        closefd(&fd);
529
530 74396
        vsmw_do_unlock();
531 74396
        return (vsmw);
532
}
533
534
void
535 37720
VSMW_Destroy(struct vsmw **pp)
536
{
537
        struct vsmw *vsmw;
538
        struct vsmwseg *seg, *s2;
539
540 37720
        vsmw_do_lock();
541 37720
        TAKE_OBJ_NOTNULL(vsmw, pp, VSMW_MAGIC);
542 226280
        VTAILQ_FOREACH_SAFE(seg, &vsmw->segs, list, s2)
543 188560
                vsmw_delseg(vsmw, seg);
544 37720
        if (unlinkat(vsmw->vdirfd, vsmw->idx, 0))
545 0
                assert (errno == ENOENT);
546 37720
        REPLACE(vsmw->idx, NULL);
547 37720
        VSB_destroy(&vsmw->vsb);
548 37720
        closefd(&vsmw->vdirfd);
549 37720
        FREE_OBJ(vsmw);
550 37720
        vsmw_do_unlock();
551 37720
}