varnish-cache/bin/varnishd/storage/storage_file.c
0
/*-
1
 * Copyright (c) 2006 Verdens Gang AS
2
 * Copyright (c) 2006-2011 Varnish Software AS
3
 * All rights reserved.
4
 *
5
 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6
 *
7
 * SPDX-License-Identifier: BSD-2-Clause
8
 *
9
 * Redistribution and use in source and binary forms, with or without
10
 * modification, are permitted provided that the following conditions
11
 * are met:
12
 * 1. Redistributions of source code must retain the above copyright
13
 *    notice, this list of conditions and the following disclaimer.
14
 * 2. Redistributions in binary form must reproduce the above copyright
15
 *    notice, this list of conditions and the following disclaimer in the
16
 *    documentation and/or other materials provided with the distribution.
17
 *
18
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28
 * SUCH DAMAGE.
29
 *
30
 * Storage method based on mmap'ed file
31
 */
32
33
#include "config.h"
34
35
#include "cache/cache_varnishd.h"
36
#include "common/heritage.h"
37
38
#include <sys/mman.h>
39
40
#include <stdio.h>
41
#include <stdlib.h>
42
43
#include "storage/storage.h"
44
#include "storage/storage_simple.h"
45
46
#include "vnum.h"
47
#include "vfil.h"
48
49
#include "VSC_smf.h"
50
51
#ifndef MAP_NOCORE
52
#ifdef MAP_CONCEAL
53
#define MAP_NOCORE MAP_CONCEAL /* XXX OpenBSD */
54
#else
55
#define MAP_NOCORE 0 /* XXX Linux */
56
#endif
57
#endif
58
59
#ifndef MAP_NOSYNC
60
#define MAP_NOSYNC 0 /* XXX Linux */
61
#endif
62
63
#define MINPAGES                128
64
65
/*
66
 * Number of buckets on free-list.
67
 *
68
 * Last bucket is "larger than" so choose number so that the second
69
 * to last bucket matches the 128k CHUNKSIZE in cache_fetch.c when
70
 * using a 4K minimal page size
71
 */
72
#define NBUCKET                 (128 / 4 + 1)
73
74
static struct VSC_lck *lck_smf;
75
76
/*--------------------------------------------------------------------*/
77
78
VTAILQ_HEAD(smfhead, smf);
79
80
struct smf {
81
        unsigned                magic;
82
#define SMF_MAGIC               0x0927a8a0
83
        struct storage          s;
84
        struct smf_sc           *sc;
85
86
        int                     alloc;
87
88
        off_t                   size;
89
        off_t                   offset;
90
        unsigned char           *ptr;
91
92
        VTAILQ_ENTRY(smf)       order;
93
        VTAILQ_ENTRY(smf)       status;
94
        struct smfhead          *flist;
95
};
96
97
struct smf_sc {
98
        unsigned                magic;
99
#define SMF_SC_MAGIC            0x52962ee7
100
        struct lock             mtx;
101
        struct VSC_smf          *stats;
102
103
        const char              *filename;
104
        int                     fd;
105
        unsigned                pagesize;
106
        uintmax_t               filesize;
107
        int                     advice;
108
        struct smfhead          order;
109
        struct smfhead          free[NBUCKET];
110
        struct smfhead          used;
111
};
112
113
/*--------------------------------------------------------------------*/
114
115
static void v_matchproto_(storage_init_f)
116 112
smf_init(struct stevedore *parent, int ac, char * const *av)
117
{
118
        const char *size, *fn, *r;
119
        struct smf_sc *sc;
120
        unsigned u;
121
        uintmax_t page_size;
122 112
        int advice = MADV_RANDOM;
123
124 112
        AZ(av[ac]);
125
126 112
        size = NULL;
127 112
        page_size = getpagesize();
128
129 112
        if (ac > 4)
130 0
                ARGV_ERR("(-sfile) too many arguments\n");
131 112
        if (ac < 1 || *av[0] == '\0')
132 0
                ARGV_ERR("(-sfile) path is mandatory\n");
133 112
        fn = av[0];
134 112
        if (ac > 1 && *av[1] != '\0')
135 112
                size = av[1];
136 112
        if (ac > 2 && *av[2] != '\0') {
137
138 14
                r = VNUM_2bytes(av[2], &page_size, 0);
139 14
                if (r != NULL)
140 14
                        ARGV_ERR("(-sfile) granularity \"%s\": %s\n", av[2], r);
141 0
        }
142 98
        if (ac > 3) {
143 14
                if (!strcmp(av[3], "normal"))
144 0
                        advice = MADV_NORMAL;
145 14
                else if (!strcmp(av[3], "random"))
146 0
                        advice = MADV_RANDOM;
147 14
                else if (!strcmp(av[3], "sequential"))
148 0
                        advice = MADV_SEQUENTIAL;
149
                else
150 14
                        ARGV_ERR("(-s file) invalid advice: \"%s\"", av[3]);
151 0
        }
152
153 84
        AN(fn);
154
155 84
        ALLOC_OBJ(sc, SMF_SC_MAGIC);
156 84
        XXXAN(sc);
157 84
        VTAILQ_INIT(&sc->order);
158 2856
        for (u = 0; u < NBUCKET; u++)
159 2772
                VTAILQ_INIT(&sc->free[u]);
160 84
        VTAILQ_INIT(&sc->used);
161 84
        sc->pagesize = page_size;
162 84
        sc->advice = advice;
163 84
        parent->priv = sc;
164
165 84
        (void)STV_GetFile(fn, &sc->fd, &sc->filename, "-sfile");
166 84
        MCH_Fd_Inherit(sc->fd, "storage_file");
167 84
        sc->filesize = STV_FileSize(sc->fd, size, &sc->pagesize, "-sfile");
168 84
        if (VFIL_allocate(sc->fd, (off_t)sc->filesize, 0))
169 0
                ARGV_ERR("(-sfile) allocation error: %s\n", VAS_errtxt(errno));
170 84
}
171
172
/*--------------------------------------------------------------------
173
 * Insert/Remove from correct freelist
174
 */
175
176
static void
177 826
insfree(struct smf_sc *sc, struct smf *sp)
178
{
179
        off_t b, ns;
180
        struct smf *sp2;
181
182 826
        AZ(sp->alloc);
183 826
        assert(sp->flist == NULL);
184 826
        Lck_AssertHeld(&sc->mtx);
185 826
        b = sp->size / sc->pagesize;
186 826
        if (b >= NBUCKET) {
187 770
                b = NBUCKET - 1;
188 770
                sc->stats->g_smf_large++;
189 770
        } else {
190 56
                sc->stats->g_smf_frag++;
191
        }
192 826
        sp->flist = &sc->free[b];
193 826
        ns = b * sc->pagesize;
194 826
        VTAILQ_FOREACH(sp2, sp->flist, status) {
195 0
                assert(sp2->size >= ns);
196 0
                AZ(sp2->alloc);
197 0
                assert(sp2->flist == sp->flist);
198 0
                if (sp->offset < sp2->offset)
199 0
                        break;
200 0
        }
201 826
        if (sp2 == NULL)
202 826
                VTAILQ_INSERT_TAIL(sp->flist, sp, status);
203
        else
204 0
                VTAILQ_INSERT_BEFORE(sp2, sp, status);
205 826
}
206
207
static void
208 742
remfree(const struct smf_sc *sc, struct smf *sp)
209
{
210
        size_t b;
211
212 742
        AZ(sp->alloc);
213 742
        assert(sp->flist != NULL);
214 742
        Lck_AssertHeld(&sc->mtx);
215 742
        b = sp->size / sc->pagesize;
216 742
        if (b >= NBUCKET) {
217 700
                b = NBUCKET - 1;
218 700
                sc->stats->g_smf_large--;
219 700
        } else {
220 42
                sc->stats->g_smf_frag--;
221
        }
222 742
        assert(sp->flist == &sc->free[b]);
223 742
        VTAILQ_REMOVE(sp->flist, sp, status);
224 742
        sp->flist = NULL;
225 742
}
226
227
/*--------------------------------------------------------------------
228
 * Allocate a range from the first free range that is large enough.
229
 */
230
231
static struct smf *
232 392
alloc_smf(struct smf_sc *sc, off_t bytes)
233
{
234
        struct smf *sp, *sp2;
235
        off_t b;
236
237 392
        AZ(bytes % sc->pagesize);
238 392
        b = bytes / sc->pagesize;
239 392
        if (b >= NBUCKET)
240 0
                b = NBUCKET - 1;
241 392
        sp = NULL;
242 11788
        for (; b < NBUCKET - 1; b++) {
243 11396
                sp = VTAILQ_FIRST(&sc->free[b]);
244 11396
                if (sp != NULL)
245 0
                        break;
246 11396
        }
247 392
        if (sp == NULL) {
248 392
                VTAILQ_FOREACH(sp, &sc->free[NBUCKET -1], status)
249 392
                        if (sp->size >= bytes)
250 392
                                break;
251 392
        }
252 392
        if (sp == NULL)
253 0
                return (sp);
254
255 392
        assert(sp->size >= bytes);
256 392
        remfree(sc, sp);
257
258 392
        if (sp->size == bytes) {
259 0
                sp->alloc = 1;
260 0
                VTAILQ_INSERT_TAIL(&sc->used, sp, status);
261 0
                return (sp);
262
        }
263
264
        /* Split from front */
265 392
        sp2 = malloc(sizeof *sp2);
266 392
        XXXAN(sp2);
267 392
        sc->stats->g_smf++;
268 392
        *sp2 = *sp;
269
270 392
        sp->offset += bytes;
271 392
        sp->ptr += bytes;
272 392
        sp->size -= bytes;
273
274 392
        sp2->size = bytes;
275 392
        sp2->alloc = 1;
276 392
        VTAILQ_INSERT_BEFORE(sp, sp2, order);
277 392
        VTAILQ_INSERT_TAIL(&sc->used, sp2, status);
278 392
        insfree(sc, sp);
279 392
        return (sp2);
280 392
}
281
282
/*--------------------------------------------------------------------
283
 * Free a range.  Attempt merge forward and backward, then sort into
284
 * free list according to age.
285
 */
286
287
static void
288 434
free_smf(struct smf *sp)
289
{
290
        struct smf *sp2;
291 434
        struct smf_sc *sc = sp->sc;
292
293 434
        CHECK_OBJ_NOTNULL(sp, SMF_MAGIC);
294 434
        AN(sp->alloc);
295 434
        assert(sp->size > 0);
296 434
        AZ(sp->size % sc->pagesize);
297 434
        VTAILQ_REMOVE(&sc->used, sp, status);
298 434
        sp->alloc = 0;
299
300 434
        sp2 = VTAILQ_NEXT(sp, order);
301 742
        if (sp2 != NULL &&
302 364
            sp2->alloc == 0 &&
303 308
            (sp2->ptr == sp->ptr + sp->size) &&
304 308
            (sp2->offset == sp->offset + sp->size)) {
305 308
                sp->size += sp2->size;
306 308
                VTAILQ_REMOVE(&sc->order, sp2, order);
307 308
                remfree(sc, sp2);
308 308
                free(sp2);
309 308
                sc->stats->g_smf--;
310 308
        }
311
312 434
        sp2 = VTAILQ_PREV(sp, smfhead, order);
313 476
        if (sp2 != NULL &&
314 322
            sp2->alloc == 0 &&
315 42
            (sp->ptr == sp2->ptr + sp2->size) &&
316 42
            (sp->offset == sp2->offset + sp2->size)) {
317 42
                remfree(sc, sp2);
318 42
                sp2->size += sp->size;
319 42
                VTAILQ_REMOVE(&sc->order, sp, order);
320 42
                free(sp);
321 42
                sc->stats->g_smf--;
322 42
                sp = sp2;
323 42
        }
324
325 434
        insfree(sc, sp);
326 434
}
327
328
/*--------------------------------------------------------------------
329
 * Insert a newly created range as busy, then free it to do any collapses
330
 */
331
332
static void
333 70
new_smf(struct smf_sc *sc, unsigned char *ptr, off_t off, size_t len)
334
{
335
        struct smf *sp, *sp2;
336
337 70
        AZ(len % sc->pagesize);
338 70
        ALLOC_OBJ(sp, SMF_MAGIC);
339 70
        XXXAN(sp);
340 70
        sp->s.magic = STORAGE_MAGIC;
341 70
        sc->stats->g_smf++;
342
343 70
        sp->sc = sc;
344 70
        sp->size = len;
345 70
        sp->ptr = ptr;
346 70
        sp->offset = off;
347 70
        sp->alloc = 1;
348
349 70
        VTAILQ_FOREACH(sp2, &sc->order, order) {
350 0
                if (sp->ptr < sp2->ptr) {
351 0
                        VTAILQ_INSERT_BEFORE(sp2, sp, order);
352 0
                        break;
353
                }
354 0
        }
355 70
        if (sp2 == NULL)
356 70
                VTAILQ_INSERT_TAIL(&sc->order, sp, order);
357
358 70
        VTAILQ_INSERT_HEAD(&sc->used, sp, status);
359
360 70
        free_smf(sp);
361 70
}
362
363
/*--------------------------------------------------------------------*/
364
365
/*
366
 * XXX: This may be too aggressive and soak up too much address room.
367
 * XXX: On the other hand, the user, directly or implicitly asked us to
368
 * XXX: use this much storage, so we should make a decent effort.
369
 * XXX: worst case (I think), malloc will fail.
370
 */
371
372
static void
373 70
smf_open_chunk(struct smf_sc *sc, off_t sz, off_t off, off_t *fail, off_t *sum)
374
{
375
        void *p;
376
        off_t h;
377
378 70
        AN(sz);
379 70
        AZ(sz % sc->pagesize);
380
381 70
        if (*fail < (off_t)sc->pagesize * MINPAGES)
382 0
                return;
383
384 70
        if (sz > 0 && sz < *fail && sz < SSIZE_MAX) {
385 140
                p = mmap(NULL, sz, PROT_READ|PROT_WRITE,
386 70
                    MAP_NOCORE | MAP_NOSYNC | MAP_SHARED, sc->fd, off);
387 70
                if (p != MAP_FAILED) {
388 70
                        (void)madvise(p, sz, sc->advice);
389 70
                        (*sum) += sz;
390 70
                        new_smf(sc, p, off, sz);
391 70
                        return;
392
                }
393 0
        }
394
395 0
        if (sz < *fail)
396 0
                *fail = sz;
397
398 0
        h = sz / 2;
399 0
        h -= (h % sc->pagesize);
400
401 0
        smf_open_chunk(sc, h, off, fail, sum);
402 0
        smf_open_chunk(sc, sz - h, off + h, fail, sum);
403 70
}
404
405
static void v_matchproto_(storage_open_f)
406 70
smf_open(struct stevedore *st)
407
{
408
        struct smf_sc *sc;
409 70
        off_t fail = 1 << 30;   /* XXX: where is OFF_T_MAX ? */
410 70
        off_t sum = 0;
411
412 70
        ASSERT_CLI();
413 70
        st->lru = LRU_Alloc();
414 70
        if (lck_smf == NULL)
415 56
                lck_smf = Lck_CreateClass(NULL, "smf");
416 70
        CAST_OBJ_NOTNULL(sc, st->priv, SMF_SC_MAGIC);
417 70
        sc->stats = VSC_smf_New(NULL, NULL, st->ident);
418 70
        Lck_New(&sc->mtx, lck_smf);
419 70
        Lck_Lock(&sc->mtx);
420 70
        smf_open_chunk(sc, sc->filesize, 0, &fail, &sum);
421 70
        Lck_Unlock(&sc->mtx);
422 70
        if (sum < MINPAGES * (off_t)getpagesize()) {
423 0
                ARGV_ERR(
424
                    "-sfile too small for this architecture,"
425
                    " minimum size is %jd MB\n",
426
                    (MINPAGES * (intmax_t)getpagesize()) / (1<<20)
427
                );
428 0
        }
429 70
        printf("SMF.%s mmap'ed %ju bytes of %ju\n",
430 70
            st->ident, (uintmax_t)sum, sc->filesize);
431
432
        /* XXX */
433 70
        if (sum < MINPAGES * (off_t)getpagesize())
434 0
                exit(4);
435
436 70
        sc->stats->g_space += sc->filesize;
437 70
}
438
439
/*--------------------------------------------------------------------*/
440
441
static struct storage * v_matchproto_(sml_alloc_f)
442 392
smf_alloc(const struct stevedore *st, size_t sz)
443
{
444
        struct smf *smf;
445
        struct smf_sc *sc;
446
        off_t size;
447
448 392
        CAST_OBJ_NOTNULL(sc, st->priv, SMF_SC_MAGIC);
449 392
        assert(sz > 0);
450
        // XXX missing OFF_T_MAX
451 392
        size = (off_t)sz;
452 392
        size += (sc->pagesize - 1UL);
453 392
        size &= ~(sc->pagesize - 1UL);
454 392
        Lck_Lock(&sc->mtx);
455 392
        sc->stats->c_req++;
456 392
        smf = alloc_smf(sc, size);
457 392
        if (smf == NULL) {
458 0
                sc->stats->c_fail++;
459 0
                Lck_Unlock(&sc->mtx);
460 0
                return (NULL);
461
        }
462 392
        CHECK_OBJ_NOTNULL(smf, SMF_MAGIC);
463 392
        sc->stats->g_alloc++;
464 392
        sc->stats->c_bytes += smf->size;
465 392
        sc->stats->g_bytes += smf->size;
466 392
        sc->stats->g_space -= smf->size;
467 392
        Lck_Unlock(&sc->mtx);
468 392
        CHECK_OBJ_NOTNULL(&smf->s, STORAGE_MAGIC);      /*lint !e774 */
469 392
        XXXAN(smf);
470 392
        assert(smf->size == size);
471 392
        smf->s.space = size;
472 392
        smf->s.priv = smf;
473 392
        smf->s.ptr = smf->ptr;
474 392
        smf->s.len = 0;
475 392
        return (&smf->s);
476 392
}
477
478
/*--------------------------------------------------------------------*/
479
480
static void v_matchproto_(sml_free_f)
481 364
smf_free(struct storage *s)
482
{
483
        struct smf *smf;
484
        struct smf_sc *sc;
485
486 364
        CHECK_OBJ_NOTNULL(s, STORAGE_MAGIC);
487 364
        CAST_OBJ_NOTNULL(smf, s->priv, SMF_MAGIC);
488 364
        sc = smf->sc;
489 364
        Lck_Lock(&sc->mtx);
490 364
        sc->stats->g_alloc--;
491 364
        sc->stats->c_freed += smf->size;
492 364
        sc->stats->g_bytes -= smf->size;
493 364
        sc->stats->g_space += smf->size;
494 364
        free_smf(smf);
495 364
        Lck_Unlock(&sc->mtx);
496 364
}
497
498
/*--------------------------------------------------------------------*/
499
500
const struct stevedore smf_stevedore = {
501
        .magic          =       STEVEDORE_MAGIC,
502
        .name           =       "file",
503
        .init           =       smf_init,
504
        .open           =       smf_open,
505
        .sml_alloc      =       smf_alloc,
506
        .sml_free       =       smf_free,
507
        .allocobj       =       SML_allocobj,
508
        .panic          =       SML_panic,
509
        .methods        =       &SML_methods,
510
        .allocbuf       =       SML_AllocBuf,
511
        .freebuf        =       SML_FreeBuf,
512
};