varnish-cache/lib/libvarnish/vre.c
0
/*-
1
 * Copyright (c) 2006-2011 Varnish Software AS
2
 * All rights reserved.
3
 *
4
 * Author: Tollef Fog Heen <tfheen@redpill-linpro.com>
5
 *
6
 * SPDX-License-Identifier: BSD-2-Clause
7
 *
8
 * Redistribution and use in source and binary forms, with or without
9
 * modification, are permitted provided that the following conditions
10
 * are met:
11
 * 1. Redistributions of source code must retain the above copyright
12
 *    notice, this list of conditions and the following disclaimer.
13
 * 2. Redistributions in binary form must reproduce the above copyright
14
 *    notice, this list of conditions and the following disclaimer in the
15
 *    documentation and/or other materials provided with the distribution.
16
 *
17
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20
 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27
 * SUCH DAMAGE.
28
 */
29
30
#include "config.h"
31
32
#include <ctype.h>
33
#include <string.h>
34
#include <unistd.h>
35
36
#include "vdef.h"
37
38
#include "vas.h"        // XXX Flexelint "not used" - but req'ed for assert()
39
#include "vsb.h"
40
#include "miniobj.h"
41
42
#include "vre.h"
43
#include "vre_pcre2.h"
44
45
/* should be turned into an error sooner or later */
46
#if !defined(pcre2_set_depth_limit)
47
#  warning pcre2 missing pcre2_set_depth_limit - update recommended
48
#  define pcre2_set_depth_limit(r, d) pcre2_set_recursion_limit(r, d)
49
#endif
50
51
#define VRE_PACKED_RE           (pcre2_code *)(-1)
52
53
struct vre {
54
        unsigned                magic;
55
#define VRE_MAGIC               0xe83097dc
56
        pcre2_code              *re;
57
        pcre2_match_context     *re_ctx;
58
};
59
60
/*
61
 * We don't want to spread or even expose the majority of PCRE2 options
62
 * and errors so we establish our own symbols and implement hard linkage
63
 * to PCRE2 here.
64
 */
65
const int VRE_ERROR_NOMATCH = PCRE2_ERROR_NOMATCH;
66
67
const unsigned VRE_CASELESS = PCRE2_CASELESS;
68
69
vre_t *
70 14220
VRE_compile(const char *pattern, unsigned options,
71
    int *errptr, int *erroffset, unsigned jit)
72
{
73
        PCRE2_SIZE erroff;
74
        vre_t *v;
75
76 14220
        AN(pattern);
77 14220
        AN(errptr);
78 14220
        AN(erroffset);
79
80 14220
        *errptr = 0;
81 14220
        *erroffset = -1;
82
83 14220
        ALLOC_OBJ(v, VRE_MAGIC);
84 14220
        if (v == NULL) {
85 0
                *errptr = PCRE2_ERROR_NOMEMORY;
86 0
                return (NULL);
87
        }
88 28440
        v->re = pcre2_compile((PCRE2_SPTR8)pattern, PCRE2_ZERO_TERMINATED,
89 14220
            options, errptr, &erroff, NULL);
90 14220
        *erroffset = erroff;
91 14220
        if (v->re == NULL) {
92 5
                VRE_free(&v);
93 5
                return (NULL);
94
        }
95 14215
        v->re_ctx = pcre2_match_context_create(NULL);
96 14215
        if (v->re_ctx == NULL) {
97 0
                *errptr = PCRE2_ERROR_NOMEMORY;
98 0
                VRE_free(&v);
99 0
                return (NULL);
100
        }
101
#if USE_PCRE2_JIT
102 14215
        if (jit)
103 5606
                (void)pcre2_jit_compile(v->re, PCRE2_JIT_COMPLETE);
104
#else
105
        (void)jit;
106
#endif
107 14215
        return (v);
108 14220
}
109
110
int
111 6
VRE_error(struct vsb *vsb, int err)
112
{
113
        char buf[VRE_ERROR_LEN];
114
        int i;
115
116 6
        CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
117 6
        i = pcre2_get_error_message(err, (PCRE2_UCHAR *)buf, VRE_ERROR_LEN);
118 6
        if (i == PCRE2_ERROR_BADDATA) {
119 0
                VSB_printf(vsb, "unknown pcre2 error code (%d)", err);
120 0
                return (-1);
121
        }
122 6
        VSB_cat(vsb, buf);
123 6
        return (0);
124 6
}
125
126
pcre2_code *
127 8021
VRE_unpack(const vre_t *code)
128
{
129
130
        /* XXX: The ban code ensures that regex "lumps" are pointer-aligned,
131
         * but coming for example from a VMOD there is no guarantee. Should
132
         * we formally require that code is properly aligned?
133
         */
134 8021
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
135 8021
        if (code->re == VRE_PACKED_RE) {
136 23
                AZ(code->re_ctx);
137 23
                return (TRUST_ME(code + 1));
138
        }
139 7998
        return (code->re);
140 8021
}
141
142
static void
143 7982
vre_limit(const vre_t *code, const volatile struct vre_limits *lim)
144
{
145
146 7982
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
147
148 7982
        if (lim == NULL)
149 2665
                return;
150
151 5317
        assert(code->re != VRE_PACKED_RE);
152
153
        /* XXX: not reentrant */
154 5317
        AN(code->re_ctx);
155 5317
        AZ(pcre2_set_match_limit(code->re_ctx, lim->match));
156 5317
        AZ(pcre2_set_depth_limit(code->re_ctx, lim->depth));
157 7982
}
158
159
vre_t *
160 29
VRE_export(const vre_t *code, size_t *sz)
161
{
162
        pcre2_code *re;
163
        vre_t *exp;
164
165 29
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
166 29
        re = VRE_unpack(code);
167 29
        AZ(pcre2_pattern_info(re, PCRE2_INFO_SIZE, sz));
168
169 29
        exp = malloc(sizeof(*exp) + *sz);
170 29
        if (exp == NULL)
171 0
                return (NULL);
172
173 29
        INIT_OBJ(exp, VRE_MAGIC);
174 29
        exp->re = VRE_PACKED_RE;
175 29
        memcpy(exp + 1, re, *sz);
176 29
        *sz += sizeof(*exp);
177 29
        return (exp);
178 29
}
179
180
static int
181 7994
vre_capture(const vre_t *code, const char *subject, size_t length,
182
    size_t offset, int options, txt *groups, size_t *count,
183
    pcre2_match_data **datap)
184
{
185
        pcre2_match_data *data;
186
        pcre2_code *re;
187
        PCRE2_SIZE *ovector, b, e;
188
        size_t nov, g;
189
        int matches;
190
191 7994
        re = VRE_unpack(code);
192
193 7994
        if (datap != NULL && *datap != NULL) {
194 11
                data = *datap;
195 11
                *datap = NULL;
196 11
        } else {
197 7983
                data = pcre2_match_data_create_from_pattern(re, NULL);
198 7983
                AN(data);
199
        }
200
201 7994
        ovector = pcre2_get_ovector_pointer(data);
202 7994
        nov = 2L * pcre2_get_ovector_count(data);
203 24136
        for (g = 0; g < nov; g++)
204 16142
                ovector[g] = PCRE2_UNSET;
205
206 15988
        matches = pcre2_match(re, (PCRE2_SPTR)subject, length, offset,
207 7994
            options, data, code->re_ctx);
208
209 7994
        if (groups != NULL) {
210 73
                AN(count);
211 73
                AN(*count);
212 73
                ovector = pcre2_get_ovector_pointer(data);
213 73
                nov = vmin_t(size_t, pcre2_get_ovector_count(data), *count);
214 172
                for (g = 0; g < nov; g++) {
215 99
                        b = ovector[2 * g];
216 99
                        e = ovector[2 * g + 1];
217 99
                        if (b == PCRE2_UNSET) {
218 35
                                groups->b = groups->e = "";
219 35
                        } else {
220 64
                                groups->b = subject + b;
221 64
                                groups->e = subject + e;
222
                        }
223 99
                        groups++;
224 99
                }
225 73
                *count = nov;
226 73
        }
227
228 7994
        if (datap != NULL && matches > VRE_ERROR_NOMATCH)
229 39
                *datap = data;
230
        else
231 7955
                pcre2_match_data_free(data);
232 7994
        return (matches);
233
}
234
235
int
236 7921
VRE_match(const vre_t *code, const char *subject, size_t length,
237
    int options, const volatile struct vre_limits *lim)
238
{
239
240 7921
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
241 7921
        AN(subject);
242
243 7921
        if (length == 0)
244 5444
                length = PCRE2_ZERO_TERMINATED;
245 7921
        vre_limit(code, lim);
246 7921
        return (vre_capture(code, subject, length, 0, options,
247
            NULL, NULL, NULL));
248
}
249
250
int
251 0
VRE_capture(const vre_t *code, const char *subject, size_t length, int options,
252
    txt *groups, size_t count, const volatile struct vre_limits *lim)
253
{
254
        int i;
255
256 0
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
257 0
        AN(subject);
258 0
        AN(groups);
259 0
        AN(count);
260
261 0
        if (length == 0)
262 0
                length = PCRE2_ZERO_TERMINATED;
263 0
        vre_limit(code, lim);
264 0
        i = vre_capture(code, subject, length, 0, options,
265 0
            groups, &count, NULL);
266
267 0
        if (i <= 0)
268 0
                return (i);
269 0
        return (count);
270 0
}
271
272
int
273 62
VRE_sub(const vre_t *code, const char *subject, const char *replacement,
274
    struct vsb *vsb, const volatile struct vre_limits *lim, int all)
275
{
276 62
        pcre2_match_data *data = NULL;
277
        txt groups[10];
278
        size_t count;
279 62
        int i, offset = 0;
280
        const char *s, *e;
281
        unsigned x;
282
283 62
        CHECK_OBJ_NOTNULL(code, VRE_MAGIC);
284 62
        CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
285 62
        AN(subject);
286 62
        AN(replacement);
287
288 62
        vre_limit(code, lim);
289 62
        count = 10;
290 124
        i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset, 0,
291 62
            groups, &count, &data);
292
293 62
        if (i <= VRE_ERROR_NOMATCH) {
294 25
                AZ(data);
295 25
                return (i);
296
        }
297
298 37
        do {
299 39
                AN(data); /* check reuse across successful captures */
300 39
                AN(count);
301
302
                /* Copy prefix to match */
303 39
                s = subject + offset;
304 39
                VSB_bcat(vsb, s, pdiff(s, groups[0].b));
305 210
                for (s = e = replacement; *e != '\0'; e++ ) {
306 171
                        if (*e != '\\' || e[1] == '\0')
307 141
                                continue;
308 30
                        VSB_bcat(vsb, s, pdiff(s, e));
309 30
                        s = ++e;
310 30
                        if (isdigit(*e)) {
311 26
                                s++;
312 26
                                x = *e - '0';
313 26
                                if (x >= count)
314 6
                                        continue;
315 20
                                VSB_bcat(vsb, groups[x].b, Tlen(groups[x]));
316 20
                                continue;
317
                        }
318 4
                }
319 39
                VSB_bcat(vsb, s, pdiff(s, e));
320 39
                offset = pdiff(subject, groups[0].e);
321 39
                if (!all)
322 28
                        break;
323 11
                count = 10;
324 22
                i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset,
325 11
                    PCRE2_NOTEMPTY, groups, &count, &data);
326
327 11
                if (i < VRE_ERROR_NOMATCH) {
328 0
                        AZ(data);
329 0
                        return (i);
330
                }
331 11
        } while (i != VRE_ERROR_NOMATCH);
332
333 37
        if (data != NULL) {
334 28
                assert(i > VRE_ERROR_NOMATCH);
335 28
                AZ(all);
336 28
                pcre2_match_data_free(data);
337 28
        }
338
339
        /* Copy suffix to match */
340 37
        VSB_cat(vsb, subject + offset);
341 37
        return (1);
342 62
}
343
344
void
345 10716
VRE_free(vre_t **vv)
346
{
347
        vre_t *v;
348
349 10716
        TAKE_OBJ_NOTNULL(v, vv, VRE_MAGIC);
350
351 10716
        if (v->re == VRE_PACKED_RE) {
352 29
                v->re = NULL;
353 29
                AZ(v->re_ctx);
354 29
        }
355
356 10716
        if (v->re_ctx != NULL)
357 10682
                pcre2_match_context_free(v->re_ctx);
358 10716
        if (v->re != NULL)
359 10682
                pcre2_code_free(v->re);
360 10716
        FREE_OBJ(v);
361 10716
}
362
363
void
364 6
VRE_quote(struct vsb *vsb, const char *src)
365
{
366
        const char *b, *e;
367
368 6
        CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC);
369 6
        if (src == NULL)
370 0
                return;
371 9
        for (b = src; (e = strstr(b, "\\E")) != NULL; b = e + 2)
372 3
                VSB_printf(vsb, "\\Q%.*s\\\\EE", (int)(e - b), b);
373 6
        if (*b != '\0')
374 3
                VSB_printf(vsb, "\\Q%s\\E", b);
375 6
}