| | varnish-cache/lib/libvarnish/vre.c |
0 |
|
/*- |
1 |
|
* Copyright (c) 2006-2011 Varnish Software AS |
2 |
|
* All rights reserved. |
3 |
|
* |
4 |
|
* Author: Tollef Fog Heen <tfheen@redpill-linpro.com> |
5 |
|
* |
6 |
|
* SPDX-License-Identifier: BSD-2-Clause |
7 |
|
* |
8 |
|
* Redistribution and use in source and binary forms, with or without |
9 |
|
* modification, are permitted provided that the following conditions |
10 |
|
* are met: |
11 |
|
* 1. Redistributions of source code must retain the above copyright |
12 |
|
* notice, this list of conditions and the following disclaimer. |
13 |
|
* 2. Redistributions in binary form must reproduce the above copyright |
14 |
|
* notice, this list of conditions and the following disclaimer in the |
15 |
|
* documentation and/or other materials provided with the distribution. |
16 |
|
* |
17 |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
18 |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 |
|
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
21 |
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
22 |
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
23 |
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
24 |
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
25 |
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
26 |
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
27 |
|
* SUCH DAMAGE. |
28 |
|
*/ |
29 |
|
|
30 |
|
#include "config.h" |
31 |
|
|
32 |
|
#include <ctype.h> |
33 |
|
#include <string.h> |
34 |
|
#include <unistd.h> |
35 |
|
|
36 |
|
#include "vdef.h" |
37 |
|
|
38 |
|
#include "vas.h" // XXX Flexelint "not used" - but req'ed for assert() |
39 |
|
#include "vsb.h" |
40 |
|
#include "miniobj.h" |
41 |
|
|
42 |
|
#include "vre.h" |
43 |
|
#include "vre_pcre2.h" |
44 |
|
|
45 |
|
/* should be turned into an error sooner or later */ |
46 |
|
#if !defined(pcre2_set_depth_limit) |
47 |
|
# warning pcre2 missing pcre2_set_depth_limit - update recommended |
48 |
|
# define pcre2_set_depth_limit(r, d) pcre2_set_recursion_limit(r, d) |
49 |
|
#endif |
50 |
|
|
51 |
|
#define VRE_PACKED_RE (pcre2_code *)(-1) |
52 |
|
|
53 |
|
struct vre { |
54 |
|
unsigned magic; |
55 |
|
#define VRE_MAGIC 0xe83097dc |
56 |
|
pcre2_code *re; |
57 |
|
pcre2_match_context *re_ctx; |
58 |
|
}; |
59 |
|
|
60 |
|
/* |
61 |
|
* We don't want to spread or even expose the majority of PCRE2 options |
62 |
|
* and errors so we establish our own symbols and implement hard linkage |
63 |
|
* to PCRE2 here. |
64 |
|
*/ |
65 |
|
const int VRE_ERROR_NOMATCH = PCRE2_ERROR_NOMATCH; |
66 |
|
|
67 |
|
const unsigned VRE_CASELESS = PCRE2_CASELESS; |
68 |
|
|
69 |
|
vre_t * |
70 |
14220 |
VRE_compile(const char *pattern, unsigned options, |
71 |
|
int *errptr, int *erroffset, unsigned jit) |
72 |
|
{ |
73 |
|
PCRE2_SIZE erroff; |
74 |
|
vre_t *v; |
75 |
|
|
76 |
14220 |
AN(pattern); |
77 |
14220 |
AN(errptr); |
78 |
14220 |
AN(erroffset); |
79 |
|
|
80 |
14220 |
*errptr = 0; |
81 |
14220 |
*erroffset = -1; |
82 |
|
|
83 |
14220 |
ALLOC_OBJ(v, VRE_MAGIC); |
84 |
14220 |
if (v == NULL) { |
85 |
0 |
*errptr = PCRE2_ERROR_NOMEMORY; |
86 |
0 |
return (NULL); |
87 |
|
} |
88 |
28440 |
v->re = pcre2_compile((PCRE2_SPTR8)pattern, PCRE2_ZERO_TERMINATED, |
89 |
14220 |
options, errptr, &erroff, NULL); |
90 |
14220 |
*erroffset = erroff; |
91 |
14220 |
if (v->re == NULL) { |
92 |
5 |
VRE_free(&v); |
93 |
5 |
return (NULL); |
94 |
|
} |
95 |
14215 |
v->re_ctx = pcre2_match_context_create(NULL); |
96 |
14215 |
if (v->re_ctx == NULL) { |
97 |
0 |
*errptr = PCRE2_ERROR_NOMEMORY; |
98 |
0 |
VRE_free(&v); |
99 |
0 |
return (NULL); |
100 |
|
} |
101 |
|
#if USE_PCRE2_JIT |
102 |
14215 |
if (jit) |
103 |
5606 |
(void)pcre2_jit_compile(v->re, PCRE2_JIT_COMPLETE); |
104 |
|
#else |
105 |
|
(void)jit; |
106 |
|
#endif |
107 |
14215 |
return (v); |
108 |
14220 |
} |
109 |
|
|
110 |
|
int |
111 |
6 |
VRE_error(struct vsb *vsb, int err) |
112 |
|
{ |
113 |
|
char buf[VRE_ERROR_LEN]; |
114 |
|
int i; |
115 |
|
|
116 |
6 |
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC); |
117 |
6 |
i = pcre2_get_error_message(err, (PCRE2_UCHAR *)buf, VRE_ERROR_LEN); |
118 |
6 |
if (i == PCRE2_ERROR_BADDATA) { |
119 |
0 |
VSB_printf(vsb, "unknown pcre2 error code (%d)", err); |
120 |
0 |
return (-1); |
121 |
|
} |
122 |
6 |
VSB_cat(vsb, buf); |
123 |
6 |
return (0); |
124 |
6 |
} |
125 |
|
|
126 |
|
pcre2_code * |
127 |
8021 |
VRE_unpack(const vre_t *code) |
128 |
|
{ |
129 |
|
|
130 |
|
/* XXX: The ban code ensures that regex "lumps" are pointer-aligned, |
131 |
|
* but coming for example from a VMOD there is no guarantee. Should |
132 |
|
* we formally require that code is properly aligned? |
133 |
|
*/ |
134 |
8021 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
135 |
8021 |
if (code->re == VRE_PACKED_RE) { |
136 |
23 |
AZ(code->re_ctx); |
137 |
23 |
return (TRUST_ME(code + 1)); |
138 |
|
} |
139 |
7998 |
return (code->re); |
140 |
8021 |
} |
141 |
|
|
142 |
|
static void |
143 |
7982 |
vre_limit(const vre_t *code, const volatile struct vre_limits *lim) |
144 |
|
{ |
145 |
|
|
146 |
7982 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
147 |
|
|
148 |
7982 |
if (lim == NULL) |
149 |
2665 |
return; |
150 |
|
|
151 |
5317 |
assert(code->re != VRE_PACKED_RE); |
152 |
|
|
153 |
|
/* XXX: not reentrant */ |
154 |
5317 |
AN(code->re_ctx); |
155 |
5317 |
AZ(pcre2_set_match_limit(code->re_ctx, lim->match)); |
156 |
5317 |
AZ(pcre2_set_depth_limit(code->re_ctx, lim->depth)); |
157 |
7982 |
} |
158 |
|
|
159 |
|
vre_t * |
160 |
29 |
VRE_export(const vre_t *code, size_t *sz) |
161 |
|
{ |
162 |
|
pcre2_code *re; |
163 |
|
vre_t *exp; |
164 |
|
|
165 |
29 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
166 |
29 |
re = VRE_unpack(code); |
167 |
29 |
AZ(pcre2_pattern_info(re, PCRE2_INFO_SIZE, sz)); |
168 |
|
|
169 |
29 |
exp = malloc(sizeof(*exp) + *sz); |
170 |
29 |
if (exp == NULL) |
171 |
0 |
return (NULL); |
172 |
|
|
173 |
29 |
INIT_OBJ(exp, VRE_MAGIC); |
174 |
29 |
exp->re = VRE_PACKED_RE; |
175 |
29 |
memcpy(exp + 1, re, *sz); |
176 |
29 |
*sz += sizeof(*exp); |
177 |
29 |
return (exp); |
178 |
29 |
} |
179 |
|
|
180 |
|
static int |
181 |
7994 |
vre_capture(const vre_t *code, const char *subject, size_t length, |
182 |
|
size_t offset, int options, txt *groups, size_t *count, |
183 |
|
pcre2_match_data **datap) |
184 |
|
{ |
185 |
|
pcre2_match_data *data; |
186 |
|
pcre2_code *re; |
187 |
|
PCRE2_SIZE *ovector, b, e; |
188 |
|
size_t nov, g; |
189 |
|
int matches; |
190 |
|
|
191 |
7994 |
re = VRE_unpack(code); |
192 |
|
|
193 |
7994 |
if (datap != NULL && *datap != NULL) { |
194 |
11 |
data = *datap; |
195 |
11 |
*datap = NULL; |
196 |
11 |
} else { |
197 |
7983 |
data = pcre2_match_data_create_from_pattern(re, NULL); |
198 |
7983 |
AN(data); |
199 |
|
} |
200 |
|
|
201 |
7994 |
ovector = pcre2_get_ovector_pointer(data); |
202 |
7994 |
nov = 2L * pcre2_get_ovector_count(data); |
203 |
24136 |
for (g = 0; g < nov; g++) |
204 |
16142 |
ovector[g] = PCRE2_UNSET; |
205 |
|
|
206 |
15988 |
matches = pcre2_match(re, (PCRE2_SPTR)subject, length, offset, |
207 |
7994 |
options, data, code->re_ctx); |
208 |
|
|
209 |
7994 |
if (groups != NULL) { |
210 |
73 |
AN(count); |
211 |
73 |
AN(*count); |
212 |
73 |
ovector = pcre2_get_ovector_pointer(data); |
213 |
73 |
nov = vmin_t(size_t, pcre2_get_ovector_count(data), *count); |
214 |
172 |
for (g = 0; g < nov; g++) { |
215 |
99 |
b = ovector[2 * g]; |
216 |
99 |
e = ovector[2 * g + 1]; |
217 |
99 |
if (b == PCRE2_UNSET) { |
218 |
35 |
groups->b = groups->e = ""; |
219 |
35 |
} else { |
220 |
64 |
groups->b = subject + b; |
221 |
64 |
groups->e = subject + e; |
222 |
|
} |
223 |
99 |
groups++; |
224 |
99 |
} |
225 |
73 |
*count = nov; |
226 |
73 |
} |
227 |
|
|
228 |
7994 |
if (datap != NULL && matches > VRE_ERROR_NOMATCH) |
229 |
39 |
*datap = data; |
230 |
|
else |
231 |
7955 |
pcre2_match_data_free(data); |
232 |
7994 |
return (matches); |
233 |
|
} |
234 |
|
|
235 |
|
int |
236 |
7921 |
VRE_match(const vre_t *code, const char *subject, size_t length, |
237 |
|
int options, const volatile struct vre_limits *lim) |
238 |
|
{ |
239 |
|
|
240 |
7921 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
241 |
7921 |
AN(subject); |
242 |
|
|
243 |
7921 |
if (length == 0) |
244 |
5444 |
length = PCRE2_ZERO_TERMINATED; |
245 |
7921 |
vre_limit(code, lim); |
246 |
7921 |
return (vre_capture(code, subject, length, 0, options, |
247 |
|
NULL, NULL, NULL)); |
248 |
|
} |
249 |
|
|
250 |
|
int |
251 |
0 |
VRE_capture(const vre_t *code, const char *subject, size_t length, int options, |
252 |
|
txt *groups, size_t count, const volatile struct vre_limits *lim) |
253 |
|
{ |
254 |
|
int i; |
255 |
|
|
256 |
0 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
257 |
0 |
AN(subject); |
258 |
0 |
AN(groups); |
259 |
0 |
AN(count); |
260 |
|
|
261 |
0 |
if (length == 0) |
262 |
0 |
length = PCRE2_ZERO_TERMINATED; |
263 |
0 |
vre_limit(code, lim); |
264 |
0 |
i = vre_capture(code, subject, length, 0, options, |
265 |
0 |
groups, &count, NULL); |
266 |
|
|
267 |
0 |
if (i <= 0) |
268 |
0 |
return (i); |
269 |
0 |
return (count); |
270 |
0 |
} |
271 |
|
|
272 |
|
int |
273 |
62 |
VRE_sub(const vre_t *code, const char *subject, const char *replacement, |
274 |
|
struct vsb *vsb, const volatile struct vre_limits *lim, int all) |
275 |
|
{ |
276 |
62 |
pcre2_match_data *data = NULL; |
277 |
|
txt groups[10]; |
278 |
|
size_t count; |
279 |
62 |
int i, offset = 0; |
280 |
|
const char *s, *e; |
281 |
|
unsigned x; |
282 |
|
|
283 |
62 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
284 |
62 |
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC); |
285 |
62 |
AN(subject); |
286 |
62 |
AN(replacement); |
287 |
|
|
288 |
62 |
vre_limit(code, lim); |
289 |
62 |
count = 10; |
290 |
124 |
i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset, 0, |
291 |
62 |
groups, &count, &data); |
292 |
|
|
293 |
62 |
if (i <= VRE_ERROR_NOMATCH) { |
294 |
25 |
AZ(data); |
295 |
25 |
return (i); |
296 |
|
} |
297 |
|
|
298 |
37 |
do { |
299 |
39 |
AN(data); /* check reuse across successful captures */ |
300 |
39 |
AN(count); |
301 |
|
|
302 |
|
/* Copy prefix to match */ |
303 |
39 |
s = subject + offset; |
304 |
39 |
VSB_bcat(vsb, s, pdiff(s, groups[0].b)); |
305 |
210 |
for (s = e = replacement; *e != '\0'; e++ ) { |
306 |
171 |
if (*e != '\\' || e[1] == '\0') |
307 |
141 |
continue; |
308 |
30 |
VSB_bcat(vsb, s, pdiff(s, e)); |
309 |
30 |
s = ++e; |
310 |
30 |
if (isdigit(*e)) { |
311 |
26 |
s++; |
312 |
26 |
x = *e - '0'; |
313 |
26 |
if (x >= count) |
314 |
6 |
continue; |
315 |
20 |
VSB_bcat(vsb, groups[x].b, Tlen(groups[x])); |
316 |
20 |
continue; |
317 |
|
} |
318 |
4 |
} |
319 |
39 |
VSB_bcat(vsb, s, pdiff(s, e)); |
320 |
39 |
offset = pdiff(subject, groups[0].e); |
321 |
39 |
if (!all) |
322 |
28 |
break; |
323 |
11 |
count = 10; |
324 |
22 |
i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset, |
325 |
11 |
PCRE2_NOTEMPTY, groups, &count, &data); |
326 |
|
|
327 |
11 |
if (i < VRE_ERROR_NOMATCH) { |
328 |
0 |
AZ(data); |
329 |
0 |
return (i); |
330 |
|
} |
331 |
11 |
} while (i != VRE_ERROR_NOMATCH); |
332 |
|
|
333 |
37 |
if (data != NULL) { |
334 |
28 |
assert(i > VRE_ERROR_NOMATCH); |
335 |
28 |
AZ(all); |
336 |
28 |
pcre2_match_data_free(data); |
337 |
28 |
} |
338 |
|
|
339 |
|
/* Copy suffix to match */ |
340 |
37 |
VSB_cat(vsb, subject + offset); |
341 |
37 |
return (1); |
342 |
62 |
} |
343 |
|
|
344 |
|
void |
345 |
10716 |
VRE_free(vre_t **vv) |
346 |
|
{ |
347 |
|
vre_t *v; |
348 |
|
|
349 |
10716 |
TAKE_OBJ_NOTNULL(v, vv, VRE_MAGIC); |
350 |
|
|
351 |
10716 |
if (v->re == VRE_PACKED_RE) { |
352 |
29 |
v->re = NULL; |
353 |
29 |
AZ(v->re_ctx); |
354 |
29 |
} |
355 |
|
|
356 |
10716 |
if (v->re_ctx != NULL) |
357 |
10682 |
pcre2_match_context_free(v->re_ctx); |
358 |
10716 |
if (v->re != NULL) |
359 |
10682 |
pcre2_code_free(v->re); |
360 |
10716 |
FREE_OBJ(v); |
361 |
10716 |
} |
362 |
|
|
363 |
|
void |
364 |
6 |
VRE_quote(struct vsb *vsb, const char *src) |
365 |
|
{ |
366 |
|
const char *b, *e; |
367 |
|
|
368 |
6 |
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC); |
369 |
6 |
if (src == NULL) |
370 |
0 |
return; |
371 |
9 |
for (b = src; (e = strstr(b, "\\E")) != NULL; b = e + 2) |
372 |
3 |
VSB_printf(vsb, "\\Q%.*s\\\\EE", (int)(e - b), b); |
373 |
6 |
if (*b != '\0') |
374 |
3 |
VSB_printf(vsb, "\\Q%s\\E", b); |
375 |
6 |
} |