| | varnish-cache/lib/libvarnish/vre.c |
0 |
|
/*- |
1 |
|
* Copyright (c) 2006-2011 Varnish Software AS |
2 |
|
* All rights reserved. |
3 |
|
* |
4 |
|
* Author: Tollef Fog Heen <tfheen@redpill-linpro.com> |
5 |
|
* |
6 |
|
* SPDX-License-Identifier: BSD-2-Clause |
7 |
|
* |
8 |
|
* Redistribution and use in source and binary forms, with or without |
9 |
|
* modification, are permitted provided that the following conditions |
10 |
|
* are met: |
11 |
|
* 1. Redistributions of source code must retain the above copyright |
12 |
|
* notice, this list of conditions and the following disclaimer. |
13 |
|
* 2. Redistributions in binary form must reproduce the above copyright |
14 |
|
* notice, this list of conditions and the following disclaimer in the |
15 |
|
* documentation and/or other materials provided with the distribution. |
16 |
|
* |
17 |
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND |
18 |
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
19 |
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
20 |
|
* ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE |
21 |
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
22 |
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
23 |
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
24 |
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
25 |
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
26 |
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
27 |
|
* SUCH DAMAGE. |
28 |
|
*/ |
29 |
|
|
30 |
|
#include "config.h" |
31 |
|
|
32 |
|
#include <ctype.h> |
33 |
|
#include <string.h> |
34 |
|
#include <unistd.h> |
35 |
|
|
36 |
|
#include "vdef.h" |
37 |
|
|
38 |
|
#include "vas.h" // XXX Flexelint "not used" - but req'ed for assert() |
39 |
|
#include "vsb.h" |
40 |
|
#include "miniobj.h" |
41 |
|
|
42 |
|
#include "vre.h" |
43 |
|
#include "vre_pcre2.h" |
44 |
|
|
45 |
|
#if !HAVE_PCRE2_SET_DEPTH_LIMIT |
46 |
|
# define pcre2_set_depth_limit(r, d) pcre2_set_recursion_limit(r, d) |
47 |
|
#endif |
48 |
|
|
49 |
|
#define VRE_PACKED_RE (pcre2_code *)(-1) |
50 |
|
|
51 |
|
struct vre { |
52 |
|
unsigned magic; |
53 |
|
#define VRE_MAGIC 0xe83097dc |
54 |
|
pcre2_code *re; |
55 |
|
pcre2_match_context *re_ctx; |
56 |
|
}; |
57 |
|
|
58 |
|
/* |
59 |
|
* We don't want to spread or even expose the majority of PCRE2 options |
60 |
|
* and errors so we establish our own symbols and implement hard linkage |
61 |
|
* to PCRE2 here. |
62 |
|
*/ |
63 |
|
const int VRE_ERROR_NOMATCH = PCRE2_ERROR_NOMATCH; |
64 |
|
|
65 |
|
const unsigned VRE_CASELESS = PCRE2_CASELESS; |
66 |
|
|
67 |
|
vre_t * |
68 |
557200 |
VRE_compile(const char *pattern, unsigned options, |
69 |
|
int *errptr, int *erroffset, unsigned jit) |
70 |
|
{ |
71 |
|
PCRE2_SIZE erroff; |
72 |
|
vre_t *v; |
73 |
|
|
74 |
557200 |
AN(pattern); |
75 |
557200 |
AN(errptr); |
76 |
557200 |
AN(erroffset); |
77 |
|
|
78 |
557200 |
*errptr = 0; |
79 |
557200 |
*erroffset = -1; |
80 |
|
|
81 |
557200 |
ALLOC_OBJ(v, VRE_MAGIC); |
82 |
557200 |
if (v == NULL) { |
83 |
0 |
*errptr = PCRE2_ERROR_NOMEMORY; |
84 |
0 |
return (NULL); |
85 |
|
} |
86 |
1114400 |
v->re = pcre2_compile((PCRE2_SPTR8)pattern, PCRE2_ZERO_TERMINATED, |
87 |
557200 |
options, errptr, &erroff, NULL); |
88 |
557200 |
*erroffset = erroff; |
89 |
557200 |
if (v->re == NULL) { |
90 |
200 |
VRE_free(&v); |
91 |
200 |
return (NULL); |
92 |
|
} |
93 |
557000 |
v->re_ctx = pcre2_match_context_create(NULL); |
94 |
557000 |
if (v->re_ctx == NULL) { |
95 |
0 |
*errptr = PCRE2_ERROR_NOMEMORY; |
96 |
0 |
VRE_free(&v); |
97 |
0 |
return (NULL); |
98 |
|
} |
99 |
|
#if USE_PCRE2_JIT |
100 |
557000 |
if (jit) |
101 |
219040 |
(void)pcre2_jit_compile(v->re, PCRE2_JIT_COMPLETE); |
102 |
|
#else |
103 |
|
(void)jit; |
104 |
|
#endif |
105 |
557000 |
return (v); |
106 |
557200 |
} |
107 |
|
|
108 |
|
int |
109 |
240 |
VRE_error(struct vsb *vsb, int err) |
110 |
|
{ |
111 |
|
char buf[VRE_ERROR_LEN]; |
112 |
|
int i; |
113 |
|
|
114 |
240 |
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC); |
115 |
240 |
i = pcre2_get_error_message(err, (PCRE2_UCHAR *)buf, VRE_ERROR_LEN); |
116 |
240 |
if (i == PCRE2_ERROR_BADDATA) { |
117 |
0 |
VSB_printf(vsb, "unknown pcre2 error code (%d)", err); |
118 |
0 |
return (-1); |
119 |
|
} |
120 |
240 |
VSB_cat(vsb, buf); |
121 |
240 |
return (0); |
122 |
240 |
} |
123 |
|
|
124 |
|
pcre2_code * |
125 |
309312 |
VRE_unpack(const vre_t *code) |
126 |
|
{ |
127 |
|
|
128 |
|
/* XXX: The ban code ensures that regex "lumps" are pointer-aligned, |
129 |
|
* but coming for example from a VMOD there is no guarantee. Should |
130 |
|
* we formally require that code is properly aligned? |
131 |
|
*/ |
132 |
309312 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
133 |
309312 |
if (code->re == VRE_PACKED_RE) { |
134 |
920 |
AZ(code->re_ctx); |
135 |
920 |
return (TRUST_ME(code + 1)); |
136 |
|
} |
137 |
308392 |
return (code->re); |
138 |
309312 |
} |
139 |
|
|
140 |
|
static void |
141 |
307689 |
vre_limit(const vre_t *code, const volatile struct vre_limits *lim) |
142 |
|
{ |
143 |
|
|
144 |
307689 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
145 |
|
|
146 |
307689 |
if (lim == NULL) |
147 |
100418 |
return; |
148 |
|
|
149 |
207271 |
assert(code->re != VRE_PACKED_RE); |
150 |
|
|
151 |
|
/* XXX: not reentrant */ |
152 |
207271 |
AN(code->re_ctx); |
153 |
207271 |
AZ(pcre2_set_match_limit(code->re_ctx, lim->match)); |
154 |
207271 |
AZ(pcre2_set_depth_limit(code->re_ctx, lim->depth)); |
155 |
307689 |
} |
156 |
|
|
157 |
|
vre_t * |
158 |
1160 |
VRE_export(const vre_t *code, size_t *sz) |
159 |
|
{ |
160 |
|
pcre2_code *re; |
161 |
|
vre_t *exp; |
162 |
|
|
163 |
1160 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
164 |
1160 |
re = VRE_unpack(code); |
165 |
1160 |
AZ(pcre2_pattern_info(re, PCRE2_INFO_SIZE, sz)); |
166 |
|
|
167 |
1160 |
exp = malloc(sizeof(*exp) + *sz); |
168 |
1160 |
if (exp == NULL) |
169 |
0 |
return (NULL); |
170 |
|
|
171 |
1160 |
INIT_OBJ(exp, VRE_MAGIC); |
172 |
1160 |
exp->re = VRE_PACKED_RE; |
173 |
1160 |
memcpy(exp + 1, re, *sz); |
174 |
1160 |
*sz += sizeof(*exp); |
175 |
1160 |
return (exp); |
176 |
1160 |
} |
177 |
|
|
178 |
|
static int |
179 |
308161 |
vre_capture(const vre_t *code, const char *subject, size_t length, |
180 |
|
size_t offset, int options, txt *groups, size_t *count, |
181 |
|
pcre2_match_data **datap) |
182 |
|
{ |
183 |
|
pcre2_match_data *data; |
184 |
|
pcre2_code *re; |
185 |
|
PCRE2_SIZE *ovector, b, e; |
186 |
|
size_t nov, g; |
187 |
|
int matches; |
188 |
|
|
189 |
308161 |
re = VRE_unpack(code); |
190 |
|
|
191 |
308161 |
if (datap != NULL && *datap != NULL) { |
192 |
440 |
data = *datap; |
193 |
440 |
*datap = NULL; |
194 |
440 |
} else { |
195 |
307721 |
data = pcre2_match_data_create_from_pattern(re, NULL); |
196 |
307721 |
AN(data); |
197 |
|
} |
198 |
|
|
199 |
308161 |
ovector = pcre2_get_ovector_pointer(data); |
200 |
308161 |
nov = 2L * pcre2_get_ovector_count(data); |
201 |
930633 |
for (g = 0; g < nov; g++) |
202 |
622472 |
ovector[g] = PCRE2_UNSET; |
203 |
|
|
204 |
616322 |
matches = pcre2_match(re, (PCRE2_SPTR)subject, length, offset, |
205 |
308161 |
options, data, code->re_ctx); |
206 |
|
|
207 |
308161 |
if (groups != NULL) { |
208 |
2920 |
AN(count); |
209 |
2920 |
AN(*count); |
210 |
2920 |
ovector = pcre2_get_ovector_pointer(data); |
211 |
2920 |
nov = vmin_t(size_t, pcre2_get_ovector_count(data), *count); |
212 |
6880 |
for (g = 0; g < nov; g++) { |
213 |
3960 |
b = ovector[2 * g]; |
214 |
3960 |
e = ovector[2 * g + 1]; |
215 |
3960 |
if (b == PCRE2_UNSET) { |
216 |
1400 |
groups->b = groups->e = ""; |
217 |
1400 |
} else { |
218 |
2560 |
groups->b = subject + b; |
219 |
2560 |
groups->e = subject + e; |
220 |
|
} |
221 |
3960 |
groups++; |
222 |
3960 |
} |
223 |
2920 |
*count = nov; |
224 |
2920 |
} |
225 |
|
|
226 |
308161 |
if (datap != NULL && matches > VRE_ERROR_NOMATCH) |
227 |
1560 |
*datap = data; |
228 |
|
else |
229 |
306601 |
pcre2_match_data_free(data); |
230 |
308161 |
return (matches); |
231 |
|
} |
232 |
|
|
233 |
|
int |
234 |
305233 |
VRE_match(const vre_t *code, const char *subject, size_t length, |
235 |
|
int options, const volatile struct vre_limits *lim) |
236 |
|
{ |
237 |
|
|
238 |
305233 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
239 |
305233 |
AN(subject); |
240 |
|
|
241 |
305233 |
if (length == 0) |
242 |
212214 |
length = PCRE2_ZERO_TERMINATED; |
243 |
305233 |
vre_limit(code, lim); |
244 |
305233 |
return (vre_capture(code, subject, length, 0, options, |
245 |
|
NULL, NULL, NULL)); |
246 |
|
} |
247 |
|
|
248 |
|
int |
249 |
0 |
VRE_capture(const vre_t *code, const char *subject, size_t length, int options, |
250 |
|
txt *groups, size_t count, const volatile struct vre_limits *lim) |
251 |
|
{ |
252 |
|
int i; |
253 |
|
|
254 |
0 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
255 |
0 |
AN(subject); |
256 |
0 |
AN(groups); |
257 |
0 |
AN(count); |
258 |
|
|
259 |
0 |
if (length == 0) |
260 |
0 |
length = PCRE2_ZERO_TERMINATED; |
261 |
0 |
vre_limit(code, lim); |
262 |
0 |
i = vre_capture(code, subject, length, 0, options, |
263 |
0 |
groups, &count, NULL); |
264 |
|
|
265 |
0 |
if (i <= 0) |
266 |
0 |
return (i); |
267 |
0 |
return (count); |
268 |
0 |
} |
269 |
|
|
270 |
|
int |
271 |
2480 |
VRE_sub(const vre_t *code, const char *subject, const char *replacement, |
272 |
|
struct vsb *vsb, const volatile struct vre_limits *lim, int all) |
273 |
|
{ |
274 |
2480 |
pcre2_match_data *data = NULL; |
275 |
|
txt groups[10]; |
276 |
|
size_t count; |
277 |
2480 |
int i, offset = 0; |
278 |
|
const char *s, *e; |
279 |
|
unsigned x; |
280 |
|
|
281 |
2480 |
CHECK_OBJ_NOTNULL(code, VRE_MAGIC); |
282 |
2480 |
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC); |
283 |
2480 |
AN(subject); |
284 |
2480 |
AN(replacement); |
285 |
|
|
286 |
2480 |
vre_limit(code, lim); |
287 |
2480 |
count = 10; |
288 |
4960 |
i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset, 0, |
289 |
2480 |
groups, &count, &data); |
290 |
|
|
291 |
2480 |
if (i <= VRE_ERROR_NOMATCH) { |
292 |
1000 |
AZ(data); |
293 |
1000 |
return (i); |
294 |
|
} |
295 |
|
|
296 |
1480 |
do { |
297 |
1560 |
AN(data); /* check reuse across successful captures */ |
298 |
1560 |
AN(count); |
299 |
|
|
300 |
|
/* Copy prefix to match */ |
301 |
1560 |
s = subject + offset; |
302 |
1560 |
VSB_bcat(vsb, s, pdiff(s, groups[0].b)); |
303 |
8400 |
for (s = e = replacement; *e != '\0'; e++ ) { |
304 |
6840 |
if (*e != '\\' || e[1] == '\0') |
305 |
5640 |
continue; |
306 |
1200 |
VSB_bcat(vsb, s, pdiff(s, e)); |
307 |
1200 |
s = ++e; |
308 |
1200 |
if (isdigit(*e)) { |
309 |
1040 |
s++; |
310 |
1040 |
x = *e - '0'; |
311 |
1040 |
if (x >= count) |
312 |
240 |
continue; |
313 |
800 |
VSB_bcat(vsb, groups[x].b, Tlen(groups[x])); |
314 |
800 |
continue; |
315 |
|
} |
316 |
160 |
} |
317 |
1560 |
VSB_bcat(vsb, s, pdiff(s, e)); |
318 |
1560 |
offset = pdiff(subject, groups[0].e); |
319 |
1560 |
if (!all) |
320 |
1120 |
break; |
321 |
440 |
count = 10; |
322 |
880 |
i = vre_capture(code, subject, PCRE2_ZERO_TERMINATED, offset, |
323 |
440 |
PCRE2_NOTEMPTY, groups, &count, &data); |
324 |
|
|
325 |
440 |
if (i < VRE_ERROR_NOMATCH) { |
326 |
0 |
AZ(data); |
327 |
0 |
return (i); |
328 |
|
} |
329 |
440 |
} while (i != VRE_ERROR_NOMATCH); |
330 |
|
|
331 |
1480 |
if (data != NULL) { |
332 |
1120 |
assert(i > VRE_ERROR_NOMATCH); |
333 |
1120 |
AZ(all); |
334 |
1120 |
pcre2_match_data_free(data); |
335 |
1120 |
} |
336 |
|
|
337 |
|
/* Copy suffix to match */ |
338 |
1480 |
VSB_cat(vsb, subject + offset); |
339 |
1480 |
return (1); |
340 |
2480 |
} |
341 |
|
|
342 |
|
void |
343 |
420538 |
VRE_free(vre_t **vv) |
344 |
|
{ |
345 |
|
vre_t *v; |
346 |
|
|
347 |
420538 |
TAKE_OBJ_NOTNULL(v, vv, VRE_MAGIC); |
348 |
|
|
349 |
420538 |
if (v->re == VRE_PACKED_RE) { |
350 |
1160 |
v->re = NULL; |
351 |
1160 |
AZ(v->re_ctx); |
352 |
1160 |
} |
353 |
|
|
354 |
420538 |
if (v->re_ctx != NULL) |
355 |
419178 |
pcre2_match_context_free(v->re_ctx); |
356 |
420538 |
if (v->re != NULL) |
357 |
419178 |
pcre2_code_free(v->re); |
358 |
420538 |
FREE_OBJ(v); |
359 |
420538 |
} |
360 |
|
|
361 |
|
void |
362 |
240 |
VRE_quote(struct vsb *vsb, const char *src) |
363 |
|
{ |
364 |
|
const char *b, *e; |
365 |
|
|
366 |
240 |
CHECK_OBJ_NOTNULL(vsb, VSB_MAGIC); |
367 |
240 |
if (src == NULL) |
368 |
0 |
return; |
369 |
360 |
for (b = src; (e = strstr(b, "\\E")) != NULL; b = e + 2) |
370 |
120 |
VSB_printf(vsb, "\\Q%.*s\\\\EE", (int)(e - b), b); |
371 |
240 |
if (*b != '\0') |
372 |
120 |
VSB_printf(vsb, "\\Q%s\\E", b); |
373 |
240 |
} |