|  |  | varnish-cache/bin/varnishd/cache/cache_esi_parse.c | 
|---|
| 0 |  | /*- | 
| 1 |  |  * Copyright (c) 2011 Varnish Software AS | 
| 2 |  |  * All rights reserved. | 
| 3 |  |  * | 
| 4 |  |  * Author: Poul-Henning Kamp <phk@phk.freebsd.dk> | 
| 5 |  |  * | 
| 6 |  |  * SPDX-License-Identifier: BSD-2-Clause | 
| 7 |  |  * | 
| 8 |  |  * Redistribution and use in source and binary forms, with or without | 
| 9 |  |  * modification, are permitted provided that the following conditions | 
| 10 |  |  * are met: | 
| 11 |  |  * 1. Redistributions of source code must retain the above copyright | 
| 12 |  |  *    notice, this list of conditions and the following disclaimer. | 
| 13 |  |  * 2. Redistributions in binary form must reproduce the above copyright | 
| 14 |  |  *    notice, this list of conditions and the following disclaimer in the | 
| 15 |  |  *    documentation and/or other materials provided with the distribution. | 
| 16 |  |  * | 
| 17 |  |  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND | 
| 18 |  |  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | 
| 19 |  |  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | 
| 20 |  |  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE | 
| 21 |  |  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | 
| 22 |  |  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS | 
| 23 |  |  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) | 
| 24 |  |  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT | 
| 25 |  |  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY | 
| 26 |  |  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF | 
| 27 |  |  * SUCH DAMAGE. | 
| 28 |  |  * | 
| 29 |  |  * VEP Varnish Esi Parsing | 
| 30 |  |  */ | 
| 31 |  |  | 
| 32 |  | #include "config.h" | 
| 33 |  |  | 
| 34 |  | #include "cache_varnishd.h" | 
| 35 |  | #include "cache_filter.h" | 
| 36 |  |  | 
| 37 |  | #include "cache_vgz.h" | 
| 38 |  | #include "cache_esi.h" | 
| 39 |  | #include "vct.h" | 
| 40 |  | #include "vend.h" | 
| 41 |  | #include "vgz.h" | 
| 42 |  |  | 
| 43 |  | //#define Debug(fmt, ...) printf(fmt, __VA_ARGS__) | 
| 44 |  | #define Debug(fmt, ...) /**/ | 
| 45 |  |  | 
| 46 |  | struct vep_state; | 
| 47 |  |  | 
| 48 |  | enum dowhat {DO_ATTR, DO_TAG}; | 
| 49 |  | typedef void dostuff_f(struct vep_state *, enum dowhat); | 
| 50 |  |  | 
| 51 |  | struct vep_match { | 
| 52 |  |         const char      *match; | 
| 53 |  |         const char      * const *state; | 
| 54 |  | }; | 
| 55 |  |  | 
| 56 |  | enum vep_mark { VERBATIM = 0, SKIP }; | 
| 57 |  |  | 
| 58 |  | struct vep_state { | 
| 59 |  |         unsigned                magic; | 
| 60 |  | #define VEP_MAGIC               0x55cb9b82 | 
| 61 |  |         struct vsb              *vsb; | 
| 62 |  |  | 
| 63 |  |         const char              *url; | 
| 64 |  |         struct vfp_ctx          *vc; | 
| 65 |  |         int                     dogzip; | 
| 66 |  |         vep_callback_t          *cb; | 
| 67 |  |         void                    *cb_priv; | 
| 68 |  |  | 
| 69 |  |         /* Internal Counter for default call-back function */ | 
| 70 |  |         ssize_t                 cb_x; | 
| 71 |  |  | 
| 72 |  |         /* parser state */ | 
| 73 |  |         const char              *state; | 
| 74 |  |         unsigned                startup; | 
| 75 |  |         unsigned                esi_found; | 
| 76 |  |  | 
| 77 |  |         unsigned                endtag; | 
| 78 |  |         unsigned                emptytag; | 
| 79 |  |         unsigned                canattr; | 
| 80 |  |  | 
| 81 |  |         unsigned                remove; | 
| 82 |  |  | 
| 83 |  |         ssize_t                 o_wait; | 
| 84 |  |         ssize_t                 o_pending; | 
| 85 |  |         ssize_t                 o_total; | 
| 86 |  |         uint32_t                crc; | 
| 87 |  |         ssize_t                 o_crc; | 
| 88 |  |         uint32_t                crcp; | 
| 89 |  |         ssize_t                 o_last; | 
| 90 |  |  | 
| 91 |  |         const char              *hack_p; | 
| 92 |  |         const char              *ver_p; | 
| 93 |  |  | 
| 94 |  |         const char              *until; | 
| 95 |  |         const char              *until_p; | 
| 96 |  |         const char              *until_s; | 
| 97 |  |  | 
| 98 |  |         int                     in_esi_tag; | 
| 99 |  |  | 
| 100 |  |         const char              *esicmt; | 
| 101 |  |         const char              *esicmt_p; | 
| 102 |  |  | 
| 103 |  |         struct vep_match        *attr; | 
| 104 |  |         struct vsb              *attr_vsb; | 
| 105 |  |         int                     attr_delim; | 
| 106 |  |  | 
| 107 |  |         struct vep_match        *match; | 
| 108 |  |         struct vep_match        *match_hit; | 
| 109 |  |  | 
| 110 |  |         char                    tag[8]; | 
| 111 |  |         int                     tag_i; | 
| 112 |  |  | 
| 113 |  |         dostuff_f               *dostuff; | 
| 114 |  |  | 
| 115 |  |         struct vsb              *include_src; | 
| 116 |  |         unsigned                include_continue; | 
| 117 |  |  | 
| 118 |  |         unsigned                nm_skip; | 
| 119 |  |         unsigned                nm_verbatim; | 
| 120 |  |         unsigned                nm_pending; | 
| 121 |  |         enum vep_mark           last_mark; | 
| 122 |  | }; | 
| 123 |  |  | 
| 124 |  | /*---------------------------------------------------------------------*/ | 
| 125 |  |  | 
| 126 |  | static const char * const VEP_START =           "[Start]"; | 
| 127 |  | static const char * const VEP_BOM =             "[BOM]"; | 
| 128 |  | static const char * const VEP_TESTXML =         "[TestXml]"; | 
| 129 |  | static const char * const VEP_NOTXML =          "[NotXml]"; | 
| 130 |  |  | 
| 131 |  | static const char * const VEP_NEXTTAG =         "[NxtTag]"; | 
| 132 |  | static const char * const VEP_NOTMYTAG =        "[NotMyTag]"; | 
| 133 |  |  | 
| 134 |  | static const char * const VEP_STARTTAG =        "[StartTag]"; | 
| 135 |  | static const char * const VEP_COMMENTESI =      "[CommentESI]"; | 
| 136 |  | static const char * const VEP_COMMENT =         "[Comment]"; | 
| 137 |  | static const char * const VEP_CDATA =           "[CDATA]"; | 
| 138 |  | static const char * const VEP_ESITAG =          "[ESITag]"; | 
| 139 |  | static const char * const VEP_ESIENDTAG =       "[/ESITag]"; | 
| 140 |  |  | 
| 141 |  | static const char * const VEP_ESIREMOVE =       "[ESI:Remove]"; | 
| 142 |  | static const char * const VEP_ESIINCLUDE =      "[ESI:Include]"; | 
| 143 |  | static const char * const VEP_ESICOMMENT =      "[ESI:Comment]"; | 
| 144 |  | static const char * const VEP_ESIBOGON =        "[ESI:Bogon]"; | 
| 145 |  |  | 
| 146 |  | static const char * const VEP_INTAG =           "[InTag]"; | 
| 147 |  | static const char * const VEP_TAGERROR =        "[TagError]"; | 
| 148 |  |  | 
| 149 |  | static const char * const VEP_ATTR =            "[Attribute]"; | 
| 150 |  | static const char * const VEP_SKIPATTR =        "[SkipAttribute]"; | 
| 151 |  | static const char * const VEP_ATTRDELIM =       "[AttrDelim]"; | 
| 152 |  | static const char * const VEP_ATTRGETVAL =      "[AttrGetValue]"; | 
| 153 |  | static const char * const VEP_ATTRVAL =         "[AttrValue]"; | 
| 154 |  |  | 
| 155 |  | static const char * const VEP_UNTIL =           "[Until]"; | 
| 156 |  | static const char * const VEP_MATCHBUF =        "[MatchBuf]"; | 
| 157 |  | static const char * const VEP_MATCH =           "[Match]"; | 
| 158 |  |  | 
| 159 |  | /*---------------------------------------------------------------------*/ | 
| 160 |  |  | 
| 161 |  | static struct vep_match vep_match_starttag[] = { | 
| 162 |  |         { "!--esi",     &VEP_COMMENTESI }, | 
| 163 |  |         { "!---->",     &VEP_NEXTTAG }, | 
| 164 |  |         { "!--",        &VEP_COMMENT }, | 
| 165 |  |         { "/esi:",      &VEP_ESIENDTAG }, | 
| 166 |  |         { "esi:",       &VEP_ESITAG }, | 
| 167 |  |         { "![CDATA[",   &VEP_CDATA }, | 
| 168 |  |         { NULL,         &VEP_NOTMYTAG } | 
| 169 |  | }; | 
| 170 |  |  | 
| 171 |  | /*---------------------------------------------------------------------*/ | 
| 172 |  |  | 
| 173 |  | static struct vep_match vep_match_esi[] = { | 
| 174 |  |         { "include",    &VEP_ESIINCLUDE }, | 
| 175 |  |         { "remove",     &VEP_ESIREMOVE }, | 
| 176 |  |         { "comment",    &VEP_ESICOMMENT }, | 
| 177 |  |         { NULL,         &VEP_ESIBOGON } | 
| 178 |  | }; | 
| 179 |  |  | 
| 180 |  | /*---------------------------------------------------------------------*/ | 
| 181 |  |  | 
| 182 |  | static struct vep_match vep_match_attr_include[] = { | 
| 183 |  |         { "src=",       &VEP_ATTRGETVAL }, | 
| 184 |  |         { "onerror=",   &VEP_ATTRGETVAL }, | 
| 185 |  |         { NULL,         &VEP_SKIPATTR } | 
| 186 |  | }; | 
| 187 |  |  | 
| 188 |  | /*---------------------------------------------------------------------*/ | 
| 189 |  |  | 
| 190 |  | static struct vep_match vep_match_bom[] = { | 
| 191 |  |         { "\xeb\xbb\xbf",       &VEP_START }, | 
| 192 |  |         { NULL,                 &VEP_BOM } | 
| 193 |  | }; | 
| 194 |  |  | 
| 195 |  | /*-------------------------------------------------------------------- | 
| 196 |  |  * Report a parsing error | 
| 197 |  |  */ | 
| 198 |  |  | 
| 199 |  | static void | 
| 200 | 1425 | vep_error(const struct vep_state *vep, const char *p) | 
| 201 |  | { | 
| 202 | 1425 |         VSC_C_main->esi_errors++; | 
| 203 | 2850 |         VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "ERR: after %zd %s", | 
| 204 | 1425 |              vep->o_last, p); | 
| 205 | 1425 | } | 
| 206 |  |  | 
| 207 |  | /*-------------------------------------------------------------------- | 
| 208 |  |  * Report a parsing warning | 
| 209 |  |  */ | 
| 210 |  |  | 
| 211 |  | static void | 
| 212 | 200 | vep_warn(const struct vep_state *vep, const char *p) | 
| 213 |  | { | 
| 214 | 200 |         VSC_C_main->esi_warnings++; | 
| 215 | 400 |         VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, "WARN: after %zd %s", | 
| 216 | 200 |              vep->o_last, p); | 
| 217 | 200 | } | 
| 218 |  |  | 
| 219 |  | /*--------------------------------------------------------------------- | 
| 220 |  |  * return match or NULL if more input needed. | 
| 221 |  |  */ | 
| 222 |  |  | 
| 223 |  | static struct vep_match * | 
| 224 | 492766 | vep_match(const struct vep_state *vep, const char *b, const char *e) | 
| 225 |  | { | 
| 226 |  |         struct vep_match *vm; | 
| 227 |  |         const char *q, *r; | 
| 228 |  |  | 
| 229 | 492766 |         AN(vep->match); | 
| 230 | 3204734 |         for (vm = vep->match; vm->match != NULL; vm++) { | 
| 231 | 2767160 |                 assert(strlen(vm->match) <= sizeof (vep->tag)); | 
| 232 | 2767160 |                 r = b; | 
| 233 | 3032293 |                 for (q = vm->match; *q != '\0' && r < e; q++, r++) | 
| 234 | 2977101 |                         if (*q != *r) | 
| 235 | 2711968 |                                 break; | 
| 236 | 2767160 |                 if (*q == '\0') | 
| 237 | 45560 |                         break; | 
| 238 | 2721600 |                 if (r == e) | 
| 239 | 9632 |                         return (NULL); | 
| 240 | 2711968 |         } | 
| 241 | 483134 |         return (vm); | 
| 242 | 492766 | } | 
| 243 |  |  | 
| 244 |  | /*--------------------------------------------------------------------- | 
| 245 |  |  * | 
| 246 |  |  */ | 
| 247 |  |  | 
| 248 |  | static void | 
| 249 | 39160 | vep_emit_len(const struct vep_state *vep, ssize_t l, int m8, int m16, int m64) | 
| 250 |  | { | 
| 251 |  |         uint8_t buf[9]; | 
| 252 |  |  | 
| 253 | 39160 |         assert(l > 0); | 
| 254 | 39160 |         if (l < 256) { | 
| 255 | 38480 |                 buf[0] = (uint8_t)m8; | 
| 256 | 38480 |                 buf[1] = (uint8_t)l; | 
| 257 | 38480 |                 assert((ssize_t)buf[1] == l); | 
| 258 | 38480 |                 VSB_bcat(vep->vsb, buf, 2); | 
| 259 | 39160 |         } else if (l < 65536) { | 
| 260 | 440 |                 buf[0] = (uint8_t)m16; | 
| 261 | 440 |                 vbe16enc(buf + 1, (uint16_t)l); | 
| 262 | 440 |                 assert((ssize_t)vbe16dec(buf + 1) == l); | 
| 263 | 440 |                 VSB_bcat(vep->vsb, buf, 3); | 
| 264 | 440 |         } else { | 
| 265 | 240 |                 buf[0] = (uint8_t)m64; | 
| 266 | 240 |                 vbe64enc(buf + 1, l); | 
| 267 | 240 |                 assert((ssize_t)vbe64dec(buf + 1) == l); | 
| 268 | 240 |                 VSB_bcat(vep->vsb, buf, 9); | 
| 269 |  |         } | 
| 270 | 39160 | } | 
| 271 |  |  | 
| 272 |  | static void | 
| 273 | 17800 | vep_emit_skip(const struct vep_state *vep, ssize_t l) | 
| 274 |  | { | 
| 275 |  |  | 
| 276 | 17800 |         vep_emit_len(vep, l, VEC_S1, VEC_S2, VEC_S8); | 
| 277 | 17800 | } | 
| 278 |  |  | 
| 279 |  | static void | 
| 280 | 17320 | vep_emit_verbatim(const struct vep_state *vep, ssize_t l, ssize_t l_crc) | 
| 281 |  | { | 
| 282 |  |         uint8_t buf[4]; | 
| 283 |  |  | 
| 284 | 17320 |         vep_emit_len(vep, l, VEC_V1, VEC_V2, VEC_V8); | 
| 285 | 17320 |         if (vep->dogzip) { | 
| 286 | 4040 |                 vep_emit_len(vep, l_crc, VEC_C1, VEC_C2, VEC_C8); | 
| 287 | 4040 |                 vbe32enc(buf, vep->crc); | 
| 288 | 4040 |                 VSB_bcat(vep->vsb, buf, sizeof buf); | 
| 289 | 4040 |         } | 
| 290 | 17320 | } | 
| 291 |  |  | 
| 292 |  | static void | 
| 293 | 40720 | vep_emit_common(struct vep_state *vep, ssize_t l, enum vep_mark mark) | 
| 294 |  | { | 
| 295 |  |  | 
| 296 | 40720 |         assert(l >= 0); | 
| 297 | 40720 |         if (l == 0) | 
| 298 | 5600 |                 return; | 
| 299 | 35120 |         assert(mark == SKIP || mark == VERBATIM); | 
| 300 | 35120 |         if (mark == SKIP) | 
| 301 | 17800 |                 vep_emit_skip(vep, l); | 
| 302 |  |         else | 
| 303 | 17320 |                 vep_emit_verbatim(vep, l, vep->o_crc); | 
| 304 |  |  | 
| 305 | 35120 |         vep->crc = crc32(0L, Z_NULL, 0); | 
| 306 | 35120 |         vep->o_crc = 0; | 
| 307 | 35120 |         vep->o_total += l; | 
| 308 | 40720 | } | 
| 309 |  |  | 
| 310 |  | /*--------------------------------------------------------------------- | 
| 311 |  |  * | 
| 312 |  |  */ | 
| 313 |  |  | 
| 314 |  | static void | 
| 315 | 2652314 | vep_mark_common(struct vep_state *vep, const char *p, enum vep_mark mark) | 
| 316 |  | { | 
| 317 |  |         ssize_t l, lcb; | 
| 318 |  |  | 
| 319 | 2652314 |         assert(mark == SKIP || mark == VERBATIM); | 
| 320 |  |  | 
| 321 |  |         /* The NO-OP case, no data, no pending data & no change of mode */ | 
| 322 | 2652314 |         if (vep->last_mark == mark && p == vep->ver_p && vep->o_pending == 0) | 
| 323 | 1720 |                 return; | 
| 324 |  |  | 
| 325 |  |         /* | 
| 326 |  |          * If we changed mode, emit whatever the opposite mode | 
| 327 |  |          * assembled before the pending bytes. | 
| 328 |  |          */ | 
| 329 |  |  | 
| 330 | 2650594 |         if (vep->last_mark != mark && (vep->o_wait > 0 || vep->startup)) { | 
| 331 | 64400 |                 lcb = vep->cb(vep->vc, vep->cb_priv, 0, | 
| 332 | 32200 |                     mark == VERBATIM ? VGZ_RESET : VGZ_ALIGN); | 
| 333 | 32200 |                 vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); | 
| 334 | 32200 |                 vep->o_last = lcb; | 
| 335 | 32200 |                 vep->o_wait = 0; | 
| 336 | 32200 |         } | 
| 337 |  |  | 
| 338 |  |         /* Transfer pending bytes CRC into active mode CRC */ | 
| 339 | 2650594 |         if (vep->o_pending) { | 
| 340 | 2699 |                 (void)vep->cb(vep->vc, vep->cb_priv, vep->o_pending, | 
| 341 |  |                      VGZ_NORMAL); | 
| 342 | 2699 |                 if (vep->o_crc == 0) { | 
| 343 | 2089 |                         vep->crc = vep->crcp; | 
| 344 | 2089 |                         vep->o_crc = vep->o_pending; | 
| 345 | 2089 |                 } else { | 
| 346 | 1220 |                         vep->crc = crc32_combine(vep->crc, | 
| 347 | 610 |                             vep->crcp, vep->o_pending); | 
| 348 | 610 |                         vep->o_crc += vep->o_pending; | 
| 349 |  |                 } | 
| 350 | 2699 |                 vep->crcp = crc32(0L, Z_NULL, 0); | 
| 351 | 2699 |                 vep->o_wait += vep->o_pending; | 
| 352 | 2699 |                 vep->o_pending = 0; | 
| 353 | 2699 |         } | 
| 354 |  |  | 
| 355 |  |         /* * Process this bit of input */ | 
| 356 | 2650594 |         AN(vep->ver_p); | 
| 357 | 2650594 |         l = p - vep->ver_p; | 
| 358 | 2650594 |         assert(l >= 0); | 
| 359 | 2650594 |         vep->crc = crc32(vep->crc, (const void*)vep->ver_p, l); | 
| 360 | 2650594 |         vep->o_crc += l; | 
| 361 | 2650594 |         vep->ver_p = p; | 
| 362 |  |  | 
| 363 | 2650594 |         vep->o_wait += l; | 
| 364 | 2650594 |         vep->last_mark = mark; | 
| 365 | 2650594 |         (void)vep->cb(vep->vc, vep->cb_priv, l, VGZ_NORMAL); | 
| 366 | 2652314 | } | 
| 367 |  |  | 
| 368 |  | static void | 
| 369 | 1529797 | vep_mark_verbatim(struct vep_state *vep, const char *p) | 
| 370 |  | { | 
| 371 |  |  | 
| 372 | 1529797 |         vep_mark_common(vep, p, VERBATIM); | 
| 373 | 1529797 |         vep->nm_verbatim++; | 
| 374 | 1529797 | } | 
| 375 |  |  | 
| 376 |  | static void | 
| 377 | 1113997 | vep_mark_skip(struct vep_state *vep, const char *p) | 
| 378 |  | { | 
| 379 |  |  | 
| 380 | 1113997 |         vep_mark_common(vep, p, SKIP); | 
| 381 | 1113997 |         vep->nm_skip++; | 
| 382 | 1113997 | } | 
| 383 |  |  | 
| 384 |  | static void | 
| 385 | 5035 | vep_mark_pending(struct vep_state *vep, const char *p) | 
| 386 |  | { | 
| 387 |  |         ssize_t l; | 
| 388 |  |  | 
| 389 | 5035 |         AN(vep->ver_p); | 
| 390 | 5035 |         l = p - vep->ver_p; | 
| 391 | 5035 |         assert(l > 0); | 
| 392 | 5035 |         vep->crcp = crc32(vep->crcp, (const void *)vep->ver_p, l); | 
| 393 | 5035 |         vep->ver_p = p; | 
| 394 |  |  | 
| 395 | 5035 |         vep->o_pending += l; | 
| 396 | 5035 |         vep->nm_pending++; | 
| 397 | 5035 | } | 
| 398 |  |  | 
| 399 |  | /*--------------------------------------------------------------------- | 
| 400 |  |  */ | 
| 401 |  |  | 
| 402 |  | static void v_matchproto_() | 
| 403 | 240 | vep_do_comment(struct vep_state *vep, enum dowhat what) | 
| 404 |  | { | 
| 405 |  |         Debug("DO_COMMENT(%d)\n", what); | 
| 406 | 240 |         assert(what == DO_TAG); | 
| 407 | 240 |         if (!vep->emptytag) | 
| 408 | 80 |                 vep_error(vep, "ESI 1.0 <esi:comment> needs final '/'"); | 
| 409 | 240 | } | 
| 410 |  |  | 
| 411 |  | /*--------------------------------------------------------------------- | 
| 412 |  |  */ | 
| 413 |  |  | 
| 414 |  | static void v_matchproto_() | 
| 415 | 5680 | vep_do_remove(struct vep_state *vep, enum dowhat what) | 
| 416 |  | { | 
| 417 |  |         Debug("DO_REMOVE(%d, end %d empty %d remove %d)\n", | 
| 418 |  |             what, vep->endtag, vep->emptytag, vep->remove); | 
| 419 | 5680 |         assert(what == DO_TAG); | 
| 420 | 5680 |         if (vep->emptytag) | 
| 421 | 40 |                 vep_error(vep, "ESI 1.0 <esi:remove/> not legal"); | 
| 422 | 5640 |         else if (vep->remove && !vep->endtag) | 
| 423 | 40 |                 vep_error(vep, "ESI 1.0 <esi:remove> already open"); | 
| 424 | 5600 |         else if (!vep->remove && vep->endtag) | 
| 425 | 40 |                 vep_error(vep, "ESI 1.0 <esi:remove> not open"); | 
| 426 |  |         else | 
| 427 | 5560 |                 vep->remove = !vep->endtag; | 
| 428 | 5680 | } | 
| 429 |  |  | 
| 430 |  | /*--------------------------------------------------------------------- | 
| 431 |  |  */ | 
| 432 |  |  | 
| 433 |  | static void | 
| 434 | 10280 | include_attr_src(struct vep_state *vep) | 
| 435 |  | { | 
| 436 |  |         const char *p; | 
| 437 |  |  | 
| 438 | 10280 |         if (vep->include_src != NULL) { | 
| 439 | 40 |                 vep_error(vep, | 
| 440 |  |                     "ESI 1.0 <esi:include> " | 
| 441 |  |                     "has multiple src= attributes"); | 
| 442 | 40 |                 vep->state = VEP_TAGERROR; | 
| 443 | 40 |                 VSB_destroy(&vep->attr_vsb); | 
| 444 | 40 |                 VSB_destroy(&vep->include_src); | 
| 445 | 40 |                 return; | 
| 446 |  |         } | 
| 447 | 369680 |         for (p = VSB_data(vep->attr_vsb); *p != '\0'; p++) | 
| 448 | 359480 |                 if (vct_islws(*p)) | 
| 449 | 40 |                         break; | 
| 450 | 10240 |         if (*p != '\0') { | 
| 451 | 40 |                 vep_error(vep, | 
| 452 |  |                     "ESI 1.0 <esi:include> " | 
| 453 |  |                     "has whitespace in src= attribute"); | 
| 454 | 40 |                 vep->state = VEP_TAGERROR; | 
| 455 | 40 |                 VSB_destroy(&vep->attr_vsb); | 
| 456 | 40 |                 if (vep->include_src != NULL) | 
| 457 | 0 |                         VSB_destroy(&vep->include_src); | 
| 458 | 40 |                 return; | 
| 459 |  |         } | 
| 460 | 10200 |         vep->include_src = vep->attr_vsb; | 
| 461 | 10200 |         vep->attr_vsb = NULL; | 
| 462 | 10280 | } | 
| 463 |  |  | 
| 464 |  | static void | 
| 465 | 280 | include_attr_onerror(struct vep_state *vep) | 
| 466 |  | { | 
| 467 |  |  | 
| 468 | 280 |         vep->include_continue = !strcmp("continue", VSB_data(vep->attr_vsb)); | 
| 469 | 280 |         VSB_destroy(&vep->attr_vsb); | 
| 470 | 280 | } | 
| 471 |  |  | 
| 472 |  | static void v_matchproto_() | 
| 473 | 20800 | vep_do_include(struct vep_state *vep, enum dowhat what) | 
| 474 |  | { | 
| 475 |  |         const char *p, *q, *h; | 
| 476 |  |         ssize_t l; | 
| 477 |  |         char incl; | 
| 478 |  |  | 
| 479 |  |         Debug("DO_INCLUDE(%d)\n", what); | 
| 480 | 20800 |         if (what == DO_ATTR) { | 
| 481 |  |                 Debug("ATTR (%s) (%s)\n", vep->match_hit->match, | 
| 482 |  |                         VSB_data(vep->attr_vsb)); | 
| 483 | 10560 |                 if (!strcmp("src=", vep->match_hit->match)) { | 
| 484 | 10280 |                         include_attr_src(vep); | 
| 485 | 10280 |                         return; | 
| 486 |  |                 } | 
| 487 | 280 |                 if (!strcmp("onerror=", vep->match_hit->match)) { | 
| 488 | 280 |                         include_attr_onerror(vep); | 
| 489 | 280 |                         return; | 
| 490 |  |                 } | 
| 491 | 0 |                 WRONG("Unhandled <esi:include> attribute"); | 
| 492 | 0 |         } | 
| 493 | 10240 |         assert(what == DO_TAG); | 
| 494 | 10240 |         if (!vep->emptytag) | 
| 495 | 80 |                 vep_warn(vep, "ESI 1.0 <esi:include> lacks final '/'"); | 
| 496 | 10240 |         if (vep->include_src == NULL) { | 
| 497 | 80 |                 vep_error(vep, "ESI 1.0 <esi:include> lacks src attr"); | 
| 498 | 80 |                 return; | 
| 499 |  |         } | 
| 500 |  |  | 
| 501 |  |         /* | 
| 502 |  |          * Strictly speaking, we ought to spit out any piled up skip before | 
| 503 |  |          * emitting the VEC for the include, but objectively that makes no | 
| 504 |  |          * difference and robs us of a chance to collapse another skip into | 
| 505 |  |          * this on so we don't do that. | 
| 506 |  |          * However, we cannot tolerate any verbatim stuff piling up. | 
| 507 |  |          * The mark_skip() before calling dostuff should have taken | 
| 508 |  |          * care of that.  Make sure. | 
| 509 |  |          */ | 
| 510 | 10160 |         assert(vep->o_wait == 0 || vep->last_mark == SKIP); | 
| 511 |  |         /* XXX: what if it contains NUL bytes ?? */ | 
| 512 | 10160 |         p = VSB_data(vep->include_src); | 
| 513 | 10160 |         l = VSB_len(vep->include_src); | 
| 514 | 10160 |         h = 0; | 
| 515 |  |  | 
| 516 | 10160 |         incl = vep->include_continue ? VEC_IC : VEC_IA; | 
| 517 |  |  | 
| 518 | 10160 |         if (l > 7 && !memcmp(p, "http://", 7)) { | 
| 519 | 80 |                 h = p + 7; | 
| 520 | 80 |                 p = strchr(h, '/'); | 
| 521 | 80 |                 if (p == NULL) { | 
| 522 | 40 |                         vep_error(vep, | 
| 523 |  |                             "ESI 1.0 <esi:include> invalid src= URL"); | 
| 524 | 40 |                         vep->state = VEP_TAGERROR; | 
| 525 | 40 |                         AZ(vep->attr_vsb); | 
| 526 | 40 |                         VSB_destroy(&vep->include_src); | 
| 527 | 40 |                         return; | 
| 528 |  |                 } | 
| 529 |  |                 Debug("HOST <%.*s> PATH <%s>\n", (int)(p-h),h, p); | 
| 530 | 40 |                 VSB_printf(vep->vsb, "%c", incl); | 
| 531 | 40 |                 VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0); | 
| 532 | 10120 |         } else if (l > 8 && !memcmp(p, "https://", 8)) { | 
| 533 | 120 |                 if (!FEATURE(FEATURE_ESI_IGNORE_HTTPS)) { | 
| 534 | 40 |                         vep_warn(vep, | 
| 535 |  |                             "ESI 1.0 <esi:include> with https:// ignored"); | 
| 536 | 40 |                         vep->state = VEP_TAGERROR; | 
| 537 | 40 |                         AZ(vep->attr_vsb); | 
| 538 | 40 |                         VSB_destroy(&vep->include_src); | 
| 539 | 40 |                         return; | 
| 540 |  |                 } | 
| 541 | 80 |                 vep_warn(vep, | 
| 542 |  |                     "ESI 1.0 <esi:include> https:// treated as http://"); | 
| 543 | 80 |                 h = p + 8; | 
| 544 | 80 |                 p = strchr(h, '/'); | 
| 545 | 80 |                 if (p == NULL) { | 
| 546 | 40 |                         vep_error(vep, | 
| 547 |  |                             "ESI 1.0 <esi:include> invalid src= URL"); | 
| 548 | 40 |                         vep->state = VEP_TAGERROR; | 
| 549 | 40 |                         AZ(vep->attr_vsb); | 
| 550 | 40 |                         VSB_destroy(&vep->include_src); | 
| 551 | 40 |                         return; | 
| 552 |  |                 } | 
| 553 | 40 |                 VSB_printf(vep->vsb, "%c", incl); | 
| 554 | 40 |                 VSB_printf(vep->vsb, "Host: %.*s%c", (int)(p-h), h, 0); | 
| 555 | 10000 |         } else if (*p == '/') { | 
| 556 | 8760 |                 VSB_printf(vep->vsb, "%c", incl); | 
| 557 | 8760 |                 VSB_printf(vep->vsb, "%c", 0); | 
| 558 | 8760 |         } else { | 
| 559 | 1200 |                 VSB_printf(vep->vsb, "%c", incl); | 
| 560 | 1200 |                 VSB_printf(vep->vsb, "%c", 0); | 
| 561 |  |                 /* Look for the last / before a '?' */ | 
| 562 | 1200 |                 h = NULL; | 
| 563 | 3600 |                 for (q = vep->url; *q && *q != '?'; q++) | 
| 564 | 3720 |                         if (*q == '/') | 
| 565 | 1320 |                                 h = q; | 
| 566 | 1200 |                 if (h == NULL) | 
| 567 | 40 |                         h = q + 1; | 
| 568 |  |  | 
| 569 |  |                 Debug("INCL:: [%.*s]/[%s]\n", | 
| 570 |  |                     (int)(h - vep->url), vep->url, p); | 
| 571 | 1200 |                 VSB_printf(vep->vsb, "%.*s/", (int)(h - vep->url), vep->url); | 
| 572 |  |         } | 
| 573 | 10040 |         l -= (p - VSB_data(vep->include_src)); | 
| 574 | 364960 |         for (q = p; *q != '\0'; ) { | 
| 575 | 354920 |                 if (*q == '&') { | 
| 576 |  | #define R(w,f,r)                                                        \ | 
| 577 |  |                         if (q + w <= p + l && !memcmp(q, f, w)) { \ | 
| 578 |  |                                 VSB_printf(vep->vsb, "%c", r);  \ | 
| 579 |  |                                 q += w;                         \ | 
| 580 |  |                                 continue;                       \ | 
| 581 |  |                         } | 
| 582 | 400 |                         R(6, "'", '\''); | 
| 583 | 320 |                         R(6, """, '"'); | 
| 584 | 240 |                         R(4, "<", '<'); | 
| 585 | 160 |                         R(4, ">", '>'); | 
| 586 | 80 |                         R(5, "&", '&'); | 
| 587 | 0 |                 } | 
| 588 | 354520 |                 VSB_printf(vep->vsb, "%c", *q++); | 
| 589 |  |         } | 
| 590 |  | #undef R | 
| 591 | 10040 |         VSB_printf(vep->vsb, "%c", 0); | 
| 592 | 10040 |         VSB_destroy(&vep->include_src); | 
| 593 | 10040 |         vep->include_continue = 0; | 
| 594 | 20800 | } | 
| 595 |  |  | 
| 596 |  | /*--------------------------------------------------------------------- | 
| 597 |  |  * Lex/Parse object for ESI instructions | 
| 598 |  |  * | 
| 599 |  |  * This function is called with the input object piecemeal so do not | 
| 600 |  |  * assume that we have more than one char available at at time, but | 
| 601 |  |  * optimize for getting huge chunks. | 
| 602 |  |  * | 
| 603 |  |  * NB: At the bottom of this source-file, there is a dot-diagram matching | 
| 604 |  |  * NB: the state-machine.  Please maintain it along with the code. | 
| 605 |  |  */ | 
| 606 |  |  | 
| 607 |  | void | 
| 608 | 2154839 | VEP_Parse(struct vep_state *vep, const char *p, size_t l) | 
| 609 |  | { | 
| 610 |  |         const char *e; | 
| 611 |  |         struct vep_match *vm; | 
| 612 |  |         int i; | 
| 613 |  |  | 
| 614 | 2154839 |         CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); | 
| 615 | 2154839 |         assert(l > 0); | 
| 616 |  |  | 
| 617 | 2154839 |         if (vep->startup) { | 
| 618 |  |                 /* | 
| 619 |  |                  * We must force the GZIP header out as a SKIP string, | 
| 620 |  |                  * otherwise an object starting with <esi:include would | 
| 621 |  |                  * have its GZIP header appear after the included object | 
| 622 |  |                  * (e000026.vtc) | 
| 623 |  |                  */ | 
| 624 | 8520 |                 vep->ver_p = ""; | 
| 625 | 8520 |                 vep->last_mark = SKIP; | 
| 626 | 8520 |                 vep_mark_common(vep, vep->ver_p, VERBATIM); | 
| 627 | 8520 |                 vep->startup = 0; | 
| 628 | 8520 |                 AZ(vep->hack_p); | 
| 629 | 8520 |                 vep->hack_p = p; | 
| 630 | 8520 |         } | 
| 631 |  |  | 
| 632 | 2154839 |         vep->ver_p = p; | 
| 633 |  |  | 
| 634 | 2154839 |         e = p + l; | 
| 635 |  |  | 
| 636 | 6258271 |         while (p < e) { | 
| 637 | 4103432 |                 AN(vep->state); | 
| 638 |  |                 Debug("EP %s %d (%.*s) [%.*s]\n", | 
| 639 |  |                     vep->state, | 
| 640 |  |                     vep->remove, | 
| 641 |  |                     vep->tag_i, vep->tag, | 
| 642 |  |                     (e - p) > 10 ? 10 : (int)(e-p), p); | 
| 643 | 4103432 |                 assert(p >= vep->ver_p); | 
| 644 |  |  | 
| 645 |  |                 /****************************************************** | 
| 646 |  |                  * SECTION A | 
| 647 |  |                  */ | 
| 648 |  |  | 
| 649 | 4103432 |                 if (vep->state == VEP_START) { | 
| 650 | 8640 |                         if (FEATURE(FEATURE_ESI_REMOVE_BOM) && | 
| 651 | 240 |                             *p == (char)0xeb) { | 
| 652 | 160 |                                 vep->match = vep_match_bom; | 
| 653 | 160 |                                 vep->state = VEP_MATCH; | 
| 654 | 160 |                         } else | 
| 655 | 8480 |                                 vep->state = VEP_BOM; | 
| 656 | 4103432 |                 } else if (vep->state == VEP_BOM) { | 
| 657 | 8520 |                         vep_mark_skip(vep, p); | 
| 658 | 8520 |                         if (FEATURE(FEATURE_ESI_DISABLE_XML_CHECK)) | 
| 659 | 2160 |                                 vep->state = VEP_NEXTTAG; | 
| 660 |  |                         else | 
| 661 | 6360 |                                 vep->state = VEP_TESTXML; | 
| 662 | 4094792 |                 } else if (vep->state == VEP_TESTXML) { | 
| 663 |  |                         /* | 
| 664 |  |                          * If the first non-whitespace char is different | 
| 665 |  |                          * from '<' we assume this is not XML. | 
| 666 |  |                          */ | 
| 667 | 14180 |                         while (p < e && vct_islws(*p)) | 
| 668 | 7520 |                                 p++; | 
| 669 | 6660 |                         vep_mark_verbatim(vep, p); | 
| 670 | 6660 |                         if (p < e && *p == '<') { | 
| 671 | 5800 |                                 p++; | 
| 672 | 5800 |                                 vep->state = VEP_STARTTAG; | 
| 673 | 6660 |                         } else if (p < e && *p == (char)0xeb) { | 
| 674 | 80 |                                 VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, | 
| 675 |  |                                     "WARN: No ESI processing, " | 
| 676 |  |                                     "first char not '<' but BOM." | 
| 677 |  |                                     " (See feature esi_remove_bom)" | 
| 678 |  |                                 ); | 
| 679 | 80 |                                 vep->state = VEP_NOTXML; | 
| 680 | 860 |                         } else if (p < e) { | 
| 681 | 440 |                                 VSLb(vep->vc->wrk->vsl, SLT_ESI_xmlerror, | 
| 682 |  |                                     "WARN: No ESI processing, " | 
| 683 |  |                                     "first char not '<'." | 
| 684 |  |                                     " (See feature esi_disable_xml_check)" | 
| 685 |  |                                 ); | 
| 686 | 440 |                                 vep->state = VEP_NOTXML; | 
| 687 | 440 |                         } | 
| 688 | 4086272 |                 } else if (vep->state == VEP_NOTXML) { | 
| 689 |  |                         /* | 
| 690 |  |                          * This is not recognized as XML, just skip thru | 
| 691 |  |                          * vfp_esi_end() will handle the rest | 
| 692 |  |                          */ | 
| 693 | 520 |                         p = e; | 
| 694 | 520 |                         vep_mark_verbatim(vep, p); | 
| 695 |  |  | 
| 696 |  |                 /****************************************************** | 
| 697 |  |                  * SECTION B | 
| 698 |  |                  */ | 
| 699 |  |  | 
| 700 | 4079612 |                 } else if (vep->state == VEP_NOTMYTAG) { | 
| 701 | 438728 |                         if (FEATURE(FEATURE_ESI_IGNORE_OTHER_ELEMENTS)) { | 
| 702 | 80 |                                 p++; | 
| 703 | 80 |                                 vep->state = VEP_NEXTTAG; | 
| 704 | 80 |                         } else { | 
| 705 | 438648 |                                 vep->tag_i = 0; | 
| 706 | 1856952 |                                 while (p < e) { | 
| 707 | 1855373 |                                         if (*p++ == '>') { | 
| 708 | 437069 |                                                 vep->state = VEP_NEXTTAG; | 
| 709 | 437069 |                                                 break; | 
| 710 |  |                                         } | 
| 711 |  |                                 } | 
| 712 |  |                         } | 
| 713 | 438728 |                         if (p == e && !vep->remove) | 
| 714 | 4058 |                                 vep_mark_verbatim(vep, p); | 
| 715 | 4079092 |                 } else if (vep->state == VEP_NEXTTAG) { | 
| 716 |  |                         /* | 
| 717 |  |                          * Hunt for start of next tag and keep an eye | 
| 718 |  |                          * out for end of EsiCmt if armed. | 
| 719 |  |                          */ | 
| 720 | 2577977 |                         vep->emptytag = 0; | 
| 721 | 2577977 |                         vep->attr = NULL; | 
| 722 | 2577977 |                         vep->dostuff = NULL; | 
| 723 | 89083359 |                         while (p < e && *p != '<') { | 
| 724 | 86505382 |                                 if (vep->esicmt_p == NULL) { | 
| 725 | 86500742 |                                         p++; | 
| 726 | 86500742 |                                         continue; | 
| 727 |  |                                 } | 
| 728 | 4640 |                                 if (*p != *vep->esicmt_p) { | 
| 729 | 2640 |                                         p++; | 
| 730 | 2640 |                                         vep->esicmt_p = vep->esicmt; | 
| 731 | 2640 |                                         continue; | 
| 732 |  |                                 } | 
| 733 | 2000 |                                 if (!vep->remove && vep->esicmt_p == vep->esicmt) | 
| 734 | 480 |                                         vep_mark_verbatim(vep, p); | 
| 735 | 2000 |                                 p++; | 
| 736 | 2000 |                                 if (*++vep->esicmt_p == '\0') { | 
| 737 | 640 |                                         vep->esi_found = 1; | 
| 738 | 640 |                                         vep->esicmt = NULL; | 
| 739 | 640 |                                         vep->esicmt_p = NULL; | 
| 740 |  |                                         /* | 
| 741 |  |                                          * The end of the esicmt | 
| 742 |  |                                          * should not be emitted. | 
| 743 |  |                                          * But the stuff before should | 
| 744 |  |                                          */ | 
| 745 | 640 |                                         vep_mark_skip(vep, p); | 
| 746 | 640 |                                 } | 
| 747 |  |                         } | 
| 748 | 2577977 |                         if (p < e) { | 
| 749 | 449374 |                                 if (!vep->remove) | 
| 750 | 446054 |                                         vep_mark_verbatim(vep, p); | 
| 751 | 449374 |                                 assert(*p == '<'); | 
| 752 | 449374 |                                 p++; | 
| 753 | 449374 |                                 vep->state = VEP_STARTTAG; | 
| 754 | 2577977 |                         } else if (vep->esicmt_p == vep->esicmt && !vep->remove) | 
| 755 | 1071292 |                                 vep_mark_verbatim(vep, p); | 
| 756 |  |  | 
| 757 |  |                 /****************************************************** | 
| 758 |  |                  * SECTION C | 
| 759 |  |                  */ | 
| 760 |  |  | 
| 761 | 3640364 |                 } else if (vep->state == VEP_STARTTAG) { | 
| 762 |  |                         /* Start of tag, set up match table */ | 
| 763 | 455174 |                         vep->endtag = 0; | 
| 764 | 455174 |                         vep->match = vep_match_starttag; | 
| 765 | 455174 |                         vep->state = VEP_MATCH; | 
| 766 | 1062387 |                 } else if (vep->state == VEP_COMMENT) { | 
| 767 | 200 |                         vep->esicmt_p = vep->esicmt = NULL; | 
| 768 | 200 |                         vep->until_p = vep->until = "-->"; | 
| 769 | 200 |                         vep->until_s = VEP_NEXTTAG; | 
| 770 | 200 |                         vep->state = VEP_UNTIL; | 
| 771 | 607213 |                 } else if (vep->state == VEP_COMMENTESI) { | 
| 772 | 680 |                         if (vep->remove) | 
| 773 | 200 |                                 vep_error(vep, | 
| 774 |  |                                     "ESI 1.0 Nested <!--esi" | 
| 775 |  |                                     " element in <esi:remove>"); | 
| 776 | 680 |                         vep->esicmt_p = vep->esicmt = "-->"; | 
| 777 | 680 |                         vep->state = VEP_NEXTTAG; | 
| 778 | 680 |                         vep_mark_skip(vep, p); | 
| 779 | 607013 |                 } else if (vep->state == VEP_CDATA) { | 
| 780 |  |                         /* | 
| 781 |  |                          * Easy: just look for the end of CDATA | 
| 782 |  |                          */ | 
| 783 | 160 |                         vep->until_p = vep->until = "]]>"; | 
| 784 | 160 |                         vep->until_s = VEP_NEXTTAG; | 
| 785 | 160 |                         vep->state = VEP_UNTIL; | 
| 786 | 606333 |                 } else if (vep->state == VEP_ESIENDTAG) { | 
| 787 | 2920 |                         vep->endtag = 1; | 
| 788 | 2920 |                         vep->state = VEP_ESITAG; | 
| 789 | 606173 |                 } else if (vep->state == VEP_ESITAG) { | 
| 790 | 16880 |                         vep->in_esi_tag = 1; | 
| 791 | 16880 |                         vep->esi_found = 1; | 
| 792 | 16880 |                         vep_mark_skip(vep, p); | 
| 793 | 16880 |                         vep->match = vep_match_esi; | 
| 794 | 16880 |                         vep->state = VEP_MATCH; | 
| 795 | 603253 |                 } else if (vep->state == VEP_ESIINCLUDE) { | 
| 796 | 10720 |                         if (vep->remove) { | 
| 797 | 160 |                                 vep_error(vep, | 
| 798 |  |                                     "ESI 1.0 <esi:include> element" | 
| 799 |  |                                     " nested in <esi:remove>"); | 
| 800 | 160 |                                 vep->state = VEP_TAGERROR; | 
| 801 | 10720 |                         } else if (vep->endtag) { | 
| 802 | 40 |                                 vep_error(vep, | 
| 803 |  |                                     "ESI 1.0 </esi:include> illegal end-tag"); | 
| 804 | 40 |                                 vep->state = VEP_TAGERROR; | 
| 805 | 40 |                         } else { | 
| 806 | 10520 |                                 vep->dostuff = vep_do_include; | 
| 807 | 10520 |                                 vep->state = VEP_INTAG; | 
| 808 | 10520 |                                 vep->attr = vep_match_attr_include; | 
| 809 |  |                         } | 
| 810 | 586373 |                 } else if (vep->state == VEP_ESIREMOVE) { | 
| 811 | 5680 |                         vep->dostuff = vep_do_remove; | 
| 812 | 5680 |                         vep->state = VEP_INTAG; | 
| 813 | 575653 |                 } else if (vep->state == VEP_ESICOMMENT) { | 
| 814 | 400 |                         if (vep->remove) { | 
| 815 | 40 |                                 vep_error(vep, | 
| 816 |  |                                     "ESI 1.0 <esi:comment> element" | 
| 817 |  |                                     " nested in <esi:remove>"); | 
| 818 | 40 |                                 vep->state = VEP_TAGERROR; | 
| 819 | 400 |                         } else if (vep->endtag) { | 
| 820 | 80 |                                 vep_error(vep, | 
| 821 |  |                                     "ESI 1.0 </esi:comment> illegal end-tag"); | 
| 822 | 80 |                                 vep->state = VEP_TAGERROR; | 
| 823 | 80 |                         } else { | 
| 824 | 280 |                                 vep->dostuff = vep_do_comment; | 
| 825 | 280 |                                 vep->state = VEP_INTAG; | 
| 826 |  |                         } | 
| 827 | 569973 |                 } else if (vep->state == VEP_ESIBOGON) { | 
| 828 | 80 |                         vep_error(vep, | 
| 829 |  |                             "ESI 1.0 <esi:bogus> element"); | 
| 830 | 80 |                         vep->state = VEP_TAGERROR; | 
| 831 |  |  | 
| 832 |  |                 /****************************************************** | 
| 833 |  |                  * SECTION D | 
| 834 |  |                  */ | 
| 835 |  |  | 
| 836 | 569573 |                 } else if (vep->state == VEP_INTAG) { | 
| 837 | 28267 |                         vep->tag_i = 0; | 
| 838 | 43667 |                         while (p < e && vct_islws(*p) && !vep->emptytag) { | 
| 839 | 15400 |                                 p++; | 
| 840 | 15400 |                                 vep->canattr = 1; | 
| 841 |  |                         } | 
| 842 | 28267 |                         if (p < e && *p == '/' && !vep->emptytag) { | 
| 843 | 10400 |                                 p++; | 
| 844 | 10400 |                                 vep->emptytag = 1; | 
| 845 | 10400 |                                 vep->canattr = 0; | 
| 846 | 10400 |                         } | 
| 847 | 28267 |                         if (p < e && *p == '>') { | 
| 848 | 16160 |                                 p++; | 
| 849 | 16160 |                                 AN(vep->dostuff); | 
| 850 | 16160 |                                 vep_mark_skip(vep, p); | 
| 851 | 16160 |                                 vep->dostuff(vep, DO_TAG); | 
| 852 | 16160 |                                 vep->in_esi_tag = 0; | 
| 853 | 16160 |                                 vep->state = VEP_NEXTTAG; | 
| 854 | 28267 |                         } else if (p < e && vep->emptytag) { | 
| 855 | 40 |                                 vep_error(vep, | 
| 856 |  |                                     "XML 1.0 '>' does not follow '/' in tag"); | 
| 857 | 40 |                                 vep->state = VEP_TAGERROR; | 
| 858 | 12107 |                         } else if (p < e && vep->canattr && | 
| 859 | 11080 |                             vct_isxmlnamestart(*p)) { | 
| 860 | 11040 |                                 vep->state = VEP_ATTR; | 
| 861 | 12067 |                         } else if (p < e) { | 
| 862 | 40 |                                 vep_error(vep, | 
| 863 |  |                                     "XML 1.0 Illegal attribute start char"); | 
| 864 | 40 |                                 vep->state = VEP_TAGERROR; | 
| 865 | 40 |                         } | 
| 866 | 569493 |                 } else if (vep->state == VEP_TAGERROR) { | 
| 867 | 7322 |                         while (p < e && *p != '>') | 
| 868 | 5230 |                                 p++; | 
| 869 | 2092 |                         if (p < e) { | 
| 870 | 720 |                                 p++; | 
| 871 | 720 |                                 vep_mark_skip(vep, p); | 
| 872 | 720 |                                 vep->in_esi_tag = 0; | 
| 873 | 720 |                                 vep->state = VEP_NEXTTAG; | 
| 874 | 720 |                                 if (vep->attr_vsb) | 
| 875 | 40 |                                         VSB_destroy(&vep->attr_vsb); | 
| 876 | 720 |                         } | 
| 877 |  |  | 
| 878 |  |                 /****************************************************** | 
| 879 |  |                  * SECTION E | 
| 880 |  |                  */ | 
| 881 |  |  | 
| 882 | 541226 |                 } else if (vep->state == VEP_ATTR) { | 
| 883 | 11040 |                         AZ(vep->attr_delim); | 
| 884 | 11040 |                         if (vep->attr == NULL) { | 
| 885 | 120 |                                 p++; | 
| 886 | 120 |                                 AZ(vep->attr_vsb); | 
| 887 | 120 |                                 vep->state = VEP_SKIPATTR; | 
| 888 | 120 |                         } else { | 
| 889 | 10920 |                                 vep->match = vep->attr; | 
| 890 | 10920 |                                 vep->state = VEP_MATCH; | 
| 891 |  |                         } | 
| 892 | 539134 |                 } else if (vep->state == VEP_SKIPATTR) { | 
| 893 | 1310 |                         while (p < e && vct_isxmlname(*p)) | 
| 894 | 728 |                                 p++; | 
| 895 | 582 |                         if (p < e && *p == '=') { | 
| 896 | 160 |                                 p++; | 
| 897 | 160 |                                 vep->state = VEP_ATTRDELIM; | 
| 898 | 582 |                         } else if (p < e && *p == '>') { | 
| 899 | 40 |                                 vep->state = VEP_INTAG; | 
| 900 | 422 |                         } else if (p < e && *p == '/') { | 
| 901 | 40 |                                 vep->state = VEP_INTAG; | 
| 902 | 382 |                         } else if (p < e && vct_issp(*p)) { | 
| 903 | 80 |                                 vep->state = VEP_INTAG; | 
| 904 | 342 |                         } else if (p < e) { | 
| 905 | 40 |                                 vep_error(vep, | 
| 906 |  |                                     "XML 1.0 Illegal attr char"); | 
| 907 | 40 |                                 vep->state = VEP_TAGERROR; | 
| 908 | 40 |                         } | 
| 909 | 528094 |                 } else if (vep->state == VEP_ATTRGETVAL) { | 
| 910 | 10680 |                         AZ(vep->attr_vsb); | 
| 911 | 10680 |                         vep->attr_vsb = VSB_new_auto(); | 
| 912 | 10680 |                         vep->state = VEP_ATTRDELIM; | 
| 913 | 527512 |                 } else if (vep->state == VEP_ATTRDELIM) { | 
| 914 | 10840 |                         AZ(vep->attr_delim); | 
| 915 | 10840 |                         if (*p == '"' || *p == '\'') { | 
| 916 | 10680 |                                 vep->attr_delim = *p++; | 
| 917 | 10680 |                                 vep->state = VEP_ATTRVAL; | 
| 918 | 10840 |                         } else if (!vct_issp(*p)) { | 
| 919 | 120 |                                 vep->attr_delim = ' '; | 
| 920 | 120 |                                 vep->state = VEP_ATTRVAL; | 
| 921 | 120 |                         } else { | 
| 922 | 40 |                                 vep_error(vep, | 
| 923 |  |                                     "XML 1.0 Illegal attribute delimiter"); | 
| 924 | 40 |                                 vep->state = VEP_TAGERROR; | 
| 925 |  |                         } | 
| 926 |  |  | 
| 927 | 516832 |                 } else if (vep->state == VEP_ATTRVAL) { | 
| 928 | 737111 |                         while (p < e && *p != '>' && *p != vep->attr_delim && | 
| 929 | 362400 |                            (vep->attr_delim != ' ' || !vct_issp(*p))) { | 
| 930 | 362400 |                                 if (vep->attr_vsb != NULL) | 
| 931 | 361920 |                                         VSB_putc(vep->attr_vsb, *p); | 
| 932 | 362400 |                                 p++; | 
| 933 |  |                         } | 
| 934 | 12311 |                         if (p < e && *p == '>') { | 
| 935 | 80 |                                 vep_error(vep, | 
| 936 |  |                                     "XML 1.0 Missing end attribute delimiter"); | 
| 937 | 80 |                                 vep->state = VEP_TAGERROR; | 
| 938 | 80 |                                 vep->attr_delim = 0; | 
| 939 | 80 |                                 if (vep->attr_vsb != NULL) { | 
| 940 | 80 |                                         AZ(VSB_finish(vep->attr_vsb)); | 
| 941 | 80 |                                         VSB_destroy(&vep->attr_vsb); | 
| 942 | 80 |                                 } | 
| 943 | 12311 |                         } else if (p < e) { | 
| 944 | 10720 |                                 vep->attr_delim = 0; | 
| 945 | 10720 |                                 p++; | 
| 946 | 10720 |                                 vep->state = VEP_INTAG; | 
| 947 | 10720 |                                 if (vep->attr_vsb != NULL) { | 
| 948 | 10560 |                                         AZ(VSB_finish(vep->attr_vsb)); | 
| 949 | 10560 |                                         AN(vep->dostuff); | 
| 950 | 10560 |                                         vep->dostuff(vep, DO_ATTR); | 
| 951 | 10560 |                                         vep->attr_vsb = NULL; | 
| 952 | 10560 |                                 } | 
| 953 | 10720 |                         } | 
| 954 |  |  | 
| 955 |  |                 /****************************************************** | 
| 956 |  |                  * Utility Section | 
| 957 |  |                  */ | 
| 958 |  |  | 
| 959 | 505992 |                 } else if (vep->state == VEP_MATCH) { | 
| 960 |  |                         /* | 
| 961 |  |                          * Match against a table | 
| 962 |  |                          */ | 
| 963 | 483134 |                         vm = vep_match(vep, p, e); | 
| 964 | 483134 |                         vep->match_hit = vm; | 
| 965 | 483134 |                         if (vm != NULL) { | 
| 966 | 477573 |                                 if (vm->match != NULL) | 
| 967 | 40277 |                                         p += strlen(vm->match); | 
| 968 | 477573 |                                 vep->state = *vm->state; | 
| 969 | 477573 |                                 vep->match = NULL; | 
| 970 | 477573 |                                 vep->tag_i = 0; | 
| 971 | 477573 |                         } else { | 
| 972 | 5561 |                                 assert(p + sizeof(vep->tag) >= e); | 
| 973 | 5561 |                                 memcpy(vep->tag, p, e - p); | 
| 974 | 5561 |                                 vep->tag_i = e - p; | 
| 975 | 5561 |                                 vep->state = VEP_MATCHBUF; | 
| 976 | 5561 |                                 p = e; | 
| 977 |  |                         } | 
| 978 | 493681 |                 } else if (vep->state == VEP_MATCHBUF) { | 
| 979 |  |                         /* | 
| 980 |  |                          * Match against a table while split over input | 
| 981 |  |                          * sections. | 
| 982 |  |                          */ | 
| 983 | 9632 |                         AN(vep->match); | 
| 984 | 9632 |                         i = sizeof(vep->tag) - vep->tag_i; | 
| 985 | 9632 |                         if (i > e - p) | 
| 986 | 8372 |                                 i = e - p; | 
| 987 | 9632 |                         memcpy(vep->tag + vep->tag_i, p, i); | 
| 988 | 19264 |                         vm = vep_match(vep, vep->tag, | 
| 989 | 9632 |                             vep->tag + vep->tag_i + i); | 
| 990 |  |                         Debug("MB (%.*s) tag_i %d i %d = vm %p match %s\n", | 
| 991 |  |                             vep->tag_i + i, vep->tag, | 
| 992 |  |                             vep->tag_i, | 
| 993 |  |                             i, | 
| 994 |  |                             vm, | 
| 995 |  |                             vm ? vm->match : "(nil)"); | 
| 996 |  |  | 
| 997 | 9632 |                         if (vm == NULL) { | 
| 998 | 4071 |                                 vep->tag_i += i; | 
| 999 | 4071 |                                 p += i; | 
| 1000 | 4071 |                                 assert(p == e); | 
| 1001 | 4071 |                         } else { | 
| 1002 | 5561 |                                 vep->match_hit = vm; | 
| 1003 | 5561 |                                 vep->state = *vm->state; | 
| 1004 | 5561 |                                 if (vm->match != NULL) { | 
| 1005 | 5283 |                                         i = strlen(vm->match); | 
| 1006 | 5283 |                                         if (i > vep->tag_i) | 
| 1007 | 5227 |                                                 p += i - vep->tag_i; | 
| 1008 | 5283 |                                 } | 
| 1009 | 5561 |                                 vep->match = NULL; | 
| 1010 | 5561 |                                 vep->tag_i = 0; | 
| 1011 |  |                         } | 
| 1012 | 10547 |                 } else if (vep->state == VEP_UNTIL) { | 
| 1013 |  |                         /* | 
| 1014 |  |                          * Skip until we see magic string | 
| 1015 |  |                          */ | 
| 1016 | 7395 |                         while (p < e) { | 
| 1017 | 6760 |                                 if (*p++ != *vep->until_p++) { | 
| 1018 | 5840 |                                         vep->until_p = vep->until; | 
| 1019 | 6760 |                                 } else if (*vep->until_p == '\0') { | 
| 1020 | 280 |                                         vep->state = vep->until_s; | 
| 1021 | 280 |                                         break; | 
| 1022 |  |                                 } | 
| 1023 |  |                         } | 
| 1024 | 915 |                         if (p == e && !vep->remove) | 
| 1025 | 733 |                                 vep_mark_verbatim(vep, p); | 
| 1026 | 915 |                 } else { | 
| 1027 |  |                         Debug("*** Unknown state %s\n", vep->state); | 
| 1028 | 0 |                         WRONG("WRONG ESI PARSER STATE"); | 
| 1029 |  |                 } | 
| 1030 |  |         } | 
| 1031 |  |         /* | 
| 1032 |  |          * We must always mark up the storage we got, try to do so | 
| 1033 |  |          * in the most efficient way, in particular with respect to | 
| 1034 |  |          * minimizing and limiting use of pending. | 
| 1035 |  |          */ | 
| 1036 | 2154839 |         if (p == vep->ver_p) | 
| 1037 |  |                 ; | 
| 1038 | 1075432 |         else if (vep->in_esi_tag) | 
| 1039 | 11867 |                 vep_mark_skip(vep, p); | 
| 1040 | 1063565 |         else if (vep->remove) | 
| 1041 | 1058530 |                 vep_mark_skip(vep, p); | 
| 1042 |  |         else | 
| 1043 | 5035 |                 vep_mark_pending(vep, p); | 
| 1044 | 2154839 | } | 
| 1045 |  |  | 
| 1046 |  | /*--------------------------------------------------------------------- | 
| 1047 |  |  */ | 
| 1048 |  |  | 
| 1049 |  | static ssize_t v_matchproto_(vep_callback_t) | 
| 1050 | 2656403 | vep_default_cb(struct vfp_ctx *vc, void *priv, ssize_t l, enum vgz_flag flg) | 
| 1051 |  | { | 
| 1052 |  |         ssize_t *s; | 
| 1053 |  |  | 
| 1054 | 2656403 |         CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); | 
| 1055 | 2656403 |         AN(priv); | 
| 1056 | 2656403 |         s = priv; | 
| 1057 | 2656403 |         *s += l; | 
| 1058 | 2656403 |         (void)flg; | 
| 1059 | 2656403 |         return (*s); | 
| 1060 |  | } | 
| 1061 |  |  | 
| 1062 |  | /*--------------------------------------------------------------------- | 
| 1063 |  |  */ | 
| 1064 |  |  | 
| 1065 |  | struct vep_state * | 
| 1066 | 10760 | VEP_Init(struct vfp_ctx *vc, const struct http *req, vep_callback_t *cb, | 
| 1067 |  |     void *cb_priv) | 
| 1068 |  | { | 
| 1069 |  |         struct vep_state *vep; | 
| 1070 |  |  | 
| 1071 | 10760 |         CHECK_OBJ_NOTNULL(vc, VFP_CTX_MAGIC); | 
| 1072 | 10760 |         CHECK_OBJ_NOTNULL(req, HTTP_MAGIC); | 
| 1073 | 10760 |         vep = WS_Alloc(vc->resp->ws, sizeof *vep); | 
| 1074 | 10760 |         if (vep == NULL) { | 
| 1075 | 2080 |                 VSLb(vc->wrk->vsl, SLT_VCL_Error, | 
| 1076 |  |                      "VEP_Init() workspace overflow"); | 
| 1077 | 2080 |                 return (NULL); | 
| 1078 |  |         } | 
| 1079 |  |  | 
| 1080 | 8680 |         INIT_OBJ(vep, VEP_MAGIC); | 
| 1081 | 8680 |         vep->url = req->hd[HTTP_HDR_URL].b; | 
| 1082 | 8680 |         vep->vc = vc; | 
| 1083 | 8680 |         vep->vsb = VSB_new_auto(); | 
| 1084 | 8680 |         AN(vep->vsb); | 
| 1085 |  |  | 
| 1086 | 8680 |         if (cb != NULL) { | 
| 1087 | 3040 |                 vep->dogzip = 1; | 
| 1088 |  |                 /* XXX */ | 
| 1089 | 3040 |                 VSB_printf(vep->vsb, "%c", VEC_GZ); | 
| 1090 | 3040 |                 vep->cb = cb; | 
| 1091 | 3040 |                 vep->cb_priv = cb_priv; | 
| 1092 | 3040 |         } else { | 
| 1093 | 5640 |                 vep->cb = vep_default_cb; | 
| 1094 | 5640 |                 vep->cb_priv = &vep->cb_x; | 
| 1095 |  |         } | 
| 1096 |  |  | 
| 1097 | 8680 |         vep->state = VEP_START; | 
| 1098 | 8680 |         vep->crc = crc32(0L, Z_NULL, 0); | 
| 1099 | 8680 |         vep->crcp = crc32(0L, Z_NULL, 0); | 
| 1100 |  |  | 
| 1101 | 8680 |         vep->startup = 1; | 
| 1102 | 8680 |         return (vep); | 
| 1103 | 10760 | } | 
| 1104 |  |  | 
| 1105 |  | /*--------------------------------------------------------------------- | 
| 1106 |  |  */ | 
| 1107 |  |  | 
| 1108 |  | struct vsb * | 
| 1109 | 8680 | VEP_Finish(struct vep_state *vep) | 
| 1110 |  | { | 
| 1111 |  |         ssize_t l, lcb; | 
| 1112 |  |  | 
| 1113 | 8680 |         CHECK_OBJ_NOTNULL(vep, VEP_MAGIC); | 
| 1114 |  |  | 
| 1115 | 8680 |         if (vep->include_src) | 
| 1116 | 0 |                 VSB_destroy(&vep->include_src); | 
| 1117 | 8680 |         if (vep->attr_vsb) | 
| 1118 | 0 |                 VSB_destroy(&vep->attr_vsb); | 
| 1119 |  |  | 
| 1120 | 16640 |         if (vep->state != VEP_START && | 
| 1121 | 8520 |             vep->state != VEP_BOM && | 
| 1122 | 8520 |             vep->state != VEP_TESTXML && | 
| 1123 | 8480 |             vep->state != VEP_NOTXML && | 
| 1124 | 7960 |             vep->state != VEP_NEXTTAG) { | 
| 1125 | 145 |                 vep_error(vep, "VEP ended inside a tag"); | 
| 1126 | 145 |         } | 
| 1127 |  |  | 
| 1128 | 8680 |         if (vep->o_pending) | 
| 1129 | 0 |                 vep_mark_common(vep, vep->ver_p, vep->last_mark); | 
| 1130 | 8680 |         if (vep->o_wait > 0) { | 
| 1131 | 8520 |                 lcb = vep->cb(vep->vc, vep->cb_priv, 0, VGZ_ALIGN); | 
| 1132 | 8520 |                 vep_emit_common(vep, lcb - vep->o_last, vep->last_mark); | 
| 1133 | 8520 |         } | 
| 1134 |  |         // NB: We don't account for PAD+SUM+LEN in gzipped objects | 
| 1135 | 8680 |         (void)vep->cb(vep->vc, vep->cb_priv, 0, VGZ_FINISH); | 
| 1136 |  |  | 
| 1137 | 8680 |         AZ(VSB_finish(vep->vsb)); | 
| 1138 | 8680 |         l = VSB_len(vep->vsb); | 
| 1139 | 8680 |         if (vep->esi_found && l > 0) | 
| 1140 | 4680 |                 return (vep->vsb); | 
| 1141 | 4000 |         VSB_destroy(&vep->vsb); | 
| 1142 | 4000 |         return (NULL); | 
| 1143 | 8680 | } | 
| 1144 |  |  | 
| 1145 |  | #if 0 | 
| 1146 |  |  | 
| 1147 |  | digraph xml { | 
| 1148 |  |         rankdir="LR" | 
| 1149 |  |         size="7,10" | 
| 1150 |  | ################################################################# | 
| 1151 |  | # SECTION A | 
| 1152 |  | # | 
| 1153 |  |  | 
| 1154 |  | START           [shape=ellipse] | 
| 1155 |  | TESTXML         [shape=ellipse] | 
| 1156 |  | NOTXML          [shape=ellipse] | 
| 1157 |  | NEXTTAGa        [shape=hexagon, label="NEXTTAG"] | 
| 1158 |  | STARTTAGa       [shape=hexagon, label="STARTTAG"] | 
| 1159 |  | START           -> TESTXML | 
| 1160 |  | START           -> NEXTTAGa     [style=dotted, label="syntax:1"] | 
| 1161 |  | TESTXML         -> TESTXML      [label="lws"] | 
| 1162 |  | TESTXML         -> NOTXML | 
| 1163 |  | TESTXML         -> STARTTAGa    [label="'<'"] | 
| 1164 |  |  | 
| 1165 |  | ################################################################# | 
| 1166 |  | # SECTION B | 
| 1167 |  |  | 
| 1168 |  | NOTMYTAG        [shape=ellipse] | 
| 1169 |  | NEXTTAG         [shape=ellipse] | 
| 1170 |  | NOTMYTAG        -> NEXTTAG      [style=dotted, label="syntax:2"] | 
| 1171 |  | STARTTAGb       [shape=hexagon, label="STARTTAG"] | 
| 1172 |  | NOTMYTAG        -> NEXTTAG      [label="'>'"] | 
| 1173 |  | NOTMYTAG        -> NOTMYTAG     [label="*"] | 
| 1174 |  | NEXTTAG         -> NEXTTAG      [label="'-->'"] | 
| 1175 |  | NEXTTAG         -> NEXTTAG      [label="*"] | 
| 1176 |  | NEXTTAG         -> STARTTAGb    [label="'<'"] | 
| 1177 |  |  | 
| 1178 |  | ################################################################# | 
| 1179 |  | # SECTION C | 
| 1180 |  |  | 
| 1181 |  | STARTTAG        [shape=ellipse] | 
| 1182 |  | COMMENT         [shape=ellipse] | 
| 1183 |  | CDATA           [shape=ellipse] | 
| 1184 |  | ESITAG          [shape=ellipse] | 
| 1185 |  | ESIETAG         [shape=ellipse] | 
| 1186 |  | ESIINCLUDE      [shape=ellipse] | 
| 1187 |  | ESIREMOVE       [shape=ellipse] | 
| 1188 |  | ESICOMMENT      [shape=ellipse] | 
| 1189 |  | ESIBOGON        [shape=ellipse] | 
| 1190 |  | INTAGc          [shape=hexagon, label="INTAG"] | 
| 1191 |  | NOTMYTAGc       [shape=hexagon, label="NOTMYTAG"] | 
| 1192 |  | NEXTTAGc        [shape=hexagon, label="NEXTTAG"] | 
| 1193 |  | TAGERRORc       [shape=hexagon, label="TAGERROR"] | 
| 1194 |  | C1              [shape=circle,label=""] | 
| 1195 |  | STARTTAG        -> COMMENT      [label="'<!--'"] | 
| 1196 |  | STARTTAG        -> ESITAG       [label="'<esi'"] | 
| 1197 |  | STARTTAG        -> CDATA        [label="'<![CDATA['"] | 
| 1198 |  | STARTTAG        -> NOTMYTAGc    [label="'*'"] | 
| 1199 |  | COMMENT         -> NEXTTAGc     [label="'esi'"] | 
| 1200 |  | COMMENT         -> C1           [label="*"] | 
| 1201 |  | C1              -> C1           [label="*"] | 
| 1202 |  | C1              -> NEXTTAGc     [label="-->"] | 
| 1203 |  | CDATA           -> CDATA        [label="*"] | 
| 1204 |  | CDATA           -> NEXTTAGc     [label="]]>"] | 
| 1205 |  | ESITAG          -> ESIINCLUDE   [label="'include'"] | 
| 1206 |  | ESITAG          -> ESIREMOVE    [label="'remove'"] | 
| 1207 |  | ESITAG          -> ESICOMMENT   [label="'comment'"] | 
| 1208 |  | ESITAG          -> ESIBOGON     [label="*"] | 
| 1209 |  | ESICOMMENT      -> INTAGc | 
| 1210 |  | ESICOMMENT      -> TAGERRORc | 
| 1211 |  | ESICOMMENT      -> TAGERRORc    [style=dotted, label="nested\nin\nremove"] | 
| 1212 |  | ESIREMOVE       -> INTAGc | 
| 1213 |  | ESIREMOVE       -> TAGERRORc | 
| 1214 |  | ESIINCLUDE      -> INTAGc | 
| 1215 |  | ESIINCLUDE      -> TAGERRORc | 
| 1216 |  | ESIINCLUDE      -> TAGERRORc    [style=dotted, label="nested\nin\nremove"] | 
| 1217 |  | ESIBOGON        -> TAGERRORc | 
| 1218 |  |  | 
| 1219 |  | ################################################################# | 
| 1220 |  | # SECTION D | 
| 1221 |  |  | 
| 1222 |  | INTAG           [shape=ellipse] | 
| 1223 |  | TAGERROR        [shape=ellipse] | 
| 1224 |  | NEXTTAGd        [shape=hexagon, label="NEXTTAG"] | 
| 1225 |  | ATTRd           [shape=hexagon, label="ATTR"] | 
| 1226 |  | D1              [shape=circle, label=""] | 
| 1227 |  | D2              [shape=circle, label=""] | 
| 1228 |  | INTAG           -> D1           [label="lws"] | 
| 1229 |  | D1              -> D2           [label="/"] | 
| 1230 |  | INTAG           -> D2           [label="/"] | 
| 1231 |  | INTAG           -> NEXTTAGd     [label=">"] | 
| 1232 |  | D1              -> NEXTTAGd     [label=">"] | 
| 1233 |  | D2              -> NEXTTAGd     [label=">"] | 
| 1234 |  | D1              -> ATTRd        [label="XMLstartchar"] | 
| 1235 |  | D1              -> TAGERROR     [label="*"] | 
| 1236 |  | D2              -> TAGERROR     [label="*"] | 
| 1237 |  | TAGERROR        -> TAGERROR     [label="*"] | 
| 1238 |  | TAGERROR        -> NEXTTAGd     [label="'>'"] | 
| 1239 |  |  | 
| 1240 |  | ################################################################# | 
| 1241 |  | # SECTION E | 
| 1242 |  |  | 
| 1243 |  | ATTR            [shape=ellipse] | 
| 1244 |  | SKIPATTR        [shape=ellipse] | 
| 1245 |  | ATTRGETVAL      [shape=ellipse] | 
| 1246 |  | ATTRDELIM       [shape=ellipse] | 
| 1247 |  | ATTRVAL         [shape=ellipse] | 
| 1248 |  | TAGERRORe       [shape=hexagon, label="TAGERROR"] | 
| 1249 |  | INTAGe          [shape=hexagon, label="INTAG"] | 
| 1250 |  | ATTR            -> SKIPATTR     [label="*"] | 
| 1251 |  | ATTR            -> ATTRGETVAL   [label="wanted attr"] | 
| 1252 |  | SKIPATTR        -> SKIPATTR     [label="XMLname"] | 
| 1253 |  | SKIPATTR        -> ATTRDELIM    [label="'='"] | 
| 1254 |  | SKIPATTR        -> TAGERRORe    [label="*"] | 
| 1255 |  | ATTRGETVAL      -> ATTRDELIM | 
| 1256 |  | ATTRDELIM       -> ATTRVAL      [label="\""] | 
| 1257 |  | ATTRDELIM       -> ATTRVAL      [label="\'"] | 
| 1258 |  | ATTRDELIM       -> ATTRVAL      [label="*"] | 
| 1259 |  | ATTRDELIM       -> TAGERRORe    [label="lws"] | 
| 1260 |  | ATTRVAL         -> TAGERRORe    [label="'>'"] | 
| 1261 |  | ATTRVAL         -> INTAGe       [label="delim"] | 
| 1262 |  | ATTRVAL         -> ATTRVAL      [label="*"] | 
| 1263 |  |  | 
| 1264 |  | } | 
| 1265 |  |  | 
| 1266 |  | #endif |