1 /* Extended regular expression matching and search library, version
   2    0.12.  (Implements POSIX draft P1003.2/D11.2, except for some of the
   3    internationalization features.)
   4 
   5    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
   6                  2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   7                  Free Software Foundation, Inc.
   8 
   9    This program is free software; you can redistribute it and/or modify
  10    it under the terms of the GNU General Public License as published by
  11    the Free Software Foundation; either version 3, or (at your option)
  12    any later version.
  13 
  14    This program is distributed in the hope that it will be useful,
  15    but WITHOUT ANY WARRANTY; without even the implied warranty of
  16    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17    GNU General Public License for more details.
  18 
  19    You should have received a copy of the GNU General Public License
  20    along with this program; if not, write to the Free Software
  21    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
  22    USA.  */
  23 
  24 /* TODO:
  25    - structure the opcode space into opcode+flag.
  26    - merge with glibc's regex.[ch].
  27    - replace (succeed_n + jump_n + set_number_at) with something that doesn't
  28      need to modify the compiled regexp so that re_match can be reentrant.
  29    - get rid of on_failure_jump_smart by doing the optimization in re_comp
  30      rather than at run-time, so that re_match can be reentrant.
  31 */
  32 
  33 /* AIX requires this to be the first thing in the file. */
  34 #if defined _AIX && !defined REGEX_MALLOC
  35   #pragma alloca
  36 #endif
  37 
  38 #ifdef HAVE_CONFIG_H
  39 # include <config.h>
  40 #endif
  41 
  42 #if defined STDC_HEADERS && !defined emacs
  43 # include <stddef.h>
  44 #else
  45 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
  46 # include <sys/types.h>
  47 #endif
  48 
  49 /* Whether to use ISO C Amendment 1 wide char functions.
  50    Those should not be used for Emacs since it uses its own.  */
  51 #if defined _LIBC
  52 #define WIDE_CHAR_SUPPORT 1
  53 #else
  54 #define WIDE_CHAR_SUPPORT \
  55         (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
  56 #endif
  57 
  58 /* For platform which support the ISO C amendement 1 functionality we
  59    support user defined character classes.  */
  60 #if WIDE_CHAR_SUPPORT
  61 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
  62 # include <wchar.h>
  63 # include <wctype.h>
  64 #endif
  65 
  66 #ifdef _LIBC
  67 /* We have to keep the namespace clean.  */
  68 # define regfree(preg) __regfree (preg)
  69 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
  70 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
  71 # define regerror(err_code, preg, errbuf, errbuf_size) \
  72         __regerror(err_code, preg, errbuf, errbuf_size)
  73 # define re_set_registers(bu, re, nu, st, en) \
  74         __re_set_registers (bu, re, nu, st, en)
  75 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
  76         __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
  77 # define re_match(bufp, string, size, pos, regs) \
  78         __re_match (bufp, string, size, pos, regs)
  79 # define re_search(bufp, string, size, startpos, range, regs) \
  80         __re_search (bufp, string, size, startpos, range, regs)
  81 # define re_compile_pattern(pattern, length, bufp) \
  82         __re_compile_pattern (pattern, length, bufp)
  83 # define re_set_syntax(syntax) __re_set_syntax (syntax)
  84 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
  85         __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
  86 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
  87 
  88 /* Make sure we call libc's function even if the user overrides them.  */
  89 # define btowc __btowc
  90 # define iswctype __iswctype
  91 # define wctype __wctype
  92 
  93 # define WEAK_ALIAS(a,b) weak_alias (a, b)
  94 
  95 /* We are also using some library internals.  */
  96 # include <locale/localeinfo.h>
  97 # include <locale/elem-hash.h>
  98 # include <langinfo.h>
  99 #else
 100 # define WEAK_ALIAS(a,b)
 101 #endif
 102 
 103 /* This is for other GNU distributions with internationalized messages.  */
 104 #if HAVE_LIBINTL_H || defined _LIBC
 105 # include <libintl.h>
 106 #else
 107 # define gettext(msgid) (msgid)
 108 #endif
 109 
 110 #ifndef gettext_noop
 111 /* This define is so xgettext can find the internationalizable
 112    strings.  */
 113 # define gettext_noop(String) String
 114 #endif
 115 
 116 /* The `emacs' switch turns on certain matching commands
 117    that make sense only in Emacs. */
 118 #ifdef emacs
 119 
 120 # include <setjmp.h>
 121 # include "lisp.h"
 122 # include "buffer.h"
 123 
 124 /* Make syntax table lookup grant data in gl_state.  */
 125 # define SYNTAX_ENTRY_VIA_PROPERTY
 126 
 127 # include "syntax.h"
 128 # include "character.h"
 129 # include "category.h"
 130 
 131 # ifdef malloc
 132 #  undef malloc
 133 # endif
 134 # define malloc xmalloc
 135 # ifdef realloc
 136 #  undef realloc
 137 # endif
 138 # define realloc xrealloc
 139 # ifdef free
 140 #  undef free
 141 # endif
 142 # define free xfree
 143 
 144 /* Converts the pointer to the char to BEG-based offset from the start.  */
 145 # define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
 146 # define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
 147 
 148 # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
 149 # define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
 150 # define RE_STRING_CHAR(p, multibyte) \
 151   (multibyte ? (STRING_CHAR (p)) : (*(p)))
 152 # define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
 153   (multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p)))
 154 
 155 # define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
 156 
 157 # define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
 158 
 159 /* Set C a (possibly converted to multibyte) character before P.  P
 160    points into a string which is the virtual concatenation of STR1
 161    (which ends at END1) or STR2 (which ends at END2).  */
 162 # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)                     \
 163   do {                                                                       \
 164     if (target_multibyte)                                                    \
 165       {                                                                      \
 166         re_char *dtemp = (p) == (str2) ? (end1) : (p);                       \
 167         re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
 168         while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp));                   \
 169         c = STRING_CHAR (dtemp);                                             \
 170       }                                                                      \
 171     else                                                                     \
 172       {                                                                      \
 173         (c = ((p) == (str2) ? (end1) : (p))[-1]);                            \
 174         (c) = RE_CHAR_TO_MULTIBYTE (c);                                      \
 175       }                                                                      \
 176   } while (0)
 177 
 178 /* Set C a (possibly converted to multibyte) character at P, and set
 179    LEN to the byte length of that character.  */
 180 # define GET_CHAR_AFTER(c, p, len)              \
 181   do {                                          \
 182     if (target_multibyte)                       \
 183       (c) = STRING_CHAR_AND_LENGTH (p, len);    \
 184     else                                        \
 185       {                                         \
 186         (c) = *p;                               \
 187         len = 1;                                \
 188         (c) = RE_CHAR_TO_MULTIBYTE (c);         \
 189       }                                         \
 190    } while (0)
 191 
 192 #else  /* not emacs */
 193 
 194 /* If we are not linking with Emacs proper,
 195    we can't use the relocating allocator
 196    even if config.h says that we can.  */
 197 # undef REL_ALLOC
 198 
 199 # if defined STDC_HEADERS || defined _LIBC
 200 #  include <stdlib.h>
 201 # else
 202 char *malloc ();
 203 char *realloc ();
 204 # endif
 205 
 206 /* When used in Emacs's lib-src, we need xmalloc and xrealloc. */
 207 
 208 void *
 209 xmalloc (size)
 210      size_t size;
 211 {
 212   register void *val;
 213   val = (void *) malloc (size);
 214   if (!val && size)
 215     {
 216       write (2, "virtual memory exhausted\n", 25);
 217       exit (1);
 218     }
 219   return val;
 220 }
 221 
 222 void *
 223 xrealloc (block, size)
 224      void *block;
 225      size_t size;
 226 {
 227   register void *val;
 228   /* We must call malloc explicitly when BLOCK is 0, since some
 229      reallocs don't do this.  */
 230   if (! block)
 231     val = (void *) malloc (size);
 232   else
 233     val = (void *) realloc (block, size);
 234   if (!val && size)
 235     {
 236       write (2, "virtual memory exhausted\n", 25);
 237       exit (1);
 238     }
 239   return val;
 240 }
 241 
 242 # ifdef malloc
 243 #  undef malloc
 244 # endif
 245 # define malloc xmalloc
 246 # ifdef realloc
 247 #  undef realloc
 248 # endif
 249 # define realloc xrealloc
 250 
 251 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
 252    If nothing else has been done, use the method below.  */
 253 # ifdef INHIBIT_STRING_HEADER
 254 #  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
 255 #   if !defined bzero && !defined bcopy
 256 #    undef INHIBIT_STRING_HEADER
 257 #   endif
 258 #  endif
 259 # endif
 260 
 261 /* This is the normal way of making sure we have memcpy, memcmp and bzero.
 262    This is used in most programs--a few other programs avoid this
 263    by defining INHIBIT_STRING_HEADER.  */
 264 # ifndef INHIBIT_STRING_HEADER
 265 #  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
 266 #   include <string.h>
 267 #   ifndef bzero
 268 #    ifndef _LIBC
 269 #     define bzero(s, n)        (memset (s, '\0', n), (s))
 270 #    else
 271 #     define bzero(s, n)        __bzero (s, n)
 272 #    endif
 273 #   endif
 274 #  else
 275 #   include <strings.h>
 276 #   ifndef memcmp
 277 #    define memcmp(s1, s2, n)   bcmp (s1, s2, n)
 278 #   endif
 279 #   ifndef memcpy
 280 #    define memcpy(d, s, n)     (bcopy (s, d, n), (d))
 281 #   endif
 282 #  endif
 283 # endif
 284 
 285 /* Define the syntax stuff for \<, \>, etc.  */
 286 
 287 /* Sword must be nonzero for the wordchar pattern commands in re_match_2.  */
 288 enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
 289 
 290 #  define SWITCH_ENUM_CAST(x) (x)
 291 
 292 /* Dummy macros for non-Emacs environments.  */
 293 # define BASE_LEADING_CODE_P(c) (0)
 294 # define CHAR_CHARSET(c) 0
 295 # define CHARSET_LEADING_CODE_BASE(c) 0
 296 # define MAX_MULTIBYTE_LENGTH 1
 297 # define RE_MULTIBYTE_P(x) 0
 298 # define RE_TARGET_MULTIBYTE_P(x) 0
 299 # define WORD_BOUNDARY_P(c1, c2) (0)
 300 # define CHAR_HEAD_P(p) (1)
 301 # define SINGLE_BYTE_CHAR_P(c) (1)
 302 # define SAME_CHARSET_P(c1, c2) (1)
 303 # define MULTIBYTE_FORM_LENGTH(p, s) (1)
 304 # define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
 305 # define STRING_CHAR(p) (*(p))
 306 # define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p)
 307 # define CHAR_STRING(c, s) (*(s) = (c), 1)
 308 # define STRING_CHAR_AND_LENGTH(p, actual_len) ((actual_len) = 1, *(p))
 309 # define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) STRING_CHAR_AND_LENGTH (p, len)
 310 # define RE_CHAR_TO_MULTIBYTE(c) (c)
 311 # define RE_CHAR_TO_UNIBYTE(c) (c)
 312 # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
 313   (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
 314 # define GET_CHAR_AFTER(c, p, len)      \
 315   (c = *p, len = 1)
 316 # define MAKE_CHAR(charset, c1, c2) (c1)
 317 # define BYTE8_TO_CHAR(c) (c)
 318 # define CHAR_BYTE8_P(c) (0)
 319 # define CHAR_LEADING_CODE(c) (c)
 320 
 321 #endif /* not emacs */
 322 
 323 #ifndef RE_TRANSLATE
 324 # define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C])
 325 # define RE_TRANSLATE_P(TBL) (TBL)
 326 #endif
 327 
 328 /* Get the interface, including the syntax bits.  */
 329 #include "regex.h"
 330 
 331 /* isalpha etc. are used for the character classes.  */
 332 #include <ctype.h>
 333 
 334 #ifdef emacs
 335 
 336 /* 1 if C is an ASCII character.  */
 337 # define IS_REAL_ASCII(c) ((c) < 0200)
 338 
 339 /* 1 if C is a unibyte character.  */
 340 # define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
 341 
 342 /* The Emacs definitions should not be directly affected by locales.  */
 343 
 344 /* In Emacs, these are only used for single-byte characters.  */
 345 # define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
 346 # define ISCNTRL(c) ((c) < ' ')
 347 # define ISXDIGIT(c) (((c) >= '0' && (c) <= '9')                \
 348                      || ((c) >= 'a' && (c) <= 'f')      \
 349                      || ((c) >= 'A' && (c) <= 'F'))
 350 
 351 /* This is only used for single-byte characters.  */
 352 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
 353 
 354 /* The rest must handle multibyte characters.  */
 355 
 356 # define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c)                             \
 357                     ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237)        \
 358                     : 1)
 359 
 360 # define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)                             \
 361                     ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237)       \
 362                     : 1)
 363 
 364 # define ISALNUM(c) (IS_REAL_ASCII (c)                  \
 365                     ? (((c) >= 'a' && (c) <= 'z')       \
 366                        || ((c) >= 'A' && (c) <= 'Z')    \
 367                        || ((c) >= '0' && (c) <= '9'))   \
 368                     : SYNTAX (c) == Sword)
 369 
 370 # define ISALPHA(c) (IS_REAL_ASCII (c)                  \
 371                     ? (((c) >= 'a' && (c) <= 'z')       \
 372                        || ((c) >= 'A' && (c) <= 'Z'))   \
 373                     : SYNTAX (c) == Sword)
 374 
 375 # define ISLOWER(c) (LOWERCASEP (c))
 376 
 377 # define ISPUNCT(c) (IS_REAL_ASCII (c)                          \
 378                     ? ((c) > ' ' && (c) < 0177                  \
 379                        && !(((c) >= 'a' && (c) <= 'z')          \
 380                             || ((c) >= 'A' && (c) <= 'Z')       \
 381                             || ((c) >= '0' && (c) <= '9')))     \
 382                     : SYNTAX (c) != Sword)
 383 
 384 # define ISSPACE(c) (SYNTAX (c) == Swhitespace)
 385 
 386 # define ISUPPER(c) (UPPERCASEP (c))
 387 
 388 # define ISWORD(c) (SYNTAX (c) == Sword)
 389 
 390 #else /* not emacs */
 391 
 392 /* Jim Meyering writes:
 393 
 394    "... Some ctype macros are valid only for character codes that
 395    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
 396    using /bin/cc or gcc but without giving an ansi option).  So, all
 397    ctype uses should be through macros like ISPRINT...  If
 398    STDC_HEADERS is defined, then autoconf has verified that the ctype
 399    macros don't need to be guarded with references to isascii. ...
 400    Defining isascii to 1 should let any compiler worth its salt
 401    eliminate the && through constant folding."
 402    Solaris defines some of these symbols so we must undefine them first.  */
 403 
 404 # undef ISASCII
 405 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
 406 #  define ISASCII(c) 1
 407 # else
 408 #  define ISASCII(c) isascii(c)
 409 # endif
 410 
 411 /* 1 if C is an ASCII character.  */
 412 # define IS_REAL_ASCII(c) ((c) < 0200)
 413 
 414 /* This distinction is not meaningful, except in Emacs.  */
 415 # define ISUNIBYTE(c) 1
 416 
 417 # ifdef isblank
 418 #  define ISBLANK(c) (ISASCII (c) && isblank (c))
 419 # else
 420 #  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
 421 # endif
 422 # ifdef isgraph
 423 #  define ISGRAPH(c) (ISASCII (c) && isgraph (c))
 424 # else
 425 #  define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
 426 # endif
 427 
 428 # undef ISPRINT
 429 # define ISPRINT(c) (ISASCII (c) && isprint (c))
 430 # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
 431 # define ISALNUM(c) (ISASCII (c) && isalnum (c))
 432 # define ISALPHA(c) (ISASCII (c) && isalpha (c))
 433 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
 434 # define ISLOWER(c) (ISASCII (c) && islower (c))
 435 # define ISPUNCT(c) (ISASCII (c) && ispunct (c))
 436 # define ISSPACE(c) (ISASCII (c) && isspace (c))
 437 # define ISUPPER(c) (ISASCII (c) && isupper (c))
 438 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
 439 
 440 # define ISWORD(c) ISALPHA(c)
 441 
 442 # ifdef _tolower
 443 #  define TOLOWER(c) _tolower(c)
 444 # else
 445 #  define TOLOWER(c) tolower(c)
 446 # endif
 447 
 448 /* How many characters in the character set.  */
 449 # define CHAR_SET_SIZE 256
 450 
 451 # ifdef SYNTAX_TABLE
 452 
 453 extern char *re_syntax_table;
 454 
 455 # else /* not SYNTAX_TABLE */
 456 
 457 static char re_syntax_table[CHAR_SET_SIZE];
 458 
 459 static void
 460 init_syntax_once ()
 461 {
 462    register int c;
 463    static int done = 0;
 464 
 465    if (done)
 466      return;
 467 
 468    bzero (re_syntax_table, sizeof re_syntax_table);
 469 
 470    for (c = 0; c < CHAR_SET_SIZE; ++c)
 471      if (ISALNUM (c))
 472         re_syntax_table[c] = Sword;
 473 
 474    re_syntax_table['_'] = Ssymbol;
 475 
 476    done = 1;
 477 }
 478 
 479 # endif /* not SYNTAX_TABLE */
 480 
 481 # define SYNTAX(c) re_syntax_table[(c)]
 482 
 483 #endif /* not emacs */
 484 
 485 #ifndef NULL
 486 # define NULL (void *)0
 487 #endif
 488 
 489 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
 490    since ours (we hope) works properly with all combinations of
 491    machines, compilers, `char' and `unsigned char' argument types.
 492    (Per Bothner suggested the basic approach.)  */
 493 #undef SIGN_EXTEND_CHAR
 494 #if __STDC__
 495 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
 496 #else  /* not __STDC__ */
 497 /* As in Harbison and Steele.  */
 498 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
 499 #endif
 500 
 501 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
 502    use `alloca' instead of `malloc'.  This is because using malloc in
 503    re_search* or re_match* could cause memory leaks when C-g is used in
 504    Emacs; also, malloc is slower and causes storage fragmentation.  On
 505    the other hand, malloc is more portable, and easier to debug.
 506 
 507    Because we sometimes use alloca, some routines have to be macros,
 508    not functions -- `alloca'-allocated space disappears at the end of the
 509    function it is called in.  */
 510 
 511 #ifdef REGEX_MALLOC
 512 
 513 # define REGEX_ALLOCATE malloc
 514 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
 515 # define REGEX_FREE free
 516 
 517 #else /* not REGEX_MALLOC  */
 518 
 519 /* Emacs already defines alloca, sometimes.  */
 520 # ifndef alloca
 521 
 522 /* Make alloca work the best possible way.  */
 523 #  ifdef __GNUC__
 524 #   define alloca __builtin_alloca
 525 #  else /* not __GNUC__ */
 526 #   ifdef HAVE_ALLOCA_H
 527 #    include <alloca.h>
 528 #   endif /* HAVE_ALLOCA_H */
 529 #  endif /* not __GNUC__ */
 530 
 531 # endif /* not alloca */
 532 
 533 # define REGEX_ALLOCATE alloca
 534 
 535 /* Assumes a `char *destination' variable.  */
 536 # define REGEX_REALLOCATE(source, osize, nsize)                         \
 537   (destination = (char *) alloca (nsize),                               \
 538    memcpy (destination, source, osize))
 539 
 540 /* No need to do anything to free, after alloca.  */
 541 # define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
 542 
 543 #endif /* not REGEX_MALLOC */
 544 
 545 /* Define how to allocate the failure stack.  */
 546 
 547 #if defined REL_ALLOC && defined REGEX_MALLOC
 548 
 549 # define REGEX_ALLOCATE_STACK(size)                             \
 550   r_alloc (&failure_stack_ptr, (size))
 551 # define REGEX_REALLOCATE_STACK(source, osize, nsize)           \
 552   r_re_alloc (&failure_stack_ptr, (nsize))
 553 # define REGEX_FREE_STACK(ptr)                                  \
 554   r_alloc_free (&failure_stack_ptr)
 555 
 556 #else /* not using relocating allocator */
 557 
 558 # ifdef REGEX_MALLOC
 559 
 560 #  define REGEX_ALLOCATE_STACK malloc
 561 #  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
 562 #  define REGEX_FREE_STACK free
 563 
 564 # else /* not REGEX_MALLOC */
 565 
 566 #  define REGEX_ALLOCATE_STACK alloca
 567 
 568 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)                  \
 569    REGEX_REALLOCATE (source, osize, nsize)
 570 /* No need to explicitly free anything.  */
 571 #  define REGEX_FREE_STACK(arg) ((void)0)
 572 
 573 # endif /* not REGEX_MALLOC */
 574 #endif /* not using relocating allocator */
 575 
 576 
 577 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
 578    `string1' or just past its end.  This works if PTR is NULL, which is
 579    a good thing.  */
 580 #define FIRST_STRING_P(ptr)                                     \
 581   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
 582 
 583 /* (Re)Allocate N items of type T using malloc, or fail.  */
 584 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
 585 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
 586 #define RETALLOC_IF(addr, n, t) \
 587   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
 588 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
 589 
 590 #define BYTEWIDTH 8 /* In bits.  */
 591 
 592 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
 593 
 594 #undef MAX
 595 #undef MIN
 596 #define MAX(a, b) ((a) > (b) ? (a) : (b))
 597 #define MIN(a, b) ((a) < (b) ? (a) : (b))
 598 
 599 /* Type of source-pattern and string chars.  */
 600 typedef const unsigned char re_char;
 601 
 602 typedef char boolean;
 603 #define false 0
 604 #define true 1
 605 
 606 static int re_match_2_internal _RE_ARGS ((struct re_pattern_buffer *bufp,
 607                                         re_char *string1, int size1,
 608                                         re_char *string2, int size2,
 609                                         int pos,
 610                                         struct re_registers *regs,
 611                                         int stop));
 612 
 613 /* These are the command codes that appear in compiled regular
 614    expressions.  Some opcodes are followed by argument bytes.  A
 615    command code can specify any interpretation whatsoever for its
 616    arguments.  Zero bytes may appear in the compiled regular expression.  */
 617 
 618 typedef enum
 619 {
 620   no_op = 0,
 621 
 622   /* Succeed right away--no more backtracking.  */
 623   succeed,
 624 
 625         /* Followed by one byte giving n, then by n literal bytes.  */
 626   exactn,
 627 
 628         /* Matches any (more or less) character.  */
 629   anychar,
 630 
 631         /* Matches any one char belonging to specified set.  First
 632            following byte is number of bitmap bytes.  Then come bytes
 633            for a bitmap saying which chars are in.  Bits in each byte
 634            are ordered low-bit-first.  A character is in the set if its
 635            bit is 1.  A character too large to have a bit in the map is
 636            automatically not in the set.
 637 
 638            If the length byte has the 0x80 bit set, then that stuff
 639            is followed by a range table:
 640                2 bytes of flags for character sets (low 8 bits, high 8 bits)
 641                    See RANGE_TABLE_WORK_BITS below.
 642                2 bytes, the number of pairs that follow (upto 32767)
 643                pairs, each 2 multibyte characters,
 644                    each multibyte character represented as 3 bytes.  */
 645   charset,
 646 
 647         /* Same parameters as charset, but match any character that is
 648            not one of those specified.  */
 649   charset_not,
 650 
 651         /* Start remembering the text that is matched, for storing in a
 652            register.  Followed by one byte with the register number, in
 653            the range 0 to one less than the pattern buffer's re_nsub
 654            field.  */
 655   start_memory,
 656 
 657         /* Stop remembering the text that is matched and store it in a
 658            memory register.  Followed by one byte with the register
 659            number, in the range 0 to one less than `re_nsub' in the
 660            pattern buffer.  */
 661   stop_memory,
 662 
 663         /* Match a duplicate of something remembered. Followed by one
 664            byte containing the register number.  */
 665   duplicate,
 666 
 667         /* Fail unless at beginning of line.  */
 668   begline,
 669 
 670         /* Fail unless at end of line.  */
 671   endline,
 672 
 673         /* Succeeds if at beginning of buffer (if emacs) or at beginning
 674            of string to be matched (if not).  */
 675   begbuf,
 676 
 677         /* Analogously, for end of buffer/string.  */
 678   endbuf,
 679 
 680         /* Followed by two byte relative address to which to jump.  */
 681   jump,
 682 
 683         /* Followed by two-byte relative address of place to resume at
 684            in case of failure.  */
 685   on_failure_jump,
 686 
 687         /* Like on_failure_jump, but pushes a placeholder instead of the
 688            current string position when executed.  */
 689   on_failure_keep_string_jump,
 690 
 691         /* Just like `on_failure_jump', except that it checks that we
 692            don't get stuck in an infinite loop (matching an empty string
 693            indefinitely).  */
 694   on_failure_jump_loop,
 695 
 696         /* Just like `on_failure_jump_loop', except that it checks for
 697            a different kind of loop (the kind that shows up with non-greedy
 698            operators).  This operation has to be immediately preceded
 699            by a `no_op'.  */
 700   on_failure_jump_nastyloop,
 701 
 702         /* A smart `on_failure_jump' used for greedy * and + operators.
 703            It analyses the loop before which it is put and if the
 704            loop does not require backtracking, it changes itself to
 705            `on_failure_keep_string_jump' and short-circuits the loop,
 706            else it just defaults to changing itself into `on_failure_jump'.
 707            It assumes that it is pointing to just past a `jump'.  */
 708   on_failure_jump_smart,
 709 
 710         /* Followed by two-byte relative address and two-byte number n.
 711            After matching N times, jump to the address upon failure.
 712            Does not work if N starts at 0: use on_failure_jump_loop
 713            instead.  */
 714   succeed_n,
 715 
 716         /* Followed by two-byte relative address, and two-byte number n.
 717            Jump to the address N times, then fail.  */
 718   jump_n,
 719 
 720         /* Set the following two-byte relative address to the
 721            subsequent two-byte number.  The address *includes* the two
 722            bytes of number.  */
 723   set_number_at,
 724 
 725   wordbeg,      /* Succeeds if at word beginning.  */
 726   wordend,      /* Succeeds if at word end.  */
 727 
 728   wordbound,    /* Succeeds if at a word boundary.  */
 729   notwordbound, /* Succeeds if not at a word boundary.  */
 730 
 731   symbeg,       /* Succeeds if at symbol beginning.  */
 732   symend,       /* Succeeds if at symbol end.  */
 733 
 734         /* Matches any character whose syntax is specified.  Followed by
 735            a byte which contains a syntax code, e.g., Sword.  */
 736   syntaxspec,
 737 
 738         /* Matches any character whose syntax is not that specified.  */
 739   notsyntaxspec
 740 
 741 #ifdef emacs
 742   ,before_dot,  /* Succeeds if before point.  */
 743   at_dot,       /* Succeeds if at point.  */
 744   after_dot,    /* Succeeds if after point.  */
 745 
 746   /* Matches any character whose category-set contains the specified
 747      category.  The operator is followed by a byte which contains a
 748      category code (mnemonic ASCII character).  */
 749   categoryspec,
 750 
 751   /* Matches any character whose category-set does not contain the
 752      specified category.  The operator is followed by a byte which
 753      contains the category code (mnemonic ASCII character).  */
 754   notcategoryspec
 755 #endif /* emacs */
 756 } re_opcode_t;
 757 
 758 /* Common operations on the compiled pattern.  */
 759 
 760 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
 761 
 762 #define STORE_NUMBER(destination, number)                               \
 763   do {                                                                  \
 764     (destination)[0] = (number) & 0377;                                 \
 765     (destination)[1] = (number) >> 8;                                   \
 766   } while (0)
 767 
 768 /* Same as STORE_NUMBER, except increment DESTINATION to
 769    the byte after where the number is stored.  Therefore, DESTINATION
 770    must be an lvalue.  */
 771 
 772 #define STORE_NUMBER_AND_INCR(destination, number)                      \
 773   do {                                                                  \
 774     STORE_NUMBER (destination, number);                                 \
 775     (destination) += 2;                                                 \
 776   } while (0)
 777 
 778 /* Put into DESTINATION a number stored in two contiguous bytes starting
 779    at SOURCE.  */
 780 
 781 #define EXTRACT_NUMBER(destination, source)                             \
 782   do {                                                                  \
 783     (destination) = *(source) & 0377;                                   \
 784     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
 785   } while (0)
 786 
 787 #ifdef DEBUG
 788 static void extract_number _RE_ARGS ((int *dest, re_char *source));
 789 static void
 790 extract_number (dest, source)
 791     int *dest;
 792     re_char *source;
 793 {
 794   int temp = SIGN_EXTEND_CHAR (*(source + 1));
 795   *dest = *source & 0377;
 796   *dest += temp << 8;
 797 }
 798 
 799 # ifndef EXTRACT_MACROS /* To debug the macros.  */
 800 #  undef EXTRACT_NUMBER
 801 #  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
 802 # endif /* not EXTRACT_MACROS */
 803 
 804 #endif /* DEBUG */
 805 
 806 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
 807    SOURCE must be an lvalue.  */
 808 
 809 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
 810   do {                                                                  \
 811     EXTRACT_NUMBER (destination, source);                               \
 812     (source) += 2;                                                      \
 813   } while (0)
 814 
 815 #ifdef DEBUG
 816 static void extract_number_and_incr _RE_ARGS ((int *destination,
 817                                                re_char **source));
 818 static void
 819 extract_number_and_incr (destination, source)
 820     int *destination;
 821     re_char **source;
 822 {
 823   extract_number (destination, *source);
 824   *source += 2;
 825 }
 826 
 827 # ifndef EXTRACT_MACROS
 828 #  undef EXTRACT_NUMBER_AND_INCR
 829 #  define EXTRACT_NUMBER_AND_INCR(dest, src) \
 830   extract_number_and_incr (&dest, &src)
 831 # endif /* not EXTRACT_MACROS */
 832 
 833 #endif /* DEBUG */
 834 
 835 /* Store a multibyte character in three contiguous bytes starting
 836    DESTINATION, and increment DESTINATION to the byte after where the
 837    character is stored.  Therefore, DESTINATION must be an lvalue.  */
 838 
 839 #define STORE_CHARACTER_AND_INCR(destination, character)        \
 840   do {                                                          \
 841     (destination)[0] = (character) & 0377;                      \
 842     (destination)[1] = ((character) >> 8) & 0377;               \
 843     (destination)[2] = (character) >> 16;                       \
 844     (destination) += 3;                                         \
 845   } while (0)
 846 
 847 /* Put into DESTINATION a character stored in three contiguous bytes
 848    starting at SOURCE.  */
 849 
 850 #define EXTRACT_CHARACTER(destination, source)  \
 851   do {                                          \
 852     (destination) = ((source)[0]                \
 853                      | ((source)[1] << 8)       \
 854                      | ((source)[2] << 16));    \
 855   } while (0)
 856 
 857 
 858 /* Macros for charset. */
 859 
 860 /* Size of bitmap of charset P in bytes.  P is a start of charset,
 861    i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
 862 #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
 863 
 864 /* Nonzero if charset P has range table.  */
 865 #define CHARSET_RANGE_TABLE_EXISTS_P(p)  ((p)[1] & 0x80)
 866 
 867 /* Return the address of range table of charset P.  But not the start
 868    of table itself, but the before where the number of ranges is
 869    stored.  `2 +' means to skip re_opcode_t and size of bitmap,
 870    and the 2 bytes of flags at the start of the range table.  */
 871 #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
 872 
 873 /* Extract the bit flags that start a range table.  */
 874 #define CHARSET_RANGE_TABLE_BITS(p)             \
 875   ((p)[2 + CHARSET_BITMAP_SIZE (p)]             \
 876    + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
 877 
 878 /* Test if C is listed in the bitmap of charset P.  */
 879 #define CHARSET_LOOKUP_BITMAP(p, c)                             \
 880   ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH                    \
 881    && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
 882 
 883 /* Return the address of end of RANGE_TABLE.  COUNT is number of
 884    ranges (which is a pair of (start, end)) in the RANGE_TABLE.  `* 2'
 885    is start of range and end of range.  `* 3' is size of each start
 886    and end.  */
 887 #define CHARSET_RANGE_TABLE_END(range_table, count)     \
 888   ((range_table) + (count) * 2 * 3)
 889 
 890 /* Test if C is in RANGE_TABLE.  A flag NOT is negated if C is in.
 891    COUNT is number of ranges in RANGE_TABLE.  */
 892 #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)      \
 893   do                                                                    \
 894     {                                                                   \
 895       re_wchar_t range_start, range_end;                                \
 896       re_char *p;                                                       \
 897       re_char *range_table_end                                          \
 898         = CHARSET_RANGE_TABLE_END ((range_table), (count));             \
 899                                                                         \
 900       for (p = (range_table); p < range_table_end; p += 2 * 3)          \
 901         {                                                               \
 902           EXTRACT_CHARACTER (range_start, p);                           \
 903           EXTRACT_CHARACTER (range_end, p + 3);                         \
 904                                                                         \
 905           if (range_start <= (c) && (c) <= range_end)                   \
 906             {                                                           \
 907               (not) = !(not);                                           \
 908               break;                                                    \
 909             }                                                           \
 910         }                                                               \
 911     }                                                                   \
 912   while (0)
 913 
 914 /* Test if C is in range table of CHARSET.  The flag NOT is negated if
 915    C is listed in it.  */
 916 #define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)                     \
 917   do                                                                    \
 918     {                                                                   \
 919       /* Number of ranges in range table. */                            \
 920       int count;                                                        \
 921       re_char *range_table = CHARSET_RANGE_TABLE (charset);             \
 922                                                                         \
 923       EXTRACT_NUMBER_AND_INCR (count, range_table);                     \
 924       CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);  \
 925     }                                                                   \
 926   while (0)
 927 
 928 /* If DEBUG is defined, Regex prints many voluminous messages about what
 929    it is doing (if the variable `debug' is nonzero).  If linked with the
 930    main program in `iregex.c', you can enter patterns and strings
 931    interactively.  And if linked with the main program in `main.c' and
 932    the other test files, you can run the already-written tests.  */
 933 
 934 #ifdef DEBUG
 935 
 936 /* We use standard I/O for debugging.  */
 937 # include <stdio.h>
 938 
 939 /* It is useful to test things that ``must'' be true when debugging.  */
 940 # include <assert.h>
 941 
 942 static int debug = -100000;
 943 
 944 # define DEBUG_STATEMENT(e) e
 945 # define DEBUG_PRINT1(x) if (debug > 0) printf (x)
 946 # define DEBUG_PRINT2(x1, x2) if (debug > 0) printf (x1, x2)
 947 # define DEBUG_PRINT3(x1, x2, x3) if (debug > 0) printf (x1, x2, x3)
 948 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug > 0) printf (x1, x2, x3, x4)
 949 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                          \
 950   if (debug > 0) print_partial_compiled_pattern (s, e)
 951 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                 \
 952   if (debug > 0) print_double_string (w, s1, sz1, s2, sz2)
 953 
 954 
 955 /* Print the fastmap in human-readable form.  */
 956 
 957 void
 958 print_fastmap (fastmap)
 959     char *fastmap;
 960 {
 961   unsigned was_a_range = 0;
 962   unsigned i = 0;
 963 
 964   while (i < (1 << BYTEWIDTH))
 965     {
 966       if (fastmap[i++])
 967         {
 968           was_a_range = 0;
 969           putchar (i - 1);
 970           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
 971             {
 972               was_a_range = 1;
 973               i++;
 974             }
 975           if (was_a_range)
 976             {
 977               printf ("-");
 978               putchar (i - 1);
 979             }
 980         }
 981     }
 982   putchar ('\n');
 983 }
 984 
 985 
 986 /* Print a compiled pattern string in human-readable form, starting at
 987    the START pointer into it and ending just before the pointer END.  */
 988 
 989 void
 990 print_partial_compiled_pattern (start, end)
 991     re_char *start;
 992     re_char *end;
 993 {
 994   int mcnt, mcnt2;
 995   re_char *p = start;
 996   re_char *pend = end;
 997 
 998   if (start == NULL)
 999     {
1000       fprintf (stderr, "(null)\n");
1001       return;
1002     }
1003 
1004   /* Loop over pattern commands.  */
1005   while (p < pend)
1006     {
1007       fprintf (stderr, "%d:\t", p - start);
1008 
1009       switch ((re_opcode_t) *p++)
1010         {
1011         case no_op:
1012           fprintf (stderr, "/no_op");
1013           break;
1014 
1015         case succeed:
1016           fprintf (stderr, "/succeed");
1017           break;
1018 
1019         case exactn:
1020           mcnt = *p++;
1021           fprintf (stderr, "/exactn/%d", mcnt);
1022           do
1023             {
1024               fprintf (stderr, "/%c", *p++);
1025             }
1026           while (--mcnt);
1027           break;
1028 
1029         case start_memory:
1030           fprintf (stderr, "/start_memory/%d", *p++);
1031           break;
1032 
1033         case stop_memory:
1034           fprintf (stderr, "/stop_memory/%d", *p++);
1035           break;
1036 
1037         case duplicate:
1038           fprintf (stderr, "/duplicate/%d", *p++);
1039           break;
1040 
1041         case anychar:
1042           fprintf (stderr, "/anychar");
1043           break;
1044 
1045         case charset:
1046         case charset_not:
1047           {
1048             register int c, last = -100;
1049             register int in_range = 0;
1050             int length = CHARSET_BITMAP_SIZE (p - 1);
1051             int has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
1052 
1053             fprintf (stderr, "/charset [%s",
1054                      (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
1055 
1056             if (p + *p >= pend)
1057               fprintf (stderr, " !extends past end of pattern! ");
1058 
1059             for (c = 0; c < 256; c++)
1060               if (c / 8 < length
1061                   && (p[1 + (c/8)] & (1 << (c % 8))))
1062                 {
1063                   /* Are we starting a range?  */
1064                   if (last + 1 == c && ! in_range)
1065                     {
1066                       fprintf (stderr, "-");
1067                       in_range = 1;
1068                     }
1069                   /* Have we broken a range?  */
1070                   else if (last + 1 != c && in_range)
1071                     {
1072                       fprintf (stderr, "%c", last);
1073                       in_range = 0;
1074                     }
1075 
1076                   if (! in_range)
1077                     fprintf (stderr, "%c", c);
1078 
1079                   last = c;
1080               }
1081 
1082             if (in_range)
1083               fprintf (stderr, "%c", last);
1084 
1085             fprintf (stderr, "]");
1086 
1087             p += 1 + length;
1088 
1089             if (has_range_table)
1090               {
1091                 int count;
1092                 fprintf (stderr, "has-range-table");
1093 
1094                 /* ??? Should print the range table; for now, just skip it.  */
1095                 p += 2;         /* skip range table bits */
1096                 EXTRACT_NUMBER_AND_INCR (count, p);
1097                 p = CHARSET_RANGE_TABLE_END (p, count);
1098               }
1099           }
1100           break;
1101 
1102         case begline:
1103           fprintf (stderr, "/begline");
1104           break;
1105 
1106         case endline:
1107           fprintf (stderr, "/endline");
1108           break;
1109 
1110         case on_failure_jump:
1111           extract_number_and_incr (&mcnt, &p);
1112           fprintf (stderr, "/on_failure_jump to %d", p + mcnt - start);
1113           break;
1114 
1115         case on_failure_keep_string_jump:
1116           extract_number_and_incr (&mcnt, &p);
1117           fprintf (stderr, "/on_failure_keep_string_jump to %d", p + mcnt - start);
1118           break;
1119 
1120         case on_failure_jump_nastyloop:
1121           extract_number_and_incr (&mcnt, &p);
1122           fprintf (stderr, "/on_failure_jump_nastyloop to %d", p + mcnt - start);
1123           break;
1124 
1125         case on_failure_jump_loop:
1126           extract_number_and_incr (&mcnt, &p);
1127           fprintf (stderr, "/on_failure_jump_loop to %d", p + mcnt - start);
1128           break;
1129 
1130         case on_failure_jump_smart:
1131           extract_number_and_incr (&mcnt, &p);
1132           fprintf (stderr, "/on_failure_jump_smart to %d", p + mcnt - start);
1133           break;
1134 
1135         case jump:
1136           extract_number_and_incr (&mcnt, &p);
1137           fprintf (stderr, "/jump to %d", p + mcnt - start);
1138           break;
1139 
1140         case succeed_n:
1141           extract_number_and_incr (&mcnt, &p);
1142           extract_number_and_incr (&mcnt2, &p);
1143           fprintf (stderr, "/succeed_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
1144           break;
1145 
1146         case jump_n:
1147           extract_number_and_incr (&mcnt, &p);
1148           extract_number_and_incr (&mcnt2, &p);
1149           fprintf (stderr, "/jump_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
1150           break;
1151 
1152         case set_number_at:
1153           extract_number_and_incr (&mcnt, &p);
1154           extract_number_and_incr (&mcnt2, &p);
1155           fprintf (stderr, "/set_number_at location %d to %d", p - 2 + mcnt - start, mcnt2);
1156           break;
1157 
1158         case wordbound:
1159           fprintf (stderr, "/wordbound");
1160           break;
1161 
1162         case notwordbound:
1163           fprintf (stderr, "/notwordbound");
1164           break;
1165 
1166         case wordbeg:
1167           fprintf (stderr, "/wordbeg");
1168           break;
1169 
1170         case wordend:
1171           fprintf (stderr, "/wordend");
1172           break;
1173 
1174         case symbeg:
1175           fprintf (stderr, "/symbeg");
1176           break;
1177 
1178         case symend:
1179           fprintf (stderr, "/symend");
1180           break;
1181 
1182         case syntaxspec:
1183           fprintf (stderr, "/syntaxspec");
1184           mcnt = *p++;
1185           fprintf (stderr, "/%d", mcnt);
1186           break;
1187 
1188         case notsyntaxspec:
1189           fprintf (stderr, "/notsyntaxspec");
1190           mcnt = *p++;
1191           fprintf (stderr, "/%d", mcnt);
1192           break;
1193 
1194 # ifdef emacs
1195         case before_dot:
1196           fprintf (stderr, "/before_dot");
1197           break;
1198 
1199         case at_dot:
1200           fprintf (stderr, "/at_dot");
1201           break;
1202 
1203         case after_dot:
1204           fprintf (stderr, "/after_dot");
1205           break;
1206 
1207         case categoryspec:
1208           fprintf (stderr, "/categoryspec");
1209           mcnt = *p++;
1210           fprintf (stderr, "/%d", mcnt);
1211           break;
1212 
1213         case notcategoryspec:
1214           fprintf (stderr, "/notcategoryspec");
1215           mcnt = *p++;
1216           fprintf (stderr, "/%d", mcnt);
1217           break;
1218 # endif /* emacs */
1219 
1220         case begbuf:
1221           fprintf (stderr, "/begbuf");
1222           break;
1223 
1224         case endbuf:
1225           fprintf (stderr, "/endbuf");
1226           break;
1227 
1228         default:
1229           fprintf (stderr, "?%d", *(p-1));
1230         }
1231 
1232       fprintf (stderr, "\n");
1233     }
1234 
1235   fprintf (stderr, "%d:\tend of pattern.\n", p - start);
1236 }
1237 
1238 
1239 void
1240 print_compiled_pattern (bufp)
1241     struct re_pattern_buffer *bufp;
1242 {
1243   re_char *buffer = bufp->buffer;
1244 
1245   print_partial_compiled_pattern (buffer, buffer + bufp->used);
1246   printf ("%ld bytes used/%ld bytes allocated.\n",
1247           bufp->used, bufp->allocated);
1248 
1249   if (bufp->fastmap_accurate && bufp->fastmap)
1250     {
1251       printf ("fastmap: ");
1252       print_fastmap (bufp->fastmap);
1253     }
1254 
1255   printf ("re_nsub: %d\t", bufp->re_nsub);
1256   printf ("regs_alloc: %d\t", bufp->regs_allocated);
1257   printf ("can_be_null: %d\t", bufp->can_be_null);
1258   printf ("no_sub: %d\t", bufp->no_sub);
1259   printf ("not_bol: %d\t", bufp->not_bol);
1260   printf ("not_eol: %d\t", bufp->not_eol);
1261   printf ("syntax: %lx\n", bufp->syntax);
1262   fflush (stdout);
1263   /* Perhaps we should print the translate table?  */
1264 }
1265 
1266 
1267 void
1268 print_double_string (where, string1, size1, string2, size2)
1269     re_char *where;
1270     re_char *string1;
1271     re_char *string2;
1272     int size1;
1273     int size2;
1274 {
1275   int this_char;
1276 
1277   if (where == NULL)
1278     printf ("(null)");
1279   else
1280     {
1281       if (FIRST_STRING_P (where))
1282         {
1283           for (this_char = where - string1; this_char < size1; this_char++)
1284             putchar (string1[this_char]);
1285 
1286           where = string2;
1287         }
1288 
1289       for (this_char = where - string2; this_char < size2; this_char++)
1290         putchar (string2[this_char]);
1291     }
1292 }
1293 
1294 #else /* not DEBUG */
1295 
1296 # undef assert
1297 # define assert(e)
1298 
1299 # define DEBUG_STATEMENT(e)
1300 # define DEBUG_PRINT1(x)
1301 # define DEBUG_PRINT2(x1, x2)
1302 # define DEBUG_PRINT3(x1, x2, x3)
1303 # define DEBUG_PRINT4(x1, x2, x3, x4)
1304 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1305 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1306 
1307 #endif /* not DEBUG */
1308 
1309 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1310    also be assigned to arbitrarily: each pattern buffer stores its own
1311    syntax, so it can be changed between regex compilations.  */
1312 /* This has no initializer because initialized variables in Emacs
1313    become read-only after dumping.  */
1314 reg_syntax_t re_syntax_options;
1315 
1316 
1317 /* Specify the precise syntax of regexps for compilation.  This provides
1318    for compatibility for various utilities which historically have
1319    different, incompatible syntaxes.
1320 
1321    The argument SYNTAX is a bit mask comprised of the various bits
1322    defined in regex.h.  We return the old syntax.  */
1323 
1324 reg_syntax_t
1325 re_set_syntax (syntax)
1326      reg_syntax_t syntax;
1327 {
1328   reg_syntax_t ret = re_syntax_options;
1329 
1330   re_syntax_options = syntax;
1331   return ret;
1332 }
1333 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1334 
1335 /* Regexp to use to replace spaces, or NULL meaning don't.  */
1336 static re_char *whitespace_regexp;
1337 
1338 void
1339 re_set_whitespace_regexp (regexp)
1340      const char *regexp;
1341 {
1342   whitespace_regexp = (re_char *) regexp;
1343 }
1344 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1345 
1346 /* This table gives an error message for each of the error codes listed
1347    in regex.h.  Obviously the order here has to be same as there.
1348    POSIX doesn't require that we do anything for REG_NOERROR,
1349    but why not be nice?  */
1350 
1351 static const char *re_error_msgid[] =
1352   {
1353     gettext_noop ("Success"),   /* REG_NOERROR */
1354     gettext_noop ("No match"),  /* REG_NOMATCH */
1355     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1356     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1357     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1358     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1359     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1360     gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
1361     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1362     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1363     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1364     gettext_noop ("Invalid range end"), /* REG_ERANGE */
1365     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1366     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1367     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1368     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1369     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1370     gettext_noop ("Range striding over charsets") /* REG_ERANGEX  */
1371   };
1372 
1373 /* Avoiding alloca during matching, to placate r_alloc.  */
1374 
1375 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1376    searching and matching functions should not call alloca.  On some
1377    systems, alloca is implemented in terms of malloc, and if we're
1378    using the relocating allocator routines, then malloc could cause a
1379    relocation, which might (if the strings being searched are in the
1380    ralloc heap) shift the data out from underneath the regexp
1381    routines.
1382 
1383    Here's another reason to avoid allocation: Emacs
1384    processes input from X in a signal handler; processing X input may
1385    call malloc; if input arrives while a matching routine is calling
1386    malloc, then we're scrod.  But Emacs can't just block input while
1387    calling matching routines; then we don't notice interrupts when
1388    they come in.  So, Emacs blocks input around all regexp calls
1389    except the matching calls, which it leaves unprotected, in the
1390    faith that they will not malloc.  */
1391 
1392 /* Normally, this is fine.  */
1393 #define MATCH_MAY_ALLOCATE
1394 
1395 /* The match routines may not allocate if (1) they would do it with malloc
1396    and (2) it's not safe for them to use malloc.
1397    Note that if REL_ALLOC is defined, matching would not use malloc for the
1398    failure stack, but we would still use it for the register vectors;
1399    so REL_ALLOC should not affect this.  */
1400 #if defined REGEX_MALLOC && defined emacs
1401 # undef MATCH_MAY_ALLOCATE
1402 #endif
1403 
1404 
1405 /* Failure stack declarations and macros; both re_compile_fastmap and
1406    re_match_2 use a failure stack.  These have to be macros because of
1407    REGEX_ALLOCATE_STACK.  */
1408 
1409 
1410 /* Approximate number of failure points for which to initially allocate space
1411    when matching.  If this number is exceeded, we allocate more
1412    space, so it is not a hard limit.  */
1413 #ifndef INIT_FAILURE_ALLOC
1414 # define INIT_FAILURE_ALLOC 20
1415 #endif
1416 
1417 /* Roughly the maximum number of failure points on the stack.  Would be
1418    exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1419    This is a variable only so users of regex can assign to it; we never
1420    change it ourselves.  We always multiply it by TYPICAL_FAILURE_SIZE
1421    before using it, so it should probably be a byte-count instead.  */
1422 # if defined MATCH_MAY_ALLOCATE
1423 /* Note that 4400 was enough to cause a crash on Alpha OSF/1,
1424    whose default stack limit is 2mb.  In order for a larger
1425    value to work reliably, you have to try to make it accord
1426    with the process stack limit.  */
1427 size_t re_max_failures = 40000;
1428 # else
1429 size_t re_max_failures = 4000;
1430 # endif
1431 
1432 union fail_stack_elt
1433 {
1434   re_char *pointer;
1435   /* This should be the biggest `int' that's no bigger than a pointer.  */
1436   long integer;
1437 };
1438 
1439 typedef union fail_stack_elt fail_stack_elt_t;
1440 
1441 typedef struct
1442 {
1443   fail_stack_elt_t *stack;
1444   size_t size;
1445   size_t avail; /* Offset of next open position.  */
1446   size_t frame; /* Offset of the cur constructed frame.  */
1447 } fail_stack_type;
1448 
1449 #define FAIL_STACK_EMPTY()     (fail_stack.frame == 0)
1450 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1451 
1452 
1453 /* Define macros to initialize and free the failure stack.
1454    Do `return -2' if the alloc fails.  */
1455 
1456 #ifdef MATCH_MAY_ALLOCATE
1457 # define INIT_FAIL_STACK()                                              \
1458   do {                                                                  \
1459     fail_stack.stack = (fail_stack_elt_t *)                             \
1460       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE   \
1461                             * sizeof (fail_stack_elt_t));               \
1462                                                                         \
1463     if (fail_stack.stack == NULL)                                       \
1464       return -2;                                                        \
1465                                                                         \
1466     fail_stack.size = INIT_FAILURE_ALLOC;                               \
1467     fail_stack.avail = 0;                                               \
1468     fail_stack.frame = 0;                                               \
1469   } while (0)
1470 
1471 # define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1472 #else
1473 # define INIT_FAIL_STACK()                                              \
1474   do {                                                                  \
1475     fail_stack.avail = 0;                                               \
1476     fail_stack.frame = 0;                                               \
1477   } while (0)
1478 
1479 # define RESET_FAIL_STACK() ((void)0)
1480 #endif
1481 
1482 
1483 /* Double the size of FAIL_STACK, up to a limit
1484    which allows approximately `re_max_failures' items.
1485 
1486    Return 1 if succeeds, and 0 if either ran out of memory
1487    allocating space for it or it was already too large.
1488 
1489    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1490 
1491 /* Factor to increase the failure stack size by
1492    when we increase it.
1493    This used to be 2, but 2 was too wasteful
1494    because the old discarded stacks added up to as much space
1495    were as ultimate, maximum-size stack.  */
1496 #define FAIL_STACK_GROWTH_FACTOR 4
1497 
1498 #define GROW_FAIL_STACK(fail_stack)                                     \
1499   (((fail_stack).size * sizeof (fail_stack_elt_t)                       \
1500     >= re_max_failures * TYPICAL_FAILURE_SIZE)                          \
1501    ? 0                                                                  \
1502    : ((fail_stack).stack                                                \
1503       = (fail_stack_elt_t *)                                            \
1504         REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
1505           (fail_stack).size * sizeof (fail_stack_elt_t),                \
1506           MIN (re_max_failures * TYPICAL_FAILURE_SIZE,                  \
1507                ((fail_stack).size * sizeof (fail_stack_elt_t)           \
1508                 * FAIL_STACK_GROWTH_FACTOR))),                          \
1509                                                                         \
1510       (fail_stack).stack == NULL                                        \
1511       ? 0                                                               \
1512       : ((fail_stack).size                                              \
1513          = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,                \
1514                  ((fail_stack).size * sizeof (fail_stack_elt_t)         \
1515                   * FAIL_STACK_GROWTH_FACTOR))                          \
1516             / sizeof (fail_stack_elt_t)),                               \
1517          1)))
1518 
1519 
1520 /* Push a pointer value onto the failure stack.
1521    Assumes the variable `fail_stack'.  Probably should only
1522    be called from within `PUSH_FAILURE_POINT'.  */
1523 #define PUSH_FAILURE_POINTER(item)                                      \
1524   fail_stack.stack[fail_stack.avail++].pointer = (item)
1525 
1526 /* This pushes an integer-valued item onto the failure stack.
1527    Assumes the variable `fail_stack'.  Probably should only
1528    be called from within `PUSH_FAILURE_POINT'.  */
1529 #define PUSH_FAILURE_INT(item)                                  \
1530   fail_stack.stack[fail_stack.avail++].integer = (item)
1531 
1532 /* Push a fail_stack_elt_t value onto the failure stack.
1533    Assumes the variable `fail_stack'.  Probably should only
1534    be called from within `PUSH_FAILURE_POINT'.  */
1535 #define PUSH_FAILURE_ELT(item)                                  \
1536   fail_stack.stack[fail_stack.avail++] =  (item)
1537 
1538 /* These three POP... operations complement the three PUSH... operations.
1539    All assume that `fail_stack' is nonempty.  */
1540 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1541 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1542 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1543 
1544 /* Individual items aside from the registers.  */
1545 #define NUM_NONREG_ITEMS 3
1546 
1547 /* Used to examine the stack (to detect infinite loops).  */
1548 #define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
1549 #define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer)
1550 #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
1551 #define TOP_FAILURE_HANDLE() fail_stack.frame
1552 
1553 
1554 #define ENSURE_FAIL_STACK(space)                                        \
1555 while (REMAINING_AVAIL_SLOTS <= space) {                                \
1556   if (!GROW_FAIL_STACK (fail_stack))                                    \
1557     return -2;                                                          \
1558   DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n", (fail_stack).size);\
1559   DEBUG_PRINT2 ("        slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1560 }
1561 
1562 /* Push register NUM onto the stack.  */
1563 #define PUSH_FAILURE_REG(num)                                           \
1564 do {                                                                    \
1565   char *destination;                                                    \
1566   ENSURE_FAIL_STACK(3);                                                 \
1567   DEBUG_PRINT4 ("    Push reg %d (spanning %p -> %p)\n",                \
1568                 num, regstart[num], regend[num]);                       \
1569   PUSH_FAILURE_POINTER (regstart[num]);                                 \
1570   PUSH_FAILURE_POINTER (regend[num]);                                   \
1571   PUSH_FAILURE_INT (num);                                               \
1572 } while (0)
1573 
1574 /* Change the counter's value to VAL, but make sure that it will
1575    be reset when backtracking.  */
1576 #define PUSH_NUMBER(ptr,val)                                            \
1577 do {                                                                    \
1578   char *destination;                                                    \
1579   int c;                                                                \
1580   ENSURE_FAIL_STACK(3);                                                 \
1581   EXTRACT_NUMBER (c, ptr);                                              \
1582   DEBUG_PRINT4 ("    Push number %p = %d -> %d\n", ptr, c, val);        \
1583   PUSH_FAILURE_INT (c);                                                 \
1584   PUSH_FAILURE_POINTER (ptr);                                           \
1585   PUSH_FAILURE_INT (-1);                                                \
1586   STORE_NUMBER (ptr, val);                                              \
1587 } while (0)
1588 
1589 /* Pop a saved register off the stack.  */
1590 #define POP_FAILURE_REG_OR_COUNT()                                      \
1591 do {                                                                    \
1592   int reg = POP_FAILURE_INT ();                                         \
1593   if (reg == -1)                                                        \
1594     {                                                                   \
1595       /* It's a counter.  */                                            \
1596       /* Here, we discard `const', making re_match non-reentrant.  */   \
1597       unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER ();     \
1598       reg = POP_FAILURE_INT ();                                         \
1599       STORE_NUMBER (ptr, reg);                                          \
1600       DEBUG_PRINT3 ("     Pop counter %p = %d\n", ptr, reg);            \
1601     }                                                                   \
1602   else                                                                  \
1603     {                                                                   \
1604       regend[reg] = POP_FAILURE_POINTER ();                             \
1605       regstart[reg] = POP_FAILURE_POINTER ();                           \
1606       DEBUG_PRINT4 ("     Pop reg %d (spanning %p -> %p)\n",            \
1607                     reg, regstart[reg], regend[reg]);                   \
1608     }                                                                   \
1609 } while (0)
1610 
1611 /* Check that we are not stuck in an infinite loop.  */
1612 #define CHECK_INFINITE_LOOP(pat_cur, string_place)                      \
1613 do {                                                                    \
1614   int failure = TOP_FAILURE_HANDLE ();                                  \
1615   /* Check for infinite matching loops */                               \
1616   while (failure > 0                                                    \
1617          && (FAILURE_STR (failure) == string_place                      \
1618              || FAILURE_STR (failure) == NULL))                         \
1619     {                                                                   \
1620       assert (FAILURE_PAT (failure) >= bufp->buffer                     \
1621               && FAILURE_PAT (failure) <= bufp->buffer + bufp->used);   \
1622       if (FAILURE_PAT (failure) == pat_cur)                             \
1623         {                                                               \
1624           cycle = 1;                                                    \
1625           break;                                                        \
1626         }                                                               \
1627       DEBUG_PRINT2 ("  Other pattern: %p\n", FAILURE_PAT (failure));    \
1628       failure = NEXT_FAILURE_HANDLE(failure);                           \
1629     }                                                                   \
1630   DEBUG_PRINT2 ("  Other string: %p\n", FAILURE_STR (failure));         \
1631 } while (0)
1632 
1633 /* Push the information about the state we will need
1634    if we ever fail back to it.
1635 
1636    Requires variables fail_stack, regstart, regend and
1637    num_regs be declared.  GROW_FAIL_STACK requires `destination' be
1638    declared.
1639 
1640    Does `return FAILURE_CODE' if runs out of memory.  */
1641 
1642 #define PUSH_FAILURE_POINT(pattern, string_place)                       \
1643 do {                                                                    \
1644   char *destination;                                                    \
1645   /* Must be int, so when we don't save any registers, the arithmetic   \
1646      of 0 + -1 isn't done as unsigned.  */                              \
1647                                                                         \
1648   DEBUG_STATEMENT (nfailure_points_pushed++);                           \
1649   DEBUG_PRINT1 ("\nPUSH_FAILURE_POINT:\n");                             \
1650   DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail); \
1651   DEBUG_PRINT2 ("                       size: %d\n", (fail_stack).size);\
1652                                                                         \
1653   ENSURE_FAIL_STACK (NUM_NONREG_ITEMS);                                 \
1654                                                                         \
1655   DEBUG_PRINT1 ("\n");                                                  \
1656                                                                         \
1657   DEBUG_PRINT2 ("  Push frame index: %d\n", fail_stack.frame);          \
1658   PUSH_FAILURE_INT (fail_stack.frame);                                  \
1659                                                                         \
1660   DEBUG_PRINT2 ("  Push string %p: `", string_place);                   \
1661   DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
1662   DEBUG_PRINT1 ("'\n");                                                 \
1663   PUSH_FAILURE_POINTER (string_place);                                  \
1664                                                                         \
1665   DEBUG_PRINT2 ("  Push pattern %p: ", pattern);                        \
1666   DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend);                   \
1667   PUSH_FAILURE_POINTER (pattern);                                       \
1668                                                                         \
1669   /* Close the frame by moving the frame pointer past it.  */           \
1670   fail_stack.frame = fail_stack.avail;                                  \
1671 } while (0)
1672 
1673 /* Estimate the size of data pushed by a typical failure stack entry.
1674    An estimate is all we need, because all we use this for
1675    is to choose a limit for how big to make the failure stack.  */
1676 /* BEWARE, the value `20' is hard-coded in emacs.c:main().  */
1677 #define TYPICAL_FAILURE_SIZE 20
1678 
1679 /* How many items can still be added to the stack without overflowing it.  */
1680 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1681 
1682 
1683 /* Pops what PUSH_FAIL_STACK pushes.
1684 
1685    We restore into the parameters, all of which should be lvalues:
1686      STR -- the saved data position.
1687      PAT -- the saved pattern position.
1688      REGSTART, REGEND -- arrays of string positions.
1689 
1690    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1691    `pend', `string1', `size1', `string2', and `size2'.  */
1692 
1693 #define POP_FAILURE_POINT(str, pat)                                     \
1694 do {                                                                    \
1695   assert (!FAIL_STACK_EMPTY ());                                        \
1696                                                                         \
1697   /* Remove failure points and point to how many regs pushed.  */       \
1698   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
1699   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
1700   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
1701                                                                         \
1702   /* Pop the saved registers.  */                                       \
1703   while (fail_stack.frame < fail_stack.avail)                           \
1704     POP_FAILURE_REG_OR_COUNT ();                                        \
1705                                                                         \
1706   pat = POP_FAILURE_POINTER ();                         \
1707   DEBUG_PRINT2 ("  Popping pattern %p: ", pat);                         \
1708   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
1709                                                                         \
1710   /* If the saved string location is NULL, it came from an              \
1711      on_failure_keep_string_jump opcode, and we want to throw away the  \
1712      saved NULL, thus retaining our current position in the string.  */ \
1713   str = POP_FAILURE_POINTER ();                                         \
1714   DEBUG_PRINT2 ("  Popping string %p: `", str);                         \
1715   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
1716   DEBUG_PRINT1 ("'\n");                                                 \
1717                                                                         \
1718   fail_stack.frame = POP_FAILURE_INT ();                                \
1719   DEBUG_PRINT2 ("  Popping  frame index: %d\n", fail_stack.frame);      \
1720                                                                         \
1721   assert (fail_stack.avail >= 0);                                       \
1722   assert (fail_stack.frame <= fail_stack.avail);                        \
1723                                                                         \
1724   DEBUG_STATEMENT (nfailure_points_popped++);                           \
1725 } while (0) /* POP_FAILURE_POINT */
1726 
1727 
1728 
1729 /* Registers are set to a sentinel when they haven't yet matched.  */
1730 #define REG_UNSET(e) ((e) == NULL)
1731 
1732 /* Subroutine declarations and macros for regex_compile.  */
1733 
1734 static reg_errcode_t regex_compile _RE_ARGS ((re_char *pattern, size_t size,
1735                                               reg_syntax_t syntax,
1736                                               struct re_pattern_buffer *bufp));
1737 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1738 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1739                                  int arg1, int arg2));
1740 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1741                                   int arg, unsigned char *end));
1742 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1743                                   int arg1, int arg2, unsigned char *end));
1744 static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern,
1745                                            re_char *p,
1746                                            reg_syntax_t syntax));
1747 static boolean at_endline_loc_p _RE_ARGS ((re_char *p,
1748                                            re_char *pend,
1749                                            reg_syntax_t syntax));
1750 static re_char *skip_one_char _RE_ARGS ((re_char *p));
1751 static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
1752                                     char *fastmap, const int multibyte));
1753 
1754 /* Fetch the next character in the uncompiled pattern, with no
1755    translation.  */
1756 #define PATFETCH(c)                                                     \
1757   do {                                                                  \
1758     int len;                                                            \
1759     if (p == pend) return REG_EEND;                                     \
1760     c = RE_STRING_CHAR_AND_LENGTH (p, len, multibyte);                  \
1761     p += len;                                                           \
1762   } while (0)
1763 
1764 
1765 /* If `translate' is non-null, return translate[D], else just D.  We
1766    cast the subscript to translate because some data is declared as
1767    `char *', to avoid warnings when a string constant is passed.  But
1768    when we use a character as a subscript we must make it unsigned.  */
1769 #ifndef TRANSLATE
1770 # define TRANSLATE(d) \
1771   (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
1772 #endif
1773 
1774 
1775 /* Macros for outputting the compiled pattern into `buffer'.  */
1776 
1777 /* If the buffer isn't allocated when it comes in, use this.  */
1778 #define INIT_BUF_SIZE  32
1779 
1780 /* Make sure we have at least N more bytes of space in buffer.  */
1781 #define GET_BUFFER_SPACE(n)                                             \
1782     while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated)         \
1783       EXTEND_BUFFER ()
1784 
1785 /* Make sure we have one more byte of buffer space and then add C to it.  */
1786 #define BUF_PUSH(c)                                                     \
1787   do {                                                                  \
1788     GET_BUFFER_SPACE (1);                                               \
1789     *b++ = (unsigned char) (c);                                         \
1790   } while (0)
1791 
1792 
1793 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1794 #define BUF_PUSH_2(c1, c2)                                              \
1795   do {                                                                  \
1796     GET_BUFFER_SPACE (2);                                               \
1797     *b++ = (unsigned char) (c1);                                        \
1798     *b++ = (unsigned char) (c2);                                        \
1799   } while (0)
1800 
1801 
1802 /* As with BUF_PUSH_2, except for three bytes.  */
1803 #define BUF_PUSH_3(c1, c2, c3)                                          \
1804   do {                                                                  \
1805     GET_BUFFER_SPACE (3);                                               \
1806     *b++ = (unsigned char) (c1);                                        \
1807     *b++ = (unsigned char) (c2);                                        \
1808     *b++ = (unsigned char) (c3);                                        \
1809   } while (0)
1810 
1811 
1812 /* Store a jump with opcode OP at LOC to location TO.  We store a
1813    relative address offset by the three bytes the jump itself occupies.  */
1814 #define STORE_JUMP(op, loc, to) \
1815   store_op1 (op, loc, (to) - (loc) - 3)
1816 
1817 /* Likewise, for a two-argument jump.  */
1818 #define STORE_JUMP2(op, loc, to, arg) \
1819   store_op2 (op, loc, (to) - (loc) - 3, arg)
1820 
1821 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1822 #define INSERT_JUMP(op, loc, to) \
1823   insert_op1 (op, loc, (to) - (loc) - 3, b)
1824 
1825 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1826 #define INSERT_JUMP2(op, loc, to, arg) \
1827   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1828 
1829 
1830 /* This is not an arbitrary limit: the arguments which represent offsets
1831    into the pattern are two bytes long.  So if 2^15 bytes turns out to
1832    be too small, many things would have to change.  */
1833 # define MAX_BUF_SIZE (1L << 15)
1834 
1835 #if 0  /* This is when we thought it could be 2^16 bytes.  */
1836 /* Any other compiler which, like MSC, has allocation limit below 2^16
1837    bytes will have to use approach similar to what was done below for
1838    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1839    reallocating to 0 bytes.  Such thing is not going to work too well.
1840    You have been warned!!  */
1841 #if defined _MSC_VER  && !defined WIN32
1842 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.  */
1843 # define MAX_BUF_SIZE  65500L
1844 #else
1845 # define MAX_BUF_SIZE (1L << 16)
1846 #endif
1847 #endif /* 0 */
1848 
1849 /* Extend the buffer by twice its current size via realloc and
1850    reset the pointers that pointed into the old block to point to the
1851    correct places in the new one.  If extending the buffer results in it
1852    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1853 #if __BOUNDED_POINTERS__
1854 # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
1855 # define MOVE_BUFFER_POINTER(P)                                 \
1856   (__ptrlow (P) = new_buffer + (__ptrlow (P) - old_buffer),     \
1857    SET_HIGH_BOUND (P),                                          \
1858    __ptrvalue (P) = new_buffer + (__ptrvalue (P) - old_buffer))
1859 # define ELSE_EXTEND_BUFFER_HIGH_BOUND          \
1860   else                                          \
1861     {                                           \
1862       SET_HIGH_BOUND (b);                       \
1863       SET_HIGH_BOUND (begalt);                  \
1864       if (fixup_alt_jump)                       \
1865         SET_HIGH_BOUND (fixup_alt_jump);        \
1866       if (laststart)                            \
1867         SET_HIGH_BOUND (laststart);             \
1868       if (pending_exact)                        \
1869         SET_HIGH_BOUND (pending_exact);         \
1870     }
1871 #else
1872 # define MOVE_BUFFER_POINTER(P) ((P) = new_buffer + ((P) - old_buffer))
1873 # define ELSE_EXTEND_BUFFER_HIGH_BOUND
1874 #endif
1875 #define EXTEND_BUFFER()                                                 \
1876   do {                                                                  \
1877     unsigned char *old_buffer = bufp->buffer;                           \
1878     if (bufp->allocated == MAX_BUF_SIZE)                                \
1879       return REG_ESIZE;                                                 \
1880     bufp->allocated <<= 1;                                              \
1881     if (bufp->allocated > MAX_BUF_SIZE)                                 \
1882       bufp->allocated = MAX_BUF_SIZE;                                   \
1883     RETALLOC (bufp->buffer, bufp->allocated, unsigned char);            \
1884     if (bufp->buffer == NULL)                                           \
1885       return REG_ESPACE;                                                \
1886     /* If the buffer moved, move all the pointers into it.  */          \
1887     if (old_buffer != bufp->buffer)                                     \
1888       {                                                                 \
1889         unsigned char *new_buffer = bufp->buffer;                       \
1890         MOVE_BUFFER_POINTER (b);                                        \
1891         MOVE_BUFFER_POINTER (begalt);                                   \
1892         if (fixup_alt_jump)                                             \
1893           MOVE_BUFFER_POINTER (fixup_alt_jump);                         \
1894         if (laststart)                                                  \
1895           MOVE_BUFFER_POINTER (laststart);                              \
1896         if (pending_exact)                                              \
1897           MOVE_BUFFER_POINTER (pending_exact);                          \
1898       }                                                                 \
1899     ELSE_EXTEND_BUFFER_HIGH_BOUND                                       \
1900   } while (0)
1901 
1902 
1903 /* Since we have one byte reserved for the register number argument to
1904    {start,stop}_memory, the maximum number of groups we can report
1905    things about is what fits in that byte.  */
1906 #define MAX_REGNUM 255
1907 
1908 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1909    ignore the excess.  */
1910 typedef int regnum_t;
1911 
1912 
1913 /* Macros for the compile stack.  */
1914 
1915 /* Since offsets can go either forwards or backwards, this type needs to
1916    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1917 /* int may be not enough when sizeof(int) == 2.  */
1918 typedef long pattern_offset_t;
1919 
1920 typedef struct
1921 {
1922   pattern_offset_t begalt_offset;
1923   pattern_offset_t fixup_alt_jump;
1924   pattern_offset_t laststart_offset;
1925   regnum_t regnum;
1926 } compile_stack_elt_t;
1927 
1928 
1929 typedef struct
1930 {
1931   compile_stack_elt_t *stack;
1932   unsigned size;
1933   unsigned avail;                       /* Offset of next open position.  */
1934 } compile_stack_type;
1935 
1936 
1937 #define INIT_COMPILE_STACK_SIZE 32
1938 
1939 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1940 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1941 
1942 /* The next available element.  */
1943 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1944 
1945 /* Explicit quit checking is only used on NTemacs and whenever we
1946    use polling to process input events.  */
1947 #if defined emacs && (defined WINDOWSNT || defined SYNC_INPUT) && defined QUIT
1948 extern int immediate_quit;
1949 # define IMMEDIATE_QUIT_CHECK                   \
1950     do {                                        \
1951       if (immediate_quit) QUIT;                 \
1952     } while (0)
1953 #else
1954 # define IMMEDIATE_QUIT_CHECK    ((void)0)
1955 #endif
1956 
1957 /* Structure to manage work area for range table.  */
1958 struct range_table_work_area
1959 {
1960   int *table;                   /* actual work area.  */
1961   int allocated;                /* allocated size for work area in bytes.  */
1962   int used;                     /* actually used size in words.  */
1963   int bits;                     /* flag to record character classes */
1964 };
1965 
1966 /* Make sure that WORK_AREA can hold more N multibyte characters.
1967    This is used only in set_image_of_range and set_image_of_range_1.
1968    It expects WORK_AREA to be a pointer.
1969    If it can't get the space, it returns from the surrounding function.  */
1970 
1971 #define EXTEND_RANGE_TABLE(work_area, n)                                \
1972   do {                                                                  \
1973     if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
1974       {                                                                 \
1975         extend_range_table_work_area (&work_area);                      \
1976         if ((work_area).table == 0)                                     \
1977           return (REG_ESPACE);                                          \
1978       }                                                                 \
1979   } while (0)
1980 
1981 #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit)           \
1982   (work_area).bits |= (bit)
1983 
1984 /* Bits used to implement the multibyte-part of the various character classes
1985    such as [:alnum:] in a charset's range table.  */
1986 #define BIT_WORD        0x1
1987 #define BIT_LOWER       0x2
1988 #define BIT_PUNCT       0x4
1989 #define BIT_SPACE       0x8
1990 #define BIT_UPPER       0x10
1991 #define BIT_MULTIBYTE   0x20
1992 
1993 /* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
1994 #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)    \
1995   do {                                                                  \
1996     EXTEND_RANGE_TABLE ((work_area), 2);                                \
1997     (work_area).table[(work_area).used++] = (range_start);              \
1998     (work_area).table[(work_area).used++] = (range_end);                \
1999   } while (0)
2000 
2001 /* Free allocated memory for WORK_AREA.  */
2002 #define FREE_RANGE_TABLE_WORK_AREA(work_area)   \
2003   do {                                          \
2004     if ((work_area).table)                      \
2005       free ((work_area).table);                 \
2006   } while (0)
2007 
2008 #define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0)
2009 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
2010 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
2011 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
2012 
2013 
2014 /* Set the bit for character C in a list.  */
2015 #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
2016 
2017 
2018 #ifdef emacs
2019 
2020 /* Store characters in the range FROM to TO in the bitmap at B (for
2021    ASCII and unibyte characters) and WORK_AREA (for multibyte
2022    characters) while translating them and paying attention to the
2023    continuity of translated characters.
2024 
2025    Implementation note: It is better to implement these fairly big
2026    macros by a function, but it's not that easy because macros called
2027    in this macro assume various local variables already declared.  */
2028 
2029 /* Both FROM and TO are ASCII characters.  */
2030 
2031 #define SETUP_ASCII_RANGE(work_area, FROM, TO)                  \
2032   do {                                                          \
2033     int C0, C1;                                                 \
2034                                                                 \
2035     for (C0 = (FROM); C0 <= (TO); C0++)                         \
2036       {                                                         \
2037         C1 = TRANSLATE (C0);                                    \
2038         if (! ASCII_CHAR_P (C1))                                \
2039           {                                                     \
2040             SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1);    \
2041             if ((C1 = RE_CHAR_TO_UNIBYTE (C1)) < 0)             \
2042               C1 = C0;                                          \
2043           }                                                     \
2044         SET_LIST_BIT (C1);                                      \
2045       }                                                         \
2046   } while (0)
2047 
2048 
2049 /* Both FROM and TO are unibyte characters (0x80..0xFF).  */
2050 
2051 #define SETUP_UNIBYTE_RANGE(work_area, FROM, TO)                               \
2052   do {                                                                         \
2053     int C0, C1, C2, I;                                                         \
2054     int USED = RANGE_TABLE_WORK_USED (work_area);                              \
2055                                                                                \
2056     for (C0 = (FROM); C0 <= (TO); C0++)                                        \
2057       {                                                                        \
2058         C1 = RE_CHAR_TO_MULTIBYTE (C0);                                        \
2059         if (CHAR_BYTE8_P (C1))                                                 \
2060           SET_LIST_BIT (C0);                                                   \
2061         else                                                                   \
2062           {                                                                    \
2063             C2 = TRANSLATE (C1);                                               \
2064             if (C2 == C1                                                       \
2065                 || (C1 = RE_CHAR_TO_UNIBYTE (C2)) < 0)                         \
2066               C1 = C0;                                                         \
2067             SET_LIST_BIT (C1);                                                 \
2068             for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
2069               {                                                                \
2070                 int from = RANGE_TABLE_WORK_ELT (work_area, I);                \
2071                 int to = RANGE_TABLE_WORK_ELT (work_area, I + 1);              \
2072                                                                                \
2073                 if (C2 >= from - 1 && C2 <= to + 1)                            \
2074                   {                                                            \
2075                     if (C2 == from - 1)                                        \
2076                       RANGE_TABLE_WORK_ELT (work_area, I)--;                   \
2077                     else if (C2 == to + 1)                                     \
2078                       RANGE_TABLE_WORK_ELT (work_area, I + 1)++;               \
2079                     break;                                                     \
2080                   }                                                            \
2081               }                                                                \
2082             if (I < USED)                                                      \
2083               SET_RANGE_TABLE_WORK_AREA ((work_area), C2, C2);                 \
2084           }                                                                    \
2085       }                                                                        \
2086   } while (0)
2087 
2088 
2089 /* Both FROM and TO are mulitbyte characters.  */
2090 
2091 #define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO)                         \
2092   do {                                                                     \
2093     int C0, C1, C2, I, USED = RANGE_TABLE_WORK_USED (work_area);           \
2094                                                                            \
2095     SET_RANGE_TABLE_WORK_AREA ((work_area), (FROM), (TO));                 \
2096     for (C0 = (FROM); C0 <= (TO); C0++)                                    \
2097       {                                                                    \
2098         C1 = TRANSLATE (C0);                                               \
2099         if ((C2 = RE_CHAR_TO_UNIBYTE (C1)) >= 0                            \
2100             || (C1 != C0 && (C2 = RE_CHAR_TO_UNIBYTE (C0)) >= 0))          \
2101           SET_LIST_BIT (C2);                                               \
2102         if (C1 >= (FROM) && C1 <= (TO))                                    \
2103           continue;                                                        \
2104         for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
2105           {                                                                \
2106             int from = RANGE_TABLE_WORK_ELT (work_area, I);                \
2107             int to = RANGE_TABLE_WORK_ELT (work_area, I + 1);              \
2108                                                                            \
2109             if (C1 >= from - 1 && C1 <= to + 1)                            \
2110               {                                                            \
2111                 if (C1 == from - 1)                                        \
2112                   RANGE_TABLE_WORK_ELT (work_area, I)--;                   \
2113                 else if (C1 == to + 1)                                     \
2114                   RANGE_TABLE_WORK_ELT (work_area, I + 1)++;               \
2115                 break;                                                     \
2116               }                                                            \
2117           }                                                                \
2118         if (I < USED)                                                      \
2119           SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1);                 \
2120       }                                                                    \
2121   } while (0)
2122 
2123 #endif /* emacs */
2124 
2125 /* Get the next unsigned number in the uncompiled pattern.  */
2126 #define GET_UNSIGNED_NUMBER(num)                                        \
2127   do {                                                                  \
2128     if (p == pend)                                                      \
2129       FREE_STACK_RETURN (REG_EBRACE);                                   \
2130     else                                                                \
2131       {                                                                 \
2132         PATFETCH (c);                                                   \
2133         while ('0' <= c && c <= '9')                                    \
2134           {                                                             \
2135             int prev;                                                   \
2136             if (num < 0)                                                \
2137               num = 0;                                                  \
2138             prev = num;                                                 \
2139             num = num * 10 + c - '0';                                   \
2140             if (num / 10 != prev)                                       \
2141               FREE_STACK_RETURN (REG_BADBR);                            \
2142             if (p == pend)                                              \
2143               FREE_STACK_RETURN (REG_EBRACE);                           \
2144             PATFETCH (c);                                               \
2145           }                                                             \
2146       }                                                                 \
2147   } while (0)
2148 
2149 #if ! WIDE_CHAR_SUPPORT
2150 
2151 /* Map a string to the char class it names (if any).  */
2152 re_wctype_t
2153 re_wctype (str)
2154      re_char *str;
2155 {
2156   const char *string = str;
2157   if      (STREQ (string, "alnum"))     return RECC_ALNUM;
2158   else if (STREQ (string, "alpha"))     return RECC_ALPHA;
2159   else if (STREQ (string, "word"))      return RECC_WORD;
2160   else if (STREQ (string, "ascii"))     return RECC_ASCII;
2161   else if (STREQ (string, "nonascii"))  return RECC_NONASCII;
2162   else if (STREQ (string, "graph"))     return RECC_GRAPH;
2163   else if (STREQ (string, "lower"))     return RECC_LOWER;
2164   else if (STREQ (string, "print"))     return RECC_PRINT;
2165   else if (STREQ (string, "punct"))     return RECC_PUNCT;
2166   else if (STREQ (string, "space"))     return RECC_SPACE;
2167   else if (STREQ (string, "upper"))     return RECC_UPPER;
2168   else if (STREQ (string, "unibyte"))   return RECC_UNIBYTE;
2169   else if (STREQ (string, "multibyte")) return RECC_MULTIBYTE;
2170   else if (STREQ (string, "digit"))     return RECC_DIGIT;
2171   else if (STREQ (string, "xdigit"))    return RECC_XDIGIT;
2172   else if (STREQ (string, "cntrl"))     return RECC_CNTRL;
2173   else if (STREQ (string, "blank"))     return RECC_BLANK;
2174   else return 0;
2175 }
2176 
2177 /* True if CH is in the char class CC.  */
2178 boolean
2179 re_iswctype (ch, cc)
2180      int ch;
2181      re_wctype_t cc;
2182 {
2183   switch (cc)
2184     {
2185     case RECC_ALNUM: return ISALNUM (ch);
2186     case RECC_ALPHA: return ISALPHA (ch);
2187     case RECC_BLANK: return ISBLANK (ch);
2188     case RECC_CNTRL: return ISCNTRL (ch);
2189     case RECC_DIGIT: return ISDIGIT (ch);
2190     case RECC_GRAPH: return ISGRAPH (ch);
2191     case RECC_LOWER: return ISLOWER (ch);
2192     case RECC_PRINT: return ISPRINT (ch);
2193     case RECC_PUNCT: return ISPUNCT (ch);
2194     case RECC_SPACE: return ISSPACE (ch);
2195     case RECC_UPPER: return ISUPPER (ch);
2196     case RECC_XDIGIT: return ISXDIGIT (ch);
2197     case RECC_ASCII: return IS_REAL_ASCII (ch);
2198     case RECC_NONASCII: return !IS_REAL_ASCII (ch);
2199     case RECC_UNIBYTE: return ISUNIBYTE (ch);
2200     case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
2201     case RECC_WORD: return ISWORD (ch);
2202     case RECC_ERROR: return false;
2203     default:
2204       abort();
2205     }
2206 }
2207 
2208 /* Return a bit-pattern to use in the range-table bits to match multibyte
2209    chars of class CC.  */
2210 static int
2211 re_wctype_to_bit (cc)
2212      re_wctype_t cc;
2213 {
2214   switch (cc)
2215     {
2216     case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
2217     case RECC_MULTIBYTE: return BIT_MULTIBYTE;
2218     case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
2219     case RECC_LOWER: return BIT_LOWER;
2220     case RECC_UPPER: return BIT_UPPER;
2221     case RECC_PUNCT: return BIT_PUNCT;
2222     case RECC_SPACE: return BIT_SPACE;
2223     case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
2224     case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
2225     default:
2226       abort();
2227     }
2228 }
2229 #endif
2230 
2231 /* Filling in the work area of a range.  */
2232 
2233 /* Actually extend the space in WORK_AREA.  */
2234 
2235 static void
2236 extend_range_table_work_area (work_area)
2237      struct range_table_work_area *work_area;
2238 {
2239   work_area->allocated += 16 * sizeof (int);
2240   if (work_area->table)
2241     work_area->table
2242       = (int *) realloc (work_area->table, work_area->allocated);
2243   else
2244     work_area->table
2245       = (int *) malloc (work_area->allocated);
2246 }
2247 
2248 #if 0
2249 #ifdef emacs
2250 
2251 /* Carefully find the ranges of codes that are equivalent
2252    under case conversion to the range start..end when passed through
2253    TRANSLATE.  Handle the case where non-letters can come in between
2254    two upper-case letters (which happens in Latin-1).
2255    Also handle the case of groups of more than 2 case-equivalent chars.
2256 
2257    The basic method is to look at consecutive characters and see
2258    if they can form a run that can be handled as one.
2259 
2260    Returns -1 if successful, REG_ESPACE if ran out of space.  */
2261 
2262 static int
2263 set_image_of_range_1 (work_area, start, end, translate)
2264      RE_TRANSLATE_TYPE translate;
2265      struct range_table_work_area *work_area;
2266      re_wchar_t start, end;
2267 {
2268   /* `one_case' indicates a character, or a run of characters,
2269      each of which is an isolate (no case-equivalents).
2270      This includes all ASCII non-letters.
2271 
2272      `two_case' indicates a character, or a run of characters,
2273      each of which has two case-equivalent forms.
2274      This includes all ASCII letters.
2275 
2276      `strange' indicates a character that has more than one
2277      case-equivalent.  */
2278 
2279   enum case_type {one_case, two_case, strange};
2280 
2281   /* Describe the run that is in progress,
2282      which the next character can try to extend.
2283      If run_type is strange, that means there really is no run.
2284      If run_type is one_case, then run_start...run_end is the run.
2285      If run_type is two_case, then the run is run_start...run_end,
2286      and the case-equivalents end at run_eqv_end.  */
2287 
2288   enum case_type run_type = strange;
2289   int run_start, run_end, run_eqv_end;
2290 
2291   Lisp_Object eqv_table;
2292 
2293   if (!RE_TRANSLATE_P (translate))
2294     {
2295       EXTEND_RANGE_TABLE (work_area, 2);
2296       work_area->table[work_area->used++] = (start);
2297       work_area->table[work_area->used++] = (end);
2298       return -1;
2299     }
2300 
2301   eqv_table = XCHAR_TABLE (translate)->extras[2];
2302 
2303   for (; start <= end; start++)
2304     {
2305       enum case_type this_type;
2306       int eqv = RE_TRANSLATE (eqv_table, start);
2307       int minchar, maxchar;
2308 
2309       /* Classify this character */
2310       if (eqv == start)
2311         this_type = one_case;
2312       else if (RE_TRANSLATE (eqv_table, eqv) == start)
2313         this_type = two_case;
2314       else
2315         this_type = strange;
2316 
2317       if (start < eqv)
2318         minchar = start, maxchar = eqv;
2319       else
2320         minchar = eqv, maxchar = start;
2321 
2322       /* Can this character extend the run in progress?  */
2323       if (this_type == strange || this_type != run_type
2324           || !(minchar == run_end + 1
2325                && (run_type == two_case
2326                    ? maxchar == run_eqv_end + 1 : 1)))
2327         {
2328           /* No, end the run.
2329              Record each of its equivalent ranges.  */
2330           if (run_type == one_case)
2331             {
2332               EXTEND_RANGE_TABLE (work_area, 2);
2333               work_area->table[work_area->used++] = run_start;
2334               work_area->table[work_area->used++] = run_end;
2335             }
2336           else if (run_type == two_case)
2337             {
2338               EXTEND_RANGE_TABLE (work_area, 4);
2339               work_area->table[work_area->used++] = run_start;
2340               work_area->table[work_area->used++] = run_end;
2341               work_area->table[work_area->used++]
2342                 = RE_TRANSLATE (eqv_table, run_start);
2343               work_area->table[work_area->used++]
2344                 = RE_TRANSLATE (eqv_table, run_end);
2345             }
2346           run_type = strange;
2347         }
2348 
2349       if (this_type == strange)
2350         {
2351           /* For a strange character, add each of its equivalents, one
2352              by one.  Don't start a range.  */
2353           do
2354             {
2355               EXTEND_RANGE_TABLE (work_area, 2);
2356               work_area->table[work_area->used++] = eqv;
2357               work_area->table[work_area->used++] = eqv;
2358               eqv = RE_TRANSLATE (eqv_table, eqv);
2359             }
2360           while (eqv != start);
2361         }
2362 
2363       /* Add this char to the run, or start a new run.  */
2364       else if (run_type == strange)
2365         {
2366           /* Initialize a new range.  */
2367           run_type = this_type;
2368           run_start = start;
2369           run_end = start;
2370           run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2371         }
2372       else
2373         {
2374           /* Extend a running range.  */
2375           run_end = minchar;
2376           run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2377         }
2378     }
2379 
2380   /* If a run is still in progress at the end, finish it now
2381      by recording its equivalent ranges.  */
2382   if (run_type == one_case)
2383     {
2384       EXTEND_RANGE_TABLE (work_area, 2);
2385       work_area->table[work_area->used++] = run_start;
2386       work_area->table[work_area->used++] = run_end;
2387     }
2388   else if (run_type == two_case)
2389     {
2390       EXTEND_RANGE_TABLE (work_area, 4);
2391       work_area->table[work_area->used++] = run_start;
2392       work_area->table[work_area->used++] = run_end;
2393       work_area->table[work_area->used++]
2394         = RE_TRANSLATE (eqv_table, run_start);
2395       work_area->table[work_area->used++]
2396         = RE_TRANSLATE (eqv_table, run_end);
2397     }
2398 
2399   return -1;
2400 }
2401 
2402 #endif /* emacs */
2403 
2404 /* Record the image of the range start..end when passed through
2405    TRANSLATE.  This is not necessarily TRANSLATE(start)..TRANSLATE(end)
2406    and is not even necessarily contiguous.
2407    Normally we approximate it with the smallest contiguous range that contains
2408    all the chars we need.  However, for Latin-1 we go to extra effort
2409    to do a better job.
2410 
2411    This function is not called for ASCII ranges.
2412 
2413    Returns -1 if successful, REG_ESPACE if ran out of space.  */
2414 
2415 static int
2416 set_image_of_range (work_area, start, end, translate)
2417      RE_TRANSLATE_TYPE translate;
2418      struct range_table_work_area *work_area;
2419      re_wchar_t start, end;
2420 {
2421   re_wchar_t cmin, cmax;
2422 
2423 #ifdef emacs
2424   /* For Latin-1 ranges, use set_image_of_range_1
2425      to get proper handling of ranges that include letters and nonletters.
2426      For a range that includes the whole of Latin-1, this is not necessary.
2427      For other character sets, we don't bother to get this right.  */
2428   if (RE_TRANSLATE_P (translate) && start < 04400
2429       && !(start < 04200 && end >= 04377))
2430     {
2431       int newend;
2432       int tem;
2433       newend = end;
2434       if (newend > 04377)
2435         newend = 04377;
2436       tem = set_image_of_range_1 (work_area, start, newend, translate);
2437       if (tem > 0)
2438         return tem;
2439 
2440       start = 04400;
2441       if (end < 04400)
2442         return -1;
2443     }
2444 #endif
2445 
2446   EXTEND_RANGE_TABLE (work_area, 2);
2447   work_area->table[work_area->used++] = (start);
2448   work_area->table[work_area->used++] = (end);
2449 
2450   cmin = -1, cmax = -1;
2451 
2452   if (RE_TRANSLATE_P (translate))
2453     {
2454       int ch;
2455 
2456       for (ch = start; ch <= end; ch++)
2457         {
2458           re_wchar_t c = TRANSLATE (ch);
2459           if (! (start <= c && c <= end))
2460             {
2461               if (cmin == -1)
2462                 cmin = c, cmax = c;
2463               else
2464                 {
2465                   cmin = MIN (cmin, c);
2466                   cmax = MAX (cmax, c);
2467                 }
2468             }
2469         }
2470 
2471       if (cmin != -1)
2472         {
2473           EXTEND_RANGE_TABLE (work_area, 2);
2474           work_area->table[work_area->used++] = (cmin);
2475           work_area->table[work_area->used++] = (cmax);
2476         }
2477     }
2478 
2479   return -1;
2480 }
2481 #endif  /* 0 */
2482 
2483 #ifndef MATCH_MAY_ALLOCATE
2484 
2485 /* If we cannot allocate large objects within re_match_2_internal,
2486    we make the fail stack and register vectors global.
2487    The fail stack, we grow to the maximum size when a regexp
2488    is compiled.
2489    The register vectors, we adjust in size each time we
2490    compile a regexp, according to the number of registers it needs.  */
2491 
2492 static fail_stack_type fail_stack;
2493 
2494 /* Size with which the following vectors are currently allocated.
2495    That is so we can make them bigger as needed,
2496    but never make them smaller.  */
2497 static int regs_allocated_size;
2498 
2499 static re_char **     regstart, **     regend;
2500 static re_char **best_regstart, **best_regend;
2501 
2502 /* Make the register vectors big enough for NUM_REGS registers,
2503    but don't make them smaller.  */
2504 
2505 static
2506 regex_grow_registers (num_regs)
2507      int num_regs;
2508 {
2509   if (num_regs > regs_allocated_size)
2510     {
2511       RETALLOC_IF (regstart,     num_regs, re_char *);
2512       RETALLOC_IF (regend,       num_regs, re_char *);
2513       RETALLOC_IF (best_regstart, num_regs, re_char *);
2514       RETALLOC_IF (best_regend,  num_regs, re_char *);
2515 
2516       regs_allocated_size = num_regs;
2517     }
2518 }
2519 
2520 #endif /* not MATCH_MAY_ALLOCATE */
2521 
2522 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
2523                                                  compile_stack,
2524                                                  regnum_t regnum));
2525 
2526 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2527    Returns one of error codes defined in `regex.h', or zero for success.
2528 
2529    Assumes the `allocated' (and perhaps `buffer') and `translate'
2530    fields are set in BUFP on entry.
2531 
2532    If it succeeds, results are put in BUFP (if it returns an error, the
2533    contents of BUFP are undefined):
2534      `buffer' is the compiled pattern;
2535      `syntax' is set to SYNTAX;
2536      `used' is set to the length of the compiled pattern;
2537      `fastmap_accurate' is zero;
2538      `re_nsub' is the number of subexpressions in PATTERN;
2539      `not_bol' and `not_eol' are zero;
2540 
2541    The `fastmap' field is neither examined nor set.  */
2542 
2543 /* Insert the `jump' from the end of last alternative to "here".
2544    The space for the jump has already been allocated. */
2545 #define FIXUP_ALT_JUMP()                                                \
2546 do {                                                                    \
2547   if (fixup_alt_jump)                                                   \
2548     STORE_JUMP (jump, fixup_alt_jump, b);                               \
2549 } while (0)
2550 
2551 
2552 /* Return, freeing storage we allocated.  */
2553 #define FREE_STACK_RETURN(value)                \
2554   do {                                                  \
2555     FREE_RANGE_TABLE_WORK_AREA (range_table_work);      \
2556     free (compile_stack.stack);                         \
2557     return value;                                       \
2558   } while (0)
2559 
2560 static reg_errcode_t
2561 regex_compile (pattern, size, syntax, bufp)
2562      re_char *pattern;
2563      size_t size;
2564      reg_syntax_t syntax;
2565      struct re_pattern_buffer *bufp;
2566 {
2567   /* We fetch characters from PATTERN here.  */
2568   register re_wchar_t c, c1;
2569 
2570   /* A random temporary spot in PATTERN.  */
2571   re_char *p1;
2572 
2573   /* Points to the end of the buffer, where we should append.  */
2574   register unsigned char *b;
2575 
2576   /* Keeps track of unclosed groups.  */
2577   compile_stack_type compile_stack;
2578 
2579   /* Points to the current (ending) position in the pattern.  */
2580 #ifdef AIX
2581   /* `const' makes AIX compiler fail.  */
2582   unsigned char *p = pattern;
2583 #else
2584   re_char *p = pattern;
2585 #endif
2586   re_char *pend = pattern + size;
2587 
2588   /* How to translate the characters in the pattern.  */
2589   RE_TRANSLATE_TYPE translate = bufp->translate;
2590 
2591   /* Address of the count-byte of the most recently inserted `exactn'
2592      command.  This makes it possible to tell if a new exact-match
2593      character can be added to that command or if the character requires
2594      a new `exactn' command.  */
2595   unsigned char *pending_exact = 0;
2596 
2597   /* Address of start of the most recently finished expression.
2598      This tells, e.g., postfix * where to find the start of its
2599      operand.  Reset at the beginning of groups and alternatives.  */
2600   unsigned char *laststart = 0;
2601 
2602   /* Address of beginning of regexp, or inside of last group.  */
2603   unsigned char *begalt;
2604 
2605   /* Place in the uncompiled pattern (i.e., the {) to
2606      which to go back if the interval is invalid.  */
2607   re_char *beg_interval;
2608 
2609   /* Address of the place where a forward jump should go to the end of
2610      the containing expression.  Each alternative of an `or' -- except the
2611      last -- ends with a forward jump of this sort.  */
2612   unsigned char *fixup_alt_jump = 0;
2613 
2614   /* Work area for range table of charset.  */
2615   struct range_table_work_area range_table_work;
2616 
2617   /* If the object matched can contain multibyte characters.  */
2618   const boolean multibyte = RE_MULTIBYTE_P (bufp);
2619 
2620   /* If a target of matching can contain multibyte characters.  */
2621   const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
2622 
2623   /* Nonzero if we have pushed down into a subpattern.  */
2624   int in_subpattern = 0;
2625 
2626   /* These hold the values of p, pattern, and pend from the main
2627      pattern when we have pushed into a subpattern.  */
2628   re_char *main_p;
2629   re_char *main_pattern;
2630   re_char *main_pend;
2631 
2632 #ifdef DEBUG
2633   debug++;
2634   DEBUG_PRINT1 ("\nCompiling pattern: ");
2635   if (debug > 0)
2636     {
2637       unsigned debug_count;
2638 
2639       for (debug_count = 0; debug_count < size; debug_count++)
2640         putchar (pattern[debug_count]);
2641       putchar ('\n');
2642     }
2643 #endif /* DEBUG */
2644 
2645   /* Initialize the compile stack.  */
2646   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2647   if (compile_stack.stack == NULL)
2648     return REG_ESPACE;
2649 
2650   compile_stack.size = INIT_COMPILE_STACK_SIZE;
2651   compile_stack.avail = 0;
2652 
2653   range_table_work.table = 0;
2654   range_table_work.allocated = 0;
2655 
2656   /* Initialize the pattern buffer.  */
2657   bufp->syntax = syntax;
2658   bufp->fastmap_accurate = 0;
2659   bufp->not_bol = bufp->not_eol = 0;
2660   bufp->used_syntax = 0;
2661 
2662   /* Set `used' to zero, so that if we return an error, the pattern
2663      printer (for debugging) will think there's no pattern.  We reset it
2664      at the end.  */
2665   bufp->used = 0;
2666 
2667   /* Always count groups, whether or not bufp->no_sub is set.  */
2668   bufp->re_nsub = 0;
2669 
2670 #if !defined emacs && !defined SYNTAX_TABLE
2671   /* Initialize the syntax table.  */
2672    init_syntax_once ();
2673 #endif
2674 
2675   if (bufp->allocated == 0)
2676     {
2677       if (bufp->buffer)
2678         { /* If zero allocated, but buffer is non-null, try to realloc
2679              enough space.  This loses if buffer's address is bogus, but
2680              that is the user's responsibility.  */
2681           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
2682         }
2683       else
2684         { /* Caller did not allocate a buffer.  Do it for them.  */
2685           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
2686         }
2687       if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
2688 
2689       bufp->allocated = INIT_BUF_SIZE;
2690     }
2691 
2692   begalt = b = bufp->buffer;
2693 
2694   /* Loop through the uncompiled pattern until we're at the end.  */
2695   while (1)
2696     {
2697       if (p == pend)
2698         {
2699           /* If this is the end of an included regexp,
2700              pop back to the main regexp and try again.  */
2701           if (in_subpattern)
2702             {
2703               in_subpattern = 0;
2704               pattern = main_pattern;
2705               p = main_p;
2706               pend = main_pend;
2707               continue;
2708             }
2709           /* If this is the end of the main regexp, we are done.  */
2710           break;
2711         }
2712 
2713       PATFETCH (c);
2714 
2715       switch (c)
2716         {
2717         case ' ':
2718           {
2719             re_char *p1 = p;
2720 
2721             /* If there's no special whitespace regexp, treat
2722                spaces normally.  And don't try to do this recursively.  */
2723             if (!whitespace_regexp || in_subpattern)
2724               goto normal_char;
2725 
2726             /* Peek past following spaces.  */
2727             while (p1 != pend)
2728               {
2729                 if (*p1 != ' ')
2730                   break;
2731                 p1++;
2732               }
2733             /* If the spaces are followed by a repetition op,
2734                treat them normally.  */
2735             if (p1 != pend
2736                 && (*p1 == '*' || *p1 == '+' || *p1 == '?'
2737                     || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
2738               goto normal_char;
2739 
2740             /* Replace the spaces with the whitespace regexp.  */
2741             in_subpattern = 1;
2742             main_p = p1;
2743             main_pend = pend;
2744             main_pattern = pattern;
2745             p = pattern = whitespace_regexp;
2746             pend = p + strlen (p);
2747             break;
2748           }
2749 
2750         case '^':
2751           {
2752             if (   /* If at start of pattern, it's an operator.  */
2753                    p == pattern + 1
2754                    /* If context independent, it's an operator.  */
2755                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2756                    /* Otherwise, depends on what's come before.  */
2757                 || at_begline_loc_p (pattern, p, syntax))
2758               BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline);
2759             else
2760               goto normal_char;
2761           }
2762           break;
2763 
2764 
2765         case '$':
2766           {
2767             if (   /* If at end of pattern, it's an operator.  */
2768                    p == pend
2769                    /* If context independent, it's an operator.  */
2770                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2771                    /* Otherwise, depends on what's next.  */
2772                 || at_endline_loc_p (p, pend, syntax))
2773                BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline);
2774              else
2775                goto normal_char;
2776            }
2777            break;
2778 
2779 
2780         case '+':
2781         case '?':
2782           if ((syntax & RE_BK_PLUS_QM)
2783               || (syntax & RE_LIMITED_OPS))
2784             goto normal_char;
2785         handle_plus:
2786         case '*':
2787           /* If there is no previous pattern... */
2788           if (!laststart)
2789             {
2790               if (syntax & RE_CONTEXT_INVALID_OPS)
2791                 FREE_STACK_RETURN (REG_BADRPT);
2792               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2793                 goto normal_char;
2794             }
2795 
2796           {
2797             /* 1 means zero (many) matches is allowed.  */
2798             boolean zero_times_ok = 0, many_times_ok = 0;
2799             boolean greedy = 1;
2800 
2801             /* If there is a sequence of repetition chars, collapse it
2802                down to just one (the right one).  We can't combine
2803                interval operators with these because of, e.g., `a{2}*',
2804                which should only match an even number of `a's.  */
2805 
2806             for (;;)
2807               {
2808                 if ((syntax & RE_FRUGAL)
2809                     && c == '?' && (zero_times_ok || many_times_ok))
2810                   greedy = 0;
2811                 else
2812                   {
2813                     zero_times_ok |= c != '+';
2814                     many_times_ok |= c != '?';
2815                   }
2816 
2817                 if (p == pend)
2818                   break;
2819                 else if (*p == '*'
2820                          || (!(syntax & RE_BK_PLUS_QM)
2821                              && (*p == '+' || *p == '?')))
2822                   ;
2823                 else if (syntax & RE_BK_PLUS_QM  && *p == '\\')
2824                   {
2825                     if (p+1 == pend)
2826                       FREE_STACK_RETURN (REG_EESCAPE);
2827                     if (p[1] == '+' || p[1] == '?')
2828                       PATFETCH (c); /* Gobble up the backslash.  */
2829                     else
2830                       break;
2831                   }
2832                 else
2833                   break;
2834                 /* If we get here, we found another repeat character.  */
2835                 PATFETCH (c);
2836                }
2837 
2838             /* Star, etc. applied to an empty pattern is equivalent
2839                to an empty pattern.  */
2840             if (!laststart || laststart == b)
2841               break;
2842 
2843             /* Now we know whether or not zero matches is allowed
2844                and also whether or not two or more matches is allowed.  */
2845             if (greedy)
2846               {
2847                 if (many_times_ok)
2848                   {
2849                     boolean simple = skip_one_char (laststart) == b;
2850                     unsigned int startoffset = 0;
2851                     re_opcode_t ofj =
2852                       /* Check if the loop can match the empty string.  */
2853                       (simple || !analyse_first (laststart, b, NULL, 0))
2854                       ? on_failure_jump : on_failure_jump_loop;
2855                     assert (skip_one_char (laststart) <= b);
2856 
2857                     if (!zero_times_ok && simple)
2858                       { /* Since simple * loops can be made faster by using
2859                            on_failure_keep_string_jump, we turn simple P+
2860                            into PP* if P is simple.  */
2861                         unsigned char *p1, *p2;
2862                         startoffset = b - laststart;
2863                         GET_BUFFER_SPACE (startoffset);
2864                         p1 = b; p2 = laststart;
2865                         while (p2 < p1)
2866                           *b++ = *p2++;
2867                         zero_times_ok = 1;
2868                       }
2869 
2870                     GET_BUFFER_SPACE (6);
2871                     if (!zero_times_ok)
2872                       /* A + loop.  */
2873                       STORE_JUMP (ofj, b, b + 6);
2874                     else
2875                       /* Simple * loops can use on_failure_keep_string_jump
2876                          depending on what follows.  But since we don't know
2877                          that yet, we leave the decision up to
2878                          on_failure_jump_smart.  */
2879                       INSERT_JUMP (simple ? on_failure_jump_smart : ofj,
2880                                    laststart + startoffset, b + 6);
2881                     b += 3;
2882                     STORE_JUMP (jump, b, laststart + startoffset);
2883                     b += 3;
2884                   }
2885                 else
2886                   {
2887                     /* A simple ? pattern.  */
2888                     assert (zero_times_ok);
2889                     GET_BUFFER_SPACE (3);
2890                     INSERT_JUMP (on_failure_jump, laststart, b + 3);
2891                     b += 3;
2892                   }
2893               }
2894             else                /* not greedy */
2895               { /* I wish the greedy and non-greedy cases could be merged. */
2896 
2897                 GET_BUFFER_SPACE (7); /* We might use less.  */
2898                 if (many_times_ok)
2899                   {
2900                     boolean emptyp = analyse_first (laststart, b, NULL, 0);
2901 
2902                     /* The non-greedy multiple match looks like
2903                        a repeat..until: we only need a conditional jump
2904                        at the end of the loop.  */
2905                     if (emptyp) BUF_PUSH (no_op);
2906                     STORE_JUMP (emptyp ? on_failure_jump_nastyloop
2907                                 : on_failure_jump, b, laststart);
2908                     b += 3;
2909                     if (zero_times_ok)
2910                       {
2911                         /* The repeat...until naturally matches one or more.
2912                            To also match zero times, we need to first jump to
2913                            the end of the loop (its conditional jump).  */
2914                         INSERT_JUMP (jump, laststart, b);
2915                         b += 3;
2916                       }
2917                   }
2918                 else
2919                   {
2920                     /* non-greedy a?? */
2921                     INSERT_JUMP (jump, laststart, b + 3);
2922                     b += 3;
2923                     INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
2924                     b += 3;
2925                   }
2926               }
2927           }
2928           pending_exact = 0;
2929           break;
2930 
2931 
2932         case '.':
2933           laststart = b;
2934           BUF_PUSH (anychar);
2935           break;
2936 
2937 
2938         case '[':
2939           {
2940             CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
2941 
2942             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2943 
2944             /* Ensure that we have enough space to push a charset: the
2945                opcode, the length count, and the bitset; 34 bytes in all.  */
2946             GET_BUFFER_SPACE (34);
2947 
2948             laststart = b;
2949 
2950             /* We test `*p == '^' twice, instead of using an if
2951                statement, so we only need one BUF_PUSH.  */
2952             BUF_PUSH (*p == '^' ? charset_not : charset);
2953             if (*p == '^')
2954               p++;
2955 
2956             /* Remember the first position in the bracket expression.  */
2957             p1 = p;
2958 
2959             /* Push the number of bytes in the bitmap.  */
2960             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2961 
2962             /* Clear the whole map.  */
2963             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2964 
2965             /* charset_not matches newline according to a syntax bit.  */
2966             if ((re_opcode_t) b[-2] == charset_not
2967                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2968               SET_LIST_BIT ('\n');
2969 
2970             /* Read in characters and ranges, setting map bits.  */
2971             for (;;)
2972               {
2973                 boolean escaped_char = false;
2974                 const unsigned char *p2 = p;
2975                 re_wchar_t ch, c2;
2976 
2977                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2978 
2979                 /* Don't translate yet.  The range TRANSLATE(X..Y) cannot
2980                    always be determined from TRANSLATE(X) and TRANSLATE(Y)
2981                    So the translation is done later in a loop.  Example:
2982                    (let ((case-fold-search t)) (string-match "[A-_]" "A"))  */
2983                 PATFETCH (c);
2984 
2985                 /* \ might escape characters inside [...] and [^...].  */
2986                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2987                   {
2988                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2989 
2990                     PATFETCH (c);
2991                     escaped_char = true;
2992                   }
2993                 else
2994                   {
2995                     /* Could be the end of the bracket expression.  If it's
2996                        not (i.e., when the bracket expression is `[]' so
2997                        far), the ']' character bit gets set way below.  */
2998                     if (c == ']' && p2 != p1)
2999                       break;
3000                   }
3001 
3002                 /* See if we're at the beginning of a possible character
3003                    class.  */
3004 
3005                 if (!escaped_char &&
3006                     syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
3007                   {
3008                     /* Leave room for the null.  */
3009                     unsigned char str[CHAR_CLASS_MAX_LENGTH + 1];
3010                     const unsigned char *class_beg;
3011 
3012                     PATFETCH (c);
3013                     c1 = 0;
3014                     class_beg = p;
3015 
3016                     /* If pattern is `[[:'.  */
3017                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3018 
3019                     for (;;)
3020                       {
3021                         PATFETCH (c);
3022                         if ((c == ':' && *p == ']') || p == pend)
3023                           break;
3024                         if (c1 < CHAR_CLASS_MAX_LENGTH)
3025                           str[c1++] = c;
3026                         else
3027                           /* This is in any case an invalid class name.  */
3028                           str[0] = '\0';
3029                       }
3030                     str[c1] = '\0';
3031 
3032                     /* If isn't a word bracketed by `[:' and `:]':
3033                        undo the ending character, the letters, and
3034                        leave the leading `:' and `[' (but set bits for
3035                        them).  */
3036                     if (c == ':' && *p == ']')
3037                       {
3038                         re_wctype_t cc;
3039                         int limit;
3040 
3041                         cc = re_wctype (str);
3042 
3043                         if (cc == 0)
3044                           FREE_STACK_RETURN (REG_ECTYPE);
3045 
3046                         /* Throw away the ] at the end of the character
3047                            class.  */
3048                         PATFETCH (c);
3049 
3050                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3051 
3052 #ifndef emacs
3053                         for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
3054                           if (re_iswctype (btowc (ch), cc))
3055                             {
3056                               c = TRANSLATE (ch);
3057                               if (c < (1 << BYTEWIDTH))
3058                                 SET_LIST_BIT (c);
3059                             }
3060 #else  /* emacs */
3061                         /* Most character classes in a multibyte match
3062                            just set a flag.  Exceptions are is_blank,
3063                            is_digit, is_cntrl, and is_xdigit, since
3064                            they can only match ASCII characters.  We
3065                            don't need to handle them for multibyte.
3066                            They are distinguished by a negative wctype.  */
3067 
3068                         /* Setup the gl_state object to its buffer-defined
3069                            value.  This hardcodes the buffer-global
3070                            syntax-table for ASCII chars, while the other chars
3071                            will obey syntax-table properties.  It's not ideal,
3072                            but it's the way it's been done until now.  */
3073                         SETUP_BUFFER_SYNTAX_TABLE ();
3074 
3075                         for (ch = 0; ch < 256; ++ch)
3076                           {
3077                             c = RE_CHAR_TO_MULTIBYTE (ch);
3078                             if (! CHAR_BYTE8_P (c)
3079                                 && re_iswctype (c, cc))
3080                               {
3081                                 SET_LIST_BIT (ch);
3082                                 c1 = TRANSLATE (c);
3083                                 if (c1 == c)
3084                                   continue;
3085                                 if (ASCII_CHAR_P (c1))
3086                                   SET_LIST_BIT (c1);
3087                                 else if ((c1 = RE_CHAR_TO_UNIBYTE (c1)) >= 0)
3088                                   SET_LIST_BIT (c1);
3089                               }
3090                           }
3091                         SET_RANGE_TABLE_WORK_AREA_BIT
3092                           (range_table_work, re_wctype_to_bit (cc));
3093 #endif  /* emacs */
3094                         /* In most cases the matching rule for char classes
3095                            only uses the syntax table for multibyte chars,
3096                            so that the content of the syntax-table it is not
3097                            hardcoded in the range_table.  SPACE and WORD are
3098                            the two exceptions.  */
3099                         if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
3100                           bufp->used_syntax = 1;
3101 
3102                         /* Repeat the loop. */
3103                         continue;
3104                       }
3105                     else
3106                       {
3107                         /* Go back to right after the "[:".  */
3108                         p = class_beg;
3109                         SET_LIST_BIT ('[');
3110 
3111                         /* Because the `:' may starts the range, we
3112                            can't simply set bit and repeat the loop.
3113                            Instead, just set it to C and handle below.  */
3114                         c = ':';
3115                       }
3116                   }
3117 
3118                 if (p < pend && p[0] == '-' && p[1] != ']')
3119                   {
3120 
3121                     /* Discard the `-'. */
3122                     PATFETCH (c1);
3123 
3124                     /* Fetch the character which ends the range. */
3125                     PATFETCH (c1);
3126 #ifdef emacs
3127                     if (CHAR_BYTE8_P (c1)
3128                         && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c))
3129                       /* Treat the range from a multibyte character to
3130                          raw-byte character as empty.  */
3131                       c = c1 + 1;
3132 #endif  /* emacs */
3133                   }
3134                 else
3135                   /* Range from C to C. */
3136                   c1 = c;
3137 
3138                 if (c > c1)
3139                   {
3140                     if (syntax & RE_NO_EMPTY_RANGES)
3141                       FREE_STACK_RETURN (REG_ERANGEX);
3142                     /* Else, repeat the loop.  */
3143                   }
3144                 else
3145                   {
3146 #ifndef emacs
3147                     /* Set the range into bitmap */
3148                     for (; c <= c1; c++)
3149                       {
3150                         ch = TRANSLATE (c);
3151                         if (ch < (1 << BYTEWIDTH))
3152                           SET_LIST_BIT (ch);
3153                       }
3154 #else  /* emacs */
3155                     if (c < 128)
3156                       {
3157                         ch = MIN (127, c1);
3158                         SETUP_ASCII_RANGE (range_table_work, c, ch);
3159                         c = ch + 1;
3160                         if (CHAR_BYTE8_P (c1))
3161                           c = BYTE8_TO_CHAR (128);
3162                       }
3163                     if (c <= c1)
3164                       {
3165                         if (CHAR_BYTE8_P (c))
3166                           {
3167                             c = CHAR_TO_BYTE8 (c);
3168                             c1 = CHAR_TO_BYTE8 (c1);
3169                             for (; c <= c1; c++)
3170                               SET_LIST_BIT (c);
3171                           }
3172                         else if (multibyte)
3173                           {
3174                             SETUP_MULTIBYTE_RANGE (range_table_work, c, c1);
3175                           }
3176                         else
3177                           {
3178                             SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
3179                           }
3180                       }
3181 #endif /* emacs */
3182                   }
3183               }
3184 
3185             /* Discard any (non)matching list bytes that are all 0 at the
3186                end of the map.  Decrease the map-length byte too.  */
3187             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3188               b[-1]--;
3189             b += b[-1];
3190 
3191             /* Build real range table from work area.  */
3192             if (RANGE_TABLE_WORK_USED (range_table_work)
3193                 || RANGE_TABLE_WORK_BITS (range_table_work))
3194               {
3195                 int i;
3196                 int used = RANGE_TABLE_WORK_USED (range_table_work);
3197 
3198                 /* Allocate space for COUNT + RANGE_TABLE.  Needs two
3199                    bytes for flags, two for COUNT, and three bytes for
3200                    each character. */
3201                 GET_BUFFER_SPACE (4 + used * 3);
3202 
3203                 /* Indicate the existence of range table.  */
3204                 laststart[1] |= 0x80;
3205 
3206                 /* Store the character class flag bits into the range table.
3207                    If not in emacs, these flag bits are always 0.  */
3208                 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
3209                 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
3210 
3211                 STORE_NUMBER_AND_INCR (b, used / 2);
3212                 for (i = 0; i < used; i++)
3213                   STORE_CHARACTER_AND_INCR
3214                     (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
3215               }
3216           }
3217           break;
3218 
3219 
3220         case '(':
3221           if (syntax & RE_NO_BK_PARENS)
3222             goto handle_open;
3223           else
3224             goto normal_char;
3225 
3226 
3227         case ')':
3228           if (syntax & RE_NO_BK_PARENS)
3229             goto handle_close;
3230           else
3231             goto normal_char;
3232 
3233 
3234         case '\n':
3235           if (syntax & RE_NEWLINE_ALT)
3236             goto handle_alt;
3237           else
3238             goto normal_char;
3239 
3240 
3241         case '|':
3242           if (syntax & RE_NO_BK_VBAR)
3243             goto handle_alt;
3244           else
3245             goto normal_char;
3246 
3247 
3248         case '{':
3249            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3250              goto handle_interval;
3251            else
3252              goto normal_char;
3253 
3254 
3255         case '\\':
3256           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3257 
3258           /* Do not translate the character after the \, so that we can
3259              distinguish, e.g., \B from \b, even if we normally would
3260              translate, e.g., B to b.  */
3261           PATFETCH (c);
3262 
3263           switch (c)
3264             {
3265             case '(':
3266               if (syntax & RE_NO_BK_PARENS)
3267                 goto normal_backslash;
3268 
3269             handle_open:
3270               {
3271                 int shy = 0;
3272                 regnum_t regnum = 0;
3273                 if (p+1 < pend)
3274                   {
3275                     /* Look for a special (?...) construct */
3276                     if ((syntax & RE_SHY_GROUPS) && *p == '?')
3277                       {
3278                         PATFETCH (c); /* Gobble up the '?'.  */
3279                         while (!shy)
3280                           {
3281                             PATFETCH (c);
3282                             switch (c)
3283                               {
3284                               case ':': shy = 1; break;
3285                               case '0':
3286                                 /* An explicitly specified regnum must start
3287                                    with non-0. */
3288                                 if (regnum == 0)
3289                                   FREE_STACK_RETURN (REG_BADPAT);
3290                               case '1': case '2': case '3': case '4':
3291                               case '5': case '6': case '7': case '8': case '9':
3292                                 regnum = 10*regnum + (c - '0'); break;
3293                               default:
3294                                 /* Only (?:...) is supported right now. */
3295                                 FREE_STACK_RETURN (REG_BADPAT);
3296                               }
3297                           }
3298                       }
3299                   }
3300 
3301                 if (!shy)
3302                   regnum = ++bufp->re_nsub;
3303                 else if (regnum)
3304                   { /* It's actually not shy, but explicitly numbered.  */
3305                     shy = 0;
3306                     if (regnum > bufp->re_nsub)
3307                       bufp->re_nsub = regnum;
3308                     else if (regnum > bufp->re_nsub
3309                              /* Ideally, we'd want to check that the specified
3310                                 group can't have matched (i.e. all subgroups
3311                                 using the same regnum are in other branches of
3312                                 OR patterns), but we don't currently keep track
3313                                 of enough info to do that easily.  */
3314                              || group_in_compile_stack (compile_stack, regnum))
3315                       FREE_STACK_RETURN (REG_BADPAT);
3316                   }
3317                 else
3318                   /* It's really shy.  */
3319                   regnum = - bufp->re_nsub;
3320 
3321                 if (COMPILE_STACK_FULL)
3322                   {
3323                     RETALLOC (compile_stack.stack, compile_stack.size << 1,
3324                               compile_stack_elt_t);
3325                     if (compile_stack.stack == NULL) return REG_ESPACE;
3326 
3327                     compile_stack.size <<= 1;
3328                   }
3329 
3330                 /* These are the values to restore when we hit end of this
3331                    group.  They are all relative offsets, so that if the
3332                    whole pattern moves because of realloc, they will still
3333                    be valid.  */
3334                 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
3335                 COMPILE_STACK_TOP.fixup_alt_jump
3336                   = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
3337                 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
3338                 COMPILE_STACK_TOP.regnum = regnum;
3339 
3340                 /* Do not push a start_memory for groups beyond the last one
3341                    we can represent in the compiled pattern.  */
3342                 if (regnum <= MAX_REGNUM && regnum > 0)
3343                   BUF_PUSH_2 (start_memory, regnum);
3344 
3345                 compile_stack.avail++;
3346 
3347                 fixup_alt_jump = 0;
3348                 laststart = 0;
3349                 begalt = b;
3350                 /* If we've reached MAX_REGNUM groups, then this open
3351                    won't actually generate any code, so we'll have to
3352                    clear pending_exact explicitly.  */
3353                 pending_exact = 0;
3354                 break;
3355               }
3356 
3357             case ')':
3358               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3359 
3360               if (COMPILE_STACK_EMPTY)
3361                 {
3362                   if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3363                     goto normal_backslash;
3364                   else
3365                     FREE_STACK_RETURN (REG_ERPAREN);
3366                 }
3367 
3368             handle_close:
3369               FIXUP_ALT_JUMP ();
3370 
3371               /* See similar code for backslashed left paren above.  */
3372               if (COMPILE_STACK_EMPTY)
3373                 {
3374                   if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3375                     goto normal_char;
3376                   else
3377                     FREE_STACK_RETURN (REG_ERPAREN);
3378                 }
3379 
3380               /* Since we just checked for an empty stack above, this
3381                  ``can't happen''.  */
3382               assert (compile_stack.avail != 0);
3383               {
3384                 /* We don't just want to restore into `regnum', because
3385                    later groups should continue to be numbered higher,
3386                    as in `(ab)c(de)' -- the second group is #2.  */
3387                 regnum_t regnum;
3388 
3389                 compile_stack.avail--;
3390                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
3391                 fixup_alt_jump
3392                   = COMPILE_STACK_TOP.fixup_alt_jump
3393                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
3394                     : 0;
3395                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
3396                 regnum = COMPILE_STACK_TOP.regnum;
3397                 /* If we've reached MAX_REGNUM groups, then this open
3398                    won't actually generate any code, so we'll have to
3399                    clear pending_exact explicitly.  */
3400                 pending_exact = 0;
3401 
3402                 /* We're at the end of the group, so now we know how many
3403                    groups were inside this one.  */
3404                 if (regnum <= MAX_REGNUM && regnum > 0)
3405                   BUF_PUSH_2 (stop_memory, regnum);
3406               }
3407               break;
3408 
3409 
3410             case '|':                                   /* `\|'.  */
3411               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
3412                 goto normal_backslash;
3413             handle_alt:
3414               if (syntax & RE_LIMITED_OPS)
3415                 goto normal_char;
3416 
3417               /* Insert before the previous alternative a jump which
3418                  jumps to this alternative if the former fails.  */
3419               GET_BUFFER_SPACE (3);
3420               INSERT_JUMP (on_failure_jump, begalt, b + 6);
3421               pending_exact = 0;
3422               b += 3;
3423 
3424               /* The alternative before this one has a jump after it
3425                  which gets executed if it gets matched.  Adjust that
3426                  jump so it will jump to this alternative's analogous
3427                  jump (put in below, which in turn will jump to the next
3428                  (if any) alternative's such jump, etc.).  The last such
3429                  jump jumps to the correct final destination.  A picture:
3430                           _____ _____
3431                           |   | |   |
3432                           |   v |   v
3433                          a | b   | c
3434 
3435                  If we are at `b', then fixup_alt_jump right now points to a
3436                  three-byte space after `a'.  We'll put in the jump, set
3437                  fixup_alt_jump to right after `b', and leave behind three
3438                  bytes which we'll fill in when we get to after `c'.  */
3439 
3440               FIXUP_ALT_JUMP ();
3441 
3442               /* Mark and leave space for a jump after this alternative,
3443                  to be filled in later either by next alternative or
3444                  when know we're at the end of a series of alternatives.  */
3445               fixup_alt_jump = b;
3446               GET_BUFFER_SPACE (3);
3447               b += 3;
3448 
3449               laststart = 0;
3450               begalt = b;
3451               break;
3452 
3453 
3454             case '{':
3455               /* If \{ is a literal.  */
3456               if (!(syntax & RE_INTERVALS)
3457                      /* If we're at `\{' and it's not the open-interval
3458                         operator.  */
3459                   || (syntax & RE_NO_BK_BRACES))
3460                 goto normal_backslash;
3461 
3462             handle_interval:
3463               {
3464                 /* If got here, then the syntax allows intervals.  */
3465 
3466                 /* At least (most) this many matches must be made.  */
3467                 int lower_bound = 0, upper_bound = -1;
3468 
3469                 beg_interval = p;
3470 
3471                 GET_UNSIGNED_NUMBER (lower_bound);
3472 
3473                 if (c == ',')
3474                   GET_UNSIGNED_NUMBER (upper_bound);
3475                 else
3476                   /* Interval such as `{1}' => match exactly once. */
3477                   upper_bound = lower_bound;
3478 
3479                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
3480                     || (upper_bound >= 0 && lower_bound > upper_bound))
3481                   FREE_STACK_RETURN (REG_BADBR);
3482 
3483                 if (!(syntax & RE_NO_BK_BRACES))
3484                   {
3485                     if (c != '\\')
3486                       FREE_STACK_RETURN (REG_BADBR);
3487                     if (p == pend)
3488                       FREE_STACK_RETURN (REG_EESCAPE);
3489                     PATFETCH (c);
3490                   }
3491 
3492                 if (c != '}')
3493                   FREE_STACK_RETURN (REG_BADBR);
3494 
3495                 /* We just parsed a valid interval.  */
3496 
3497                 /* If it's invalid to have no preceding re.  */
3498                 if (!laststart)
3499                   {
3500                     if (syntax & RE_CONTEXT_INVALID_OPS)
3501                       FREE_STACK_RETURN (REG_BADRPT);
3502                     else if (syntax & RE_CONTEXT_INDEP_OPS)
3503                       laststart = b;
3504                     else
3505                       goto unfetch_interval;
3506                   }
3507 
3508                 if (upper_bound == 0)
3509                   /* If the upper bound is zero, just drop the sub pattern
3510                      altogether.  */
3511                   b = laststart;
3512                 else if (lower_bound == 1 && upper_bound == 1)
3513                   /* Just match it once: nothing to do here.  */
3514                   ;
3515 
3516                 /* Otherwise, we have a nontrivial interval.  When
3517                    we're all done, the pattern will look like:
3518                    set_number_at <jump count> <upper bound>
3519                    set_number_at <succeed_n count> <lower bound>
3520                    succeed_n <after jump addr> <succeed_n count>
3521                    <body of loop>
3522                    jump_n <succeed_n addr> <jump count>
3523                    (The upper bound and `jump_n' are omitted if
3524                    `upper_bound' is 1, though.)  */
3525                 else
3526                   { /* If the upper bound is > 1, we need to insert
3527                        more at the end of the loop.  */
3528                     unsigned int nbytes = (upper_bound < 0 ? 3
3529                                            : upper_bound > 1 ? 5 : 0);
3530                     unsigned int startoffset = 0;
3531 
3532                     GET_BUFFER_SPACE (20); /* We might use less.  */
3533 
3534                     if (lower_bound == 0)
3535                       {
3536                         /* A succeed_n that starts with 0 is really a
3537                            a simple on_failure_jump_loop.  */
3538                         INSERT_JUMP (on_failure_jump_loop, laststart,
3539                                      b + 3 + nbytes);
3540                         b += 3;
3541                       }
3542                     else
3543                       {
3544                         /* Initialize lower bound of the `succeed_n', even
3545                            though it will be set during matching by its
3546                            attendant `set_number_at' (inserted next),
3547                            because `re_compile_fastmap' needs to know.
3548                            Jump to the `jump_n' we might insert below.  */
3549                         INSERT_JUMP2 (succeed_n, laststart,
3550                                       b + 5 + nbytes,
3551                                       lower_bound);
3552                         b += 5;
3553 
3554                         /* Code to initialize the lower bound.  Insert
3555                            before the `succeed_n'.  The `5' is the last two
3556                            bytes of this `set_number_at', plus 3 bytes of
3557                            the following `succeed_n'.  */
3558                         insert_op2 (set_number_at, laststart, 5, lower_bound, b);
3559                         b += 5;
3560                         startoffset += 5;
3561                       }
3562 
3563                     if (upper_bound < 0)
3564                       {
3565                         /* A negative upper bound stands for infinity,
3566                            in which case it degenerates to a plain jump.  */
3567                         STORE_JUMP (jump, b, laststart + startoffset);
3568                         b += 3;
3569                       }
3570                     else if (upper_bound > 1)
3571                       { /* More than one repetition is allowed, so
3572                            append a backward jump to the `succeed_n'
3573                            that starts this interval.
3574 
3575                            When we've reached this during matching,
3576                            we'll have matched the interval once, so
3577                            jump back only `upper_bound - 1' times.  */
3578                         STORE_JUMP2 (jump_n, b, laststart + startoffset,
3579                                      upper_bound - 1);
3580                         b += 5;
3581 
3582                         /* The location we want to set is the second
3583                            parameter of the `jump_n'; that is `b-2' as
3584                            an absolute address.  `laststart' will be
3585                            the `set_number_at' we're about to insert;
3586                            `laststart+3' the number to set, the source
3587                            for the relative address.  But we are
3588                            inserting into the middle of the pattern --
3589                            so everything is getting moved up by 5.
3590                            Conclusion: (b - 2) - (laststart + 3) + 5,
3591                            i.e., b - laststart.
3592 
3593                            We insert this at the beginning of the loop
3594                            so that if we fail during matching, we'll
3595                            reinitialize the bounds.  */
3596                         insert_op2 (set_number_at, laststart, b - laststart,
3597                                     upper_bound - 1, b);
3598                         b += 5;
3599                       }
3600                   }
3601                 pending_exact = 0;
3602                 beg_interval = NULL;
3603               }
3604               break;
3605 
3606             unfetch_interval:
3607               /* If an invalid interval, match the characters as literals.  */
3608                assert (beg_interval);
3609                p = beg_interval;
3610                beg_interval = NULL;
3611 
3612                /* normal_char and normal_backslash need `c'.  */
3613                c = '{';
3614 
3615                if (!(syntax & RE_NO_BK_BRACES))
3616                  {
3617                    assert (p > pattern && p[-1] == '\\');
3618                    goto normal_backslash;
3619                  }
3620                else
3621                  goto normal_char;
3622 
3623 #ifdef emacs
3624             /* There is no way to specify the before_dot and after_dot
3625                operators.  rms says this is ok.  --karl  */
3626             case '=':
3627               BUF_PUSH (at_dot);
3628               break;
3629 
3630             case 's':
3631               laststart = b;
3632               PATFETCH (c);
3633               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
3634               break;
3635 
3636             case 'S':
3637               laststart = b;
3638               PATFETCH (c);
3639               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
3640               break;
3641 
3642             case 'c':
3643               laststart = b;
3644               PATFETCH (c);
3645               BUF_PUSH_2 (categoryspec, c);
3646               break;
3647 
3648             case 'C':
3649               laststart = b;
3650               PATFETCH (c);
3651               BUF_PUSH_2 (notcategoryspec, c);
3652               break;
3653 #endif /* emacs */
3654 
3655 
3656             case 'w':
3657               if (syntax & RE_NO_GNU_OPS)
3658                 goto normal_char;
3659               laststart = b;
3660               BUF_PUSH_2 (syntaxspec, Sword);
3661               break;
3662 
3663 
3664             case 'W':
3665               if (syntax & RE_NO_GNU_OPS)
3666                 goto normal_char;
3667               laststart = b;
3668               BUF_PUSH_2 (notsyntaxspec, Sword);
3669               break;
3670 
3671 
3672             case '<':
3673               if (syntax & RE_NO_GNU_OPS)
3674                 goto normal_char;
3675               BUF_PUSH (wordbeg);
3676               break;
3677 
3678             case '>':
3679               if (syntax & RE_NO_GNU_OPS)
3680                 goto normal_char;
3681               BUF_PUSH (wordend);
3682               break;
3683 
3684             case '_':
3685               if (syntax & RE_NO_GNU_OPS)
3686                 goto normal_char;
3687               laststart = b;
3688               PATFETCH (c);
3689               if (c == '<')
3690                 BUF_PUSH (symbeg);
3691               else if (c == '>')
3692                 BUF_PUSH (symend);
3693               else
3694                 FREE_STACK_RETURN (REG_BADPAT);
3695               break;
3696 
3697             case 'b':
3698               if (syntax & RE_NO_GNU_OPS)
3699                 goto normal_char;
3700               BUF_PUSH (wordbound);
3701               break;
3702 
3703             case 'B':
3704               if (syntax & RE_NO_GNU_OPS)
3705                 goto normal_char;
3706               BUF_PUSH (notwordbound);
3707               break;
3708 
3709             case '`':
3710               if (syntax & RE_NO_GNU_OPS)
3711                 goto normal_char;
3712               BUF_PUSH (begbuf);
3713               break;
3714 
3715             case '\'':
3716               if (syntax & RE_NO_GNU_OPS)
3717                 goto normal_char;
3718               BUF_PUSH (endbuf);
3719               break;
3720 
3721             case '1': case '2': case '3': case '4': case '5':
3722             case '6': case '7': case '8': case '9':
3723               {
3724                 regnum_t reg;
3725 
3726                 if (syntax & RE_NO_BK_REFS)
3727                   goto normal_backslash;
3728 
3729                 reg = c - '0';
3730 
3731                 if (reg > bufp->re_nsub || reg < 1
3732                     /* Can't back reference to a subexp before its end.  */
3733                     || group_in_compile_stack (compile_stack, reg))
3734                   FREE_STACK_RETURN (REG_ESUBREG);
3735 
3736                 laststart = b;
3737                 BUF_PUSH_2 (duplicate, reg);
3738               }
3739               break;
3740 
3741 
3742             case '+':
3743             case '?':
3744               if (syntax & RE_BK_PLUS_QM)
3745                 goto handle_plus;
3746               else
3747                 goto normal_backslash;
3748 
3749             default:
3750             normal_backslash:
3751               /* You might think it would be useful for \ to mean
3752                  not to translate; but if we don't translate it
3753                  it will never match anything.  */
3754               goto normal_char;
3755             }
3756           break;
3757 
3758 
3759         default:
3760         /* Expects the character in `c'.  */
3761         normal_char:
3762           /* If no exactn currently being built.  */
3763           if (!pending_exact
3764 
3765               /* If last exactn not at current position.  */
3766               || pending_exact + *pending_exact + 1 != b
3767 
3768               /* We have only one byte following the exactn for the count.  */
3769               || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
3770 
3771               /* If followed by a repetition operator.  */
3772               || (p != pend && (*p == '*' || *p == '^'))
3773               || ((syntax & RE_BK_PLUS_QM)
3774                   ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
3775                   : p != pend && (*p == '+' || *p == '?'))
3776               || ((syntax & RE_INTERVALS)
3777                   && ((syntax & RE_NO_BK_BRACES)
3778                       ? p != pend && *p == '{'
3779                       : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
3780             {
3781               /* Start building a new exactn.  */
3782 
3783               laststart = b;
3784 
3785               BUF_PUSH_2 (exactn, 0);
3786               pending_exact = b - 1;
3787             }
3788 
3789           GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
3790           {
3791             int len;
3792 
3793             if (multibyte)
3794               {
3795                 c = TRANSLATE (c);
3796                 len = CHAR_STRING (c, b);
3797                 b += len;
3798               }
3799             else
3800               {
3801                 c1 = RE_CHAR_TO_MULTIBYTE (c);
3802                 if (! CHAR_BYTE8_P (c1))
3803                   {
3804                     re_wchar_t c2 = TRANSLATE (c1);
3805 
3806                     if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
3807                       c = c1;
3808                   }                   
3809                 *b++ = c;
3810                 len = 1;
3811               }
3812             (*pending_exact) += len;
3813           }
3814 
3815           break;
3816         } /* switch (c) */
3817     } /* while p != pend */
3818 
3819 
3820   /* Through the pattern now.  */
3821 
3822   FIXUP_ALT_JUMP ();
3823 
3824   if (!COMPILE_STACK_EMPTY)
3825     FREE_STACK_RETURN (REG_EPAREN);
3826 
3827   /* If we don't want backtracking, force success
3828      the first time we reach the end of the compiled pattern.  */
3829   if (syntax & RE_NO_POSIX_BACKTRACKING)
3830     BUF_PUSH (succeed);
3831 
3832   /* We have succeeded; set the length of the buffer.  */
3833   bufp->used = b - bufp->buffer;
3834 
3835 #ifdef DEBUG
3836   if (debug > 0)
3837     {
3838       re_compile_fastmap (bufp);
3839       DEBUG_PRINT1 ("\nCompiled pattern: \n");
3840       print_compiled_pattern (bufp);
3841     }
3842   debug--;
3843 #endif /* DEBUG */
3844 
3845 #ifndef MATCH_MAY_ALLOCATE
3846   /* Initialize the failure stack to the largest possible stack.  This
3847      isn't necessary unless we're trying to avoid calling alloca in
3848      the search and match routines.  */
3849   {
3850     int num_regs = bufp->re_nsub + 1;
3851 
3852     if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
3853       {
3854         fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
3855 
3856         if (! fail_stack.stack)
3857           fail_stack.stack
3858             = (fail_stack_elt_t *) malloc (fail_stack.size
3859                                            * sizeof (fail_stack_elt_t));
3860         else
3861           fail_stack.stack
3862             = (fail_stack_elt_t *) realloc (fail_stack.stack,
3863                                             (fail_stack.size
3864                                              * sizeof (fail_stack_elt_t)));
3865       }
3866 
3867     regex_grow_registers (num_regs);
3868   }
3869 #endif /* not MATCH_MAY_ALLOCATE */
3870 
3871   FREE_STACK_RETURN (REG_NOERROR);
3872 } /* regex_compile */
3873 
3874 /* Subroutines for `regex_compile'.  */
3875 
3876 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
3877 
3878 static void
3879 store_op1 (op, loc, arg)
3880     re_opcode_t op;
3881     unsigned char *loc;
3882     int arg;
3883 {
3884   *loc = (unsigned char) op;
3885   STORE_NUMBER (loc + 1, arg);
3886 }
3887 
3888 
3889 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3890 
3891 static void
3892 store_op2 (op, loc, arg1, arg2)
3893     re_opcode_t op;
3894     unsigned char *loc;
3895     int arg1, arg2;
3896 {
3897   *loc = (unsigned char) op;
3898   STORE_NUMBER (loc + 1, arg1);
3899   STORE_NUMBER (loc + 3, arg2);
3900 }
3901 
3902 
3903 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
3904    for OP followed by two-byte integer parameter ARG.  */
3905 
3906 static void
3907 insert_op1 (op, loc, arg, end)
3908     re_opcode_t op;
3909     unsigned char *loc;
3910     int arg;
3911     unsigned char *end;
3912 {
3913   register unsigned char *pfrom = end;
3914   register unsigned char *pto = end + 3;
3915 
3916   while (pfrom != loc)
3917     *--pto = *--pfrom;
3918 
3919   store_op1 (op, loc, arg);
3920 }
3921 
3922 
3923 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3924 
3925 static void
3926 insert_op2 (op, loc, arg1, arg2, end)
3927     re_opcode_t op;
3928     unsigned char *loc;
3929     int arg1, arg2;
3930     unsigned char *end;
3931 {
3932   register unsigned char *pfrom = end;
3933   register unsigned char *pto = end + 5;
3934 
3935   while (pfrom != loc)
3936     *--pto = *--pfrom;
3937 
3938   store_op2 (op, loc, arg1, arg2);
3939 }
3940 
3941 
3942 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3943    after an alternative or a begin-subexpression.  We assume there is at
3944    least one character before the ^.  */
3945 
3946 static boolean
3947 at_begline_loc_p (pattern, p, syntax)
3948     re_char *pattern, *p;
3949     reg_syntax_t syntax;
3950 {
3951   re_char *prev = p - 2;
3952   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3953 
3954   return
3955        /* After a subexpression?  */
3956        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3957        /* After an alternative?  */
3958     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash))
3959        /* After a shy subexpression?  */
3960     || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern
3961         && prev[-1] == '?' && prev[-2] == '('
3962         && (syntax & RE_NO_BK_PARENS
3963             || (prev - 3 >= pattern && prev[-3] == '\\')));
3964 }
3965 
3966 
3967 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3968    at least one character after the $, i.e., `P < PEND'.  */
3969 
3970 static boolean
3971 at_endline_loc_p (p, pend, syntax)
3972     re_char *p, *pend;
3973     reg_syntax_t syntax;
3974 {
3975   re_char *next = p;
3976   boolean next_backslash = *next == '\\';
3977   re_char *next_next = p + 1 < pend ? p + 1 : 0;
3978 
3979   return
3980        /* Before a subexpression?  */
3981        (syntax & RE_NO_BK_PARENS ? *next == ')'
3982         : next_backslash && next_next && *next_next == ')')
3983        /* Before an alternative?  */
3984     || (syntax & RE_NO_BK_VBAR ? *next == '|'
3985         : next_backslash && next_next && *next_next == '|');
3986 }
3987 
3988 
3989 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3990    false if it's not.  */
3991 
3992 static boolean
3993 group_in_compile_stack (compile_stack, regnum)
3994     compile_stack_type compile_stack;
3995     regnum_t regnum;
3996 {
3997   int this_element;
3998 
3999   for (this_element = compile_stack.avail - 1;
4000        this_element >= 0;
4001        this_element--)
4002     if (compile_stack.stack[this_element].regnum == regnum)
4003       return true;
4004 
4005   return false;
4006 }
4007 
4008 /* analyse_first.
4009    If fastmap is non-NULL, go through the pattern and fill fastmap
4010    with all the possible leading chars.  If fastmap is NULL, don't
4011    bother filling it up (obviously) and only return whether the
4012    pattern could potentially match the empty string.
4013 
4014    Return 1  if p..pend might match the empty string.
4015    Return 0  if p..pend matches at least one char.
4016    Return -1 if fastmap was not updated accurately.  */
4017 
4018 static int
4019 analyse_first (p, pend, fastmap, multibyte)
4020      re_char *p, *pend;
4021      char *fastmap;
4022      const int multibyte;
4023 {
4024   int j, k;
4025   boolean not;
4026 
4027   /* If all elements for base leading-codes in fastmap is set, this
4028      flag is set true.  */
4029   boolean match_any_multibyte_characters = false;
4030 
4031   assert (p);
4032 
4033   /* The loop below works as follows:
4034      - It has a working-list kept in the PATTERN_STACK and which basically
4035        starts by only containing a pointer to the first operation.
4036      - If the opcode we're looking at is a match against some set of
4037        chars, then we add those chars to the fastmap and go on to the
4038        next work element from the worklist (done via `break').
4039      - If the opcode is a control operator on the other hand, we either
4040        ignore it (if it's meaningless at this point, such as `start_memory')
4041        or execute it (if it's a jump).  If the jump has several destinations
4042        (i.e. `on_failure_jump'), then we push the other destination onto the
4043        worklist.
4044      We guarantee termination by ignoring backward jumps (more or less),
4045      so that `p' is monotonically increasing.  More to the point, we
4046      never set `p' (or push) anything `<= p1'.  */
4047 
4048   while (p < pend)
4049     {
4050       /* `p1' is used as a marker of how far back a `on_failure_jump'
4051          can go without being ignored.  It is normally equal to `p'
4052          (which prevents any backward `on_failure_jump') except right
4053          after a plain `jump', to allow patterns such as:
4054             0: jump 10
4055             3..9: <body>
4056             10: on_failure_jump 3
4057          as used for the *? operator.  */
4058       re_char *p1 = p;
4059 
4060       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4061         {
4062         case succeed:
4063           return 1;
4064           continue;
4065 
4066         case duplicate:
4067           /* If the first character has to match a backreference, that means
4068              that the group was empty (since it already matched).  Since this
4069              is the only case that interests us here, we can assume that the
4070              backreference must match the empty string.  */
4071           p++;
4072           continue;
4073 
4074 
4075       /* Following are the cases which match a character.  These end
4076          with `break'.  */
4077 
4078         case exactn:
4079           if (fastmap)
4080             {
4081               /* If multibyte is nonzero, the first byte of each
4082                  character is an ASCII or a leading code.  Otherwise,
4083                  each byte is a character.  Thus, this works in both
4084                  cases. */
4085               fastmap[p[1]] = 1;
4086               if (! multibyte)
4087                 {
4088                   /* For the case of matching this unibyte regex
4089                      against multibyte, we must set a leading code of
4090                      the corresponding multibyte character.  */
4091                   int c = RE_CHAR_TO_MULTIBYTE (p[1]);
4092 
4093                   fastmap[CHAR_LEADING_CODE (c)] = 1;
4094                 }
4095             }
4096           break;
4097 
4098 
4099         case anychar:
4100           /* We could put all the chars except for \n (and maybe \0)
4101              but we don't bother since it is generally not worth it.  */
4102           if (!fastmap) break;
4103           return -1;
4104 
4105 
4106         case charset_not:
4107           if (!fastmap) break;
4108           {
4109             /* Chars beyond end of bitmap are possible matches.  */
4110             for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
4111                  j < (1 << BYTEWIDTH); j++)
4112               fastmap[j] = 1;
4113           }
4114 
4115           /* Fallthrough */
4116         case charset:
4117           if (!fastmap) break;
4118           not = (re_opcode_t) *(p - 1) == charset_not;
4119           for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
4120                j >= 0; j--)
4121             if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
4122               fastmap[j] = 1;
4123 
4124 #ifdef emacs
4125           if (/* Any leading code can possibly start a character
4126                  which doesn't match the specified set of characters.  */
4127               not
4128               || 
4129               /* If we can match a character class, we can match any
4130                  multibyte characters.  */
4131               (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
4132                && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
4133 
4134             {
4135               if (match_any_multibyte_characters == false)
4136                 {
4137                   for (j = MIN_MULTIBYTE_LEADING_CODE;
4138                        j <= MAX_MULTIBYTE_LEADING_CODE; j++)
4139                     fastmap[j] = 1;
4140                   match_any_multibyte_characters = true;
4141                 }
4142             }
4143 
4144           else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
4145                    && match_any_multibyte_characters == false)
4146             {
4147               /* Set fastmap[I] to 1 where I is a leading code of each
4148                  multibyte characer in the range table. */
4149               int c, count;
4150               unsigned char lc1, lc2;
4151 
4152               /* Make P points the range table.  `+ 2' is to skip flag
4153                  bits for a character class.  */
4154               p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
4155 
4156               /* Extract the number of ranges in range table into COUNT.  */
4157               EXTRACT_NUMBER_AND_INCR (count, p);
4158               for (; count > 0; count--, p += 3)
4159                 {
4160                   /* Extract the start and end of each range.  */
4161                   EXTRACT_CHARACTER (c, p);
4162                   lc1 = CHAR_LEADING_CODE (c);
4163                   p += 3;
4164                   EXTRACT_CHARACTER (c, p);
4165                   lc2 = CHAR_LEADING_CODE (c);
4166                   for (j = lc1; j <= lc2; j++)
4167                     fastmap[j] = 1;
4168                 }
4169             }
4170 #endif
4171           break;
4172 
4173         case syntaxspec:
4174         case notsyntaxspec:
4175           if (!fastmap) break;
4176 #ifndef emacs
4177           not = (re_opcode_t)p[-1] == notsyntaxspec;
4178           k = *p++;
4179           for (j = 0; j < (1 << BYTEWIDTH); j++)
4180             if ((SYNTAX (j) == (enum syntaxcode) k) ^ not)
4181               fastmap[j] = 1;
4182           break;
4183 #else  /* emacs */
4184           /* This match depends on text properties.  These end with
4185              aborting optimizations.  */
4186           return -1;
4187 
4188         case categoryspec:
4189         case notcategoryspec:
4190           if (!fastmap) break;
4191           not = (re_opcode_t)p[-1] == notcategoryspec;
4192           k = *p++;
4193           for (j = (1 << BYTEWIDTH); j >= 0; j--)
4194             if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
4195               fastmap[j] = 1;
4196 
4197           /* Any leading code can possibly start a character which
4198              has or doesn't has the specified category.  */
4199           if (match_any_multibyte_characters == false)
4200             {
4201               for (j = MIN_MULTIBYTE_LEADING_CODE;
4202                    j <= MAX_MULTIBYTE_LEADING_CODE; j++)
4203                 fastmap[j] = 1;
4204               match_any_multibyte_characters = true;
4205             }
4206           break;
4207 
4208       /* All cases after this match the empty string.  These end with
4209          `continue'.  */
4210 
4211         case before_dot:
4212         case at_dot:
4213         case after_dot:
4214 #endif /* !emacs */
4215         case no_op:
4216         case begline:
4217         case endline:
4218         case begbuf:
4219         case endbuf:
4220         case wordbound:
4221         case notwordbound:
4222         case wordbeg:
4223         case wordend:
4224         case symbeg:
4225         case symend:
4226           continue;
4227 
4228 
4229         case jump:
4230           EXTRACT_NUMBER_AND_INCR (j, p);
4231           if (j < 0)
4232             /* Backward jumps can only go back to code that we've already
4233                visited.  `re_compile' should make sure this is true.  */
4234             break;
4235           p += j;
4236           switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
4237             {
4238             case on_failure_jump:
4239             case on_failure_keep_string_jump:
4240             case on_failure_jump_loop:
4241             case on_failure_jump_nastyloop:
4242             case on_failure_jump_smart:
4243               p++;
4244               break;
4245             default:
4246               continue;
4247             };
4248           /* Keep `p1' to allow the `on_failure_jump' we are jumping to
4249              to jump back to "just after here".  */
4250           /* Fallthrough */
4251 
4252         case on_failure_jump:
4253         case on_failure_keep_string_jump:
4254         case on_failure_jump_nastyloop:
4255         case on_failure_jump_loop:
4256         case on_failure_jump_smart:
4257           EXTRACT_NUMBER_AND_INCR (j, p);
4258           if (p + j <= p1)
4259             ; /* Backward jump to be ignored.  */
4260           else
4261             { /* We have to look down both arms.
4262                  We first go down the "straight" path so as to minimize
4263                  stack usage when going through alternatives.  */
4264               int r = analyse_first (p, pend, fastmap, multibyte);
4265               if (r) return r;
4266               p += j;
4267             }
4268           continue;
4269 
4270 
4271         case jump_n:
4272           /* This code simply does not properly handle forward jump_n.  */
4273           DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); assert (j < 0));
4274           p += 4;
4275           /* jump_n can either jump or fall through.  The (backward) jump
4276              case has already been handled, so we only need to look at the
4277              fallthrough case.  */
4278           continue;
4279 
4280         case succeed_n:
4281           /* If N == 0, it should be an on_failure_jump_loop instead.  */
4282           DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0));
4283           p += 4;
4284           /* We only care about one iteration of the loop, so we don't
4285              need to consider the case where this behaves like an
4286              on_failure_jump.  */
4287           continue;
4288 
4289 
4290         case set_number_at:
4291           p += 4;
4292           continue;
4293 
4294 
4295         case start_memory:
4296         case stop_memory:
4297           p += 1;
4298           continue;
4299 
4300 
4301         default:
4302           abort (); /* We have listed all the cases.  */
4303         } /* switch *p++ */
4304 
4305       /* Getting here means we have found the possible starting
4306          characters for one path of the pattern -- and that the empty
4307          string does not match.  We need not follow this path further.  */
4308       return 0;
4309     } /* while p */
4310 
4311   /* We reached the end without matching anything.  */
4312   return 1;
4313 
4314 } /* analyse_first */
4315 
4316 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
4317    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
4318    characters can start a string that matches the pattern.  This fastmap
4319    is used by re_search to skip quickly over impossible starting points.
4320 
4321    Character codes above (1 << BYTEWIDTH) are not represented in the
4322    fastmap, but the leading codes are represented.  Thus, the fastmap
4323    indicates which character sets could start a match.
4324 
4325    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
4326    area as BUFP->fastmap.
4327 
4328    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
4329    the pattern buffer.
4330 
4331    Returns 0 if we succeed, -2 if an internal error.   */
4332 
4333 int
4334 re_compile_fastmap (bufp)
4335      struct re_pattern_buffer *bufp;
4336 {
4337   char *fastmap = bufp->fastmap;
4338   int analysis;
4339 
4340   assert (fastmap && bufp->buffer);
4341 
4342   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
4343   bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
4344 
4345   analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
4346                             fastmap, RE_MULTIBYTE_P (bufp));
4347   bufp->can_be_null = (analysis != 0);
4348   return 0;
4349 } /* re_compile_fastmap */
4350 
4351 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
4352    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
4353    this memory for recording register information.  STARTS and ENDS
4354    must be allocated using the malloc library routine, and must each
4355    be at least NUM_REGS * sizeof (regoff_t) bytes long.
4356 
4357    If NUM_REGS == 0, then subsequent matches should allocate their own
4358    register data.
4359 
4360    Unless this function is called, the first search or match using
4361    PATTERN_BUFFER will allocate its own register data, without
4362    freeing the old data.  */
4363 
4364 void
4365 re_set_registers (bufp, regs, num_regs, starts, ends)
4366     struct re_pattern_buffer *bufp;
4367     struct re_registers *regs;
4368     unsigned num_regs;
4369     regoff_t *starts, *ends;
4370 {
4371   if (num_regs)
4372     {
4373       bufp->regs_allocated = REGS_REALLOCATE;
4374       regs->num_regs = num_regs;
4375       regs->start = starts;
4376       regs->end = ends;
4377     }
4378   else
4379     {
4380       bufp->regs_allocated = REGS_UNALLOCATED;
4381       regs->num_regs = 0;
4382       regs->start = regs->end = (regoff_t *) 0;
4383     }
4384 }
4385 WEAK_ALIAS (__re_set_registers, re_set_registers)
4386 
4387 /* Searching routines.  */
4388 
4389 /* Like re_search_2, below, but only one string is specified, and
4390    doesn't let you say where to stop matching. */
4391 
4392 int
4393 re_search (bufp, string, size, startpos, range, regs)
4394      struct re_pattern_buffer *bufp;
4395      const char *string;
4396      int size, startpos, range;
4397      struct re_registers *regs;
4398 {
4399   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
4400                       regs, size);
4401 }
4402 WEAK_ALIAS (__re_search, re_search)
4403 
4404 /* Head address of virtual concatenation of string.  */
4405 #define HEAD_ADDR_VSTRING(P)            \
4406   (((P) >= size1 ? string2 : string1))
4407 
4408 /* End address of virtual concatenation of string.  */
4409 #define STOP_ADDR_VSTRING(P)                            \
4410   (((P) >= size1 ? string2 + size2 : string1 + size1))
4411 
4412 /* Address of POS in the concatenation of virtual string. */
4413 #define POS_ADDR_VSTRING(POS)                                   \
4414   (((POS) >= size1 ? string2 - size1 : string1) + (POS))
4415 
4416 /* Using the compiled pattern in BUFP->buffer, first tries to match the
4417    virtual concatenation of STRING1 and STRING2, starting first at index
4418    STARTPOS, then at STARTPOS + 1, and so on.
4419 
4420    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
4421 
4422    RANGE is how far to scan while trying to match.  RANGE = 0 means try
4423    only at STARTPOS; in general, the last start tried is STARTPOS +
4424    RANGE.
4425 
4426    In REGS, return the indices of the virtual concatenation of STRING1
4427    and STRING2 that matched the entire BUFP->buffer and its contained
4428    subexpressions.
4429 
4430    Do not consider matching one past the index STOP in the virtual
4431    concatenation of STRING1 and STRING2.
4432 
4433    We return either the position in the strings at which the match was
4434    found, -1 if no match, or -2 if error (such as failure
4435    stack overflow).  */
4436 
4437 int
4438 re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
4439      struct re_pattern_buffer *bufp;
4440      const char *str1, *str2;
4441      int size1, size2;
4442      int startpos;
4443      int range;
4444      struct re_registers *regs;
4445      int stop;
4446 {
4447   int val;
4448   re_char *string1 = (re_char*) str1;
4449   re_char *string2 = (re_char*) str2;
4450   register char *fastmap = bufp->fastmap;
4451   register RE_TRANSLATE_TYPE translate = bufp->translate;
4452   int total_size = size1 + size2;
4453   int endpos = startpos + range;
4454   boolean anchored_start;
4455   /* Nonzero if we are searching multibyte string.  */
4456   const boolean multibyte = RE_TARGET_MULTIBYTE_P (bufp);
4457 
4458   /* Check for out-of-range STARTPOS.  */
4459   if (startpos < 0 || startpos > total_size)
4460     return -1;
4461 
4462   /* Fix up RANGE if it might eventually take us outside
4463      the virtual concatenation of STRING1 and STRING2.
4464      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
4465   if (endpos < 0)
4466     range = 0 - startpos;
4467   else if (endpos > total_size)
4468     range = total_size - startpos;
4469 
4470   /* If the search isn't to be a backwards one, don't waste time in a
4471      search for a pattern anchored at beginning of buffer.  */
4472   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
4473     {
4474       if (startpos > 0)
4475         return -1;
4476       else
4477         range = 0;
4478     }
4479 
4480 #ifdef emacs
4481   /* In a forward search for something that starts with \=.
4482      don't keep searching past point.  */
4483   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
4484     {
4485       range = PT_BYTE - BEGV_BYTE - startpos;
4486       if (range < 0)
4487         return -1;
4488     }
4489 #endif /* emacs */
4490 
4491   /* Update the fastmap now if not correct already.  */
4492   if (fastmap && !bufp->fastmap_accurate)
4493     re_compile_fastmap (bufp);
4494 
4495   /* See whether the pattern is anchored.  */
4496   anchored_start = (bufp->buffer[0] == begline);
4497 
4498 #ifdef emacs
4499   gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
4500   {
4501     int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
4502 
4503     SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4504   }
4505 #endif
4506 
4507   /* Loop through the string, looking for a place to start matching.  */
4508   for (;;)
4509     {
4510       /* If the pattern is anchored,
4511          skip quickly past places we cannot match.
4512          We don't bother to treat startpos == 0 specially
4513          because that case doesn't repeat.  */
4514       if (anchored_start && startpos > 0)
4515         {
4516           if (! ((startpos <= size1 ? string1[startpos - 1]
4517                   : string2[startpos - size1 - 1])
4518                  == '\n'))
4519             goto advance;
4520         }
4521 
4522       /* If a fastmap is supplied, skip quickly over characters that
4523          cannot be the start of a match.  If the pattern can match the
4524          null string, however, we don't need to skip characters; we want
4525          the first null string.  */
4526       if (fastmap && startpos < total_size && !bufp->can_be_null)
4527         {
4528           register re_char *d;
4529           register re_wchar_t buf_ch;
4530 
4531           d = POS_ADDR_VSTRING (startpos);
4532 
4533           if (range > 0)        /* Searching forwards.  */
4534             {
4535               register int lim = 0;
4536               int irange = range;
4537 
4538               if (startpos < size1 && startpos + range >= size1)
4539                 lim = range - (size1 - startpos);
4540 
4541               /* Written out as an if-else to avoid testing `translate'
4542                  inside the loop.  */
4543               if (RE_TRANSLATE_P (translate))
4544                 {
4545                   if (multibyte)
4546                     while (range > lim)
4547                       {
4548                         int buf_charlen;
4549 
4550                         buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
4551                         buf_ch = RE_TRANSLATE (translate, buf_ch);
4552                         if (fastmap[CHAR_LEADING_CODE (buf_ch)])
4553                           break;
4554 
4555                         range -= buf_charlen;
4556                         d += buf_charlen;
4557                       }
4558                   else
4559                     while (range > lim)
4560                       {
4561                         register re_wchar_t ch, translated;
4562 
4563                         buf_ch = *d;
4564                         ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
4565                         translated = RE_TRANSLATE (translate, ch);
4566                         if (translated != ch
4567                             && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
4568                           buf_ch = ch;
4569                         if (fastmap[buf_ch])
4570                           break;
4571                         d++;
4572                         range--;
4573                       }
4574                 }
4575               else
4576                 {
4577                   if (multibyte)
4578                     while (range > lim)
4579                       {
4580                         int buf_charlen;
4581 
4582                         buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
4583                         if (fastmap[CHAR_LEADING_CODE (buf_ch)])
4584                           break;
4585                         range -= buf_charlen;
4586                         d += buf_charlen;
4587                       }
4588                   else
4589                     while (range > lim && !fastmap[*d])
4590                       {
4591                         d++;
4592                         range--;
4593                       }
4594                 }
4595               startpos += irange - range;
4596             }
4597           else                          /* Searching backwards.  */
4598             {
4599               if (multibyte)
4600                 {
4601                   buf_ch = STRING_CHAR (d);
4602                   buf_ch = TRANSLATE (buf_ch);
4603                   if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
4604                     goto advance;
4605                 }
4606               else
4607                 {
4608                   register re_wchar_t ch, translated;
4609 
4610                   buf_ch = *d;
4611                   ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
4612                   translated = TRANSLATE (ch);
4613                   if (translated != ch
4614                       && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
4615                     buf_ch = ch;
4616                   if (! fastmap[TRANSLATE (buf_ch)])
4617                     goto advance;
4618                 }
4619             }
4620         }
4621 
4622       /* If can't match the null string, and that's all we have left, fail.  */
4623       if (range >= 0 && startpos == total_size && fastmap
4624           && !bufp->can_be_null)
4625         return -1;
4626 
4627       val = re_match_2_internal (bufp, string1, size1, string2, size2,
4628                                  startpos, regs, stop);
4629 
4630       if (val >= 0)
4631         return startpos;
4632 
4633       if (val == -2)
4634         return -2;
4635 
4636     advance:
4637       if (!range)
4638         break;
4639       else if (range > 0)
4640         {
4641           /* Update STARTPOS to the next character boundary.  */
4642           if (multibyte)
4643             {
4644               re_char *p = POS_ADDR_VSTRING (startpos);
4645               re_char *pend = STOP_ADDR_VSTRING (startpos);
4646               int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
4647 
4648               range -= len;
4649               if (range < 0)
4650                 break;
4651               startpos += len;
4652             }
4653           else
4654             {
4655               range--;
4656               startpos++;
4657             }
4658         }
4659       else
4660         {
4661           range++;
4662           startpos--;
4663 
4664           /* Update STARTPOS to the previous character boundary.  */
4665           if (multibyte)
4666             {
4667               re_char *p = POS_ADDR_VSTRING (startpos) + 1;
4668               re_char *p0 = p;
4669               re_char *phead = HEAD_ADDR_VSTRING (startpos);
4670 
4671               /* Find the head of multibyte form.  */
4672               PREV_CHAR_BOUNDARY (p, phead);
4673               range += p0 - 1 - p;
4674               if (range > 0)
4675                 break;
4676 
4677               startpos -= p0 - 1 - p;
4678             }
4679         }
4680     }
4681   return -1;
4682 } /* re_search_2 */
4683 WEAK_ALIAS (__re_search_2, re_search_2)
4684 
4685 /* Declarations and macros for re_match_2.  */
4686 
4687 static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
4688                                     register int len,
4689                                     RE_TRANSLATE_TYPE translate,
4690                                     const int multibyte));
4691 
4692 /* This converts PTR, a pointer into one of the search strings `string1'
4693    and `string2' into an offset from the beginning of that string.  */
4694 #define POINTER_TO_OFFSET(ptr)                  \
4695   (FIRST_STRING_P (ptr)                         \
4696    ? ((regoff_t) ((ptr) - string1))             \
4697    : ((regoff_t) ((ptr) - string2 + size1)))
4698 
4699 /* Call before fetching a character with *d.  This switches over to
4700    string2 if necessary.
4701    Check re_match_2_internal for a discussion of why end_match_2 might
4702    not be within string2 (but be equal to end_match_1 instead).  */
4703 #define PREFETCH()                                                      \
4704   while (d == dend)                                                     \
4705     {                                                                   \
4706       /* End of string2 => fail.  */                                    \
4707       if (dend == end_match_2)                                          \
4708         goto fail;                                                      \
4709       /* End of string1 => advance to string2.  */                      \
4710       d = string2;                                                      \
4711       dend = end_match_2;                                               \
4712     }
4713 
4714 /* Call before fetching a char with *d if you already checked other limits.
4715    This is meant for use in lookahead operations like wordend, etc..
4716    where we might need to look at parts of the string that might be
4717    outside of the LIMITs (i.e past `stop').  */
4718 #define PREFETCH_NOLIMIT()                                              \
4719   if (d == end1)                                                        \
4720      {                                                                  \
4721        d = string2;                                                     \
4722        dend = end_match_2;                                              \
4723      }                                                                  \
4724 
4725 /* Test if at very beginning or at very end of the virtual concatenation
4726    of `string1' and `string2'.  If only one string, it's `string2'.  */
4727 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4728 #define AT_STRINGS_END(d) ((d) == end2)
4729 
4730 
4731 /* Test if D points to a character which is word-constituent.  We have
4732    two special cases to check for: if past the end of string1, look at
4733    the first character in string2; and if before the beginning of
4734    string2, look at the last character in string1.  */
4735 #define WORDCHAR_P(d)                                                   \
4736   (SYNTAX ((d) == end1 ? *string2                                       \
4737            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
4738    == Sword)
4739 
4740 /* Disabled due to a compiler bug -- see comment at case wordbound */
4741 
4742 /* The comment at case wordbound is following one, but we don't use
4743    AT_WORD_BOUNDARY anymore to support multibyte form.
4744 
4745    The DEC Alpha C compiler 3.x generates incorrect code for the
4746    test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4747    AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
4748    macro and introducing temporary variables works around the bug.  */
4749 
4750 #if 0
4751 /* Test if the character before D and the one at D differ with respect
4752    to being word-constituent.  */
4753 #define AT_WORD_BOUNDARY(d)                                             \
4754   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
4755    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
4756 #endif
4757 
4758 /* Free everything we malloc.  */
4759 #ifdef MATCH_MAY_ALLOCATE
4760 # define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
4761 # define FREE_VARIABLES()                                               \
4762   do {                                                                  \
4763     REGEX_FREE_STACK (fail_stack.stack);                                \
4764     FREE_VAR (regstart);                                                \
4765     FREE_VAR (regend);                                                  \
4766     FREE_VAR (best_regstart);                                           \
4767     FREE_VAR (best_regend);                                             \
4768   } while (0)
4769 #else
4770 # define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
4771 #endif /* not MATCH_MAY_ALLOCATE */
4772 
4773 
4774 /* Optimization routines.  */
4775 
4776 /* If the operation is a match against one or more chars,
4777    return a pointer to the next operation, else return NULL.  */
4778 static re_char *
4779 skip_one_char (p)
4780      re_char *p;
4781 {
4782   switch (SWITCH_ENUM_CAST (*p++))
4783     {
4784     case anychar:
4785       break;
4786 
4787     case exactn:
4788       p += *p + 1;
4789       break;
4790 
4791     case charset_not:
4792     case charset:
4793       if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
4794         {
4795           int mcnt;
4796           p = CHARSET_RANGE_TABLE (p - 1);
4797           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4798           p = CHARSET_RANGE_TABLE_END (p, mcnt);
4799         }
4800       else
4801         p += 1 + CHARSET_BITMAP_SIZE (p - 1);
4802       break;
4803 
4804     case syntaxspec:
4805     case notsyntaxspec:
4806 #ifdef emacs
4807     case categoryspec:
4808     case notcategoryspec:
4809 #endif /* emacs */
4810       p++;
4811       break;
4812 
4813     default:
4814       p = NULL;
4815     }
4816   return p;
4817 }
4818 
4819 
4820 /* Jump over non-matching operations.  */
4821 static re_char *
4822 skip_noops (p, pend)
4823      re_char *p, *pend;
4824 {
4825   int mcnt;
4826   while (p < pend)
4827     {
4828       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
4829         {
4830         case start_memory:
4831         case stop_memory:
4832           p += 2; break;
4833         case no_op:
4834           p += 1; break;
4835         case jump:
4836           p += 1;
4837           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4838           p += mcnt;
4839           break;
4840         default:
4841           return p;
4842         }
4843     }
4844   assert (p == pend);
4845   return p;
4846 }
4847 
4848 /* Non-zero if "p1 matches something" implies "p2 fails".  */
4849 static int
4850 mutually_exclusive_p (bufp, p1, p2)
4851      struct re_pattern_buffer *bufp;
4852      re_char *p1, *p2;
4853 {
4854   re_opcode_t op2;
4855   const boolean multibyte = RE_MULTIBYTE_P (bufp);
4856   unsigned char *pend = bufp->buffer + bufp->used;
4857 
4858   assert (p1 >= bufp->buffer && p1 < pend
4859           && p2 >= bufp->buffer && p2 <= pend);
4860 
4861   /* Skip over open/close-group commands.
4862      If what follows this loop is a ...+ construct,
4863      look at what begins its body, since we will have to
4864      match at least one of that.  */
4865   p2 = skip_noops (p2, pend);
4866   /* The same skip can be done for p1, except that this function
4867      is only used in the case where p1 is a simple match operator.  */
4868   /* p1 = skip_noops (p1, pend); */
4869 
4870   assert (p1 >= bufp->buffer && p1 < pend
4871           && p2 >= bufp->buffer && p2 <= pend);
4872 
4873   op2 = p2 == pend ? succeed : *p2;
4874 
4875   switch (SWITCH_ENUM_CAST (op2))
4876     {
4877     case succeed:
4878     case endbuf:
4879       /* If we're at the end of the pattern, we can change.  */
4880       if (skip_one_char (p1))
4881         {
4882           DEBUG_PRINT1 ("  End of pattern: fast loop.\n");
4883           return 1;
4884         }
4885       break;
4886 
4887     case endline:
4888     case exactn:
4889       {
4890         register re_wchar_t c
4891           = (re_opcode_t) *p2 == endline ? '\n'
4892           : RE_STRING_CHAR (p2 + 2, multibyte);
4893 
4894         if ((re_opcode_t) *p1 == exactn)
4895           {
4896             if (c != RE_STRING_CHAR (p1 + 2, multibyte))
4897               {
4898                 DEBUG_PRINT3 ("  '%c' != '%c' => fast loop.\n", c, p1[2]);
4899                 return 1;
4900               }
4901           }
4902 
4903         else if ((re_opcode_t) *p1 == charset
4904                  || (re_opcode_t) *p1 == charset_not)
4905           {
4906             int not = (re_opcode_t) *p1 == charset_not;
4907 
4908             /* Test if C is listed in charset (or charset_not)
4909                at `p1'.  */
4910             if (! multibyte || IS_REAL_ASCII (c))
4911               {
4912                 if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH
4913                     && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4914                   not = !not;
4915               }
4916             else if (CHARSET_RANGE_TABLE_EXISTS_P (p1))
4917               CHARSET_LOOKUP_RANGE_TABLE (not, c, p1);
4918 
4919             /* `not' is equal to 1 if c would match, which means
4920                that we can't change to pop_failure_jump.  */
4921             if (!not)
4922               {
4923                 DEBUG_PRINT1 ("  No match => fast loop.\n");
4924                 return 1;
4925               }
4926           }
4927         else if ((re_opcode_t) *p1 == anychar
4928                  && c == '\n')
4929           {
4930             DEBUG_PRINT1 ("   . != \\n => fast loop.\n");
4931             return 1;
4932           }
4933       }
4934       break;
4935 
4936     case charset:
4937       {
4938         if ((re_opcode_t) *p1 == exactn)
4939           /* Reuse the code above.  */
4940           return mutually_exclusive_p (bufp, p2, p1);
4941 
4942       /* It is hard to list up all the character in charset
4943          P2 if it includes multibyte character.  Give up in
4944          such case.  */
4945       else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
4946         {
4947           /* Now, we are sure that P2 has no range table.
4948              So, for the size of bitmap in P2, `p2[1]' is
4949              enough.  But P1 may have range table, so the
4950              size of bitmap table of P1 is extracted by
4951              using macro `CHARSET_BITMAP_SIZE'.
4952 
4953              In a multibyte case, we know that all the character
4954              listed in P2 is ASCII.  In a unibyte case, P1 has only a
4955              bitmap table.  So, in both cases, it is enough to test
4956              only the bitmap table of P1.  */
4957 
4958           if ((re_opcode_t) *p1 == charset)
4959             {
4960               int idx;
4961               /* We win if the charset inside the loop
4962                  has no overlap with the one after the loop.  */
4963               for (idx = 0;
4964                    (idx < (int) p2[1]
4965                     && idx < CHARSET_BITMAP_SIZE (p1));
4966                    idx++)
4967                 if ((p2[2 + idx] & p1[2 + idx]) != 0)
4968                   break;
4969 
4970               if (idx == p2[1]
4971                   || idx == CHARSET_BITMAP_SIZE (p1))
4972                 {
4973                   DEBUG_PRINT1 ("        No match => fast loop.\n");
4974                   return 1;
4975                 }
4976             }
4977           else if ((re_opcode_t) *p1 == charset_not)
4978             {
4979               int idx;
4980               /* We win if the charset_not inside the loop lists
4981                  every character listed in the charset after.  */
4982               for (idx = 0; idx < (int) p2[1]; idx++)
4983                 if (! (p2[2 + idx] == 0
4984                        || (idx < CHARSET_BITMAP_SIZE (p1)
4985                            && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
4986                   break;
4987 
4988                 if (idx == p2[1])
4989                   {
4990                     DEBUG_PRINT1 ("      No match => fast loop.\n");
4991                     return 1;
4992                   }
4993               }
4994           }
4995       }
4996       break;
4997 
4998     case charset_not:
4999       switch (SWITCH_ENUM_CAST (*p1))
5000         {
5001         case exactn:
5002         case charset:
5003           /* Reuse the code above.  */
5004           return mutually_exclusive_p (bufp, p2, p1);
5005         case charset_not:
5006           /* When we have two charset_not, it's very unlikely that
5007              they don't overlap.  The union of the two sets of excluded
5008              chars should cover all possible chars, which, as a matter of
5009              fact, is virtually impossible in multibyte buffers.  */
5010           break;
5011         }
5012       break;
5013 
5014     case wordend:
5015       return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
5016     case symend:
5017       return ((re_opcode_t) *p1 == syntaxspec
5018               && (p1[1] == Ssymbol || p1[1] == Sword));
5019     case notsyntaxspec:
5020       return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
5021 
5022     case wordbeg:
5023       return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
5024     case symbeg:
5025       return ((re_opcode_t) *p1 == notsyntaxspec
5026               && (p1[1] == Ssymbol || p1[1] == Sword));
5027     case syntaxspec:
5028       return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
5029 
5030     case wordbound:
5031       return (((re_opcode_t) *p1 == notsyntaxspec
5032                || (re_opcode_t) *p1 == syntaxspec)
5033               && p1[1] == Sword);
5034 
5035 #ifdef emacs
5036     case categoryspec:
5037       return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
5038     case notcategoryspec:
5039       return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
5040 #endif /* emacs */
5041 
5042     default:
5043       ;
5044     }
5045 
5046   /* Safe default.  */
5047   return 0;
5048 }
5049 
5050 
5051 /* Matching routines.  */
5052 
5053 #ifndef emacs   /* Emacs never uses this.  */
5054 /* re_match is like re_match_2 except it takes only a single string.  */
5055 
5056 int
5057 re_match (bufp, string, size, pos, regs)
5058      struct re_pattern_buffer *bufp;
5059      const char *string;
5060      int size, pos;
5061      struct re_registers *regs;
5062 {
5063   int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size,
5064                                     pos, regs, size);
5065   return result;
5066 }
5067 WEAK_ALIAS (__re_match, re_match)
5068 #endif /* not emacs */
5069 
5070 #ifdef emacs
5071 /* In Emacs, this is the string or buffer in which we
5072    are matching.  It is used for looking up syntax properties.  */
5073 Lisp_Object re_match_object;
5074 #endif
5075 
5076 /* re_match_2 matches the compiled pattern in BUFP against the
5077    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
5078    and SIZE2, respectively).  We start matching at POS, and stop
5079    matching at STOP.
5080 
5081    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
5082    store offsets for the substring each group matched in REGS.  See the
5083    documentation for exactly how many groups we fill.
5084 
5085    We return -1 if no match, -2 if an internal error (such as the
5086    failure stack overflowing).  Otherwise, we return the length of the
5087    matched substring.  */
5088 
5089 int
5090 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
5091      struct re_pattern_buffer *bufp;
5092      const char *string1, *string2;
5093      int size1, size2;
5094      int pos;
5095      struct re_registers *regs;
5096      int stop;
5097 {
5098   int result;
5099 
5100 #ifdef emacs
5101   int charpos;
5102   gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
5103   charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
5104   SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
5105