1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
23
24 25 26 27 28 29 30 31
32
33
34 #if defined _AIX && !defined REGEX_MALLOC
35 #pragma alloca
36 #endif
37
38 #ifdef HAVE_CONFIG_H
39 # include <config.h>
40 #endif
41
42 #if defined STDC_HEADERS && !defined emacs
43 # include <stddef.h>
44 #else
45
46 # include <sys/types.h>
47 #endif
48
49 50
51 #if defined _LIBC
52 #define WIDE_CHAR_SUPPORT 1
53 #else
54 #define WIDE_CHAR_SUPPORT \
55 (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
56 #endif
57
58 59
60 #if WIDE_CHAR_SUPPORT
61
62 # include <wchar.h>
63 # include <wctype.h>
64 #endif
65
66 #ifdef _LIBC
67
68 # define regfree(preg) __regfree (preg)
69 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
70 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
71 # define regerror(err_code, preg, errbuf, errbuf_size) \
72 __regerror(err_code, preg, errbuf, errbuf_size)
73 # define re_set_registers(bu, re, nu, st, en) \
74 __re_set_registers (bu, re, nu, st, en)
75 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
76 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
77 # define re_match(bufp, string, size, pos, regs) \
78 __re_match (bufp, string, size, pos, regs)
79 # define re_search(bufp, string, size, startpos, range, regs) \
80 __re_search (bufp, string, size, startpos, range, regs)
81 # define re_compile_pattern(pattern, length, bufp) \
82 __re_compile_pattern (pattern, length, bufp)
83 # define re_set_syntax(syntax) __re_set_syntax (syntax)
84 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
85 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
86 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
87
88
89 # define btowc __btowc
90 # define iswctype __iswctype
91 # define wctype __wctype
92
93 # define WEAK_ALIAS(a,b) weak_alias (a, b)
94
95
96 # include <locale/localeinfo.h>
97 # include <locale/elem-hash.h>
98 # include <langinfo.h>
99 #else
100 # define WEAK_ALIAS(a,b)
101 #endif
102
103
104 #if HAVE_LIBINTL_H || defined _LIBC
105 # include <libintl.h>
106 #else
107 # define gettext(msgid) (msgid)
108 #endif
109
110 #ifndef gettext_noop
111 112
113 # define gettext_noop(String) String
114 #endif
115
116 117
118 #ifdef emacs
119
120 # include <setjmp.h>
121 # include "lisp.h"
122 # include "buffer.h"
123
124
125 # define SYNTAX_ENTRY_VIA_PROPERTY
126
127 # include "syntax.h"
128 # include "character.h"
129 # include "category.h"
130
131 # ifdef malloc
132 # undef malloc
133 # endif
134 # define malloc xmalloc
135 # ifdef realloc
136 # undef realloc
137 # endif
138 # define realloc xrealloc
139 # ifdef free
140 # undef free
141 # endif
142 # define free xfree
143
144
145 # define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
146 # define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
147
148 # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
149 # define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
150 # define RE_STRING_CHAR(p, multibyte) \
151 (multibyte ? (STRING_CHAR (p)) : (*(p)))
152 # define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
153 (multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p)))
154
155 # define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
156
157 # define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
158
159 160 161
162 # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
163 do { \
164 if (target_multibyte) \
165 { \
166 re_char *dtemp = (p) == (str2) ? (end1) : (p); \
167 re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
168 while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \
169 c = STRING_CHAR (dtemp); \
170 } \
171 else \
172 { \
173 (c = ((p) == (str2) ? (end1) : (p))[-1]); \
174 (c) = RE_CHAR_TO_MULTIBYTE (c); \
175 } \
176 } while (0)
177
178 179
180 # define GET_CHAR_AFTER(c, p, len) \
181 do { \
182 if (target_multibyte) \
183 (c) = STRING_CHAR_AND_LENGTH (p, len); \
184 else \
185 { \
186 (c) = *p; \
187 len = 1; \
188 (c) = RE_CHAR_TO_MULTIBYTE (c); \
189 } \
190 } while (0)
191
192 #else
193
194 195 196
197 # undef REL_ALLOC
198
199 # if defined STDC_HEADERS || defined _LIBC
200 # include <stdlib.h>
201 # else
202 char *malloc ();
203 char *realloc ();
204 # endif
205
206
207
208 void *
209 xmalloc (size)
210 size_t size;
211 {
212 register void *val;
213 val = (void *) malloc (size);
214 if (!val && size)
215 {
216 write (2, "virtual memory exhausted\n", 25);
217 exit (1);
218 }
219 return val;
220 }
221
222 void *
223 xrealloc (block, size)
224 void *block;
225 size_t size;
226 {
227 register void *val;
228 229
230 if (! block)
231 val = (void *) malloc (size);
232 else
233 val = (void *) realloc (block, size);
234 if (!val && size)
235 {
236 write (2, "virtual memory exhausted\n", 25);
237 exit (1);
238 }
239 return val;
240 }
241
242 # ifdef malloc
243 # undef malloc
244 # endif
245 # define malloc xmalloc
246 # ifdef realloc
247 # undef realloc
248 # endif
249 # define realloc xrealloc
250
251 252
253 # ifdef INHIBIT_STRING_HEADER
254 # if !(defined HAVE_BZERO && defined HAVE_BCOPY)
255 # if !defined bzero && !defined bcopy
256 # undef INHIBIT_STRING_HEADER
257 # endif
258 # endif
259 # endif
260
261 262 263
264 # ifndef INHIBIT_STRING_HEADER
265 # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
266 # include <string.h>
267 # ifndef bzero
268 # ifndef _LIBC
269 # define bzero(s, n) (memset (s, '\0', n), (s))
270 # else
271 # define bzero(s, n) __bzero (s, n)
272 # endif
273 # endif
274 # else
275 # include <strings.h>
276 # ifndef memcmp
277 # define memcmp(s1, s2, n) bcmp (s1, s2, n)
278 # endif
279 # ifndef memcpy
280 # define memcpy(d, s, n) (bcopy (s, d, n), (d))
281 # endif
282 # endif
283 # endif
284
285
286
287
288 enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
289
290 # define SWITCH_ENUM_CAST(x) (x)
291
292
293 # define BASE_LEADING_CODE_P(c) (0)
294 # define CHAR_CHARSET(c) 0
295 # define CHARSET_LEADING_CODE_BASE(c) 0
296 # define MAX_MULTIBYTE_LENGTH 1
297 # define RE_MULTIBYTE_P(x) 0
298 # define RE_TARGET_MULTIBYTE_P(x) 0
299 # define WORD_BOUNDARY_P(c1, c2) (0)
300 # define CHAR_HEAD_P(p) (1)
301 # define SINGLE_BYTE_CHAR_P(c) (1)
302 # define SAME_CHARSET_P(c1, c2) (1)
303 # define MULTIBYTE_FORM_LENGTH(p, s) (1)
304 # define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
305 # define STRING_CHAR(p) (*(p))
306 # define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p)
307 # define CHAR_STRING(c, s) (*(s) = (c), 1)
308 # define STRING_CHAR_AND_LENGTH(p, actual_len) ((actual_len) = 1, *(p))
309 # define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) STRING_CHAR_AND_LENGTH (p, len)
310 # define RE_CHAR_TO_MULTIBYTE(c) (c)
311 # define RE_CHAR_TO_UNIBYTE(c) (c)
312 # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
313 (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
314 # define GET_CHAR_AFTER(c, p, len) \
315 (c = *p, len = 1)
316 # define MAKE_CHAR(charset, c1, c2) (c1)
317 # define BYTE8_TO_CHAR(c) (c)
318 # define CHAR_BYTE8_P(c) (0)
319 # define CHAR_LEADING_CODE(c) (c)
320
321 #endif
322
323 #ifndef RE_TRANSLATE
324 # define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C])
325 # define RE_TRANSLATE_P(TBL) (TBL)
326 #endif
327
328
329 #include "regex.h"
330
331
332 #include <ctype.h>
333
334 #ifdef emacs
335
336
337 # define IS_REAL_ASCII(c) ((c) < 0200)
338
339
340 # define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
341
342
343
344
345 # define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
346 # define ISCNTRL(c) ((c) < ' ')
347 # define ISXDIGIT(c) (((c) >= '0' && (c) <= '9') \
348 || ((c) >= 'a' && (c) <= 'f') \
349 || ((c) >= 'A' && (c) <= 'F'))
350
351
352 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
353
354
355
356 # define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
357 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
358 : 1)
359
360 # define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
361 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
362 : 1)
363
364 # define ISALNUM(c) (IS_REAL_ASCII (c) \
365 ? (((c) >= 'a' && (c) <= 'z') \
366 || ((c) >= 'A' && (c) <= 'Z') \
367 || ((c) >= '0' && (c) <= '9')) \
368 : SYNTAX (c) == Sword)
369
370 # define ISALPHA(c) (IS_REAL_ASCII (c) \
371 ? (((c) >= 'a' && (c) <= 'z') \
372 || ((c) >= 'A' && (c) <= 'Z')) \
373 : SYNTAX (c) == Sword)
374
375 # define ISLOWER(c) (LOWERCASEP (c))
376
377 # define ISPUNCT(c) (IS_REAL_ASCII (c) \
378 ? ((c) > ' ' && (c) < 0177 \
379 && !(((c) >= 'a' && (c) <= 'z') \
380 || ((c) >= 'A' && (c) <= 'Z') \
381 || ((c) >= '0' && (c) <= '9'))) \
382 : SYNTAX (c) != Sword)
383
384 # define ISSPACE(c) (SYNTAX (c) == Swhitespace)
385
386 # define ISUPPER(c) (UPPERCASEP (c))
387
388 # define ISWORD(c) (SYNTAX (c) == Sword)
389
390 #else
391
392 393 394 395 396 397 398 399 400 401 402
403
404 # undef ISASCII
405 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
406 # define ISASCII(c) 1
407 # else
408 # define ISASCII(c) isascii(c)
409 # endif
410
411
412 # define IS_REAL_ASCII(c) ((c) < 0200)
413
414
415 # define ISUNIBYTE(c) 1
416
417 # ifdef isblank
418 # define ISBLANK(c) (ISASCII (c) && isblank (c))
419 # else
420 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
421 # endif
422 # ifdef isgraph
423 # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
424 # else
425 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
426 # endif
427
428 # undef ISPRINT
429 # define ISPRINT(c) (ISASCII (c) && isprint (c))
430 # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
431 # define ISALNUM(c) (ISASCII (c) && isalnum (c))
432 # define ISALPHA(c) (ISASCII (c) && isalpha (c))
433 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
434 # define ISLOWER(c) (ISASCII (c) && islower (c))
435 # define ISPUNCT(c) (ISASCII (c) && ispunct (c))
436 # define ISSPACE(c) (ISASCII (c) && isspace (c))
437 # define ISUPPER(c) (ISASCII (c) && isupper (c))
438 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
439
440 # define ISWORD(c) ISALPHA(c)
441
442 # ifdef _tolower
443 # define TOLOWER(c) _tolower(c)
444 # else
445 # define TOLOWER(c) tolower(c)
446 # endif
447
448
449 # define CHAR_SET_SIZE 256
450
451 # ifdef SYNTAX_TABLE
452
453 extern char *re_syntax_table;
454
455 # else
456
457 static char re_syntax_table[CHAR_SET_SIZE];
458
459 static void
460 init_syntax_once ()
461 {
462 register int c;
463 static int done = 0;
464
465 if (done)
466 return;
467
468 bzero (re_syntax_table, sizeof re_syntax_table);
469
470 for (c = 0; c < CHAR_SET_SIZE; ++c)
471 if (ISALNUM (c))
472 re_syntax_table[c] = Sword;
473
474 re_syntax_table['_'] = Ssymbol;
475
476 done = 1;
477 }
478
479 # endif
480
481 # define SYNTAX(c) re_syntax_table[(c)]
482
483 #endif
484
485 #ifndef NULL
486 # define NULL (void *)0
487 #endif
488
489 490 491 492
493 #undef SIGN_EXTEND_CHAR
494 #if __STDC__
495 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
496 #else
497
498 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
499 #endif
500
501 502 503 504 505 506 507 508 509
510
511 #ifdef REGEX_MALLOC
512
513 # define REGEX_ALLOCATE malloc
514 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
515 # define REGEX_FREE free
516
517 #else
518
519
520 # ifndef alloca
521
522
523 # ifdef __GNUC__
524 # define alloca __builtin_alloca
525 # else
526 # ifdef HAVE_ALLOCA_H
527 # include <alloca.h>
528 # endif
529 # endif
530
531 # endif
532
533 # define REGEX_ALLOCATE alloca
534
535
536 # define REGEX_REALLOCATE(source, osize, nsize) \
537 (destination = (char *) alloca (nsize), \
538 memcpy (destination, source, osize))
539
540
541 # define REGEX_FREE(arg) ((void)0)
542
543 #endif
544
545
546
547 #if defined REL_ALLOC && defined REGEX_MALLOC
548
549 # define REGEX_ALLOCATE_STACK(size) \
550 r_alloc (&failure_stack_ptr, (size))
551 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
552 r_re_alloc (&failure_stack_ptr, (nsize))
553 # define REGEX_FREE_STACK(ptr) \
554 r_alloc_free (&failure_stack_ptr)
555
556 #else
557
558 # ifdef REGEX_MALLOC
559
560 # define REGEX_ALLOCATE_STACK malloc
561 # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
562 # define REGEX_FREE_STACK free
563
564 # else
565
566 # define REGEX_ALLOCATE_STACK alloca
567
568 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
569 REGEX_REALLOCATE (source, osize, nsize)
570
571 # define REGEX_FREE_STACK(arg) ((void)0)
572
573 # endif
574 #endif
575
576
577 578 579
580 #define FIRST_STRING_P(ptr) \
581 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
582
583
584 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
585 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
586 #define RETALLOC_IF(addr, n, t) \
587 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
588 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
589
590 #define BYTEWIDTH 8
591
592 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
593
594 #undef MAX
595 #undef MIN
596 #define MAX(a, b) ((a) > (b) ? (a) : (b))
597 #define MIN(a, b) ((a) < (b) ? (a) : (b))
598
599
600 typedef const unsigned char re_char;
601
602 typedef char boolean;
603 #define false 0
604 #define true 1
605
606 static int re_match_2_internal _RE_ARGS ((struct re_pattern_buffer *bufp,
607 re_char *string1, int size1,
608 re_char *string2, int size2,
609 int pos,
610 struct re_registers *regs,
611 int stop));
612
613 614 615 616
617
618 typedef enum
619 {
620 no_op = 0,
621
622
623 succeed,
624
625
626 exactn,
627
628
629 anychar,
630
631 632 633 634 635 636 637 638 639 640 641 642 643 644
645 charset,
646
647 648
649 charset_not,
650
651 652 653 654
655 start_memory,
656
657 658 659 660
661 stop_memory,
662
663 664
665 duplicate,
666
667
668 begline,
669
670
671 endline,
672
673 674
675 begbuf,
676
677
678 endbuf,
679
680
681 jump,
682
683 684
685 on_failure_jump,
686
687 688
689 on_failure_keep_string_jump,
690
691 692 693
694 on_failure_jump_loop,
695
696 697 698 699
700 on_failure_jump_nastyloop,
701
702 703 704 705 706 707
708 on_failure_jump_smart,
709
710 711 712 713
714 succeed_n,
715
716 717
718 jump_n,
719
720 721 722
723 set_number_at,
724
725 wordbeg,
726 wordend,
727
728 wordbound,
729 notwordbound,
730
731 symbeg,
732 symend,
733
734 735
736 syntaxspec,
737
738
739 notsyntaxspec
740
741 #ifdef emacs
742 ,before_dot,
743 at_dot,
744 after_dot,
745
746 747 748
749 categoryspec,
750
751 752 753
754 notcategoryspec
755 #endif
756 } re_opcode_t;
757
758
759
760
761
762 #define STORE_NUMBER(destination, number) \
763 do { \
764 (destination)[0] = (number) & 0377; \
765 (destination)[1] = (number) >> 8; \
766 } while (0)
767
768 769 770
771
772 #define STORE_NUMBER_AND_INCR(destination, number) \
773 do { \
774 STORE_NUMBER (destination, number); \
775 (destination) += 2; \
776 } while (0)
777
778 779
780
781 #define EXTRACT_NUMBER(destination, source) \
782 do { \
783 (destination) = *(source) & 0377; \
784 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
785 } while (0)
786
787 #ifdef DEBUG
788 static void extract_number _RE_ARGS ((int *dest, re_char *source));
789 static void
790 extract_number (dest, source)
791 int *dest;
792 re_char *source;
793 {
794 int temp = SIGN_EXTEND_CHAR (*(source + 1));
795 *dest = *source & 0377;
796 *dest += temp << 8;
797 }
798
799 # ifndef EXTRACT_MACROS
800 # undef EXTRACT_NUMBER
801 # define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
802 # endif
803
804 #endif
805
806 807
808
809 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
810 do { \
811 EXTRACT_NUMBER (destination, source); \
812 (source) += 2; \
813 } while (0)
814
815 #ifdef DEBUG
816 static void extract_number_and_incr _RE_ARGS ((int *destination,
817 re_char **source));
818 static void
819 extract_number_and_incr (destination, source)
820 int *destination;
821 re_char **source;
822 {
823 extract_number (destination, *source);
824 *source += 2;
825 }
826
827 # ifndef EXTRACT_MACROS
828 # undef EXTRACT_NUMBER_AND_INCR
829 # define EXTRACT_NUMBER_AND_INCR(dest, src) \
830 extract_number_and_incr (&dest, &src)
831 # endif
832
833 #endif
834
835 836 837
838
839 #define STORE_CHARACTER_AND_INCR(destination, character) \
840 do { \
841 (destination)[0] = (character) & 0377; \
842 (destination)[1] = ((character) >> 8) & 0377; \
843 (destination)[2] = (character) >> 16; \
844 (destination) += 3; \
845 } while (0)
846
847 848
849
850 #define EXTRACT_CHARACTER(destination, source) \
851 do { \
852 (destination) = ((source)[0] \
853 | ((source)[1] << 8) \
854 | ((source)[2] << 16)); \
855 } while (0)
856
857
858
859
860 861
862 #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
863
864
865 #define CHARSET_RANGE_TABLE_EXISTS_P(p) ((p)[1] & 0x80)
866
867 868 869 870
871 #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
872
873
874 #define CHARSET_RANGE_TABLE_BITS(p) \
875 ((p)[2 + CHARSET_BITMAP_SIZE (p)] \
876 + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
877
878
879 #define CHARSET_LOOKUP_BITMAP(p, c) \
880 ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH \
881 && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
882
883 884 885 886
887 #define CHARSET_RANGE_TABLE_END(range_table, count) \
888 ((range_table) + (count) * 2 * 3)
889
890 891
892 #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count) \
893 do \
894 { \
895 re_wchar_t range_start, range_end; \
896 re_char *p; \
897 re_char *range_table_end \
898 = CHARSET_RANGE_TABLE_END ((range_table), (count)); \
899 \
900 for (p = (range_table); p < range_table_end; p += 2 * 3) \
901 { \
902 EXTRACT_CHARACTER (range_start, p); \
903 EXTRACT_CHARACTER (range_end, p + 3); \
904 \
905 if (range_start <= (c) && (c) <= range_end) \
906 { \
907 (not) = !(not); \
908 break; \
909 } \
910 } \
911 } \
912 while (0)
913
914 915
916 #define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset) \
917 do \
918 { \
919 \
920 int count; \
921 re_char *range_table = CHARSET_RANGE_TABLE (charset); \
922 \
923 EXTRACT_NUMBER_AND_INCR (count, range_table); \
924 CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count); \
925 } \
926 while (0)
927
928 929 930 931 932
933
934 #ifdef DEBUG
935
936
937 # include <stdio.h>
938
939
940 # include <assert.h>
941
942 static int debug = -100000;
943
944 # define DEBUG_STATEMENT(e) e
945 # define DEBUG_PRINT1(x) if (debug > 0) printf (x)
946 # define DEBUG_PRINT2(x1, x2) if (debug > 0) printf (x1, x2)
947 # define DEBUG_PRINT3(x1, x2, x3) if (debug > 0) printf (x1, x2, x3)
948 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug > 0) printf (x1, x2, x3, x4)
949 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
950 if (debug > 0) print_partial_compiled_pattern (s, e)
951 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
952 if (debug > 0) print_double_string (w, s1, sz1, s2, sz2)
953
954
955
956
957 void
958 print_fastmap (fastmap)
959 char *fastmap;
960 {
961 unsigned was_a_range = 0;
962 unsigned i = 0;
963
964 while (i < (1 << BYTEWIDTH))
965 {
966 if (fastmap[i++])
967 {
968 was_a_range = 0;
969 putchar (i - 1);
970 while (i < (1 << BYTEWIDTH) && fastmap[i])
971 {
972 was_a_range = 1;
973 i++;
974 }
975 if (was_a_range)
976 {
977 printf ("-");
978 putchar (i - 1);
979 }
980 }
981 }
982 putchar ('\n');
983 }
984
985
986 987
988
989 void
990 print_partial_compiled_pattern (start, end)
991 re_char *start;
992 re_char *end;
993 {
994 int mcnt, mcnt2;
995 re_char *p = start;
996 re_char *pend = end;
997
998 if (start == NULL)
999 {
1000 fprintf (stderr, "(null)\n");
1001 return;
1002 }
1003
1004
1005 while (p < pend)
1006 {
1007 fprintf (stderr, "%d:\t", p - start);
1008
1009 switch ((re_opcode_t) *p++)
1010 {
1011 case no_op:
1012 fprintf (stderr, "/no_op");
1013 break;
1014
1015 case succeed:
1016 fprintf (stderr, "/succeed");
1017 break;
1018
1019 case exactn:
1020 mcnt = *p++;
1021 fprintf (stderr, "/exactn/%d", mcnt);
1022 do
1023 {
1024 fprintf (stderr, "/%c", *p++);
1025 }
1026 while (--mcnt);
1027 break;
1028
1029 case start_memory:
1030 fprintf (stderr, "/start_memory/%d", *p++);
1031 break;
1032
1033 case stop_memory:
1034 fprintf (stderr, "/stop_memory/%d", *p++);
1035 break;
1036
1037 case duplicate:
1038 fprintf (stderr, "/duplicate/%d", *p++);
1039 break;
1040
1041 case anychar:
1042 fprintf (stderr, "/anychar");
1043 break;
1044
1045 case charset:
1046 case charset_not:
1047 {
1048 register int c, last = -100;
1049 register int in_range = 0;
1050 int length = CHARSET_BITMAP_SIZE (p - 1);
1051 int has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
1052
1053 fprintf (stderr, "/charset [%s",
1054 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
1055
1056 if (p + *p >= pend)
1057 fprintf (stderr, " !extends past end of pattern! ");
1058
1059 for (c = 0; c < 256; c++)
1060 if (c / 8 < length
1061 && (p[1 + (c/8)] & (1 << (c % 8))))
1062 {
1063
1064 if (last + 1 == c && ! in_range)
1065 {
1066 fprintf (stderr, "-");
1067 in_range = 1;
1068 }
1069
1070 else if (last + 1 != c && in_range)
1071 {
1072 fprintf (stderr, "%c", last);
1073 in_range = 0;
1074 }
1075
1076 if (! in_range)
1077 fprintf (stderr, "%c", c);
1078
1079 last = c;
1080 }
1081
1082 if (in_range)
1083 fprintf (stderr, "%c", last);
1084
1085 fprintf (stderr, "]");
1086
1087 p += 1 + length;
1088
1089 if (has_range_table)
1090 {
1091 int count;
1092 fprintf (stderr, "has-range-table");
1093
1094
1095 p += 2;
1096 EXTRACT_NUMBER_AND_INCR (count, p);
1097 p = CHARSET_RANGE_TABLE_END (p, count);
1098 }
1099 }
1100 break;
1101
1102 case begline:
1103 fprintf (stderr, "/begline");
1104 break;
1105
1106 case endline:
1107 fprintf (stderr, "/endline");
1108 break;
1109
1110 case on_failure_jump:
1111 extract_number_and_incr (&mcnt, &p);
1112 fprintf (stderr, "/on_failure_jump to %d", p + mcnt - start);
1113 break;
1114
1115 case on_failure_keep_string_jump:
1116 extract_number_and_incr (&mcnt, &p);
1117 fprintf (stderr, "/on_failure_keep_string_jump to %d", p + mcnt - start);
1118 break;
1119
1120 case on_failure_jump_nastyloop:
1121 extract_number_and_incr (&mcnt, &p);
1122 fprintf (stderr, "/on_failure_jump_nastyloop to %d", p + mcnt - start);
1123 break;
1124
1125 case on_failure_jump_loop:
1126 extract_number_and_incr (&mcnt, &p);
1127 fprintf (stderr, "/on_failure_jump_loop to %d", p + mcnt - start);
1128 break;
1129
1130 case on_failure_jump_smart:
1131 extract_number_and_incr (&mcnt, &p);
1132 fprintf (stderr, "/on_failure_jump_smart to %d", p + mcnt - start);
1133 break;
1134
1135 case jump:
1136 extract_number_and_incr (&mcnt, &p);
1137 fprintf (stderr, "/jump to %d", p + mcnt - start);
1138 break;
1139
1140 case succeed_n:
1141 extract_number_and_incr (&mcnt, &p);
1142 extract_number_and_incr (&mcnt2, &p);
1143 fprintf (stderr, "/succeed_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
1144 break;
1145
1146 case jump_n:
1147 extract_number_and_incr (&mcnt, &p);
1148 extract_number_and_incr (&mcnt2, &p);
1149 fprintf (stderr, "/jump_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
1150 break;
1151
1152 case set_number_at:
1153 extract_number_and_incr (&mcnt, &p);
1154 extract_number_and_incr (&mcnt2, &p);
1155 fprintf (stderr, "/set_number_at location %d to %d", p - 2 + mcnt - start, mcnt2);
1156 break;
1157
1158 case wordbound:
1159 fprintf (stderr, "/wordbound");
1160 break;
1161
1162 case notwordbound:
1163 fprintf (stderr, "/notwordbound");
1164 break;
1165
1166 case wordbeg:
1167 fprintf (stderr, "/wordbeg");
1168 break;
1169
1170 case wordend:
1171 fprintf (stderr, "/wordend");
1172 break;
1173
1174 case symbeg:
1175 fprintf (stderr, "/symbeg");
1176 break;
1177
1178 case symend:
1179 fprintf (stderr, "/symend");
1180 break;
1181
1182 case syntaxspec:
1183 fprintf (stderr, "/syntaxspec");
1184 mcnt = *p++;
1185 fprintf (stderr, "/%d", mcnt);
1186 break;
1187
1188 case notsyntaxspec:
1189 fprintf (stderr, "/notsyntaxspec");
1190 mcnt = *p++;
1191 fprintf (stderr, "/%d", mcnt);
1192 break;
1193
1194 # ifdef emacs
1195 case before_dot:
1196 fprintf (stderr, "/before_dot");
1197 break;
1198
1199 case at_dot:
1200 fprintf (stderr, "/at_dot");
1201 break;
1202
1203 case after_dot:
1204 fprintf (stderr, "/after_dot");
1205 break;
1206
1207 case categoryspec:
1208 fprintf (stderr, "/categoryspec");
1209 mcnt = *p++;
1210 fprintf (stderr, "/%d", mcnt);
1211 break;
1212
1213 case notcategoryspec:
1214 fprintf (stderr, "/notcategoryspec");
1215 mcnt = *p++;
1216 fprintf (stderr, "/%d", mcnt);
1217 break;
1218 # endif
1219
1220 case begbuf:
1221 fprintf (stderr, "/begbuf");
1222 break;
1223
1224 case endbuf:
1225 fprintf (stderr, "/endbuf");
1226 break;
1227
1228 default:
1229 fprintf (stderr, "?%d", *(p-1));
1230 }
1231
1232 fprintf (stderr, "\n");
1233 }
1234
1235 fprintf (stderr, "%d:\tend of pattern.\n", p - start);
1236 }
1237
1238
1239 void
1240 print_compiled_pattern (bufp)
1241 struct re_pattern_buffer *bufp;
1242 {
1243 re_char *buffer = bufp->buffer;
1244
1245 print_partial_compiled_pattern (buffer, buffer + bufp->used);
1246 printf ("%ld bytes used/%ld bytes allocated.\n",
1247 bufp->used, bufp->allocated);
1248
1249 if (bufp->fastmap_accurate && bufp->fastmap)
1250 {
1251 printf ("fastmap: ");
1252 print_fastmap (bufp->fastmap);
1253 }
1254
1255 printf ("re_nsub: %d\t", bufp->re_nsub);
1256 printf ("regs_alloc: %d\t", bufp->regs_allocated);
1257 printf ("can_be_null: %d\t", bufp->can_be_null);
1258 printf ("no_sub: %d\t", bufp->no_sub);
1259 printf ("not_bol: %d\t", bufp->not_bol);
1260 printf ("not_eol: %d\t", bufp->not_eol);
1261 printf ("syntax: %lx\n", bufp->syntax);
1262 fflush (stdout);
1263
1264 }
1265
1266
1267 void
1268 print_double_string (where, string1, size1, string2, size2)
1269 re_char *where;
1270 re_char *string1;
1271 re_char *string2;
1272 int size1;
1273 int size2;
1274 {
1275 int this_char;
1276
1277 if (where == NULL)
1278 printf ("(null)");
1279 else
1280 {
1281 if (FIRST_STRING_P (where))
1282 {
1283 for (this_char = where - string1; this_char < size1; this_char++)
1284 putchar (string1[this_char]);
1285
1286 where = string2;
1287 }
1288
1289 for (this_char = where - string2; this_char < size2; this_char++)
1290 putchar (string2[this_char]);
1291 }
1292 }
1293
1294 #else
1295
1296 # undef assert
1297 # define assert(e)
1298
1299 # define DEBUG_STATEMENT(e)
1300 # define DEBUG_PRINT1(x)
1301 # define DEBUG_PRINT2(x1, x2)
1302 # define DEBUG_PRINT3(x1, x2, x3)
1303 # define DEBUG_PRINT4(x1, x2, x3, x4)
1304 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1305 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1306
1307 #endif
1308
1309 1310 1311
1312 1313
1314 reg_syntax_t re_syntax_options;
1315
1316
1317 1318 1319 1320 1321 1322
1323
1324 reg_syntax_t
1325 re_set_syntax (syntax)
1326 reg_syntax_t syntax;
1327 {
1328 reg_syntax_t ret = re_syntax_options;
1329
1330 re_syntax_options = syntax;
1331 return ret;
1332 }
1333 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1334
1335
1336 static re_char *whitespace_regexp;
1337
1338 void
1339 re_set_whitespace_regexp (regexp)
1340 const char *regexp;
1341 {
1342 whitespace_regexp = (re_char *) regexp;
1343 }
1344 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1345
1346 1347 1348 1349
1350
1351 static const char *re_error_msgid[] =
1352 {
1353 gettext_noop ("Success"),
1354 gettext_noop ("No match"),
1355 gettext_noop ("Invalid regular expression"),
1356 gettext_noop ("Invalid collation character"),
1357 gettext_noop ("Invalid character class name"),
1358 gettext_noop ("Trailing backslash"),
1359 gettext_noop ("Invalid back reference"),
1360 gettext_noop ("Unmatched [ or [^"),
1361 gettext_noop ("Unmatched ( or \\("),
1362 gettext_noop ("Unmatched \\{"),
1363 gettext_noop ("Invalid content of \\{\\}"),
1364 gettext_noop ("Invalid range end"),
1365 gettext_noop ("Memory exhausted"),
1366 gettext_noop ("Invalid preceding regular expression"),
1367 gettext_noop ("Premature end of regular expression"),
1368 gettext_noop ("Regular expression too big"),
1369 gettext_noop ("Unmatched ) or \\)"),
1370 gettext_noop ("Range striding over charsets")
1371 };
1372
1373
1374
1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390
1391
1392
1393 #define MATCH_MAY_ALLOCATE
1394
1395 1396 1397 1398 1399
1400 #if defined REGEX_MALLOC && defined emacs
1401 # undef MATCH_MAY_ALLOCATE
1402 #endif
1403
1404
1405 1406 1407
1408
1409
1410 1411 1412
1413 #ifndef INIT_FAILURE_ALLOC
1414 # define INIT_FAILURE_ALLOC 20
1415 #endif
1416
1417 1418 1419 1420 1421
1422 # if defined MATCH_MAY_ALLOCATE
1423 1424 1425 1426
1427 size_t re_max_failures = 40000;
1428 # else
1429 size_t re_max_failures = 4000;
1430 # endif
1431
1432 union fail_stack_elt
1433 {
1434 re_char *pointer;
1435
1436 long integer;
1437 };
1438
1439 typedef union fail_stack_elt fail_stack_elt_t;
1440
1441 typedef struct
1442 {
1443 fail_stack_elt_t *stack;
1444 size_t size;
1445 size_t avail;
1446 size_t frame;
1447 } fail_stack_type;
1448
1449 #define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
1450 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
1451
1452
1453 1454
1455
1456 #ifdef MATCH_MAY_ALLOCATE
1457 # define INIT_FAIL_STACK() \
1458 do { \
1459 fail_stack.stack = (fail_stack_elt_t *) \
1460 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
1461 * sizeof (fail_stack_elt_t)); \
1462 \
1463 if (fail_stack.stack == NULL) \
1464 return -2; \
1465 \
1466 fail_stack.size = INIT_FAILURE_ALLOC; \
1467 fail_stack.avail = 0; \
1468 fail_stack.frame = 0; \
1469 } while (0)
1470
1471 # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
1472 #else
1473 # define INIT_FAIL_STACK() \
1474 do { \
1475 fail_stack.avail = 0; \
1476 fail_stack.frame = 0; \
1477 } while (0)
1478
1479 # define RESET_FAIL_STACK() ((void)0)
1480 #endif
1481
1482
1483 1484 1485 1486 1487 1488 1489
1490
1491 1492 1493 1494 1495
1496 #define FAIL_STACK_GROWTH_FACTOR 4
1497
1498 #define GROW_FAIL_STACK(fail_stack) \
1499 (((fail_stack).size * sizeof (fail_stack_elt_t) \
1500 >= re_max_failures * TYPICAL_FAILURE_SIZE) \
1501 ? 0 \
1502 : ((fail_stack).stack \
1503 = (fail_stack_elt_t *) \
1504 REGEX_REALLOCATE_STACK ((fail_stack).stack, \
1505 (fail_stack).size * sizeof (fail_stack_elt_t), \
1506 MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \
1507 ((fail_stack).size * sizeof (fail_stack_elt_t) \
1508 * FAIL_STACK_GROWTH_FACTOR))), \
1509 \
1510 (fail_stack).stack == NULL \
1511 ? 0 \
1512 : ((fail_stack).size \
1513 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE, \
1514 ((fail_stack).size * sizeof (fail_stack_elt_t) \
1515 * FAIL_STACK_GROWTH_FACTOR)) \
1516 / sizeof (fail_stack_elt_t)), \
1517 1)))
1518
1519
1520 1521 1522
1523 #define PUSH_FAILURE_POINTER(item) \
1524 fail_stack.stack[fail_stack.avail++].pointer = (item)
1525
1526 1527 1528
1529 #define PUSH_FAILURE_INT(item) \
1530 fail_stack.stack[fail_stack.avail++].integer = (item)
1531
1532 1533 1534
1535 #define PUSH_FAILURE_ELT(item) \
1536 fail_stack.stack[fail_stack.avail++] = (item)
1537
1538 1539
1540 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1541 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1542 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1543
1544
1545 #define NUM_NONREG_ITEMS 3
1546
1547
1548 #define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
1549 #define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer)
1550 #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
1551 #define TOP_FAILURE_HANDLE() fail_stack.frame
1552
1553
1554 #define ENSURE_FAIL_STACK(space) \
1555 while (REMAINING_AVAIL_SLOTS <= space) { \
1556 if (!GROW_FAIL_STACK (fail_stack)) \
1557 return -2; \
1558 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", (fail_stack).size);\
1559 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1560 }
1561
1562
1563 #define PUSH_FAILURE_REG(num) \
1564 do { \
1565 char *destination; \
1566 ENSURE_FAIL_STACK(3); \
1567 DEBUG_PRINT4 (" Push reg %d (spanning %p -> %p)\n", \
1568 num, regstart[num], regend[num]); \
1569 PUSH_FAILURE_POINTER (regstart[num]); \
1570 PUSH_FAILURE_POINTER (regend[num]); \
1571 PUSH_FAILURE_INT (num); \
1572 } while (0)
1573
1574 1575
1576 #define PUSH_NUMBER(ptr,val) \
1577 do { \
1578 char *destination; \
1579 int c; \
1580 ENSURE_FAIL_STACK(3); \
1581 EXTRACT_NUMBER (c, ptr); \
1582 DEBUG_PRINT4 (" Push number %p = %d -> %d\n", ptr, c, val); \
1583 PUSH_FAILURE_INT (c); \
1584 PUSH_FAILURE_POINTER (ptr); \
1585 PUSH_FAILURE_INT (-1); \
1586 STORE_NUMBER (ptr, val); \
1587 } while (0)
1588
1589
1590 #define POP_FAILURE_REG_OR_COUNT() \
1591 do { \
1592 int reg = POP_FAILURE_INT (); \
1593 if (reg == -1) \
1594 { \
1595 \
1596 \
1597 unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER (); \
1598 reg = POP_FAILURE_INT (); \
1599 STORE_NUMBER (ptr, reg); \
1600 DEBUG_PRINT3 (" Pop counter %p = %d\n", ptr, reg); \
1601 } \
1602 else \
1603 { \
1604 regend[reg] = POP_FAILURE_POINTER (); \
1605 regstart[reg] = POP_FAILURE_POINTER (); \
1606 DEBUG_PRINT4 (" Pop reg %d (spanning %p -> %p)\n", \
1607 reg, regstart[reg], regend[reg]); \
1608 } \
1609 } while (0)
1610
1611
1612 #define CHECK_INFINITE_LOOP(pat_cur, string_place) \
1613 do { \
1614 int failure = TOP_FAILURE_HANDLE (); \
1615 \
1616 while (failure > 0 \
1617 && (FAILURE_STR (failure) == string_place \
1618 || FAILURE_STR (failure) == NULL)) \
1619 { \
1620 assert (FAILURE_PAT (failure) >= bufp->buffer \
1621 && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
1622 if (FAILURE_PAT (failure) == pat_cur) \
1623 { \
1624 cycle = 1; \
1625 break; \
1626 } \
1627 DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure)); \
1628 failure = NEXT_FAILURE_HANDLE(failure); \
1629 } \
1630 DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure)); \
1631 } while (0)
1632
1633 1634 1635 1636 1637 1638 1639 1640
1641
1642 #define PUSH_FAILURE_POINT(pattern, string_place) \
1643 do { \
1644 char *destination; \
1645 1646 \
1647 \
1648 DEBUG_STATEMENT (nfailure_points_pushed++); \
1649 DEBUG_PRINT1 ("\nPUSH_FAILURE_POINT:\n"); \
1650 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail); \
1651 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
1652 \
1653 ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \
1654 \
1655 DEBUG_PRINT1 ("\n"); \
1656 \
1657 DEBUG_PRINT2 (" Push frame index: %d\n", fail_stack.frame); \
1658 PUSH_FAILURE_INT (fail_stack.frame); \
1659 \
1660 DEBUG_PRINT2 (" Push string %p: `", string_place); \
1661 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
1662 DEBUG_PRINT1 ("'\n"); \
1663 PUSH_FAILURE_POINTER (string_place); \
1664 \
1665 DEBUG_PRINT2 (" Push pattern %p: ", pattern); \
1666 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend); \
1667 PUSH_FAILURE_POINTER (pattern); \
1668 \
1669 \
1670 fail_stack.frame = fail_stack.avail; \
1671 } while (0)
1672
1673 1674 1675
1676
1677 #define TYPICAL_FAILURE_SIZE 20
1678
1679
1680 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1681
1682
1683 1684 1685 1686 1687 1688 1689 1690 1691
1692
1693 #define POP_FAILURE_POINT(str, pat) \
1694 do { \
1695 assert (!FAIL_STACK_EMPTY ()); \
1696 \
1697 \
1698 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
1699 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
1700 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
1701 \
1702 \
1703 while (fail_stack.frame < fail_stack.avail) \
1704 POP_FAILURE_REG_OR_COUNT (); \
1705 \
1706 pat = POP_FAILURE_POINTER (); \
1707 DEBUG_PRINT2 (" Popping pattern %p: ", pat); \
1708 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1709 \
1710 1711 1712 \
1713 str = POP_FAILURE_POINTER (); \
1714 DEBUG_PRINT2 (" Popping string %p: `", str); \
1715 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1716 DEBUG_PRINT1 ("'\n"); \
1717 \
1718 fail_stack.frame = POP_FAILURE_INT (); \
1719 DEBUG_PRINT2 (" Popping frame index: %d\n", fail_stack.frame); \
1720 \
1721 assert (fail_stack.avail >= 0); \
1722 assert (fail_stack.frame <= fail_stack.avail); \
1723 \
1724 DEBUG_STATEMENT (nfailure_points_popped++); \
1725 } while (0)
1726
1727
1728
1729
1730 #define REG_UNSET(e) ((e) == NULL)
1731
1732
1733
1734 static reg_errcode_t regex_compile _RE_ARGS ((re_char *pattern, size_t size,
1735 reg_syntax_t syntax,
1736 struct re_pattern_buffer *bufp));
1737 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1738 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1739 int arg1, int arg2));
1740 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1741 int arg, unsigned char *end));
1742 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1743 int arg1, int arg2, unsigned char *end));
1744 static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern,
1745 re_char *p,
1746 reg_syntax_t syntax));
1747 static boolean at_endline_loc_p _RE_ARGS ((re_char *p,
1748 re_char *pend,
1749 reg_syntax_t syntax));
1750 static re_char *skip_one_char _RE_ARGS ((re_char *p));
1751 static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
1752 char *fastmap, const int multibyte));
1753
1754 1755
1756 #define PATFETCH(c) \
1757 do { \
1758 int len; \
1759 if (p == pend) return REG_EEND; \
1760 c = RE_STRING_CHAR_AND_LENGTH (p, len, multibyte); \
1761 p += len; \
1762 } while (0)
1763
1764
1765 1766 1767 1768
1769 #ifndef TRANSLATE
1770 # define TRANSLATE(d) \
1771 (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
1772 #endif
1773
1774
1775
1776
1777
1778 #define INIT_BUF_SIZE 32
1779
1780
1781 #define GET_BUFFER_SPACE(n) \
1782 while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated) \
1783 EXTEND_BUFFER ()
1784
1785
1786 #define BUF_PUSH(c) \
1787 do { \
1788 GET_BUFFER_SPACE (1); \
1789 *b++ = (unsigned char) (c); \
1790 } while (0)
1791
1792
1793
1794 #define BUF_PUSH_2(c1, c2) \
1795 do { \
1796 GET_BUFFER_SPACE (2); \
1797 *b++ = (unsigned char) (c1); \
1798 *b++ = (unsigned char) (c2); \
1799 } while (0)
1800
1801
1802
1803 #define BUF_PUSH_3(c1, c2, c3) \
1804 do { \
1805 GET_BUFFER_SPACE (3); \
1806 *b++ = (unsigned char) (c1); \
1807 *b++ = (unsigned char) (c2); \
1808 *b++ = (unsigned char) (c3); \
1809 } while (0)
1810
1811
1812 1813
1814 #define STORE_JUMP(op, loc, to) \
1815 store_op1 (op, loc, (to) - (loc) - 3)
1816
1817
1818 #define STORE_JUMP2(op, loc, to, arg) \
1819 store_op2 (op, loc, (to) - (loc) - 3, arg)
1820
1821
1822 #define INSERT_JUMP(op, loc, to) \
1823 insert_op1 (op, loc, (to) - (loc) - 3, b)
1824
1825
1826 #define INSERT_JUMP2(op, loc, to, arg) \
1827 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1828
1829
1830 1831 1832
1833 # define MAX_BUF_SIZE (1L << 15)
1834
1835 #if 0
1836 1837 1838 1839 1840
1841 #if defined _MSC_VER && !defined WIN32
1842
1843 # define MAX_BUF_SIZE 65500L
1844 #else
1845 # define MAX_BUF_SIZE (1L << 16)
1846 #endif
1847 #endif
1848
1849 1850 1851 1852
1853 #if __BOUNDED_POINTERS__
1854 # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
1855 # define MOVE_BUFFER_POINTER(P) \
1856 (__ptrlow (P) = new_buffer + (__ptrlow (P) - old_buffer), \
1857 SET_HIGH_BOUND (P), \
1858 __ptrvalue (P) = new_buffer + (__ptrvalue (P) - old_buffer))
1859 # define ELSE_EXTEND_BUFFER_HIGH_BOUND \
1860 else \
1861 { \
1862 SET_HIGH_BOUND (b); \
1863 SET_HIGH_BOUND (begalt); \
1864 if (fixup_alt_jump) \
1865 SET_HIGH_BOUND (fixup_alt_jump); \
1866 if (laststart) \
1867 SET_HIGH_BOUND (laststart); \
1868 if (pending_exact) \
1869 SET_HIGH_BOUND (pending_exact); \
1870 }
1871 #else
1872 # define MOVE_BUFFER_POINTER(P) ((P) = new_buffer + ((P) - old_buffer))
1873 # define ELSE_EXTEND_BUFFER_HIGH_BOUND
1874 #endif
1875 #define EXTEND_BUFFER() \
1876 do { \
1877 unsigned char *old_buffer = bufp->buffer; \
1878 if (bufp->allocated == MAX_BUF_SIZE) \
1879 return REG_ESIZE; \
1880 bufp->allocated <<= 1; \
1881 if (bufp->allocated > MAX_BUF_SIZE) \
1882 bufp->allocated = MAX_BUF_SIZE; \
1883 RETALLOC (bufp->buffer, bufp->allocated, unsigned char); \
1884 if (bufp->buffer == NULL) \
1885 return REG_ESPACE; \
1886 \
1887 if (old_buffer != bufp->buffer) \
1888 { \
1889 unsigned char *new_buffer = bufp->buffer; \
1890 MOVE_BUFFER_POINTER (b); \
1891 MOVE_BUFFER_POINTER (begalt); \
1892 if (fixup_alt_jump) \
1893 MOVE_BUFFER_POINTER (fixup_alt_jump); \
1894 if (laststart) \
1895 MOVE_BUFFER_POINTER (laststart); \
1896 if (pending_exact) \
1897 MOVE_BUFFER_POINTER (pending_exact); \
1898 } \
1899 ELSE_EXTEND_BUFFER_HIGH_BOUND \
1900 } while (0)
1901
1902
1903 1904 1905
1906 #define MAX_REGNUM 255
1907
1908 1909
1910 typedef int regnum_t;
1911
1912
1913
1914
1915 1916
1917
1918 typedef long pattern_offset_t;
1919
1920 typedef struct
1921 {
1922 pattern_offset_t begalt_offset;
1923 pattern_offset_t fixup_alt_jump;
1924 pattern_offset_t laststart_offset;
1925 regnum_t regnum;
1926 } compile_stack_elt_t;
1927
1928
1929 typedef struct
1930 {
1931 compile_stack_elt_t *stack;
1932 unsigned size;
1933 unsigned avail;
1934 } compile_stack_type;
1935
1936
1937 #define INIT_COMPILE_STACK_SIZE 32
1938
1939 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1940 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1941
1942
1943 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1944
1945 1946
1947 #if defined emacs && (defined WINDOWSNT || defined SYNC_INPUT) && defined QUIT
1948 extern int immediate_quit;
1949 # define IMMEDIATE_QUIT_CHECK \
1950 do { \
1951 if (immediate_quit) QUIT; \
1952 } while (0)
1953 #else
1954 # define IMMEDIATE_QUIT_CHECK ((void)0)
1955 #endif
1956
1957
1958 struct range_table_work_area
1959 {
1960 int *table;
1961 int allocated;
1962 int used;
1963 int bits;
1964 };
1965
1966 1967 1968 1969
1970
1971 #define EXTEND_RANGE_TABLE(work_area, n) \
1972 do { \
1973 if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
1974 { \
1975 extend_range_table_work_area (&work_area); \
1976 if ((work_area).table == 0) \
1977 return (REG_ESPACE); \
1978 } \
1979 } while (0)
1980
1981 #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \
1982 (work_area).bits |= (bit)
1983
1984 1985
1986 #define BIT_WORD 0x1
1987 #define BIT_LOWER 0x2
1988 #define BIT_PUNCT 0x4
1989 #define BIT_SPACE 0x8
1990 #define BIT_UPPER 0x10
1991 #define BIT_MULTIBYTE 0x20
1992
1993
1994 #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
1995 do { \
1996 EXTEND_RANGE_TABLE ((work_area), 2); \
1997 (work_area).table[(work_area).used++] = (range_start); \
1998 (work_area).table[(work_area).used++] = (range_end); \
1999 } while (0)
2000
2001
2002 #define FREE_RANGE_TABLE_WORK_AREA(work_area) \
2003 do { \
2004 if ((work_area).table) \
2005 free ((work_area).table); \
2006 } while (0)
2007
2008 #define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0)
2009 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
2010 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
2011 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
2012
2013
2014
2015 #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
2016
2017
2018 #ifdef emacs
2019
2020 2021 2022 2023 2024 2025 2026 2027
2028
2029
2030
2031 #define SETUP_ASCII_RANGE(work_area, FROM, TO) \
2032 do { \
2033 int C0, C1; \
2034 \
2035 for (C0 = (FROM); C0 <= (TO); C0++) \
2036 { \
2037 C1 = TRANSLATE (C0); \
2038 if (! ASCII_CHAR_P (C1)) \
2039 { \
2040 SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1); \
2041 if ((C1 = RE_CHAR_TO_UNIBYTE (C1)) < 0) \
2042 C1 = C0; \
2043 } \
2044 SET_LIST_BIT (C1); \
2045 } \
2046 } while (0)
2047
2048
2049
2050
2051 #define SETUP_UNIBYTE_RANGE(work_area, FROM, TO) \
2052 do { \
2053 int C0, C1, C2, I; \
2054 int USED = RANGE_TABLE_WORK_USED (work_area); \
2055 \
2056 for (C0 = (FROM); C0 <= (TO); C0++) \
2057 { \
2058 C1 = RE_CHAR_TO_MULTIBYTE (C0); \
2059 if (CHAR_BYTE8_P (C1)) \
2060 SET_LIST_BIT (C0); \
2061 else \
2062 { \
2063 C2 = TRANSLATE (C1); \
2064 if (C2 == C1 \
2065 || (C1 = RE_CHAR_TO_UNIBYTE (C2)) < 0) \
2066 C1 = C0; \
2067 SET_LIST_BIT (C1); \
2068 for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
2069 { \
2070 int from = RANGE_TABLE_WORK_ELT (work_area, I); \
2071 int to = RANGE_TABLE_WORK_ELT (work_area, I + 1); \
2072 \
2073 if (C2 >= from - 1 && C2 <= to + 1) \
2074 { \
2075 if (C2 == from - 1) \
2076 RANGE_TABLE_WORK_ELT (work_area, I)--; \
2077 else if (C2 == to + 1) \
2078 RANGE_TABLE_WORK_ELT (work_area, I + 1)++; \
2079 break; \
2080 } \
2081 } \
2082 if (I < USED) \
2083 SET_RANGE_TABLE_WORK_AREA ((work_area), C2, C2); \
2084 } \
2085 } \
2086 } while (0)
2087
2088
2089
2090
2091 #define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO) \
2092 do { \
2093 int C0, C1, C2, I, USED = RANGE_TABLE_WORK_USED (work_area); \
2094 \
2095 SET_RANGE_TABLE_WORK_AREA ((work_area), (FROM), (TO)); \
2096 for (C0 = (FROM); C0 <= (TO); C0++) \
2097 { \
2098 C1 = TRANSLATE (C0); \
2099 if ((C2 = RE_CHAR_TO_UNIBYTE (C1)) >= 0 \
2100 || (C1 != C0 && (C2 = RE_CHAR_TO_UNIBYTE (C0)) >= 0)) \
2101 SET_LIST_BIT (C2); \
2102 if (C1 >= (FROM) && C1 <= (TO)) \
2103 continue; \
2104 for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
2105 { \
2106 int from = RANGE_TABLE_WORK_ELT (work_area, I); \
2107 int to = RANGE_TABLE_WORK_ELT (work_area, I + 1); \
2108 \
2109 if (C1 >= from - 1 && C1 <= to + 1) \
2110 { \
2111 if (C1 == from - 1) \
2112 RANGE_TABLE_WORK_ELT (work_area, I)--; \
2113 else if (C1 == to + 1) \
2114 RANGE_TABLE_WORK_ELT (work_area, I + 1)++; \
2115 break; \
2116 } \
2117 } \
2118 if (I < USED) \
2119 SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1); \
2120 } \
2121 } while (0)
2122
2123 #endif
2124
2125
2126 #define GET_UNSIGNED_NUMBER(num) \
2127 do { \
2128 if (p == pend) \
2129 FREE_STACK_RETURN (REG_EBRACE); \
2130 else \
2131 { \
2132 PATFETCH (c); \
2133 while ('0' <= c && c <= '9') \
2134 { \
2135 int prev; \
2136 if (num < 0) \
2137 num = 0; \
2138 prev = num; \
2139 num = num * 10 + c - '0'; \
2140 if (num / 10 != prev) \
2141 FREE_STACK_RETURN (REG_BADBR); \
2142 if (p == pend) \
2143 FREE_STACK_RETURN (REG_EBRACE); \
2144 PATFETCH (c); \
2145 } \
2146 } \
2147 } while (0)
2148
2149 #if ! WIDE_CHAR_SUPPORT
2150
2151
2152 re_wctype_t
2153 re_wctype (str)
2154 re_char *str;
2155 {
2156 const char *string = str;
2157 if (STREQ (string, "alnum")) return RECC_ALNUM;
2158 else if (STREQ (string, "alpha")) return RECC_ALPHA;
2159 else if (STREQ (string, "word")) return RECC_WORD;
2160 else if (STREQ (string, "ascii")) return RECC_ASCII;
2161 else if (STREQ (string, "nonascii")) return RECC_NONASCII;
2162 else if (STREQ (string, "graph")) return RECC_GRAPH;
2163 else if (STREQ (string, "lower")) return RECC_LOWER;
2164 else if (STREQ (string, "print")) return RECC_PRINT;
2165 else if (STREQ (string, "punct")) return RECC_PUNCT;
2166 else if (STREQ (string, "space")) return RECC_SPACE;
2167 else if (STREQ (string, "upper")) return RECC_UPPER;
2168 else if (STREQ (string, "unibyte")) return RECC_UNIBYTE;
2169 else if (STREQ (string, "multibyte")) return RECC_MULTIBYTE;
2170 else if (STREQ (string, "digit")) return RECC_DIGIT;
2171 else if (STREQ (string, "xdigit")) return RECC_XDIGIT;
2172 else if (STREQ (string, "cntrl")) return RECC_CNTRL;
2173 else if (STREQ (string, "blank")) return RECC_BLANK;
2174 else return 0;
2175 }
2176
2177
2178 boolean
2179 re_iswctype (ch, cc)
2180 int ch;
2181 re_wctype_t cc;
2182 {
2183 switch (cc)
2184 {
2185 case RECC_ALNUM: return ISALNUM (ch);
2186 case RECC_ALPHA: return ISALPHA (ch);
2187 case RECC_BLANK: return ISBLANK (ch);
2188 case RECC_CNTRL: return ISCNTRL (ch);
2189 case RECC_DIGIT: return ISDIGIT (ch);
2190 case RECC_GRAPH: return ISGRAPH (ch);
2191 case RECC_LOWER: return ISLOWER (ch);
2192 case RECC_PRINT: return ISPRINT (ch);
2193 case RECC_PUNCT: return ISPUNCT (ch);
2194 case RECC_SPACE: return ISSPACE (ch);
2195 case RECC_UPPER: return ISUPPER (ch);
2196 case RECC_XDIGIT: return ISXDIGIT (ch);
2197 case RECC_ASCII: return IS_REAL_ASCII (ch);
2198 case RECC_NONASCII: return !IS_REAL_ASCII (ch);
2199 case RECC_UNIBYTE: return ISUNIBYTE (ch);
2200 case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
2201 case RECC_WORD: return ISWORD (ch);
2202 case RECC_ERROR: return false;
2203 default:
2204 abort();
2205 }
2206 }
2207
2208 2209
2210 static int
2211 re_wctype_to_bit (cc)
2212 re_wctype_t cc;
2213 {
2214 switch (cc)
2215 {
2216 case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
2217 case RECC_MULTIBYTE: return BIT_MULTIBYTE;
2218 case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
2219 case RECC_LOWER: return BIT_LOWER;
2220 case RECC_UPPER: return BIT_UPPER;
2221 case RECC_PUNCT: return BIT_PUNCT;
2222 case RECC_SPACE: return BIT_SPACE;
2223 case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
2224 case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
2225 default:
2226 abort();
2227 }
2228 }
2229 #endif
2230
2231
2232
2233
2234
2235 static void
2236 extend_range_table_work_area (work_area)
2237 struct range_table_work_area *work_area;
2238 {
2239 work_area->allocated += 16 * sizeof (int);
2240 if (work_area->table)
2241 work_area->table
2242 = (int *) realloc (work_area->table, work_area->allocated);
2243 else
2244 work_area->table
2245 = (int *) malloc (work_area->allocated);
2246 }
2247
2248 #if 0
2249 #ifdef emacs
2250
2251 2252 2253 2254 2255 2256 2257 2258 2259 2260
2261
2262 static int
2263 set_image_of_range_1 (work_area, start, end, translate)
2264 RE_TRANSLATE_TYPE translate;
2265 struct range_table_work_area *work_area;
2266 re_wchar_t start, end;
2267 {
2268 2269 2270 2271 2272 2273 2274 2275 2276 2277
2278
2279 enum case_type {one_case, two_case, strange};
2280
2281 2282 2283 2284 2285 2286
2287
2288 enum case_type run_type = strange;
2289 int run_start, run_end, run_eqv_end;
2290
2291 Lisp_Object eqv_table;
2292
2293 if (!RE_TRANSLATE_P (translate))
2294 {
2295 EXTEND_RANGE_TABLE (work_area, 2);
2296 work_area->table[work_area->used++] = (start);
2297 work_area->table[work_area->used++] = (end);
2298 return -1;
2299 }
2300
2301 eqv_table = XCHAR_TABLE (translate)->extras[2];
2302
2303 for (; start <= end; start++)
2304 {
2305 enum case_type this_type;
2306 int eqv = RE_TRANSLATE (eqv_table, start);
2307 int minchar, maxchar;
2308
2309
2310 if (eqv == start)
2311 this_type = one_case;
2312 else if (RE_TRANSLATE (eqv_table, eqv) == start)
2313 this_type = two_case;
2314 else
2315 this_type = strange;
2316
2317 if (start < eqv)
2318 minchar = start, maxchar = eqv;
2319 else
2320 minchar = eqv, maxchar = start;
2321
2322
2323 if (this_type == strange || this_type != run_type
2324 || !(minchar == run_end + 1
2325 && (run_type == two_case
2326 ? maxchar == run_eqv_end + 1 : 1)))
2327 {
2328 2329
2330 if (run_type == one_case)
2331 {
2332 EXTEND_RANGE_TABLE (work_area, 2);
2333 work_area->table[work_area->used++] = run_start;
2334 work_area->table[work_area->used++] = run_end;
2335 }
2336 else if (run_type == two_case)
2337 {
2338 EXTEND_RANGE_TABLE (work_area, 4);
2339 work_area->table[work_area->used++] = run_start;
2340 work_area->table[work_area->used++] = run_end;
2341 work_area->table[work_area->used++]
2342 = RE_TRANSLATE (eqv_table, run_start);
2343 work_area->table[work_area->used++]
2344 = RE_TRANSLATE (eqv_table, run_end);
2345 }
2346 run_type = strange;
2347 }
2348
2349 if (this_type == strange)
2350 {
2351 2352
2353 do
2354 {
2355 EXTEND_RANGE_TABLE (work_area, 2);
2356 work_area->table[work_area->used++] = eqv;
2357 work_area->table[work_area->used++] = eqv;
2358 eqv = RE_TRANSLATE (eqv_table, eqv);
2359 }
2360 while (eqv != start);
2361 }
2362
2363
2364 else if (run_type == strange)
2365 {
2366
2367 run_type = this_type;
2368 run_start = start;
2369 run_end = start;
2370 run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2371 }
2372 else
2373 {
2374
2375 run_end = minchar;
2376 run_eqv_end = RE_TRANSLATE (eqv_table, run_end);
2377 }
2378 }
2379
2380 2381
2382 if (run_type == one_case)
2383 {
2384 EXTEND_RANGE_TABLE (work_area, 2);
2385 work_area->table[work_area->used++] = run_start;
2386 work_area->table[work_area->used++] = run_end;
2387 }
2388 else if (run_type == two_case)
2389 {
2390 EXTEND_RANGE_TABLE (work_area, 4);
2391 work_area->table[work_area->used++] = run_start;
2392 work_area->table[work_area->used++] = run_end;
2393 work_area->table[work_area->used++]
2394 = RE_TRANSLATE (eqv_table, run_start);
2395 work_area->table[work_area->used++]
2396 = RE_TRANSLATE (eqv_table, run_end);
2397 }
2398
2399 return -1;
2400 }
2401
2402 #endif
2403
2404 2405 2406 2407 2408 2409 2410 2411 2412 2413
2414
2415 static int
2416 set_image_of_range (work_area, start, end, translate)
2417 RE_TRANSLATE_TYPE translate;
2418 struct range_table_work_area *work_area;
2419 re_wchar_t start, end;
2420 {
2421 re_wchar_t cmin, cmax;
2422
2423 #ifdef emacs
2424 2425 2426 2427
2428 if (RE_TRANSLATE_P (translate) && start < 04400
2429 && !(start < 04200 && end >= 04377))
2430 {
2431 int newend;
2432 int tem;
2433 newend = end;
2434 if (newend > 04377)
2435 newend = 04377;
2436 tem = set_image_of_range_1 (work_area, start, newend, translate);
2437 if (tem > 0)
2438 return tem;
2439
2440 start = 04400;
2441 if (end < 04400)
2442 return -1;
2443 }
2444 #endif
2445
2446 EXTEND_RANGE_TABLE (work_area, 2);
2447 work_area->table[work_area->used++] = (start);
2448 work_area->table[work_area->used++] = (end);
2449
2450 cmin = -1, cmax = -1;
2451
2452 if (RE_TRANSLATE_P (translate))
2453 {
2454 int ch;
2455
2456 for (ch = start; ch <= end; ch++)
2457 {
2458 re_wchar_t c = TRANSLATE (ch);
2459 if (! (start <= c && c <= end))
2460 {
2461 if (cmin == -1)
2462 cmin = c, cmax = c;
2463 else
2464 {
2465 cmin = MIN (cmin, c);
2466 cmax = MAX (cmax, c);
2467 }
2468 }
2469 }
2470
2471 if (cmin != -1)
2472 {
2473 EXTEND_RANGE_TABLE (work_area, 2);
2474 work_area->table[work_area->used++] = (cmin);
2475 work_area->table[work_area->used++] = (cmax);
2476 }
2477 }
2478
2479 return -1;
2480 }
2481 #endif
2482
2483 #ifndef MATCH_MAY_ALLOCATE
2484
2485 2486 2487 2488 2489 2490
2491
2492 static fail_stack_type fail_stack;
2493
2494 2495 2496
2497 static int regs_allocated_size;
2498
2499 static re_char ** regstart, ** regend;
2500 static re_char **best_regstart, **best_regend;
2501
2502 2503
2504
2505 static
2506 regex_grow_registers (num_regs)
2507 int num_regs;
2508 {
2509 if (num_regs > regs_allocated_size)
2510 {
2511 RETALLOC_IF (regstart, num_regs, re_char *);
2512 RETALLOC_IF (regend, num_regs, re_char *);
2513 RETALLOC_IF (best_regstart, num_regs, re_char *);
2514 RETALLOC_IF (best_regend, num_regs, re_char *);
2515
2516 regs_allocated_size = num_regs;
2517 }
2518 }
2519
2520 #endif
2521
2522 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
2523 compile_stack,
2524 regnum_t regnum));
2525
2526 2527 2528 2529 2530 2531 2532 2533 2534 2535 2536 2537 2538 2539 2540 2541
2542
2543 2544
2545 #define FIXUP_ALT_JUMP() \
2546 do { \
2547 if (fixup_alt_jump) \
2548 STORE_JUMP (jump, fixup_alt_jump, b); \
2549 } while (0)
2550
2551
2552
2553 #define FREE_STACK_RETURN(value) \
2554 do { \
2555 FREE_RANGE_TABLE_WORK_AREA (range_table_work); \
2556 free (compile_stack.stack); \
2557 return value; \
2558 } while (0)
2559
2560 static reg_errcode_t
2561 regex_compile (pattern, size, syntax, bufp)
2562 re_char *pattern;
2563 size_t size;
2564 reg_syntax_t syntax;
2565 struct re_pattern_buffer *bufp;
2566 {
2567
2568 register re_wchar_t c, c1;
2569
2570
2571 re_char *p1;
2572
2573
2574 register unsigned char *b;
2575
2576
2577 compile_stack_type compile_stack;
2578
2579
2580 #ifdef AIX
2581
2582 unsigned char *p = pattern;
2583 #else
2584 re_char *p = pattern;
2585 #endif
2586 re_char *pend = pattern + size;
2587
2588
2589 RE_TRANSLATE_TYPE translate = bufp->translate;
2590
2591 2592 2593 2594
2595 unsigned char *pending_exact = 0;
2596
2597 2598 2599
2600 unsigned char *laststart = 0;
2601
2602
2603 unsigned char *begalt;
2604
2605 2606
2607 re_char *beg_interval;
2608
2609 2610 2611
2612 unsigned char *fixup_alt_jump = 0;
2613
2614
2615 struct range_table_work_area range_table_work;
2616
2617
2618 const boolean multibyte = RE_MULTIBYTE_P (bufp);
2619
2620
2621 const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
2622
2623
2624 int in_subpattern = 0;
2625
2626 2627
2628 re_char *main_p;
2629 re_char *main_pattern;
2630 re_char *main_pend;
2631
2632 #ifdef DEBUG
2633 debug++;
2634 DEBUG_PRINT1 ("\nCompiling pattern: ");
2635 if (debug > 0)
2636 {
2637 unsigned debug_count;
2638
2639 for (debug_count = 0; debug_count < size; debug_count++)
2640 putchar (pattern[debug_count]);
2641 putchar ('\n');
2642 }
2643 #endif
2644
2645
2646 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2647 if (compile_stack.stack == NULL)
2648 return REG_ESPACE;
2649
2650 compile_stack.size = INIT_COMPILE_STACK_SIZE;
2651 compile_stack.avail = 0;
2652
2653 range_table_work.table = 0;
2654 range_table_work.allocated = 0;
2655
2656
2657 bufp->syntax = syntax;
2658 bufp->fastmap_accurate = 0;
2659 bufp->not_bol = bufp->not_eol = 0;
2660 bufp->used_syntax = 0;
2661
2662 2663 2664
2665 bufp->used = 0;
2666
2667
2668 bufp->re_nsub = 0;
2669
2670 #if !defined emacs && !defined SYNTAX_TABLE
2671
2672 init_syntax_once ();
2673 #endif
2674
2675 if (bufp->allocated == 0)
2676 {
2677 if (bufp->buffer)
2678 { 2679 2680
2681 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
2682 }
2683 else
2684 {
2685 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
2686 }
2687 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
2688
2689 bufp->allocated = INIT_BUF_SIZE;
2690 }
2691
2692 begalt = b = bufp->buffer;
2693
2694
2695 while (1)
2696 {
2697 if (p == pend)
2698 {
2699 2700
2701 if (in_subpattern)
2702 {
2703 in_subpattern = 0;
2704 pattern = main_pattern;
2705 p = main_p;
2706 pend = main_pend;
2707 continue;
2708 }
2709
2710 break;
2711 }
2712
2713 PATFETCH (c);
2714
2715 switch (c)
2716 {
2717 case ' ':
2718 {
2719 re_char *p1 = p;
2720
2721 2722
2723 if (!whitespace_regexp || in_subpattern)
2724 goto normal_char;
2725
2726
2727 while (p1 != pend)
2728 {
2729 if (*p1 != ' ')
2730 break;
2731 p1++;
2732 }
2733 2734
2735 if (p1 != pend
2736 && (*p1 == '*' || *p1 == '+' || *p1 == '?'
2737 || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
2738 goto normal_char;
2739
2740
2741 in_subpattern = 1;
2742 main_p = p1;
2743 main_pend = pend;
2744 main_pattern = pattern;
2745 p = pattern = whitespace_regexp;
2746 pend = p + strlen (p);
2747 break;
2748 }
2749
2750 case '^':
2751 {
2752 if (
2753 p == pattern + 1
2754
2755 || syntax & RE_CONTEXT_INDEP_ANCHORS
2756
2757 || at_begline_loc_p (pattern, p, syntax))
2758 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline);
2759 else
2760 goto normal_char;
2761 }
2762 break;
2763
2764
2765 case '$':
2766 {
2767 if (
2768 p == pend
2769
2770 || syntax & RE_CONTEXT_INDEP_ANCHORS
2771
2772 || at_endline_loc_p (p, pend, syntax))
2773 BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline);
2774 else
2775 goto normal_char;
2776 }
2777 break;
2778
2779
2780 case '+':
2781 case '?':
2782 if ((syntax & RE_BK_PLUS_QM)
2783 || (syntax & RE_LIMITED_OPS))
2784 goto normal_char;
2785 handle_plus:
2786 case '*':
2787
2788 if (!laststart)
2789 {
2790 if (syntax & RE_CONTEXT_INVALID_OPS)
2791 FREE_STACK_RETURN (REG_BADRPT);
2792 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2793 goto normal_char;
2794 }
2795
2796 {
2797
2798 boolean zero_times_ok = 0, many_times_ok = 0;
2799 boolean greedy = 1;
2800
2801 2802 2803 2804
2805
2806 for (;;)
2807 {
2808 if ((syntax & RE_FRUGAL)
2809 && c == '?' && (zero_times_ok || many_times_ok))
2810 greedy = 0;
2811 else
2812 {
2813 zero_times_ok |= c != '+';
2814 many_times_ok |= c != '?';
2815 }
2816
2817 if (p == pend)
2818 break;
2819 else if (*p == '*'
2820 || (!(syntax & RE_BK_PLUS_QM)
2821 && (*p == '+' || *p == '?')))
2822 ;
2823 else if (syntax & RE_BK_PLUS_QM && *p == '\\')
2824 {
2825 if (p+1 == pend)
2826 FREE_STACK_RETURN (REG_EESCAPE);
2827 if (p[1] == '+' || p[1] == '?')
2828 PATFETCH (c);
2829 else
2830 break;
2831 }
2832 else
2833 break;
2834
2835 PATFETCH (c);
2836 }
2837
2838 2839
2840 if (!laststart || laststart == b)
2841 break;
2842
2843 2844
2845 if (greedy)
2846 {
2847 if (many_times_ok)
2848 {
2849 boolean simple = skip_one_char (laststart) == b;
2850 unsigned int startoffset = 0;
2851 re_opcode_t ofj =
2852
2853 (simple || !analyse_first (laststart, b, NULL, 0))
2854 ? on_failure_jump : on_failure_jump_loop;
2855 assert (skip_one_char (laststart) <= b);
2856
2857 if (!zero_times_ok && simple)
2858 { 2859 2860
2861 unsigned char *p1, *p2;
2862 startoffset = b - laststart;
2863 GET_BUFFER_SPACE (startoffset);
2864 p1 = b; p2 = laststart;
2865 while (p2 < p1)
2866 *b++ = *p2++;
2867 zero_times_ok = 1;
2868 }
2869
2870 GET_BUFFER_SPACE (6);
2871 if (!zero_times_ok)
2872
2873 STORE_JUMP (ofj, b, b + 6);
2874 else
2875 2876 2877 2878
2879 INSERT_JUMP (simple ? on_failure_jump_smart : ofj,
2880 laststart + startoffset, b + 6);
2881 b += 3;
2882 STORE_JUMP (jump, b, laststart + startoffset);
2883 b += 3;
2884 }
2885 else
2886 {
2887
2888 assert (zero_times_ok);
2889 GET_BUFFER_SPACE (3);
2890 INSERT_JUMP (on_failure_jump, laststart, b + 3);
2891 b += 3;
2892 }
2893 }
2894 else
2895 {
2896
2897 GET_BUFFER_SPACE (7);
2898 if (many_times_ok)
2899 {
2900 boolean emptyp = analyse_first (laststart, b, NULL, 0);
2901
2902 2903 2904
2905 if (emptyp) BUF_PUSH (no_op);
2906 STORE_JUMP (emptyp ? on_failure_jump_nastyloop
2907 : on_failure_jump, b, laststart);
2908 b += 3;
2909 if (zero_times_ok)
2910 {
2911 2912 2913
2914 INSERT_JUMP (jump, laststart, b);
2915 b += 3;
2916 }
2917 }
2918 else
2919 {
2920
2921 INSERT_JUMP (jump, laststart, b + 3);
2922 b += 3;
2923 INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
2924 b += 3;
2925 }
2926 }
2927 }
2928 pending_exact = 0;
2929 break;
2930
2931
2932 case '.':
2933 laststart = b;
2934 BUF_PUSH (anychar);
2935 break;
2936
2937
2938 case '[':
2939 {
2940 CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
2941
2942 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2943
2944 2945
2946 GET_BUFFER_SPACE (34);
2947
2948 laststart = b;
2949
2950 2951
2952 BUF_PUSH (*p == '^' ? charset_not : charset);
2953 if (*p == '^')
2954 p++;
2955
2956
2957 p1 = p;
2958
2959
2960 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2961
2962
2963 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2964
2965
2966 if ((re_opcode_t) b[-2] == charset_not
2967 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2968 SET_LIST_BIT ('\n');
2969
2970
2971 for (;;)
2972 {
2973 boolean escaped_char = false;
2974 const unsigned char *p2 = p;
2975 re_wchar_t ch, c2;
2976
2977 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2978
2979 2980 2981 2982
2983 PATFETCH (c);
2984
2985
2986 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2987 {
2988 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2989
2990 PATFETCH (c);
2991 escaped_char = true;
2992 }
2993 else
2994 {
2995 2996 2997
2998 if (c == ']' && p2 != p1)
2999 break;
3000 }
3001
3002 3003
3004
3005 if (!escaped_char &&
3006 syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
3007 {
3008
3009 unsigned char str[CHAR_CLASS_MAX_LENGTH + 1];
3010 const unsigned char *class_beg;
3011
3012 PATFETCH (c);
3013 c1 = 0;
3014 class_beg = p;
3015
3016
3017 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3018
3019 for (;;)
3020 {
3021 PATFETCH (c);
3022 if ((c == ':' && *p == ']') || p == pend)
3023 break;
3024 if (c1 < CHAR_CLASS_MAX_LENGTH)
3025 str[c1++] = c;
3026 else
3027
3028 str[0] = '\0';
3029 }
3030 str[c1] = '\0';
3031
3032 3033 3034 3035
3036 if (c == ':' && *p == ']')
3037 {
3038 re_wctype_t cc;
3039 int limit;
3040
3041 cc = re_wctype (str);
3042
3043 if (cc == 0)
3044 FREE_STACK_RETURN (REG_ECTYPE);
3045
3046 3047
3048 PATFETCH (c);
3049
3050 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
3051
3052 #ifndef emacs
3053 for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
3054 if (re_iswctype (btowc (ch), cc))
3055 {
3056 c = TRANSLATE (ch);
3057 if (c < (1 << BYTEWIDTH))
3058 SET_LIST_BIT (c);
3059 }
3060 #else
3061 3062 3063 3064 3065 3066
3067
3068 3069 3070 3071 3072
3073 SETUP_BUFFER_SYNTAX_TABLE ();
3074
3075 for (ch = 0; ch < 256; ++ch)
3076 {
3077 c = RE_CHAR_TO_MULTIBYTE (ch);
3078 if (! CHAR_BYTE8_P (c)
3079 && re_iswctype (c, cc))
3080 {
3081 SET_LIST_BIT (ch);
3082 c1 = TRANSLATE (c);
3083 if (c1 == c)
3084 continue;
3085 if (ASCII_CHAR_P (c1))
3086 SET_LIST_BIT (c1);
3087 else if ((c1 = RE_CHAR_TO_UNIBYTE (c1)) >= 0)
3088 SET_LIST_BIT (c1);
3089 }
3090 }
3091 SET_RANGE_TABLE_WORK_AREA_BIT
3092 (range_table_work, re_wctype_to_bit (cc));
3093 #endif
3094 3095 3096 3097 3098
3099 if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
3100 bufp->used_syntax = 1;
3101
3102
3103 continue;
3104 }
3105 else
3106 {
3107
3108 p = class_beg;
3109 SET_LIST_BIT ('[');
3110
3111 3112 3113
3114 c = ':';
3115 }
3116 }
3117
3118 if (p < pend && p[0] == '-' && p[1] != ']')
3119 {
3120
3121
3122 PATFETCH (c1);
3123
3124
3125 PATFETCH (c1);
3126 #ifdef emacs
3127 if (CHAR_BYTE8_P (c1)
3128 && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c))
3129 3130
3131 c = c1 + 1;
3132 #endif
3133 }
3134 else
3135
3136 c1 = c;
3137
3138 if (c > c1)
3139 {
3140 if (syntax & RE_NO_EMPTY_RANGES)
3141 FREE_STACK_RETURN (REG_ERANGEX);
3142
3143 }
3144 else
3145 {
3146 #ifndef emacs
3147
3148 for (; c <= c1; c++)
3149 {
3150 ch = TRANSLATE (c);
3151 if (ch < (1 << BYTEWIDTH))
3152 SET_LIST_BIT (ch);
3153 }
3154 #else
3155 if (c < 128)
3156 {
3157 ch = MIN (127, c1);
3158 SETUP_ASCII_RANGE (range_table_work, c, ch);
3159 c = ch + 1;
3160 if (CHAR_BYTE8_P (c1))
3161 c = BYTE8_TO_CHAR (128);
3162 }
3163 if (c <= c1)
3164 {
3165 if (CHAR_BYTE8_P (c))
3166 {
3167 c = CHAR_TO_BYTE8 (c);
3168 c1 = CHAR_TO_BYTE8 (c1);
3169 for (; c <= c1; c++)
3170 SET_LIST_BIT (c);
3171 }
3172 else if (multibyte)
3173 {
3174 SETUP_MULTIBYTE_RANGE (range_table_work, c, c1);
3175 }
3176 else
3177 {
3178 SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
3179 }
3180 }
3181 #endif
3182 }
3183 }
3184
3185 3186
3187 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
3188 b[-1]--;
3189 b += b[-1];
3190
3191
3192 if (RANGE_TABLE_WORK_USED (range_table_work)
3193 || RANGE_TABLE_WORK_BITS (range_table_work))
3194 {
3195 int i;
3196 int used = RANGE_TABLE_WORK_USED (range_table_work);
3197
3198 3199 3200
3201 GET_BUFFER_SPACE (4 + used * 3);
3202
3203
3204 laststart[1] |= 0x80;
3205
3206 3207
3208 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
3209 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
3210
3211 STORE_NUMBER_AND_INCR (b, used / 2);
3212 for (i = 0; i < used; i++)
3213 STORE_CHARACTER_AND_INCR
3214 (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
3215 }
3216 }
3217 break;
3218
3219
3220 case '(':
3221 if (syntax & RE_NO_BK_PARENS)
3222 goto handle_open;
3223 else
3224 goto normal_char;
3225
3226
3227 case ')':
3228 if (syntax & RE_NO_BK_PARENS)
3229 goto handle_close;
3230 else
3231 goto normal_char;
3232
3233
3234 case '\n':
3235 if (syntax & RE_NEWLINE_ALT)
3236 goto handle_alt;
3237 else
3238 goto normal_char;
3239
3240
3241 case '|':
3242 if (syntax & RE_NO_BK_VBAR)
3243 goto handle_alt;
3244 else
3245 goto normal_char;
3246
3247
3248 case '{':
3249 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
3250 goto handle_interval;
3251 else
3252 goto normal_char;
3253
3254
3255 case '\\':
3256 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
3257
3258 3259 3260
3261 PATFETCH (c);
3262
3263 switch (c)
3264 {
3265 case '(':
3266 if (syntax & RE_NO_BK_PARENS)
3267 goto normal_backslash;
3268
3269 handle_open:
3270 {
3271 int shy = 0;
3272 regnum_t regnum = 0;
3273 if (p+1 < pend)
3274 {
3275
3276 if ((syntax & RE_SHY_GROUPS) && *p == '?')
3277 {
3278 PATFETCH (c);
3279 while (!shy)
3280 {
3281 PATFETCH (c);
3282 switch (c)
3283 {
3284 case ':': shy = 1; break;
3285 case '0':
3286 3287
3288 if (regnum == 0)
3289 FREE_STACK_RETURN (REG_BADPAT);
3290 case '1': case '2': case '3': case '4':
3291 case '5': case '6': case '7': case '8': case '9':
3292 regnum = 10*regnum + (c - '0'); break;
3293 default:
3294
3295 FREE_STACK_RETURN (REG_BADPAT);
3296 }
3297 }
3298 }
3299 }
3300
3301 if (!shy)
3302 regnum = ++bufp->re_nsub;
3303 else if (regnum)
3304 {
3305 shy = 0;
3306 if (regnum > bufp->re_nsub)
3307 bufp->re_nsub = regnum;
3308 else if (regnum > bufp->re_nsub
3309 3310 3311 3312 3313
3314 || group_in_compile_stack (compile_stack, regnum))
3315 FREE_STACK_RETURN (REG_BADPAT);
3316 }
3317 else
3318
3319 regnum = - bufp->re_nsub;
3320
3321 if (COMPILE_STACK_FULL)
3322 {
3323 RETALLOC (compile_stack.stack, compile_stack.size << 1,
3324 compile_stack_elt_t);
3325 if (compile_stack.stack == NULL) return REG_ESPACE;
3326
3327 compile_stack.size <<= 1;
3328 }
3329
3330 3331 3332 3333
3334 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
3335 COMPILE_STACK_TOP.fixup_alt_jump
3336 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
3337 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
3338 COMPILE_STACK_TOP.regnum = regnum;
3339
3340 3341
3342 if (regnum <= MAX_REGNUM && regnum > 0)
3343 BUF_PUSH_2 (start_memory, regnum);
3344
3345 compile_stack.avail++;
3346
3347 fixup_alt_jump = 0;
3348 laststart = 0;
3349 begalt = b;
3350 3351 3352
3353 pending_exact = 0;
3354 break;
3355 }
3356
3357 case ')':
3358 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
3359
3360 if (COMPILE_STACK_EMPTY)
3361 {
3362 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3363 goto normal_backslash;
3364 else
3365 FREE_STACK_RETURN (REG_ERPAREN);
3366 }
3367
3368 handle_close:
3369 FIXUP_ALT_JUMP ();
3370
3371
3372 if (COMPILE_STACK_EMPTY)
3373 {
3374 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
3375 goto normal_char;
3376 else
3377 FREE_STACK_RETURN (REG_ERPAREN);
3378 }
3379
3380 3381
3382 assert (compile_stack.avail != 0);
3383 {
3384 3385 3386
3387 regnum_t regnum;
3388
3389 compile_stack.avail--;
3390 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
3391 fixup_alt_jump
3392 = COMPILE_STACK_TOP.fixup_alt_jump
3393 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
3394 : 0;
3395 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
3396 regnum = COMPILE_STACK_TOP.regnum;
3397 3398 3399
3400 pending_exact = 0;
3401
3402 3403
3404 if (regnum <= MAX_REGNUM && regnum > 0)
3405 BUF_PUSH_2 (stop_memory, regnum);
3406 }
3407 break;
3408
3409
3410 case '|':
3411 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
3412 goto normal_backslash;
3413 handle_alt:
3414 if (syntax & RE_LIMITED_OPS)
3415 goto normal_char;
3416
3417 3418
3419 GET_BUFFER_SPACE (3);
3420 INSERT_JUMP (on_failure_jump, begalt, b + 6);
3421 pending_exact = 0;
3422 b += 3;
3423
3424 3425 3426 3427 3428 3429 3430 3431 3432 3433 3434 3435 3436 3437 3438
3439
3440 FIXUP_ALT_JUMP ();
3441
3442 3443 3444
3445 fixup_alt_jump = b;
3446 GET_BUFFER_SPACE (3);
3447 b += 3;
3448
3449 laststart = 0;
3450 begalt = b;
3451 break;
3452
3453
3454 case '{':
3455
3456 if (!(syntax & RE_INTERVALS)
3457 3458
3459 || (syntax & RE_NO_BK_BRACES))
3460 goto normal_backslash;
3461
3462 handle_interval:
3463 {
3464
3465
3466
3467 int lower_bound = 0, upper_bound = -1;
3468
3469 beg_interval = p;
3470
3471 GET_UNSIGNED_NUMBER (lower_bound);
3472
3473 if (c == ',')
3474 GET_UNSIGNED_NUMBER (upper_bound);
3475 else
3476
3477 upper_bound = lower_bound;
3478
3479 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
3480 || (upper_bound >= 0 && lower_bound > upper_bound))
3481 FREE_STACK_RETURN (REG_BADBR);
3482
3483 if (!(syntax & RE_NO_BK_BRACES))
3484 {
3485 if (c != '\\')
3486 FREE_STACK_RETURN (REG_BADBR);
3487 if (p == pend)
3488 FREE_STACK_RETURN (REG_EESCAPE);
3489 PATFETCH (c);
3490 }
3491
3492 if (c != '}')
3493 FREE_STACK_RETURN (REG_BADBR);
3494
3495
3496
3497
3498 if (!laststart)
3499 {
3500 if (syntax & RE_CONTEXT_INVALID_OPS)
3501 FREE_STACK_RETURN (REG_BADRPT);
3502 else if (syntax & RE_CONTEXT_INDEP_OPS)
3503 laststart = b;
3504 else
3505 goto unfetch_interval;
3506 }
3507
3508 if (upper_bound == 0)
3509 3510
3511 b = laststart;
3512 else if (lower_bound == 1 && upper_bound == 1)
3513
3514 ;
3515
3516 3517 3518 3519 3520 3521 3522 3523 3524
3525 else
3526 { 3527
3528 unsigned int nbytes = (upper_bound < 0 ? 3
3529 : upper_bound > 1 ? 5 : 0);
3530 unsigned int startoffset = 0;
3531
3532 GET_BUFFER_SPACE (20);
3533
3534 if (lower_bound == 0)
3535 {
3536 3537
3538 INSERT_JUMP (on_failure_jump_loop, laststart,
3539 b + 3 + nbytes);
3540 b += 3;
3541 }
3542 else
3543 {
3544 3545 3546 3547 3548
3549 INSERT_JUMP2 (succeed_n, laststart,
3550 b + 5 + nbytes,
3551 lower_bound);
3552 b += 5;
3553
3554 3555 3556 3557
3558 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
3559 b += 5;
3560 startoffset += 5;
3561 }
3562
3563 if (upper_bound < 0)
3564 {
3565 3566
3567 STORE_JUMP (jump, b, laststart + startoffset);
3568 b += 3;
3569 }
3570 else if (upper_bound > 1)
3571 { 3572 3573 3574 3575 3576 3577
3578 STORE_JUMP2 (jump_n, b, laststart + startoffset,
3579 upper_bound - 1);
3580 b += 5;
3581
3582 3583 3584 3585 3586 3587 3588 3589 3590 3591 3592 3593 3594 3595
3596 insert_op2 (set_number_at, laststart, b - laststart,
3597 upper_bound - 1, b);
3598 b += 5;
3599 }
3600 }
3601 pending_exact = 0;
3602 beg_interval = NULL;
3603 }
3604 break;
3605
3606 unfetch_interval:
3607
3608 assert (beg_interval);
3609 p = beg_interval;
3610 beg_interval = NULL;
3611
3612
3613 c = '{';
3614
3615 if (!(syntax & RE_NO_BK_BRACES))
3616 {
3617 assert (p > pattern && p[-1] == '\\');
3618 goto normal_backslash;
3619 }
3620 else
3621 goto normal_char;
3622
3623 #ifdef emacs
3624 3625
3626 case '=':
3627 BUF_PUSH (at_dot);
3628 break;
3629
3630 case 's':
3631 laststart = b;
3632 PATFETCH (c);
3633 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
3634 break;
3635
3636 case 'S':
3637 laststart = b;
3638 PATFETCH (c);
3639 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
3640 break;
3641
3642 case 'c':
3643 laststart = b;
3644 PATFETCH (c);
3645 BUF_PUSH_2 (categoryspec, c);
3646 break;
3647
3648 case 'C':
3649 laststart = b;
3650 PATFETCH (c);
3651 BUF_PUSH_2 (notcategoryspec, c);
3652 break;
3653 #endif
3654
3655
3656 case 'w':
3657 if (syntax & RE_NO_GNU_OPS)
3658 goto normal_char;
3659 laststart = b;
3660 BUF_PUSH_2 (syntaxspec, Sword);
3661 break;
3662
3663
3664 case 'W':
3665 if (syntax & RE_NO_GNU_OPS)
3666 goto normal_char;
3667 laststart = b;
3668 BUF_PUSH_2 (notsyntaxspec, Sword);
3669 break;
3670
3671
3672 case '<':
3673 if (syntax & RE_NO_GNU_OPS)
3674 goto normal_char;
3675 BUF_PUSH (wordbeg);
3676 break;
3677
3678 case '>':
3679 if (syntax & RE_NO_GNU_OPS)
3680 goto normal_char;
3681 BUF_PUSH (wordend);
3682 break;
3683
3684 case '_':
3685 if (syntax & RE_NO_GNU_OPS)
3686 goto normal_char;
3687 laststart = b;
3688 PATFETCH (c);
3689 if (c == '<')
3690 BUF_PUSH (symbeg);
3691 else if (c == '>')
3692 BUF_PUSH (symend);
3693 else
3694 FREE_STACK_RETURN (REG_BADPAT);
3695 break;
3696
3697 case 'b':
3698 if (syntax & RE_NO_GNU_OPS)
3699 goto normal_char;
3700 BUF_PUSH (wordbound);
3701 break;
3702
3703 case 'B':
3704 if (syntax & RE_NO_GNU_OPS)
3705 goto normal_char;
3706 BUF_PUSH (notwordbound);
3707 break;
3708
3709 case '`':
3710 if (syntax & RE_NO_GNU_OPS)
3711 goto normal_char;
3712 BUF_PUSH (begbuf);
3713 break;
3714
3715 case '\'':
3716 if (syntax & RE_NO_GNU_OPS)
3717 goto normal_char;
3718 BUF_PUSH (endbuf);
3719 break;
3720
3721 case '1': case '2': case '3': case '4': case '5':
3722 case '6': case '7': case '8': case '9':
3723 {
3724 regnum_t reg;
3725
3726 if (syntax & RE_NO_BK_REFS)
3727 goto normal_backslash;
3728
3729 reg = c - '0';
3730
3731 if (reg > bufp->re_nsub || reg < 1
3732
3733 || group_in_compile_stack (compile_stack, reg))
3734 FREE_STACK_RETURN (REG_ESUBREG);
3735
3736 laststart = b;
3737 BUF_PUSH_2 (duplicate, reg);
3738 }
3739 break;
3740
3741
3742 case '+':
3743 case '?':
3744 if (syntax & RE_BK_PLUS_QM)
3745 goto handle_plus;
3746 else
3747 goto normal_backslash;
3748
3749 default:
3750 normal_backslash:
3751 3752 3753
3754 goto normal_char;
3755 }
3756 break;
3757
3758
3759 default:
3760
3761 normal_char:
3762
3763 if (!pending_exact
3764
3765
3766 || pending_exact + *pending_exact + 1 != b
3767
3768
3769 || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
3770
3771
3772 || (p != pend && (*p == '*' || *p == '^'))
3773 || ((syntax & RE_BK_PLUS_QM)
3774 ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
3775 : p != pend && (*p == '+' || *p == '?'))
3776 || ((syntax & RE_INTERVALS)
3777 && ((syntax & RE_NO_BK_BRACES)
3778 ? p != pend && *p == '{'
3779 : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
3780 {
3781
3782
3783 laststart = b;
3784
3785 BUF_PUSH_2 (exactn, 0);
3786 pending_exact = b - 1;
3787 }
3788
3789 GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
3790 {
3791 int len;
3792
3793 if (multibyte)
3794 {
3795 c = TRANSLATE (c);
3796 len = CHAR_STRING (c, b);
3797 b += len;
3798 }
3799 else
3800 {
3801 c1 = RE_CHAR_TO_MULTIBYTE (c);
3802 if (! CHAR_BYTE8_P (c1))
3803 {
3804 re_wchar_t c2 = TRANSLATE (c1);
3805
3806 if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
3807 c = c1;
3808 }
3809 *b++ = c;
3810 len = 1;
3811 }
3812 (*pending_exact) += len;
3813 }
3814
3815 break;
3816 }
3817 }
3818
3819
3820
3821
3822 FIXUP_ALT_JUMP ();
3823
3824 if (!COMPILE_STACK_EMPTY)
3825 FREE_STACK_RETURN (REG_EPAREN);
3826
3827 3828
3829 if (syntax & RE_NO_POSIX_BACKTRACKING)
3830 BUF_PUSH (succeed);
3831
3832
3833 bufp->used = b - bufp->buffer;
3834
3835 #ifdef DEBUG
3836 if (debug > 0)
3837 {
3838 re_compile_fastmap (bufp);
3839 DEBUG_PRINT1 ("\nCompiled pattern: \n");
3840 print_compiled_pattern (bufp);
3841 }
3842 debug--;
3843 #endif
3844
3845 #ifndef MATCH_MAY_ALLOCATE
3846 3847 3848
3849 {
3850 int num_regs = bufp->re_nsub + 1;
3851
3852 if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
3853 {
3854 fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
3855
3856 if (! fail_stack.stack)
3857 fail_stack.stack
3858 = (fail_stack_elt_t *) malloc (fail_stack.size
3859 * sizeof (fail_stack_elt_t));
3860 else
3861 fail_stack.stack
3862 = (fail_stack_elt_t *) realloc (fail_stack.stack,
3863 (fail_stack.size
3864 * sizeof (fail_stack_elt_t)));
3865 }
3866
3867 regex_grow_registers (num_regs);
3868 }
3869 #endif
3870
3871 FREE_STACK_RETURN (REG_NOERROR);
3872 }
3873
3874
3875
3876
3877
3878 static void
3879 store_op1 (op, loc, arg)
3880 re_opcode_t op;
3881 unsigned char *loc;
3882 int arg;
3883 {
3884 *loc = (unsigned char) op;
3885 STORE_NUMBER (loc + 1, arg);
3886 }
3887
3888
3889
3890
3891 static void
3892 store_op2 (op, loc, arg1, arg2)
3893 re_opcode_t op;
3894 unsigned char *loc;
3895 int arg1, arg2;
3896 {
3897 *loc = (unsigned char) op;
3898 STORE_NUMBER (loc + 1, arg1);
3899 STORE_NUMBER (loc + 3, arg2);
3900 }
3901
3902
3903 3904
3905
3906 static void
3907 insert_op1 (op, loc, arg, end)
3908 re_opcode_t op;
3909 unsigned char *loc;
3910 int arg;
3911 unsigned char *end;
3912 {
3913 register unsigned char *pfrom = end;
3914 register unsigned char *pto = end + 3;
3915
3916 while (pfrom != loc)
3917 *--pto = *--pfrom;
3918
3919 store_op1 (op, loc, arg);
3920 }
3921
3922
3923
3924
3925 static void
3926 insert_op2 (op, loc, arg1, arg2, end)
3927 re_opcode_t op;
3928 unsigned char *loc;
3929 int arg1, arg2;
3930 unsigned char *end;
3931 {
3932 register unsigned char *pfrom = end;
3933 register unsigned char *pto = end + 5;
3934
3935 while (pfrom != loc)
3936 *--pto = *--pfrom;
3937
3938 store_op2 (op, loc, arg1, arg2);
3939 }
3940
3941
3942 3943 3944
3945
3946 static boolean
3947 at_begline_loc_p (pattern, p, syntax)
3948 re_char *pattern, *p;
3949 reg_syntax_t syntax;
3950 {
3951 re_char *prev = p - 2;
3952 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3953
3954 return
3955
3956 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3957
3958 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash))
3959
3960 || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern
3961 && prev[-1] == '?' && prev[-2] == '('
3962 && (syntax & RE_NO_BK_PARENS
3963 || (prev - 3 >= pattern && prev[-3] == '\\')));
3964 }
3965
3966
3967 3968
3969
3970 static boolean
3971 at_endline_loc_p (p, pend, syntax)
3972 re_char *p, *pend;
3973 reg_syntax_t syntax;
3974 {
3975 re_char *next = p;
3976 boolean next_backslash = *next == '\\';
3977 re_char *next_next = p + 1 < pend ? p + 1 : 0;
3978
3979 return
3980
3981 (syntax & RE_NO_BK_PARENS ? *next == ')'
3982 : next_backslash && next_next && *next_next == ')')
3983
3984 || (syntax & RE_NO_BK_VBAR ? *next == '|'
3985 : next_backslash && next_next && *next_next == '|');
3986 }
3987
3988
3989 3990
3991
3992 static boolean
3993 group_in_compile_stack (compile_stack, regnum)
3994 compile_stack_type compile_stack;
3995 regnum_t regnum;
3996 {
3997 int this_element;
3998
3999 for (this_element = compile_stack.avail - 1;
4000 this_element >= 0;
4001 this_element--)
4002 if (compile_stack.stack[this_element].regnum == regnum)
4003 return true;
4004
4005 return false;
4006 }
4007
4008 4009 4010 4011 4012 4013 4014 4015 4016
4017
4018 static int
4019 analyse_first (p, pend, fastmap, multibyte)
4020 re_char *p, *pend;
4021 char *fastmap;
4022 const int multibyte;
4023 {
4024 int j, k;
4025 boolean not;
4026
4027 4028
4029 boolean match_any_multibyte_characters = false;
4030
4031 assert (p);
4032
4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046
4047
4048 while (p < pend)
4049 {
4050 4051 4052 4053 4054 4055 4056 4057
4058 re_char *p1 = p;
4059
4060 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4061 {
4062 case succeed:
4063 return 1;
4064 continue;
4065
4066 case duplicate:
4067 4068 4069 4070
4071 p++;
4072 continue;
4073
4074
4075 4076
4077
4078 case exactn:
4079 if (fastmap)
4080 {
4081 4082 4083 4084
4085 fastmap[p[1]] = 1;
4086 if (! multibyte)
4087 {
4088 4089 4090
4091 int c = RE_CHAR_TO_MULTIBYTE (p[1]);
4092
4093 fastmap[CHAR_LEADING_CODE (c)] = 1;
4094 }
4095 }
4096 break;
4097
4098
4099 case anychar:
4100 4101
4102 if (!fastmap) break;
4103 return -1;
4104
4105
4106 case charset_not:
4107 if (!fastmap) break;
4108 {
4109
4110 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
4111 j < (1 << BYTEWIDTH); j++)
4112 fastmap[j] = 1;
4113 }
4114
4115
4116 case charset:
4117 if (!fastmap) break;
4118 not = (re_opcode_t) *(p - 1) == charset_not;
4119 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
4120 j >= 0; j--)
4121 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
4122 fastmap[j] = 1;
4123
4124 #ifdef emacs
4125 if (4126
4127 not
4128 ||
4129 4130
4131 (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
4132 && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
4133
4134 {
4135 if (match_any_multibyte_characters == false)
4136 {
4137 for (j = MIN_MULTIBYTE_LEADING_CODE;
4138 j <= MAX_MULTIBYTE_LEADING_CODE; j++)
4139 fastmap[j] = 1;
4140 match_any_multibyte_characters = true;
4141 }
4142 }
4143
4144 else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
4145 && match_any_multibyte_characters == false)
4146 {
4147 4148
4149 int c, count;
4150 unsigned char lc1, lc2;
4151
4152 4153
4154 p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
4155
4156
4157 EXTRACT_NUMBER_AND_INCR (count, p);
4158 for (; count > 0; count--, p += 3)
4159 {
4160
4161 EXTRACT_CHARACTER (c, p);
4162 lc1 = CHAR_LEADING_CODE (c);
4163 p += 3;
4164 EXTRACT_CHARACTER (c, p);
4165 lc2 = CHAR_LEADING_CODE (c);
4166 for (j = lc1; j <= lc2; j++)
4167 fastmap[j] = 1;
4168 }
4169 }
4170 #endif
4171 break;
4172
4173 case syntaxspec:
4174 case notsyntaxspec:
4175 if (!fastmap) break;
4176 #ifndef emacs
4177 not = (re_opcode_t)p[-1] == notsyntaxspec;
4178 k = *p++;
4179 for (j = 0; j < (1 << BYTEWIDTH); j++)
4180 if ((SYNTAX (j) == (enum syntaxcode) k) ^ not)
4181 fastmap[j] = 1;
4182 break;
4183 #else
4184 4185
4186 return -1;
4187
4188 case categoryspec:
4189 case notcategoryspec:
4190 if (!fastmap) break;
4191 not = (re_opcode_t)p[-1] == notcategoryspec;
4192 k = *p++;
4193 for (j = (1 << BYTEWIDTH); j >= 0; j--)
4194 if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
4195 fastmap[j] = 1;
4196
4197 4198
4199 if (match_any_multibyte_characters == false)
4200 {
4201 for (j = MIN_MULTIBYTE_LEADING_CODE;
4202 j <= MAX_MULTIBYTE_LEADING_CODE; j++)
4203 fastmap[j] = 1;
4204 match_any_multibyte_characters = true;
4205 }
4206 break;
4207
4208 4209
4210
4211 case before_dot:
4212 case at_dot:
4213 case after_dot:
4214 #endif
4215 case no_op:
4216 case begline:
4217 case endline:
4218 case begbuf:
4219 case endbuf:
4220 case wordbound:
4221 case notwordbound:
4222 case wordbeg:
4223 case wordend:
4224 case symbeg:
4225 case symend:
4226 continue;
4227
4228
4229 case jump:
4230 EXTRACT_NUMBER_AND_INCR (j, p);
4231 if (j < 0)
4232 4233
4234 break;
4235 p += j;
4236 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
4237 {
4238 case on_failure_jump:
4239 case on_failure_keep_string_jump:
4240 case on_failure_jump_loop:
4241 case on_failure_jump_nastyloop:
4242 case on_failure_jump_smart:
4243 p++;
4244 break;
4245 default:
4246 continue;
4247 };
4248 4249
4250
4251
4252 case on_failure_jump:
4253 case on_failure_keep_string_jump:
4254 case on_failure_jump_nastyloop:
4255 case on_failure_jump_loop:
4256 case on_failure_jump_smart:
4257 EXTRACT_NUMBER_AND_INCR (j, p);
4258 if (p + j <= p1)
4259 ;
4260 else
4261 { 4262 4263
4264 int r = analyse_first (p, pend, fastmap, multibyte);
4265 if (r) return r;
4266 p += j;
4267 }
4268 continue;
4269
4270
4271 case jump_n:
4272
4273 DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); assert (j < 0));
4274 p += 4;
4275 4276 4277
4278 continue;
4279
4280 case succeed_n:
4281
4282 DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0));
4283 p += 4;
4284 4285 4286
4287 continue;
4288
4289
4290 case set_number_at:
4291 p += 4;
4292 continue;
4293
4294
4295 case start_memory:
4296 case stop_memory:
4297 p += 1;
4298 continue;
4299
4300
4301 default:
4302 abort ();
4303 }
4304
4305 4306 4307
4308 return 0;
4309 }
4310
4311
4312 return 1;
4313
4314 }
4315
4316 4317 4318 4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331
4332
4333 int
4334 re_compile_fastmap (bufp)
4335 struct re_pattern_buffer *bufp;
4336 {
4337 char *fastmap = bufp->fastmap;
4338 int analysis;
4339
4340 assert (fastmap && bufp->buffer);
4341
4342 bzero (fastmap, 1 << BYTEWIDTH);
4343 bufp->fastmap_accurate = 1;
4344
4345 analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
4346 fastmap, RE_MULTIBYTE_P (bufp));
4347 bufp->can_be_null = (analysis != 0);
4348 return 0;
4349 }
4350
4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361 4362
4363
4364 void
4365 re_set_registers (bufp, regs, num_regs, starts, ends)
4366 struct re_pattern_buffer *bufp;
4367 struct re_registers *regs;
4368 unsigned num_regs;
4369 regoff_t *starts, *ends;
4370 {
4371 if (num_regs)
4372 {
4373 bufp->regs_allocated = REGS_REALLOCATE;
4374 regs->num_regs = num_regs;
4375 regs->start = starts;
4376 regs->end = ends;
4377 }
4378 else
4379 {
4380 bufp->regs_allocated = REGS_UNALLOCATED;
4381 regs->num_regs = 0;
4382 regs->start = regs->end = (regoff_t *) 0;
4383 }
4384 }
4385 WEAK_ALIAS (__re_set_registers, re_set_registers)
4386
4387
4388
4389 4390
4391
4392 int
4393 re_search (bufp, string, size, startpos, range, regs)
4394 struct re_pattern_buffer *bufp;
4395 const char *string;
4396 int size, startpos, range;
4397 struct re_registers *regs;
4398 {
4399 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
4400 regs, size);
4401 }
4402 WEAK_ALIAS (__re_search, re_search)
4403
4404
4405 #define HEAD_ADDR_VSTRING(P) \
4406 (((P) >= size1 ? string2 : string1))
4407
4408
4409 #define STOP_ADDR_VSTRING(P) \
4410 (((P) >= size1 ? string2 + size2 : string1 + size1))
4411
4412
4413 #define POS_ADDR_VSTRING(POS) \
4414 (((POS) >= size1 ? string2 - size1 : string1) + (POS))
4415
4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435
4436
4437 int
4438 re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
4439 struct re_pattern_buffer *bufp;
4440 const char *str1, *str2;
4441 int size1, size2;
4442 int startpos;
4443 int range;
4444 struct re_registers *regs;
4445 int stop;
4446 {
4447 int val;
4448 re_char *string1 = (re_char*) str1;
4449 re_char *string2 = (re_char*) str2;
4450 register char *fastmap = bufp->fastmap;
4451 register RE_TRANSLATE_TYPE translate = bufp->translate;
4452 int total_size = size1 + size2;
4453 int endpos = startpos + range;
4454 boolean anchored_start;
4455
4456 const boolean multibyte = RE_TARGET_MULTIBYTE_P (bufp);
4457
4458
4459 if (startpos < 0 || startpos > total_size)
4460 return -1;
4461
4462 4463 4464
4465 if (endpos < 0)
4466 range = 0 - startpos;
4467 else if (endpos > total_size)
4468 range = total_size - startpos;
4469
4470 4471
4472 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
4473 {
4474 if (startpos > 0)
4475 return -1;
4476 else
4477 range = 0;
4478 }
4479
4480 #ifdef emacs
4481 4482
4483 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
4484 {
4485 range = PT_BYTE - BEGV_BYTE - startpos;
4486 if (range < 0)
4487 return -1;
4488 }
4489 #endif
4490
4491
4492 if (fastmap && !bufp->fastmap_accurate)
4493 re_compile_fastmap (bufp);
4494
4495
4496 anchored_start = (bufp->buffer[0] == begline);
4497
4498 #ifdef emacs
4499 gl_state.object = re_match_object;
4500 {
4501 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
4502
4503 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4504 }
4505 #endif
4506
4507
4508 for (;;)
4509 {
4510 4511 4512 4513
4514 if (anchored_start && startpos > 0)
4515 {
4516 if (! ((startpos <= size1 ? string1[startpos - 1]
4517 : string2[startpos - size1 - 1])
4518 == '\n'))
4519 goto advance;
4520 }
4521
4522 4523 4524 4525
4526 if (fastmap && startpos < total_size && !bufp->can_be_null)
4527 {
4528 register re_char *d;
4529 register re_wchar_t buf_ch;
4530
4531 d = POS_ADDR_VSTRING (startpos);
4532
4533 if (range > 0)
4534 {
4535 register int lim = 0;
4536 int irange = range;
4537
4538 if (startpos < size1 && startpos + range >= size1)
4539 lim = range - (size1 - startpos);
4540
4541 4542
4543 if (RE_TRANSLATE_P (translate))
4544 {
4545 if (multibyte)
4546 while (range > lim)
4547 {
4548 int buf_charlen;
4549
4550 buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
4551 buf_ch = RE_TRANSLATE (translate, buf_ch);
4552 if (fastmap[CHAR_LEADING_CODE (buf_ch)])
4553 break;
4554
4555 range -= buf_charlen;
4556 d += buf_charlen;
4557 }
4558 else
4559 while (range > lim)
4560 {
4561 register re_wchar_t ch, translated;
4562
4563 buf_ch = *d;
4564 ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
4565 translated = RE_TRANSLATE (translate, ch);
4566 if (translated != ch
4567 && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
4568 buf_ch = ch;
4569 if (fastmap[buf_ch])
4570 break;
4571 d++;
4572 range--;
4573 }
4574 }
4575 else
4576 {
4577 if (multibyte)
4578 while (range > lim)
4579 {
4580 int buf_charlen;
4581
4582 buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
4583 if (fastmap[CHAR_LEADING_CODE (buf_ch)])
4584 break;
4585 range -= buf_charlen;
4586 d += buf_charlen;
4587 }
4588 else
4589 while (range > lim && !fastmap[*d])
4590 {
4591 d++;
4592 range--;
4593 }
4594 }
4595 startpos += irange - range;
4596 }
4597 else
4598 {
4599 if (multibyte)
4600 {
4601 buf_ch = STRING_CHAR (d);
4602 buf_ch = TRANSLATE (buf_ch);
4603 if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
4604 goto advance;
4605 }
4606 else
4607 {
4608 register re_wchar_t ch, translated;
4609
4610 buf_ch = *d;
4611 ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
4612 translated = TRANSLATE (ch);
4613 if (translated != ch
4614 && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
4615 buf_ch = ch;
4616 if (! fastmap[TRANSLATE (buf_ch)])
4617 goto advance;
4618 }
4619 }
4620 }
4621
4622
4623 if (range >= 0 && startpos == total_size && fastmap
4624 && !bufp->can_be_null)
4625 return -1;
4626
4627 val = re_match_2_internal (bufp, string1, size1, string2, size2,
4628 startpos, regs, stop);
4629
4630 if (val >= 0)
4631 return startpos;
4632
4633 if (val == -2)
4634 return -2;
4635
4636 advance:
4637 if (!range)
4638 break;
4639 else if (range > 0)
4640 {
4641
4642 if (multibyte)
4643 {
4644 re_char *p = POS_ADDR_VSTRING (startpos);
4645 re_char *pend = STOP_ADDR_VSTRING (startpos);
4646 int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
4647
4648 range -= len;
4649 if (range < 0)
4650 break;
4651 startpos += len;
4652 }
4653 else
4654 {
4655 range--;
4656 startpos++;
4657 }
4658 }
4659 else
4660 {
4661 range++;
4662 startpos--;
4663
4664
4665 if (multibyte)
4666 {
4667 re_char *p = POS_ADDR_VSTRING (startpos) + 1;
4668 re_char *p0 = p;
4669 re_char *phead = HEAD_ADDR_VSTRING (startpos);
4670
4671
4672 PREV_CHAR_BOUNDARY (p, phead);
4673 range += p0 - 1 - p;
4674 if (range > 0)
4675 break;
4676
4677 startpos -= p0 - 1 - p;
4678 }
4679 }
4680 }
4681 return -1;
4682 }
4683 WEAK_ALIAS (__re_search_2, re_search_2)
4684
4685
4686
4687 static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
4688 register int len,
4689 RE_TRANSLATE_TYPE translate,
4690 const int multibyte));
4691
4692 4693
4694 #define POINTER_TO_OFFSET(ptr) \
4695 (FIRST_STRING_P (ptr) \
4696 ? ((regoff_t) ((ptr) - string1)) \
4697 : ((regoff_t) ((ptr) - string2 + size1)))
4698
4699 4700 4701 4702
4703 #define PREFETCH() \
4704 while (d == dend) \
4705 { \
4706 \
4707 if (dend == end_match_2) \
4708 goto fail; \
4709 \
4710 d = string2; \
4711 dend = end_match_2; \
4712 }
4713
4714 4715 4716 4717
4718 #define PREFETCH_NOLIMIT() \
4719 if (d == end1) \
4720 { \
4721 d = string2; \
4722 dend = end_match_2; \
4723 } \
4724
4725 4726
4727 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4728 #define AT_STRINGS_END(d) ((d) == end2)
4729
4730
4731 4732 4733 4734
4735 #define WORDCHAR_P(d) \
4736 (SYNTAX ((d) == end1 ? *string2 \
4737 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
4738 == Sword)
4739
4740
4741
4742 4743 4744 4745 4746 4747 4748
4749
4750 #if 0
4751 4752
4753 #define AT_WORD_BOUNDARY(d) \
4754 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
4755 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
4756 #endif
4757
4758
4759 #ifdef MATCH_MAY_ALLOCATE
4760 # define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
4761 # define FREE_VARIABLES() \
4762 do { \
4763 REGEX_FREE_STACK (fail_stack.stack); \
4764 FREE_VAR (regstart); \
4765 FREE_VAR (regend); \
4766 FREE_VAR (best_regstart); \
4767 FREE_VAR (best_regend); \
4768 } while (0)
4769 #else
4770 # define FREE_VARIABLES() ((void)0)
4771 #endif
4772
4773
4774
4775
4776 4777
4778 static re_char *
4779 skip_one_char (p)
4780 re_char *p;
4781 {
4782 switch (SWITCH_ENUM_CAST (*p++))
4783 {
4784 case anychar:
4785 break;
4786
4787 case exactn:
4788 p += *p + 1;
4789 break;
4790
4791 case charset_not:
4792 case charset:
4793 if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
4794 {
4795 int mcnt;
4796 p = CHARSET_RANGE_TABLE (p - 1);
4797 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4798 p = CHARSET_RANGE_TABLE_END (p, mcnt);
4799 }
4800 else
4801 p += 1 + CHARSET_BITMAP_SIZE (p - 1);
4802 break;
4803
4804 case syntaxspec:
4805 case notsyntaxspec:
4806 #ifdef emacs
4807 case categoryspec:
4808 case notcategoryspec:
4809 #endif
4810 p++;
4811 break;
4812
4813 default:
4814 p = NULL;
4815 }
4816 return p;
4817 }
4818
4819
4820
4821 static re_char *
4822 skip_noops (p, pend)
4823 re_char *p, *pend;
4824 {
4825 int mcnt;
4826 while (p < pend)
4827 {
4828 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
4829 {
4830 case start_memory:
4831 case stop_memory:
4832 p += 2; break;
4833 case no_op:
4834 p += 1; break;
4835 case jump:
4836 p += 1;
4837 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4838 p += mcnt;
4839 break;
4840 default:
4841 return p;
4842 }
4843 }
4844 assert (p == pend);
4845 return p;
4846 }
4847
4848
4849 static int
4850 mutually_exclusive_p (bufp, p1, p2)
4851 struct re_pattern_buffer *bufp;
4852 re_char *p1, *p2;
4853 {
4854 re_opcode_t op2;
4855 const boolean multibyte = RE_MULTIBYTE_P (bufp);
4856 unsigned char *pend = bufp->buffer + bufp->used;
4857
4858 assert (p1 >= bufp->buffer && p1 < pend
4859 && p2 >= bufp->buffer && p2 <= pend);
4860
4861 4862 4863 4864
4865 p2 = skip_noops (p2, pend);
4866 4867
4868
4869
4870 assert (p1 >= bufp->buffer && p1 < pend
4871 && p2 >= bufp->buffer && p2 <= pend);
4872
4873 op2 = p2 == pend ? succeed : *p2;
4874
4875 switch (SWITCH_ENUM_CAST (op2))
4876 {
4877 case succeed:
4878 case endbuf:
4879
4880 if (skip_one_char (p1))
4881 {
4882 DEBUG_PRINT1 (" End of pattern: fast loop.\n");
4883 return 1;
4884 }
4885 break;
4886
4887 case endline:
4888 case exactn:
4889 {
4890 register re_wchar_t c
4891 = (re_opcode_t) *p2 == endline ? '\n'
4892 : RE_STRING_CHAR (p2 + 2, multibyte);
4893
4894 if ((re_opcode_t) *p1 == exactn)
4895 {
4896 if (c != RE_STRING_CHAR (p1 + 2, multibyte))
4897 {
4898 DEBUG_PRINT3 (" '%c' != '%c' => fast loop.\n", c, p1[2]);
4899 return 1;
4900 }
4901 }
4902
4903 else if ((re_opcode_t) *p1 == charset
4904 || (re_opcode_t) *p1 == charset_not)
4905 {
4906 int not = (re_opcode_t) *p1 == charset_not;
4907
4908 4909
4910 if (! multibyte || IS_REAL_ASCII (c))
4911 {
4912 if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH
4913 && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4914 not = !not;
4915 }
4916 else if (CHARSET_RANGE_TABLE_EXISTS_P (p1))
4917 CHARSET_LOOKUP_RANGE_TABLE (not, c, p1);
4918
4919 4920
4921 if (!not)
4922 {
4923 DEBUG_PRINT1 (" No match => fast loop.\n");
4924 return 1;
4925 }
4926 }
4927 else if ((re_opcode_t) *p1 == anychar
4928 && c == '\n')
4929 {
4930 DEBUG_PRINT1 (" . != \\n => fast loop.\n");
4931 return 1;
4932 }
4933 }
4934 break;
4935
4936 case charset:
4937 {
4938 if ((re_opcode_t) *p1 == exactn)
4939
4940 return mutually_exclusive_p (bufp, p2, p1);
4941
4942 4943 4944
4945 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
4946 {
4947 4948 4949 4950 4951 4952 4953 4954 4955 4956
4957
4958 if ((re_opcode_t) *p1 == charset)
4959 {
4960 int idx;
4961 4962
4963 for (idx = 0;
4964 (idx < (int) p2[1]
4965 && idx < CHARSET_BITMAP_SIZE (p1));
4966 idx++)
4967 if ((p2[2 + idx] & p1[2 + idx]) != 0)
4968 break;
4969
4970 if (idx == p2[1]
4971 || idx == CHARSET_BITMAP_SIZE (p1))
4972 {
4973 DEBUG_PRINT1 (" No match => fast loop.\n");
4974 return 1;
4975 }
4976 }
4977 else if ((re_opcode_t) *p1 == charset_not)
4978 {
4979 int idx;
4980 4981
4982 for (idx = 0; idx < (int) p2[1]; idx++)
4983 if (! (p2[2 + idx] == 0
4984 || (idx < CHARSET_BITMAP_SIZE (p1)
4985 && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
4986 break;
4987
4988 if (idx == p2[1])
4989 {
4990 DEBUG_PRINT1 (" No match => fast loop.\n");
4991 return 1;
4992 }
4993 }
4994 }
4995 }
4996 break;
4997
4998 case charset_not:
4999 switch (SWITCH_ENUM_CAST (*p1))
5000 {
5001 case exactn:
5002 case charset:
5003
5004 return mutually_exclusive_p (bufp, p2, p1);
5005 case charset_not:
5006 5007 5008 5009
5010 break;
5011 }
5012 break;
5013
5014 case wordend:
5015 return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
5016 case symend:
5017 return ((re_opcode_t) *p1 == syntaxspec
5018 && (p1[1] == Ssymbol || p1[1] == Sword));
5019 case notsyntaxspec:
5020 return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
5021
5022 case wordbeg:
5023 return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
5024 case symbeg:
5025 return ((re_opcode_t) *p1 == notsyntaxspec
5026 && (p1[1] == Ssymbol || p1[1] == Sword));
5027 case syntaxspec:
5028 return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
5029
5030 case wordbound:
5031 return (((re_opcode_t) *p1 == notsyntaxspec
5032 || (re_opcode_t) *p1 == syntaxspec)
5033 && p1[1] == Sword);
5034
5035 #ifdef emacs
5036 case categoryspec:
5037 return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
5038 case notcategoryspec:
5039 return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
5040 #endif
5041
5042 default:
5043 ;
5044 }
5045
5046
5047 return 0;
5048 }
5049
5050
5051
5052
5053 #ifndef emacs
5054
5055
5056 int
5057 re_match (bufp, string, size, pos, regs)
5058 struct re_pattern_buffer *bufp;
5059 const char *string;
5060 int size, pos;
5061 struct re_registers *regs;
5062 {
5063 int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size,
5064 pos, regs, size);
5065 return result;
5066 }
5067 WEAK_ALIAS (__re_match, re_match)
5068 #endif
5069
5070 #ifdef emacs
5071 5072
5073 Lisp_Object re_match_object;
5074 #endif
5075
5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087
5088
5089 int
5090 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
5091 struct re_pattern_buffer *bufp;
5092 const char *string1, *string2;
5093 int size1, size2;
5094 int pos;
5095 struct re_registers *regs;
5096 int stop;
5097 {
5098 int result;
5099
5100 #ifdef emacs
5101 int charpos;
5102 gl_state.object = re_match_object;
5103 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
5104 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
5105 #endif
5106
5107 result = re_match_2_internal (bufp, (re_char*) string1, size1,
5108 (re_char*) string2, size2,
5109 pos, regs, stop);
5110 return result;
5111 }
5112 WEAK_ALIAS (__re_match_2, re_match_2)
5113
5114
5115 5116
5117 static int
5118 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
5119 struct re_pattern_buffer *bufp;
5120 re_char *string1, *string2;
5121 int size1, size2;
5122 int pos;
5123 struct re_registers *regs;
5124 int stop;
5125 {
5126
5127 int mcnt;
5128 size_t reg;
5129 boolean not;
5130
5131
5132 re_char *end1, *end2;
5133
5134 5135
5136 re_char *end_match_1, *end_match_2;
5137
5138
5139 re_char *d, *dend;
5140
5141 5142 5143 5144
5145 re_char *dfail;
5146
5147
5148 re_char *p = bufp->buffer;
5149 re_char *pend = p + bufp->used;
5150
5151
5152 RE_TRANSLATE_TYPE translate = bufp->translate;
5153
5154
5155 const boolean multibyte = RE_MULTIBYTE_P (bufp);
5156
5157
5158 const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
5159
5160 5161 5162 5163 5164 5165 5166
5167 #ifdef MATCH_MAY_ALLOCATE
5168 fail_stack_type fail_stack;
5169 #endif
5170 #ifdef DEBUG
5171 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
5172 #endif
5173
5174 #if defined REL_ALLOC && defined REGEX_MALLOC
5175 5176
5177 fail_stack_elt_t *failure_stack_ptr;
5178 #endif
5179
5180 5181 5182
5183 size_t num_regs = bufp->re_nsub + 1;
5184
5185 5186 5187 5188 5189 5190 5191
5192 #ifdef MATCH_MAY_ALLOCATE
5193 re_char **regstart, **regend;
5194 #endif
5195
5196 5197 5198 5199
5200 unsigned best_regs_set = false;
5201 #ifdef MATCH_MAY_ALLOCATE
5202 re_char **best_regstart, **best_regend;
5203 #endif
5204
5205 5206 5207 5208 5209 5210 5211 5212
5213 re_char *match_end = NULL;
5214
5215 #ifdef DEBUG
5216
5217 unsigned num_regs_pushed = 0;
5218 #endif
5219
5220 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
5221
5222 INIT_FAIL_STACK ();
5223
5224 #ifdef MATCH_MAY_ALLOCATE
5225 5226 5227 5228 5229
5230 if (bufp->re_nsub)
5231 {
5232 regstart = REGEX_TALLOC (num_regs, re_char *);
5233 regend = REGEX_TALLOC (num_regs, re_char *);
5234 best_regstart = REGEX_TALLOC (num_regs, re_char *);
5235 best_regend = REGEX_TALLOC (num_regs, re_char *);
5236
5237 if (!(regstart && regend && best_regstart && best_regend))
5238 {
5239 FREE_VARIABLES ();
5240 return -2;
5241 }
5242 }
5243 else
5244 {
5245 5246
5247 regstart = regend = best_regstart = best_regend = NULL;
5248 }
5249 #endif
5250
5251
5252 if (pos < 0 || pos > size1 + size2)
5253 {
5254 FREE_VARIABLES ();
5255 return -1;
5256 }
5257
5258 5259 5260
5261 for (reg = 1; reg < num_regs; reg++)
5262 regstart[reg] = regend[reg] = NULL;
5263
5264 5265
5266 if (size2 == 0 && string1 != NULL)
5267 {
5268 string2 = string1;
5269 size2 = size1;
5270 string1 = 0;
5271 size1 = 0;
5272 }
5273 end1 = string1 + size1;
5274 end2 = string2 + size2;
5275
5276 5277 5278 5279 5280 5281
5282 if (pos >= size1)
5283 {
5284
5285 d = string2 + pos - size1;
5286 dend = end_match_2 = string2 + stop - size1;
5287 end_match_1 = end1;
5288 }
5289 else
5290 {
5291 if (stop < size1)
5292 {
5293
5294 end_match_1 = string1 + stop;
5295 5296 5297 5298 5299 5300 5301 5302
5303 end_match_2 = end_match_1;
5304 }
5305 else
5306 { 5307 5308
5309 end_match_1 = end1;
5310 end_match_2 = string2 + stop - size1;
5311 }
5312 d = string1 + pos;
5313 dend = end_match_1;
5314 }
5315
5316 DEBUG_PRINT1 ("The compiled pattern is: ");
5317 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
5318 DEBUG_PRINT1 ("The string to match is: `");
5319 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
5320 DEBUG_PRINT1 ("'\n");
5321
5322 5323 5324
5325 for (;;)
5326 {
5327 DEBUG_PRINT2 ("\n%p: ", p);
5328
5329 if (p == pend)
5330 {
5331 DEBUG_PRINT1 ("end of pattern ... ");
5332
5333 5334
5335 if (d != end_match_2)
5336 {
5337 5338
5339 boolean same_str_p = (FIRST_STRING_P (match_end)
5340 == FIRST_STRING_P (d));
5341
5342 boolean best_match_p;
5343
5344 5345
5346 if (same_str_p)
5347 best_match_p = d > match_end;
5348 else
5349 best_match_p = !FIRST_STRING_P (d);
5350
5351 DEBUG_PRINT1 ("backtracking.\n");
5352
5353 if (!FAIL_STACK_EMPTY ())
5354 {
5355
5356
5357 if (!best_regs_set || best_match_p)
5358 {
5359 best_regs_set = true;
5360 match_end = d;
5361
5362 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
5363
5364 for (reg = 1; reg < num_regs; reg++)
5365 {
5366 best_regstart[reg] = regstart[reg];
5367 best_regend[reg] = regend[reg];
5368 }
5369 }
5370 goto fail;
5371 }
5372
5373 5374 5375
5376 else if (best_regs_set && !best_match_p)
5377 {
5378 restore_best_regs:
5379 5380 5381 5382 5383
5384 DEBUG_PRINT1 ("Restoring best registers.\n");
5385
5386 d = match_end;
5387 dend = ((d >= string1 && d <= end1)
5388 ? end_match_1 : end_match_2);
5389
5390 for (reg = 1; reg < num_regs; reg++)
5391 {
5392 regstart[reg] = best_regstart[reg];
5393 regend[reg] = best_regend[reg];
5394 }
5395 }
5396 }
5397
5398 succeed_label:
5399 DEBUG_PRINT1 ("Accepting match.\n");
5400
5401
5402 if (regs && !bufp->no_sub)
5403 {
5404
5405 if (bufp->regs_allocated == REGS_UNALLOCATED)
5406 { 5407 5408
5409 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
5410 regs->start = TALLOC (regs->num_regs, regoff_t);
5411 regs->end = TALLOC (regs->num_regs, regoff_t);
5412 if (regs->start == NULL || regs->end == NULL)
5413 {
5414 FREE_VARIABLES ();
5415 return -2;
5416 }
5417 bufp->regs_allocated = REGS_REALLOCATE;
5418 }
5419 else if (bufp->regs_allocated == REGS_REALLOCATE)
5420 { 5421 5422
5423 if (regs->num_regs < num_regs + 1)
5424 {
5425 regs->num_regs = num_regs + 1;
5426 RETALLOC (regs->start, regs->num_regs, regoff_t);
5427 RETALLOC (regs->end, regs->num_regs, regoff_t);
5428 if (regs->start == NULL || regs->end == NULL)
5429 {
5430 FREE_VARIABLES ();
5431 return -2;
5432 }
5433 }
5434 }
5435 else
5436 {
5437 5438
5439 assert (bufp->regs_allocated == REGS_FIXED);
5440 }
5441
5442 5443 5444
5445 if (regs->num_regs > 0)
5446 {
5447 regs->start[0] = pos;
5448 regs->end[0] = POINTER_TO_OFFSET (d);
5449 }
5450
5451 5452
5453 for (reg = 1; reg < MIN (num_regs, regs->num_regs); reg++)
5454 {
5455 if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg]))
5456 regs->start[reg] = regs->end[reg] = -1;
5457 else
5458 {
5459 regs->start[reg]
5460 = (regoff_t) POINTER_TO_OFFSET (regstart[reg]);
5461 regs->end[reg]
5462 = (regoff_t) POINTER_TO_OFFSET (regend[reg]);
5463 }
5464 }
5465
5466 5467 5468 5469 5470
5471 for (reg = num_regs; reg < regs->num_regs; reg++)
5472 regs->start[reg] = regs->end[reg] = -1;
5473 }
5474
5475 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
5476 nfailure_points_pushed, nfailure_points_popped,
5477 nfailure_points_pushed - nfailure_points_popped);
5478 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
5479
5480 mcnt = POINTER_TO_OFFSET (d) - pos;
5481
5482 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
5483
5484 FREE_VARIABLES ();
5485 return mcnt;
5486 }
5487
5488
5489 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
5490 {
5491 5492
5493 case no_op:
5494 DEBUG_PRINT1 ("EXECUTING no_op.\n");
5495 break;
5496
5497 case succeed:
5498 DEBUG_PRINT1 ("EXECUTING succeed.\n");
5499 goto succeed_label;
5500
5501 5502 5503
5504 case exactn:
5505 mcnt = *p++;
5506 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
5507
5508
5509 dfail = d;
5510
5511 #ifndef emacs
5512 5513
5514 if (RE_TRANSLATE_P (translate))
5515 do
5516 {
5517 PREFETCH ();
5518 if (RE_TRANSLATE (translate, *d) != *p++)
5519 {
5520 d = dfail;
5521 goto fail;
5522 }
5523 d++;
5524 }
5525 while (--mcnt);
5526 else
5527 do
5528 {
5529 PREFETCH ();
5530 if (*d++ != *p++)
5531 {
5532 d = dfail;
5533 goto fail;
5534 }
5535 }
5536 while (--mcnt);
5537 #else
5538
5539 if (target_multibyte)
5540 do
5541 {
5542 int pat_charlen, buf_charlen;
5543 int pat_ch, buf_ch;
5544
5545 PREFETCH ();
5546 if (multibyte)
5547 pat_ch = STRING_CHAR_AND_LENGTH (p, pat_charlen);
5548 else
5549 {
5550 pat_ch = RE_CHAR_TO_MULTIBYTE (*p);
5551 pat_charlen = 1;
5552 }
5553 buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
5554
5555 if (TRANSLATE (buf_ch) != pat_ch)
5556 {
5557 d = dfail;
5558 goto fail;
5559 }
5560
5561 p += pat_charlen;
5562 d += buf_charlen;
5563 mcnt -= pat_charlen;
5564 }
5565 while (mcnt > 0);
5566 else
5567 do
5568 {
5569 int pat_charlen, buf_charlen;
5570 int pat_ch, buf_ch;
5571
5572 PREFETCH ();
5573 if (multibyte)
5574 {
5575 pat_ch = STRING_CHAR_AND_LENGTH (p, pat_charlen);
5576 pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
5577 }
5578 else
5579 {
5580 pat_ch = *p;
5581 pat_charlen = 1;
5582 }
5583 buf_ch = RE_CHAR_TO_MULTIBYTE (*d);
5584 if (! CHAR_BYTE8_P (buf_ch))
5585 {
5586 buf_ch = TRANSLATE (buf_ch);
5587 buf_ch = RE_CHAR_TO_UNIBYTE (buf_ch);
5588 if (buf_ch < 0)
5589 buf_ch = *d;
5590 }
5591 else
5592 buf_ch = *d;
5593 if (buf_ch != pat_ch)
5594 {
5595 d = dfail;
5596 goto fail;
5597 }
5598 p += pat_charlen;
5599 d++;
5600 }
5601 while (--mcnt);
5602 #endif
5603 break;
5604
5605
5606
5607 case anychar:
5608 {
5609 int buf_charlen;
5610 re_wchar_t buf_ch;
5611
5612 DEBUG_PRINT1 ("EXECUTING anychar.\n");
5613
5614 PREFETCH ();
5615 buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen,
5616 target_multibyte);
5617 buf_ch = TRANSLATE (buf_ch);
5618
5619 if ((!(bufp->syntax & RE_DOT_NEWLINE)
5620 && buf_ch == '\n')
5621 || ((bufp->syntax & RE_DOT_NOT_NULL)
5622 && buf_ch == '\000'))
5623 goto fail;
5624
5625 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
5626 d += buf_charlen;
5627 }
5628 break;
5629
5630
5631 case charset:
5632 case charset_not:
5633 {
5634 register unsigned int c;
5635 boolean not = (re_opcode_t) *(p - 1) == charset_not;
5636 int len;
5637
5638 5639
5640 re_char *range_table;
5641
5642
5643 int range_table_exists;
5644
5645 5646
5647 int count = 0;
5648
5649
5650 boolean unibyte_char = false;
5651
5652 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
5653
5654 range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
5655
5656 if (range_table_exists)
5657 {
5658 range_table = CHARSET_RANGE_TABLE (&p[-1]);
5659 EXTRACT_NUMBER_AND_INCR (count, range_table);
5660 }
5661
5662 PREFETCH ();
5663 c = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
5664 if (target_multibyte)
5665 {
5666 int c1;
5667
5668 c = TRANSLATE (c);
5669 c1 = RE_CHAR_TO_UNIBYTE (c);
5670 if (c1 >= 0)
5671 {
5672 unibyte_char = true;
5673 c = c1;
5674 }
5675 }
5676 else
5677 {
5678 int c1 = RE_CHAR_TO_MULTIBYTE (c);
5679
5680 if (! CHAR_BYTE8_P (c1))
5681 {
5682 c1 = TRANSLATE (c1);
5683 c1 = RE_CHAR_TO_UNIBYTE (c1);
5684 if (c1 >= 0)
5685 {
5686 unibyte_char = true;
5687 c = c1;
5688 }
5689 }
5690 else
5691 unibyte_char = true;
5692 }
5693
5694 if (unibyte_char && c < (1 << BYTEWIDTH))
5695 {
5696 5697
5698 if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
5699 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
5700 not = !not;
5701 }
5702 #ifdef emacs
5703 else if (range_table_exists)
5704 {
5705 int class_bits = CHARSET_RANGE_TABLE_BITS (&p[-1]);
5706
5707 if ( (class_bits & BIT_LOWER && ISLOWER (c))
5708 | (class_bits & BIT_MULTIBYTE)
5709 | (class_bits & BIT_PUNCT && ISPUNCT (c))
5710 | (class_bits & BIT_SPACE && ISSPACE (c))
5711 | (class_bits & BIT_UPPER && ISUPPER (c))
5712 | (class_bits & BIT_WORD && ISWORD (c)))
5713 not = !not;
5714 else
5715 CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
5716 }
5717 #endif
5718
5719 if (range_table_exists)
5720 p = CHARSET_RANGE_TABLE_END (range_table, count);
5721 else
5722 p += CHARSET_BITMAP_SIZE (&p[-1]) + 1;
5723
5724 if (!not) goto fail;
5725
5726 d += len;
5727 break;
5728 }
5729
5730
5731 5732 5733 5734
5735 case start_memory:
5736 DEBUG_PRINT2 ("EXECUTING start_memory %d:\n", *p);
5737
5738
5739 PUSH_FAILURE_REG ((unsigned int)*p);
5740
5741 regstart[*p] = d;
5742 regend[*p] = NULL;
5743 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
5744
5745
5746 p += 1;
5747 break;
5748
5749
5750 5751
5752 case stop_memory:
5753 DEBUG_PRINT2 ("EXECUTING stop_memory %d:\n", *p);
5754
5755 assert (!REG_UNSET (regstart[*p]));
5756 5757 5758 5759 5760 5761 5762 5763 5764 5765 5766 5767 5768
5769
5770 regend[*p] = d;
5771 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
5772
5773
5774 p += 1;
5775 break;
5776
5777
5778 5779
5780 case duplicate:
5781 {
5782 register re_char *d2, *dend2;
5783 int regno = *p++;
5784 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
5785
5786
5787 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
5788 goto fail;
5789
5790
5791 d2 = regstart[regno];
5792
5793
5794 dfail = d;
5795
5796 5797 5798 5799
5800
5801 dend2 = ((FIRST_STRING_P (regstart[regno])
5802 == FIRST_STRING_P (regend[regno]))
5803 ? regend[regno] : end_match_1);
5804 for (;;)
5805 {
5806 5807
5808 while (d2 == dend2)
5809 {
5810 if (dend2 == end_match_2) break;
5811 if (dend2 == regend[regno]) break;
5812
5813
5814 d2 = string2;
5815 dend2 = regend[regno];
5816 }
5817
5818 if (d2 == dend2) break;
5819
5820
5821 PREFETCH ();
5822
5823
5824 mcnt = dend - d;
5825
5826 5827
5828 if (mcnt > dend2 - d2)
5829 mcnt = dend2 - d2;
5830
5831 5832
5833 if (RE_TRANSLATE_P (translate)
5834 ? bcmp_translate (d, d2, mcnt, translate, target_multibyte)
5835 : memcmp (d, d2, mcnt))
5836 {
5837 d = dfail;
5838 goto fail;
5839 }
5840 d += mcnt, d2 += mcnt;
5841 }
5842 }
5843 break;
5844
5845
5846 5847
5848 case begline:
5849 DEBUG_PRINT1 ("EXECUTING begline.\n");
5850
5851 if (AT_STRINGS_BEG (d))
5852 {
5853 if (!bufp->not_bol) break;
5854 }
5855 else
5856 {
5857 unsigned c;
5858 GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
5859 if (c == '\n')
5860 break;
5861 }
5862
5863 goto fail;
5864
5865
5866
5867 case endline:
5868 DEBUG_PRINT1 ("EXECUTING endline.\n");
5869
5870 if (AT_STRINGS_END (d))
5871 {
5872 if (!bufp->not_eol) break;
5873 }
5874 else
5875 {
5876 PREFETCH_NOLIMIT ();
5877 if (*d == '\n')
5878 break;
5879 }
5880 goto fail;
5881
5882
5883
5884 case begbuf:
5885 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
5886 if (AT_STRINGS_BEG (d))
5887 break;
5888 goto fail;
5889
5890
5891
5892 case endbuf:
5893 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
5894 if (AT_STRINGS_END (d))
5895 break;
5896 goto fail;
5897
5898
5899 5900 5901 5902 5903 5904 5905 5906 5907 5908 5909 5910 5911 5912 5913 5914
5915 case on_failure_keep_string_jump:
5916 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5917 DEBUG_PRINT3 ("EXECUTING on_failure_keep_string_jump %d (to %p):\n",
5918 mcnt, p + mcnt);
5919
5920 PUSH_FAILURE_POINT (p - 3, NULL);
5921 break;
5922
5923 5924 5925 5926 5927 5928 5929 5930 5931 5932 5933 5934 5935 5936
5937 case on_failure_jump_nastyloop:
5938 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5939 DEBUG_PRINT3 ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
5940 mcnt, p + mcnt);
5941
5942 assert ((re_opcode_t)p[-4] == no_op);
5943 {
5944 int cycle = 0;
5945 CHECK_INFINITE_LOOP (p - 4, d);
5946 if (!cycle)
5947 5948 5949 5950
5951 PUSH_FAILURE_POINT (p - 3, d);
5952 }
5953 break;
5954
5955 5956
5957 case on_failure_jump_loop:
5958 on_failure:
5959 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5960 DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n",
5961 mcnt, p + mcnt);
5962 {
5963 int cycle = 0;
5964 CHECK_INFINITE_LOOP (p - 3, d);
5965 if (cycle)
5966 5967 5968 5969 5970
5971 p += mcnt;
5972 else
5973 PUSH_FAILURE_POINT (p - 3, d);
5974 }
5975 break;
5976
5977
5978 5979 5980 5981 5982 5983 5984 5985 5986 5987 5988 5989
5990 case on_failure_jump:
5991 EXTRACT_NUMBER_AND_INCR (mcnt, p);
5992 DEBUG_PRINT3 ("EXECUTING on_failure_jump %d (to %p):\n",
5993 mcnt, p + mcnt);
5994
5995 PUSH_FAILURE_POINT (p -3, d);
5996 break;
5997
5998 5999 6000 6001 6002 6003 6004
6005 case on_failure_jump_smart:
6006 EXTRACT_NUMBER_AND_INCR (mcnt, p);
6007 DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
6008 mcnt, p + mcnt);
6009 {
6010 re_char *p1 = p;
6011
6012 unsigned char *p2 = (unsigned char*) p + mcnt;
6013 unsigned char *p3 = (unsigned char*) p - 3;
6014
6015 p -= 3; 6016
6017
6018 EXTRACT_NUMBER (mcnt, p2 - 2);
6019
6020 6021
6022 assert (skip_one_char (p1) == p2 - 3);
6023 assert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
6024 DEBUG_STATEMENT (debug += 2);
6025 if (mutually_exclusive_p (bufp, p1, p2))
6026 {
6027
6028 DEBUG_PRINT1 (" smart exclusive => fast loop.\n");
6029 *p3 = (unsigned char) on_failure_keep_string_jump;
6030 STORE_NUMBER (p2 - 2, mcnt + 3);
6031 }
6032 else
6033 {
6034
6035 DEBUG_PRINT1 (" smart default => slow loop.\n");
6036 *p3 = (unsigned char) on_failure_jump;
6037 }
6038 DEBUG_STATEMENT (debug -= 2);
6039 }
6040 break;
6041
6042
6043 case jump:
6044 unconditional_jump:
6045 IMMEDIATE_QUIT_CHECK;
6046 EXTRACT_NUMBER_AND_INCR (mcnt, p);
6047 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
6048 p += mcnt;
6049 DEBUG_PRINT2 ("(to %p).\n", p);
6050 break;
6051
6052
6053 6054
6055 case succeed_n:
6056
6057 EXTRACT_NUMBER (mcnt, p + 2);
6058 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
6059
6060
6061 if (mcnt != 0)
6062 {
6063
6064 unsigned char *p2 = (unsigned char*) p + 2;
6065 mcnt--;
6066 p += 4;
6067 PUSH_NUMBER (p2, mcnt);
6068 }
6069 else
6070
6071 goto on_failure;
6072 break;
6073
6074 case jump_n:
6075
6076 EXTRACT_NUMBER (mcnt, p + 2);
6077 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
6078
6079
6080 if (mcnt != 0)
6081 {
6082
6083 unsigned char *p2 = (unsigned char*) p + 2;
6084 mcnt--;
6085 PUSH_NUMBER (p2, mcnt);
6086 goto unconditional_jump;
6087 }
6088
6089 else
6090 p += 4;
6091 break;
6092
6093 case set_number_at:
6094 {
6095 unsigned char *p2;
6096 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
6097
6098 EXTRACT_NUMBER_AND_INCR (mcnt, p);
6099
6100 p2 = (unsigned char*) p + mcnt;
6101
6102 EXTRACT_NUMBER_AND_INCR (mcnt, p);
6103 DEBUG_PRINT3 (" Setting %p to %d.\n", p2, mcnt);
6104 PUSH_NUMBER (p2, mcnt);
6105 break;
6106 }
6107
6108 case wordbound:
6109 case notwordbound:
6110 not = (re_opcode_t) *(p - 1) == notwordbound;
6111 DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":"");
6112
6113
6114
6115
6116 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
6117 not = !not;
6118 else
6119 {
6120 6121
6122 re_wchar_t c1, c2;
6123 int s1, s2;
6124 int dummy;
6125 #ifdef emacs
6126 int offset = PTR_TO_OFFSET (d - 1);
6127 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6128 UPDATE_SYNTAX_TABLE (charpos);
6129 #endif
6130 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6131 s1 = SYNTAX (c1);
6132 #ifdef emacs
6133 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
6134 #endif
6135 PREFETCH_NOLIMIT ();
6136 GET_CHAR_AFTER (c2, d, dummy);
6137 s2 = SYNTAX (c2);
6138
6139 if (
6140 ((s1 == Sword) != (s2 == Sword))
6141 6142
6143 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
6144 not = !not;
6145 }
6146 if (not)
6147 break;
6148 else
6149 goto fail;
6150
6151 case wordbeg:
6152 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
6153
6154
6155
6156
6157 if (AT_STRINGS_END (d))
6158 goto fail;
6159 else
6160 {
6161 6162
6163 re_wchar_t c1, c2;
6164 int s1, s2;
6165 int dummy;
6166 #ifdef emacs
6167 int offset = PTR_TO_OFFSET (d);
6168 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6169 UPDATE_SYNTAX_TABLE (charpos);
6170 #endif
6171 PREFETCH ();
6172 GET_CHAR_AFTER (c2, d, dummy);
6173 s2 = SYNTAX (c2);
6174
6175
6176 if (s2 != Sword)
6177 goto fail;
6178
6179
6180 if (!AT_STRINGS_BEG (d))
6181 {
6182 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6183 #ifdef emacs
6184 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
6185 #endif
6186 s1 = SYNTAX (c1);
6187
6188 6189
6190 if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
6191 goto fail;
6192 }
6193 }
6194 break;
6195
6196 case wordend:
6197 DEBUG_PRINT1 ("EXECUTING wordend.\n");
6198
6199
6200
6201
6202 if (AT_STRINGS_BEG (d))
6203 goto fail;
6204 else
6205 {
6206 6207
6208 re_wchar_t c1, c2;
6209 int s1, s2;
6210 int dummy;
6211 #ifdef emacs
6212 int offset = PTR_TO_OFFSET (d) - 1;
6213 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6214 UPDATE_SYNTAX_TABLE (charpos);
6215 #endif
6216 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6217 s1 = SYNTAX (c1);
6218
6219
6220 if (s1 != Sword)
6221 goto fail;
6222
6223
6224 if (!AT_STRINGS_END (d))
6225 {
6226 PREFETCH_NOLIMIT ();
6227 GET_CHAR_AFTER (c2, d, dummy);
6228 #ifdef emacs
6229 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
6230 #endif
6231 s2 = SYNTAX (c2);
6232
6233 6234
6235 if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
6236 goto fail;
6237 }
6238 }
6239 break;
6240
6241 case symbeg:
6242 DEBUG_PRINT1 ("EXECUTING symbeg.\n");
6243
6244
6245
6246
6247 if (AT_STRINGS_END (d))
6248 goto fail;
6249 else
6250 {
6251 6252
6253 re_wchar_t c1, c2;
6254 int s1, s2;
6255 #ifdef emacs
6256 int offset = PTR_TO_OFFSET (d);
6257 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6258 UPDATE_SYNTAX_TABLE (charpos);
6259 #endif
6260 PREFETCH ();
6261 c2 = RE_STRING_CHAR (d, target_multibyte);
6262 s2 = SYNTAX (c2);
6263
6264
6265 if (s2 != Sword && s2 != Ssymbol)
6266 goto fail;
6267
6268
6269 if (!AT_STRINGS_BEG (d))
6270 {
6271 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6272 #ifdef emacs
6273 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
6274 #endif
6275 s1 = SYNTAX (c1);
6276
6277
6278 if (s1 == Sword || s1 == Ssymbol)
6279 goto fail;
6280 }
6281 }
6282 break;
6283
6284 case symend:
6285 DEBUG_PRINT1 ("EXECUTING symend.\n");
6286
6287
6288
6289
6290 if (AT_STRINGS_BEG (d))
6291 goto fail;
6292 else
6293 {
6294 6295
6296 re_wchar_t c1, c2;
6297 int s1, s2;
6298 #ifdef emacs
6299 int offset = PTR_TO_OFFSET (d) - 1;
6300 int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6301 UPDATE_SYNTAX_TABLE (charpos);
6302 #endif
6303 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
6304 s1 = SYNTAX (c1);
6305
6306
6307 if (s1 != Sword && s1 != Ssymbol)
6308 goto fail;
6309
6310
6311 if (!AT_STRINGS_END (d))
6312 {
6313 PREFETCH_NOLIMIT ();
6314 c2 = RE_STRING_CHAR (d, target_multibyte);
6315 #ifdef emacs
6316 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
6317 #endif
6318 s2 = SYNTAX (c2);
6319
6320
6321 if (s2 == Sword || s2 == Ssymbol)
6322 goto fail;
6323 }
6324 }
6325 break;
6326
6327 case syntaxspec:
6328 case notsyntaxspec:
6329 not = (re_opcode_t) *(p - 1) == notsyntaxspec;
6330 mcnt = *p++;
6331 DEBUG_PRINT3 ("EXECUTING %ssyntaxspec %d.\n", not?"not":"", mcnt);
6332 PREFETCH ();
6333 #ifdef emacs
6334 {
6335 int offset = PTR_TO_OFFSET (d);
6336 int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
6337 UPDATE_SYNTAX_TABLE (pos1);
6338 }
6339 #endif
6340 {
6341 int len;
6342 re_wchar_t c;
6343
6344 GET_CHAR_AFTER (c, d, len);
6345 if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
6346 goto fail;
6347 d += len;
6348 }
6349 break;
6350
6351 #ifdef emacs
6352 case before_dot:
6353 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
6354 if (PTR_BYTE_POS (d) >= PT_BYTE)
6355 goto fail;
6356 break;
6357
6358 case at_dot:
6359 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
6360 if (PTR_BYTE_POS (d) != PT_BYTE)
6361 goto fail;
6362 break;
6363
6364 case after_dot:
6365 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
6366 if (PTR_BYTE_POS (d) <= PT_BYTE)
6367 goto fail;
6368 break;
6369
6370 case categoryspec:
6371 case notcategoryspec:
6372 not = (re_opcode_t) *(p - 1) == notcategoryspec;
6373 mcnt = *p++;
6374 DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
6375 PREFETCH ();
6376 {
6377 int len;
6378 re_wchar_t c;
6379
6380 GET_CHAR_AFTER (c, d, len);
6381 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
6382 goto fail;
6383 d += len;
6384 }
6385 break;
6386
6387 #endif
6388
6389 default:
6390 abort ();
6391 }
6392 continue;
6393
6394
6395
6396 fail:
6397 IMMEDIATE_QUIT_CHECK;
6398 if (!FAIL_STACK_EMPTY ())
6399 {
6400 re_char *str, *pat;
6401
6402 DEBUG_PRINT1 ("\nFAIL:\n");
6403 POP_FAILURE_POINT (str, pat);
6404 switch (SWITCH_ENUM_CAST ((re_opcode_t) *pat++))
6405 {
6406 case on_failure_keep_string_jump:
6407 assert (str == NULL);
6408 goto continue_failure_jump;
6409
6410 case on_failure_jump_nastyloop:
6411 assert ((re_opcode_t)pat[-2] == no_op);
6412 PUSH_FAILURE_POINT (pat - 2, str);
6413
6414
6415 case on_failure_jump_loop:
6416 case on_failure_jump:
6417 case succeed_n:
6418 d = str;
6419 continue_failure_jump:
6420 EXTRACT_NUMBER_AND_INCR (mcnt, pat);
6421 p = pat + mcnt;
6422 break;
6423
6424 case no_op:
6425
6426 goto fail;
6427
6428 default:
6429 abort();
6430 }
6431
6432 assert (p >= bufp->buffer && p <= pend);
6433
6434 if (d >= string1 && d <= end1)
6435 dend = end_match_1;
6436 }
6437 else
6438 break;
6439 }
6440
6441 if (best_regs_set)
6442 goto restore_best_regs;
6443
6444 FREE_VARIABLES ();
6445
6446 return -1;
6447 }
6448
6449
6450
6451 6452
6453
6454 static int
6455 bcmp_translate (s1, s2, len, translate, target_multibyte)
6456 re_char *s1, *s2;
6457 register int len;
6458 RE_TRANSLATE_TYPE translate;
6459 const int target_multibyte;
6460 {
6461 register re_char *p1 = s1, *p2 = s2;
6462 re_char *p1_end = s1 + len;
6463 re_char *p2_end = s2 + len;
6464
6465 6466
6467 while (p1 < p1_end && p2 < p2_end)
6468 {
6469 int p1_charlen, p2_charlen;
6470 re_wchar_t p1_ch, p2_ch;
6471
6472 GET_CHAR_AFTER (p1_ch, p1, p1_charlen);
6473 GET_CHAR_AFTER (p2_ch, p2, p2_charlen);
6474
6475 if (RE_TRANSLATE (translate, p1_ch)
6476 != RE_TRANSLATE (translate, p2_ch))
6477 return 1;
6478
6479 p1 += p1_charlen, p2 += p2_charlen;
6480 }
6481
6482 if (p1 != p1_end || p2 != p2_end)
6483 return 1;
6484
6485 return 0;
6486 }
6487
6488
6489
6490 6491 6492 6493 6494 6495 6496 6497
6498
6499 const char *
6500 re_compile_pattern (pattern, length, bufp)
6501 const char *pattern;
6502 size_t length;
6503 struct re_pattern_buffer *bufp;
6504 {
6505 reg_errcode_t ret;
6506
6507 6508
6509 bufp->regs_allocated = REGS_UNALLOCATED;
6510
6511 6512 6513
6514 bufp->no_sub = 0;
6515
6516 ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp);
6517
6518 if (!ret)
6519 return NULL;
6520 return gettext (re_error_msgid[(int) ret]);
6521 }
6522 WEAK_ALIAS (__re_compile_pattern, re_compile_pattern)
6523
6524 6525
6526
6527 #if defined _REGEX_RE_COMP || defined _LIBC
6528
6529
6530 static struct re_pattern_buffer re_comp_buf;
6531
6532 char *
6533 # ifdef _LIBC
6534 6535 6536
6537 weak_function
6538 # endif
6539 re_comp (s)
6540 const char *s;
6541 {
6542 reg_errcode_t ret;
6543
6544 if (!s)
6545 {
6546 if (!re_comp_buf.buffer)
6547
6548 return (char *) gettext ("No previous regular expression");
6549 return 0;
6550 }
6551
6552 if (!re_comp_buf.buffer)
6553 {
6554 re_comp_buf.buffer = (unsigned char *) malloc (200);
6555 if (re_comp_buf.buffer == NULL)
6556
6557 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6558 re_comp_buf.allocated = 200;
6559
6560 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
6561 if (re_comp_buf.fastmap == NULL)
6562
6563 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
6564 }
6565
6566 6567
6568
6569 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
6570
6571 if (!ret)
6572 return NULL;
6573
6574
6575 return (char *) gettext (re_error_msgid[(int) ret]);
6576 }
6577
6578
6579 int
6580 # ifdef _LIBC
6581 weak_function
6582 # endif
6583 re_exec (s)
6584 const char *s;
6585 {
6586 const int len = strlen (s);
6587 return
6588 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
6589 }
6590 #endif
6591
6592
6593
6594 #ifndef emacs
6595
6596 6597 6598 6599 6600 6601 6602 6603 6604 6605 6606 6607 6608 6609 6610 6611 6612 6613 6614 6615 6616 6617 6618 6619 6620 6621 6622 6623 6624 6625 6626 6627 6628
6629
6630 int
6631 regcomp (preg, pattern, cflags)
6632 regex_t *__restrict preg;
6633 const char *__restrict pattern;
6634 int cflags;
6635 {
6636 reg_errcode_t ret;
6637 reg_syntax_t syntax
6638 = (cflags & REG_EXTENDED) ?
6639 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6640
6641
6642 preg->buffer = 0;
6643 preg->allocated = 0;
6644 preg->used = 0;
6645
6646
6647 preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
6648
6649 if (cflags & REG_ICASE)
6650 {
6651 unsigned i;
6652
6653 preg->translate
6654 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
6655 * sizeof (*(RE_TRANSLATE_TYPE)0));
6656 if (preg->translate == NULL)
6657 return (int) REG_ESPACE;
6658
6659
6660 for (i = 0; i < CHAR_SET_SIZE; i++)
6661 preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
6662 }
6663 else
6664 preg->translate = NULL;
6665
6666
6667 if (cflags & REG_NEWLINE)
6668 {
6669 syntax &= ~RE_DOT_NEWLINE;
6670 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6671 }
6672 else
6673 syntax |= RE_NO_NEWLINE_ANCHOR;
6674
6675 preg->no_sub = !!(cflags & REG_NOSUB);
6676
6677 6678
6679 ret = regex_compile ((re_char*) pattern, strlen (pattern), syntax, preg);
6680
6681 6682
6683 if (ret == REG_ERPAREN)
6684 ret = REG_EPAREN;
6685
6686 if (ret == REG_NOERROR && preg->fastmap)
6687 { 6688
6689 re_compile_fastmap (preg);
6690 if (preg->can_be_null)
6691 {
6692 free (preg->fastmap);
6693 preg->fastmap = NULL;
6694 }
6695 }
6696 return (int) ret;
6697 }
6698 WEAK_ALIAS (__regcomp, regcomp)
6699
6700
6701 6702 6703 6704 6705 6706 6707 6708 6709 6710 6711 6712 6713
6714
6715 int
6716 regexec (preg, string, nmatch, pmatch, eflags)
6717 const regex_t *__restrict preg;
6718 const char *__restrict string;
6719 size_t nmatch;
6720 regmatch_t pmatch[__restrict_arr];
6721 int eflags;
6722 {
6723 int ret;
6724 struct re_registers regs;
6725 regex_t private_preg;
6726 int len = strlen (string);
6727 boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch;
6728
6729 private_preg = *preg;
6730
6731 private_preg.not_bol = !!(eflags & REG_NOTBOL);
6732 private_preg.not_eol = !!(eflags & REG_NOTEOL);
6733
6734 6735 6736
6737 private_preg.regs_allocated = REGS_FIXED;
6738
6739 if (want_reg_info)
6740 {
6741 regs.num_regs = nmatch;
6742 regs.start = TALLOC (nmatch * 2, regoff_t);
6743 if (regs.start == NULL)
6744 return (int) REG_NOMATCH;
6745 regs.end = regs.start + nmatch;
6746 }
6747
6748 6749 6750 6751 6752 6753 6754 6755
6756
6757
6758 ret = re_search (&private_preg, string, len,
6759 0, len,
6760 want_reg_info ? ®s : (struct re_registers *) 0);
6761
6762
6763 if (want_reg_info)
6764 {
6765 if (ret >= 0)
6766 {
6767 unsigned r;
6768
6769 for (r = 0; r < nmatch; r++)
6770 {
6771 pmatch[r].rm_so = regs.start[r];
6772 pmatch[r].rm_eo = regs.end[r];
6773 }
6774 }
6775
6776
6777 free (regs.start);
6778 }
6779
6780
6781 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
6782 }
6783 WEAK_ALIAS (__regexec, regexec)
6784
6785
6786 6787 6788 6789 6790
6791
6792 size_t
6793 regerror (err_code, preg, errbuf, errbuf_size)
6794 int err_code;
6795 const regex_t *preg;
6796 char *errbuf;
6797 size_t errbuf_size;
6798 {
6799 const char *msg;
6800 size_t msg_size;
6801
6802 if (err_code < 0
6803 || err_code >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
6804 6805 6806 6807
6808 abort ();
6809
6810 msg = gettext (re_error_msgid[err_code]);
6811
6812 msg_size = strlen (msg) + 1;
6813
6814 if (errbuf_size != 0)
6815 {
6816 if (msg_size > errbuf_size)
6817 {
6818 strncpy (errbuf, msg, errbuf_size - 1);
6819 errbuf[errbuf_size - 1] = 0;
6820 }
6821 else
6822 strcpy (errbuf, msg);
6823 }
6824
6825 return msg_size;
6826 }
6827 WEAK_ALIAS (__regerror, regerror)
6828
6829
6830
6831
6832 void
6833 regfree (preg)
6834 regex_t *preg;
6835 {
6836 free (preg->buffer);
6837 preg->buffer = NULL;
6838
6839 preg->allocated = 0;
6840 preg->used = 0;
6841
6842 free (preg->fastmap);
6843 preg->fastmap = NULL;
6844 preg->fastmap_accurate = 0;
6845
6846 free (preg->translate);
6847 preg->translate = NULL;
6848 }
6849 WEAK_ALIAS (__regfree, regfree)
6850
6851 #endif
6852
6853 6854