1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
   2    Copyright (C) 1985, 1987, 1993, 1994, 1995, 1997, 1998, 1999, 2001,
   3                  2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   4                  Free Software Foundation, Inc.
   5 
   6 This file is part of GNU Emacs.
   7 
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12 
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17 
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20 
  21 
  22 #include <config.h>
  23 #include <ctype.h>
  24 #include <setjmp.h>
  25 #include "lisp.h"
  26 #include "commands.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "keymap.h"
  30 #include "regex.h"
  31 
  32 /* Make syntax table lookup grant data in gl_state.  */
  33 #define SYNTAX_ENTRY_VIA_PROPERTY
  34 
  35 #include "syntax.h"
  36 #include "intervals.h"
  37 
  38 /* We use these constants in place for comment-style and
  39    string-ender-char to distinguish  comments/strings started by
  40    comment_fence and string_fence codes.  */
  41 
  42 #define ST_COMMENT_STYLE (256 + 1)
  43 #define ST_STRING_STYLE (256 + 2)
  44 #include "category.h"
  45 
  46 Lisp_Object Qsyntax_table_p, Qsyntax_table, Qscan_error;
  47 
  48 int words_include_escapes;
  49 int parse_sexp_lookup_properties;
  50 
  51 /* Nonzero means `scan-sexps' treat all multibyte characters as symbol.  */
  52 int multibyte_syntax_as_symbol;
  53 
  54 /* Used as a temporary in SYNTAX_ENTRY and other macros in syntax.h,
  55    if not compiled with GCC.  No need to mark it, since it is used
  56    only very temporarily.  */
  57 Lisp_Object syntax_temp;
  58 
  59 /* Non-zero means an open parenthesis in column 0 is always considered
  60    to be the start of a defun.  Zero means an open parenthesis in
  61    column 0 has no special meaning.  */
  62 
  63 int open_paren_in_column_0_is_defun_start;
  64 
  65 /* This is the internal form of the parse state used in parse-partial-sexp.  */
  66 
  67 struct lisp_parse_state
  68   {
  69     int depth;     /* Depth at end of parsing.  */
  70     int instring;  /* -1 if not within string, else desired terminator.  */
  71     int incomment; /* -1 if in unnestable comment else comment nesting */
  72     int comstyle;  /* comment style a=0, or b=1, or ST_COMMENT_STYLE.  */
  73     int quoted;    /* Nonzero if just after an escape char at end of parsing */
  74     int mindepth;  /* Minimum depth seen while scanning.  */
  75     /* Char number of most recent start-of-expression at current level */
  76     EMACS_INT thislevelstart;
  77     /* Char number of start of containing expression */
  78     EMACS_INT prevlevelstart;
  79     EMACS_INT location;      /* Char number at which parsing stopped.  */
  80     EMACS_INT comstr_start;  /* Position of last comment/string starter.  */
  81     Lisp_Object levelstarts; /* Char numbers of starts-of-expression
  82                                 of levels (starting from outermost).  */
  83   };
  84 
  85 /* These variables are a cache for finding the start of a defun.
  86    find_start_pos is the place for which the defun start was found.
  87    find_start_value is the defun start position found for it.
  88    find_start_value_byte is the corresponding byte position.
  89    find_start_buffer is the buffer it was found in.
  90    find_start_begv is the BEGV value when it was found.
  91    find_start_modiff is the value of MODIFF when it was found.  */
  92 
  93 static EMACS_INT find_start_pos;
  94 static EMACS_INT find_start_value;
  95 static EMACS_INT find_start_value_byte;
  96 static struct buffer *find_start_buffer;
  97 static EMACS_INT find_start_begv;
  98 static int find_start_modiff;
  99 
 100 
 101 static Lisp_Object skip_chars P_ ((int, Lisp_Object, Lisp_Object, int));
 102 static Lisp_Object skip_syntaxes P_ ((int, Lisp_Object, Lisp_Object));
 103 static Lisp_Object scan_lists P_ ((EMACS_INT, EMACS_INT, EMACS_INT, int));
 104 static void scan_sexps_forward P_ ((struct lisp_parse_state *,
 105                                     EMACS_INT, EMACS_INT, EMACS_INT, int,
 106                                     int, Lisp_Object, int));
 107 static int in_classes P_ ((int, Lisp_Object));
 108 
 109 
 110 struct gl_state_s gl_state;             /* Global state of syntax parser.  */
 111 
 112 INTERVAL interval_of ();
 113 #define INTERVALS_AT_ONCE 10            /* 1 + max-number of intervals
 114                                            to scan to property-change.  */
 115 
 116 /* Update gl_state to an appropriate interval which contains CHARPOS.  The
 117    sign of COUNT give the relative position of CHARPOS wrt the previously
 118    valid interval.  If INIT, only [be]_property fields of gl_state are
 119    valid at start, the rest is filled basing on OBJECT.
 120 
 121    `gl_state.*_i' are the intervals, and CHARPOS is further in the search
 122    direction than the intervals - or in an interval.  We update the
 123    current syntax-table basing on the property of this interval, and
 124    update the interval to start further than CHARPOS - or be
 125    NULL_INTERVAL.  We also update lim_property to be the next value of
 126    charpos to call this subroutine again - or be before/after the
 127    start/end of OBJECT.  */
 128 
 129 void
 130 update_syntax_table (charpos, count, init, object)
 131      int charpos, count, init;
 132      Lisp_Object object;
 133 {
 134   Lisp_Object tmp_table;
 135   int cnt = 0, invalidate = 1;
 136   INTERVAL i;
 137 
 138   if (init)
 139     {
 140       gl_state.old_prop = Qnil;
 141       gl_state.start = gl_state.b_property;
 142       gl_state.stop = gl_state.e_property;
 143       i = interval_of (charpos, object);
 144       gl_state.backward_i = gl_state.forward_i = i;
 145       invalidate = 0;
 146       if (NULL_INTERVAL_P (i))
 147         return;
 148       /* interval_of updates only ->position of the return value, so
 149          update the parents manually to speed up update_interval.  */
 150       while (!NULL_PARENT (i))
 151         {
 152           if (AM_RIGHT_CHILD (i))
 153             INTERVAL_PARENT (i)->position = i->position
 154               - LEFT_TOTAL_LENGTH (i) + TOTAL_LENGTH (i) /* right end */
 155               - TOTAL_LENGTH (INTERVAL_PARENT (i))
 156               + LEFT_TOTAL_LENGTH (INTERVAL_PARENT (i));
 157           else
 158             INTERVAL_PARENT (i)->position = i->position - LEFT_TOTAL_LENGTH (i)
 159               + TOTAL_LENGTH (i);
 160           i = INTERVAL_PARENT (i);
 161         }
 162       i = gl_state.forward_i;
 163       gl_state.b_property = i->position - gl_state.offset;
 164       gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 165       goto update;
 166     }
 167   i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
 168 
 169   /* We are guaranteed to be called with CHARPOS either in i,
 170      or further off.  */
 171   if (NULL_INTERVAL_P (i))
 172     error ("Error in syntax_table logic for to-the-end intervals");
 173   else if (charpos < i->position)               /* Move left.  */
 174     {
 175       if (count > 0)
 176         error ("Error in syntax_table logic for intervals <-");
 177       /* Update the interval.  */
 178       i = update_interval (i, charpos);
 179       if (INTERVAL_LAST_POS (i) != gl_state.b_property)
 180         {
 181           invalidate = 0;
 182           gl_state.forward_i = i;
 183           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 184         }
 185     }
 186   else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right.  */
 187     {
 188       if (count < 0)
 189         error ("Error in syntax_table logic for intervals ->");
 190       /* Update the interval.  */
 191       i = update_interval (i, charpos);
 192       if (i->position != gl_state.e_property)
 193         {
 194           invalidate = 0;
 195           gl_state.backward_i = i;
 196           gl_state.b_property = i->position - gl_state.offset;
 197         }
 198     }
 199 
 200   update:
 201   tmp_table = textget (i->plist, Qsyntax_table);
 202 
 203   if (invalidate)
 204     invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
 205 
 206   if (invalidate)               /* Did not get to adjacent interval.  */
 207     {                           /* with the same table => */
 208                                 /* invalidate the old range.  */
 209       if (count > 0)
 210         {
 211           gl_state.backward_i = i;
 212           gl_state.b_property = i->position - gl_state.offset;
 213         }
 214       else
 215         {
 216           gl_state.forward_i = i;
 217           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
 218         }
 219     }
 220 
 221   if (!EQ (tmp_table, gl_state.old_prop))
 222     {
 223       gl_state.current_syntax_table = tmp_table;
 224       gl_state.old_prop = tmp_table;
 225       if (EQ (Fsyntax_table_p (tmp_table), Qt))
 226         {
 227           gl_state.use_global = 0;
 228         }
 229       else if (CONSP (tmp_table))
 230         {
 231           gl_state.use_global = 1;
 232           gl_state.global_code = tmp_table;
 233         }
 234       else
 235         {
 236           gl_state.use_global = 0;
 237           gl_state.current_syntax_table = current_buffer->syntax_table;
 238         }
 239     }
 240 
 241   while (!NULL_INTERVAL_P (i))
 242     {
 243       if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
 244         {
 245           if (count > 0)
 246             {
 247               gl_state.e_property = i->position - gl_state.offset;
 248               gl_state.forward_i = i;
 249             }
 250           else
 251             {
 252               gl_state.b_property
 253                 = i->position + LENGTH (i) - gl_state.offset;
 254               gl_state.backward_i = i;
 255             }
 256           return;
 257         }
 258       else if (cnt == INTERVALS_AT_ONCE)
 259         {
 260           if (count > 0)
 261             {
 262               gl_state.e_property
 263                 = i->position + LENGTH (i) - gl_state.offset
 264                 /* e_property at EOB is not set to ZV but to ZV+1, so that
 265                    we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
 266                    having to check eob between the two.  */
 267                 + (NULL_INTERVAL_P (next_interval (i)) ? 1 : 0);
 268               gl_state.forward_i = i;
 269             }
 270           else
 271             {
 272               gl_state.b_property = i->position - gl_state.offset;
 273               gl_state.backward_i = i;
 274             }
 275           return;
 276         }
 277       cnt++;
 278       i = count > 0 ? next_interval (i) : previous_interval (i);
 279     }
 280   eassert (NULL_INTERVAL_P (i)); /* This property goes to the end.  */
 281   if (count > 0)
 282     gl_state.e_property = gl_state.stop;
 283   else
 284     gl_state.b_property = gl_state.start;
 285 }
 286 
 287 /* Returns TRUE if char at CHARPOS is quoted.
 288    Global syntax-table data should be set up already to be good at CHARPOS
 289    or after.  On return global syntax data is good for lookup at CHARPOS. */
 290 
 291 static int
 292 char_quoted (EMACS_INT charpos, EMACS_INT bytepos)
 293 {
 294   register enum syntaxcode code;
 295   register EMACS_INT beg = BEGV;
 296   register int quoted = 0;
 297   EMACS_INT orig = charpos;
 298 
 299   while (charpos > beg)
 300     {
 301       int c;
 302       DEC_BOTH (charpos, bytepos);
 303 
 304       UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
 305       c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
 306       code = SYNTAX (c);
 307       if (! (code == Scharquote || code == Sescape))
 308         break;
 309 
 310       quoted = !quoted;
 311     }
 312 
 313   UPDATE_SYNTAX_TABLE (orig);
 314   return quoted;
 315 }
 316 
 317 /* Return the bytepos one character after BYTEPOS.
 318    We assume that BYTEPOS is not at the end of the buffer.  */
 319 
 320 INLINE EMACS_INT
 321 inc_bytepos (bytepos)
 322      EMACS_INT bytepos;
 323 {
 324   if (NILP (current_buffer->enable_multibyte_characters))
 325     return bytepos + 1;
 326 
 327   INC_POS (bytepos);
 328   return bytepos;
 329 }
 330 
 331 /* Return the bytepos one character before BYTEPOS.
 332    We assume that BYTEPOS is not at the start of the buffer.  */
 333 
 334 INLINE EMACS_INT
 335 dec_bytepos (bytepos)
 336      EMACS_INT bytepos;
 337 {
 338   if (NILP (current_buffer->enable_multibyte_characters))
 339     return bytepos - 1;
 340 
 341   DEC_POS (bytepos);
 342   return bytepos;
 343 }
 344 
 345 /* Return a defun-start position before POS and not too far before.
 346    It should be the last one before POS, or nearly the last.
 347 
 348    When open_paren_in_column_0_is_defun_start is nonzero,
 349    only the beginning of the buffer is treated as a defun-start.
 350 
 351    We record the information about where the scan started
 352    and what its result was, so that another call in the same area
 353    can return the same value very quickly.
 354 
 355    There is no promise at which position the global syntax data is
 356    valid on return from the subroutine, so the caller should explicitly
 357    update the global data.  */
 358 
 359 static EMACS_INT
 360 find_defun_start (pos, pos_byte)
 361      EMACS_INT pos, pos_byte;
 362 {
 363   EMACS_INT opoint = PT, opoint_byte = PT_BYTE;
 364 
 365   if (!open_paren_in_column_0_is_defun_start)
 366     {
 367       find_start_value_byte = BEGV_BYTE;
 368       return BEGV;
 369     }
 370 
 371   /* Use previous finding, if it's valid and applies to this inquiry.  */
 372   if (current_buffer == find_start_buffer
 373       /* Reuse the defun-start even if POS is a little farther on.
 374          POS might be in the next defun, but that's ok.
 375          Our value may not be the best possible, but will still be usable.  */
 376       && pos <= find_start_pos + 1000
 377       && pos >= find_start_value
 378       && BEGV == find_start_begv
 379       && MODIFF == find_start_modiff)
 380     return find_start_value;
 381 
 382   /* Back up to start of line.  */
 383   scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
 384 
 385   /* We optimize syntax-table lookup for rare updates.  Thus we accept
 386      only those `^\s(' which are good in global _and_ text-property
 387      syntax-tables.  */
 388   SETUP_BUFFER_SYNTAX_TABLE ();
 389   while (PT > BEGV)
 390     {
 391       int c;
 392 
 393       /* Open-paren at start of line means we may have found our
 394          defun-start.  */
 395       c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
 396       if (SYNTAX (c) == Sopen)
 397         {
 398           SETUP_SYNTAX_TABLE (PT + 1, -1);      /* Try again... */
 399           c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
 400           if (SYNTAX (c) == Sopen)
 401             break;
 402           /* Now fallback to the default value.  */
 403           SETUP_BUFFER_SYNTAX_TABLE ();
 404         }
 405       /* Move to beg of previous line.  */
 406       scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
 407     }
 408 
 409   /* Record what we found, for the next try.  */
 410   find_start_value = PT;
 411   find_start_value_byte = PT_BYTE;
 412   find_start_buffer = current_buffer;
 413   find_start_modiff = MODIFF;
 414   find_start_begv = BEGV;
 415   find_start_pos = pos;
 416 
 417   TEMP_SET_PT_BOTH (opoint, opoint_byte);
 418 
 419   return find_start_value;
 420 }
 421 
 422 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE.  */
 423 
 424 static int
 425 prev_char_comend_first (pos, pos_byte)
 426      int pos, pos_byte;
 427 {
 428   int c, val;
 429 
 430   DEC_BOTH (pos, pos_byte);
 431   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
 432   c = FETCH_CHAR (pos_byte);
 433   val = SYNTAX_COMEND_FIRST (c);
 434   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
 435   return val;
 436 }
 437 
 438 /* Return the SYNTAX_COMSTART_FIRST of the character before POS, POS_BYTE.  */
 439 
 440 /* static int
 441  * prev_char_comstart_first (pos, pos_byte)
 442  *      int pos, pos_byte;
 443  * {
 444  *   int c, val;
 445  *
 446  *   DEC_BOTH (pos, pos_byte);
 447  *   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
 448  *   c = FETCH_CHAR (pos_byte);
 449  *   val = SYNTAX_COMSTART_FIRST (c);
 450  *   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
 451  *   return val;
 452  * } */
 453 
 454 /* Checks whether charpos FROM is at the end of a comment.
 455    FROM_BYTE is the bytepos corresponding to FROM.
 456    Do not move back before STOP.
 457 
 458    Return a positive value if we find a comment ending at FROM/FROM_BYTE;
 459    return -1 otherwise.
 460 
 461    If successful, store the charpos of the comment's beginning
 462    into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
 463 
 464    Global syntax data remains valid for backward search starting at
 465    the returned value (or at FROM, if the search was not successful).  */
 466 
 467 static int
 468 back_comment (from, from_byte, stop, comnested, comstyle, charpos_ptr, bytepos_ptr)
 469      EMACS_INT from, from_byte, stop;
 470      int comnested, comstyle;
 471      EMACS_INT *charpos_ptr, *bytepos_ptr;
 472 {
 473   /* Look back, counting the parity of string-quotes,
 474      and recording the comment-starters seen.
 475      When we reach a safe place, assume that's not in a string;
 476      then step the main scan to the earliest comment-starter seen
 477      an even number of string quotes away from the safe place.
 478 
 479      OFROM[I] is position of the earliest comment-starter seen
 480      which is I+2X quotes from the comment-end.
 481      PARITY is current parity of quotes from the comment end.  */
 482   int string_style = -1;        /* Presumed outside of any string. */
 483   int string_lossage = 0;
 484   /* Not a real lossage: indicates that we have passed a matching comment
 485      starter plus a non-matching comment-ender, meaning that any matching
 486      comment-starter we might see later could be a false positive (hidden
 487      inside another comment).
 488      Test case:  { a (* b } c (* d *) */
 489   int comment_lossage = 0;
 490   EMACS_INT comment_end = from;
 491   EMACS_INT comment_end_byte = from_byte;
 492   EMACS_INT comstart_pos = 0;
 493   EMACS_INT comstart_byte;
 494   /* Place where the containing defun starts,
 495      or 0 if we didn't come across it yet.  */
 496   EMACS_INT defun_start = 0;
 497   EMACS_INT defun_start_byte = 0;
 498   register enum syntaxcode code;
 499   int nesting = 1;              /* current comment nesting */
 500   int c;
 501   int syntax = 0;
 502 
 503   /* FIXME: A }} comment-ender style leads to incorrect behavior
 504      in the case of {{ c }}} because we ignore the last two chars which are
 505      assumed to be comment-enders although they aren't.  */
 506 
 507   /* At beginning of range to scan, we're outside of strings;
 508      that determines quote parity to the comment-end.  */
 509   while (from != stop)
 510     {
 511       int temp_byte, prev_syntax;
 512       int com2start, com2end;
 513 
 514       /* Move back and examine a character.  */
 515       DEC_BOTH (from, from_byte);
 516       UPDATE_SYNTAX_TABLE_BACKWARD (from);
 517 
 518       prev_syntax = syntax;
 519       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
 520       syntax = SYNTAX_WITH_FLAGS (c);
 521       code = SYNTAX (c);
 522 
 523       /* Check for 2-char comment markers.  */
 524       com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
 525                    && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
 526                    && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax)
 527                    && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
 528                        || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
 529       com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
 530                  && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
 531 
 532       /* Nasty cases with overlapping 2-char comment markers:
 533          - snmp-mode: -- c -- foo -- c --
 534                       --- c --
 535                       ------ c --
 536          - c-mode:    *||*
 537                       |* *|* *|
 538                       |*| |* |*|
 539                       ///   */
 540 
 541       /* If a 2-char comment sequence partly overlaps with another,
 542          we don't try to be clever.  */
 543       if (from > stop && (com2end || com2start))
 544         {
 545           int next = from, next_byte = from_byte, next_c, next_syntax;
 546           DEC_BOTH (next, next_byte);
 547           UPDATE_SYNTAX_TABLE_BACKWARD (next);
 548           next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
 549           next_syntax = SYNTAX_WITH_FLAGS (next_c);
 550           if (((com2start || comnested)
 551                && SYNTAX_FLAGS_COMEND_SECOND (syntax)
 552                && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
 553               || ((com2end || comnested)
 554                   && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
 555                   && comstyle == SYNTAX_FLAGS_COMMENT_STYLE (syntax)
 556                   && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
 557             goto lossage;
 558           /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
 559         }
 560 
 561       if (com2start && comstart_pos == 0)
 562         /* We're looking at a comment starter.  But it might be a comment
 563            ender as well (see snmp-mode).  The first time we see one, we
 564            need to consider it as a comment starter,
 565            and the subsequent times as a comment ender.  */
 566         com2end = 0;
 567 
 568       /* Turn a 2-char comment sequences into the appropriate syntax.  */
 569       if (com2end)
 570         code = Sendcomment;
 571       else if (com2start)
 572         code = Scomment;
 573       /* Ignore comment starters of a different style.  */
 574       else if (code == Scomment
 575                && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax)
 576                    || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
 577         continue;
 578 
 579       /* Ignore escaped characters, except comment-enders.  */
 580       if (code != Sendcomment && char_quoted (from, from_byte))
 581         continue;
 582 
 583       switch (code)
 584         {
 585         case Sstring_fence:
 586         case Scomment_fence:
 587           c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
 588         case Sstring:
 589           /* Track parity of quotes.  */
 590           if (string_style == -1)
 591             /* Entering a string.  */
 592             string_style = c;
 593           else if (string_style == c)
 594             /* Leaving the string.  */
 595             string_style = -1;
 596           else
 597             /* If we have two kinds of string delimiters.
 598                There's no way to grok this scanning backwards.  */
 599             string_lossage = 1;
 600           break;
 601 
 602         case Scomment:
 603           /* We've already checked that it is the relevant comstyle.  */
 604           if (string_style != -1 || comment_lossage || string_lossage)
 605             /* There are odd string quotes involved, so let's be careful.
 606                Test case in Pascal: " { " a { " } */
 607             goto lossage;
 608 
 609           if (!comnested)
 610             {
 611               /* Record best comment-starter so far.  */
 612               comstart_pos = from;
 613               comstart_byte = from_byte;
 614             }
 615           else if (--nesting <= 0)
 616             /* nested comments have to be balanced, so we don't need to
 617                keep looking for earlier ones.  We use here the same (slightly
 618                incorrect) reasoning as below:  since it is followed by uniform
 619                paired string quotes, this comment-start has to be outside of
 620                strings, else the comment-end itself would be inside a string. */
 621             goto done;
 622           break;
 623 
 624         case Sendcomment:
 625           if (SYNTAX_FLAGS_COMMENT_STYLE (syntax) == comstyle
 626               && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
 627                   || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
 628             /* This is the same style of comment ender as ours. */
 629             {
 630               if (comnested)
 631                 nesting++;
 632               else
 633                 /* Anything before that can't count because it would match
 634                    this comment-ender rather than ours.  */
 635                 from = stop;    /* Break out of the loop.  */
 636             }
 637           else if (comstart_pos != 0 || c != '\n')
 638             /* We're mixing comment styles here, so we'd better be careful.
 639                The (comstart_pos != 0 || c != '\n') check is not quite correct
 640                (we should just always set comment_lossage), but removing it
 641                would imply that any multiline comment in C would go through
 642                lossage, which seems overkill.
 643                The failure should only happen in the rare cases such as
 644                  { (* } *)   */
 645             comment_lossage = 1;
 646           break;
 647 
 648         case Sopen:
 649           /* Assume a defun-start point is outside of strings.  */
 650           if (open_paren_in_column_0_is_defun_start
 651               && (from == stop
 652                   || (temp_byte = dec_bytepos (from_byte),
 653                       FETCH_CHAR (temp_byte) == '\n')))
 654             {
 655               defun_start = from;
 656               defun_start_byte = from_byte;
 657               from = stop;      /* Break out of the loop.  */
 658             }
 659           break;
 660 
 661         default:
 662           break;
 663         }
 664     }
 665 
 666   if (comstart_pos == 0)
 667     {
 668       from = comment_end;
 669       from_byte = comment_end_byte;
 670       UPDATE_SYNTAX_TABLE_FORWARD (comment_end - 1);
 671     }
 672   /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
 673      or `done'), then we've found the beginning of the non-nested comment.  */
 674   else if (1)   /* !comnested */
 675     {
 676       from = comstart_pos;
 677       from_byte = comstart_byte;
 678       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
 679     }
 680   else
 681     {
 682       struct lisp_parse_state state;
 683     lossage:
 684       /* We had two kinds of string delimiters mixed up
 685          together.  Decode this going forwards.
 686          Scan fwd from a known safe place (beginning-of-defun)
 687          to the one in question; this records where we
 688          last passed a comment starter.  */
 689       /* If we did not already find the defun start, find it now.  */
 690       if (defun_start == 0)
 691         {
 692           defun_start = find_defun_start (comment_end, comment_end_byte);
 693           defun_start_byte = find_start_value_byte;
 694         }
 695       do
 696         {
 697           scan_sexps_forward (&state,
 698                               defun_start, defun_start_byte,
 699                               comment_end, -10000, 0, Qnil, 0);
 700           defun_start = comment_end;
 701           if (state.incomment == (comnested ? 1 : -1)
 702               && state.comstyle == comstyle)
 703             from = state.comstr_start;
 704           else
 705             {
 706               from = comment_end;
 707               if (state.incomment)
 708                 /* If comment_end is inside some other comment, maybe ours
 709                    is nested, so we need to try again from within the
 710                    surrounding comment.  Example: { a (* " *)  */
 711                 {
 712                   /* FIXME: We should advance by one or two chars. */
 713                   defun_start = state.comstr_start + 2;
 714                   defun_start_byte = CHAR_TO_BYTE (defun_start);
 715                 }
 716             }
 717         } while (defun_start < comment_end);
 718 
 719       from_byte = CHAR_TO_BYTE (from);
 720       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
 721     }
 722 
 723  done:
 724   *charpos_ptr = from;
 725   *bytepos_ptr = from_byte;
 726 
 727   return (from == comment_end) ? -1 : from;
 728 }
 729 
 730 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
 731        doc: /* Return t if OBJECT is a syntax table.
 732 Currently, any char-table counts as a syntax table.  */)
 733      (object)
 734      Lisp_Object object;
 735 {
 736   if (CHAR_TABLE_P (object)
 737       && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
 738     return Qt;
 739   return Qnil;
 740 }
 741 
 742 static void
 743 check_syntax_table (obj)
 744      Lisp_Object obj;
 745 {
 746   CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
 747               Qsyntax_table_p, obj);
 748 }
 749 
 750 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
 751        doc: /* Return the current syntax table.
 752 This is the one specified by the current buffer.  */)
 753      ()
 754 {
 755   return current_buffer->syntax_table;
 756 }
 757 
 758 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
 759    Sstandard_syntax_table, 0, 0, 0,
 760        doc: /* Return the standard syntax table.
 761 This is the one used for new buffers.  */)
 762      ()
 763 {
 764   return Vstandard_syntax_table;
 765 }
 766 
 767 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
 768        doc: /* Construct a new syntax table and return it.
 769 It is a copy of the TABLE, which defaults to the standard syntax table.  */)
 770      (table)
 771      Lisp_Object table;
 772 {
 773   Lisp_Object copy;
 774 
 775   if (!NILP (table))
 776     check_syntax_table (table);
 777   else
 778     table = Vstandard_syntax_table;
 779 
 780   copy = Fcopy_sequence (table);
 781 
 782   /* Only the standard syntax table should have a default element.
 783      Other syntax tables should inherit from parents instead.  */
 784   XCHAR_TABLE (copy)->defalt = Qnil;
 785 
 786   /* Copied syntax tables should all have parents.
 787      If we copied one with no parent, such as the standard syntax table,
 788      use the standard syntax table as the copy's parent.  */
 789   if (NILP (XCHAR_TABLE (copy)->parent))
 790     Fset_char_table_parent (copy, Vstandard_syntax_table);
 791   return copy;
 792 }
 793 
 794 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
 795        doc: /* Select a new syntax table for the current buffer.
 796 One argument, a syntax table.  */)
 797      (table)
 798      Lisp_Object table;
 799 {
 800   int idx;
 801   check_syntax_table (table);
 802   current_buffer->syntax_table = table;
 803   /* Indicate that this buffer now has a specified syntax table.  */
 804   idx = PER_BUFFER_VAR_IDX (syntax_table);
 805   SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
 806   return table;
 807 }
 808 
 809 /* Convert a letter which signifies a syntax code
 810  into the code it signifies.
 811  This is used by modify-syntax-entry, and other things.  */
 812 
 813 unsigned char syntax_spec_code[0400] =
 814   { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 815     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 816     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 817     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 818     (char) Swhitespace, (char) Scomment_fence, (char) Sstring, 0377,
 819         (char) Smath, 0377, 0377, (char) Squote,
 820     (char) Sopen, (char) Sclose, 0377, 0377,
 821         0377, (char) Swhitespace, (char) Spunct, (char) Scharquote,
 822     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 823     0377, 0377, 0377, 0377,
 824         (char) Scomment, 0377, (char) Sendcomment, 0377,
 825     (char) Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* @, A ... */
 826     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 827     0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
 828     0377, 0377, 0377, 0377, (char) Sescape, 0377, 0377, (char) Ssymbol,
 829     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* `, a, ... */
 830     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
 831     0377, 0377, 0377, 0377, 0377, 0377, 0377, (char) Sword,
 832     0377, 0377, 0377, 0377, (char) Sstring_fence, 0377, 0377, 0377
 833   };
 834 
 835 /* Indexed by syntax code, give the letter that describes it.  */
 836 
 837 char syntax_code_spec[16] =
 838   {
 839     ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
 840     '!', '|'
 841   };
 842 
 843 /* Indexed by syntax code, give the object (cons of syntax code and
 844    nil) to be stored in syntax table.  Since these objects can be
 845    shared among syntax tables, we generate them in advance.  By
 846    sharing objects, the function `describe-syntax' can give a more
 847    compact listing.  */
 848 static Lisp_Object Vsyntax_code_object;
 849 
 850 
 851 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
 852        doc: /* Return the syntax code of CHARACTER, described by a character.
 853 For example, if CHARACTER is a word constituent, the
 854 character `w' (119) is returned.
 855 The characters that correspond to various syntax codes
 856 are listed in the documentation of `modify-syntax-entry'.  */)
 857      (character)
 858      Lisp_Object character;
 859 {
 860   int char_int;
 861   CHECK_CHARACTER (character);
 862   char_int = XINT (character);
 863   SETUP_BUFFER_SYNTAX_TABLE ();
 864   return make_number (syntax_code_spec[(int) SYNTAX (char_int)]);
 865 }
 866 
 867 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
 868        doc: /* Return the matching parenthesis of CHARACTER, or nil if none.  */)
 869      (character)
 870      Lisp_Object character;
 871 {
 872   int char_int, code;
 873   CHECK_NUMBER (character);
 874   char_int = XINT (character);
 875   SETUP_BUFFER_SYNTAX_TABLE ();
 876   code = SYNTAX (char_int);
 877   if (code == Sopen || code == Sclose)
 878     return SYNTAX_MATCH (char_int);
 879   return Qnil;
 880 }
 881 
 882 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
 883        doc: /* Convert a syntax specification STRING into syntax cell form.
 884 STRING should be a string as it is allowed as argument of
 885 `modify-syntax-entry'.  Value is the equivalent cons cell
 886 \(CODE . MATCHING-CHAR) that can be used as value of a `syntax-table'
 887 text property.  */)
 888      (string)
 889      Lisp_Object string;
 890 {
 891   register const unsigned char *p;
 892   register enum syntaxcode code;
 893   int val;
 894   Lisp_Object match;
 895 
 896   CHECK_STRING (string);
 897 
 898   p = SDATA (string);
 899   code = (enum syntaxcode) syntax_spec_code[*p++];
 900   if (((int) code & 0377) == 0377)
 901     error ("Invalid syntax description letter: %c", p[-1]);
 902 
 903   if (code == Sinherit)
 904     return Qnil;
 905 
 906   if (*p)
 907     {
 908       int len;
 909       int character = STRING_CHAR_AND_LENGTH (p, len);
 910       XSETINT (match, character);
 911       if (XFASTINT (match) == ' ')
 912         match = Qnil;
 913       p += len;
 914     }
 915   else
 916     match = Qnil;
 917 
 918   val = (int) code;
 919   while (*p)
 920     switch (*p++)
 921       {
 922       case '1':
 923         val |= 1 << 16;
 924         break;
 925 
 926       case '2':
 927         val |= 1 << 17;
 928         break;
 929 
 930       case '3':
 931         val |= 1 << 18;
 932         break;
 933 
 934       case '4':
 935         val |= 1 << 19;
 936         break;
 937 
 938       case 'p':
 939         val |= 1 << 20;
 940         break;
 941 
 942       case 'b':
 943         val |= 1 << 21;
 944         break;
 945 
 946       case 'n':
 947         val |= 1 << 22;
 948         break;
 949       }
 950 
 951   if (val < XVECTOR (Vsyntax_code_object)->size && NILP (match))
 952     return XVECTOR (Vsyntax_code_object)->contents[val];
 953   else
 954     /* Since we can't use a shared object, let's make a new one.  */
 955     return Fcons (make_number (val), match);
 956 }
 957 
 958 /* I really don't know why this is interactive
 959    help-form should at least be made useful whilst reading the second arg.  */
 960 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
 961   "cSet syntax for character: \nsSet syntax for %s to: ",
 962        doc: /* Set syntax for character CHAR according to string NEWENTRY.
 963 The syntax is changed only for table SYNTAX-TABLE, which defaults to
 964  the current buffer's syntax table.
 965 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
 966 in the range MIN to MAX are changed.
 967 The first character of NEWENTRY should be one of the following:
 968   Space or -  whitespace syntax.    w   word constituent.
 969   _           symbol constituent.   .   punctuation.
 970   (           open-parenthesis.     )   close-parenthesis.
 971   "           string quote.         \\   escape.
 972   $           paired delimiter.     '   expression quote or prefix operator.
 973   <           comment starter.      >   comment ender.
 974   /           character-quote.      @   inherit from `standard-syntax-table'.
 975   |           generic string fence. !   generic comment fence.
 976 
 977 Only single-character comment start and end sequences are represented thus.
 978 Two-character sequences are represented as described below.
 979 The second character of NEWENTRY is the matching parenthesis,
 980  used only if the first character is `(' or `)'.
 981 Any additional characters are flags.
 982 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
 983  1 means CHAR is the start of a two-char comment start sequence.
 984  2 means CHAR is the second character of such a sequence.
 985  3 means CHAR is the start of a two-char comment end sequence.
 986  4 means CHAR is the second character of such a sequence.
 987 
 988 There can be up to two orthogonal comment sequences.  This is to support
 989 language modes such as C++.  By default, all comment sequences are of style
 990 a, but you can set the comment sequence style to b (on the second character
 991 of a comment-start, or the first character of a comment-end sequence) using
 992 this flag:
 993  b means CHAR is part of comment sequence b.
 994  n means CHAR is part of a nestable comment sequence.
 995 
 996  p means CHAR is a prefix character for `backward-prefix-chars';
 997    such characters are treated as whitespace when they occur
 998    between expressions.
 999 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE)  */)
1000      (c, newentry, syntax_table)
1001      Lisp_Object c, newentry, syntax_table;
1002 {
1003   if (CONSP (c))
1004     {
1005       CHECK_CHARACTER_CAR (c);
1006       CHECK_CHARACTER_CDR (c);
1007     }
1008   else
1009     CHECK_CHARACTER (c);
1010 
1011   if (NILP (syntax_table))
1012     syntax_table = current_buffer->syntax_table;
1013   else
1014     check_syntax_table (syntax_table);
1015 
1016   newentry = Fstring_to_syntax (newentry);
1017   if (CONSP (c))
1018     SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1019   else
1020     SET_RAW_SYNTAX_ENTRY (syntax_table, XINT (c), newentry);
1021 
1022   /* We clear the regexp cache, since character classes can now have
1023      different values from those in the compiled regexps.*/
1024   clear_regexp_cache ();
1025 
1026   return Qnil;
1027 }
1028 
1029 /* Dump syntax table to buffer in human-readable format */
1030 
1031 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1032        Sinternal_describe_syntax_value, 1, 1, 0,
1033        doc: /* Insert a description of the internal syntax description SYNTAX at point.  */)
1034      (syntax)
1035      Lisp_Object syntax;
1036 {
1037   register enum syntaxcode code;
1038   char desc, start1, start2, end1, end2, prefix, comstyle, comnested;
1039   char str[2];
1040   Lisp_Object first, match_lisp, value = syntax;
1041 
1042   if (NILP (value))
1043     {
1044       insert_string ("default");
1045       return syntax;
1046     }
1047 
1048   if (CHAR_TABLE_P (value))
1049     {
1050       insert_string ("deeper char-table ...");
1051       return syntax;
1052     }
1053 
1054   if (!CONSP (value))
1055     {
1056       insert_string ("invalid");
1057       return syntax;
1058     }
1059 
1060   first = XCAR (value);
1061   match_lisp = XCDR (value);
1062 
1063   if (!INTEGERP (first) || !(NILP (match_lisp) || INTEGERP (match_lisp)))
1064     {
1065       insert_string ("invalid");
1066       return syntax;
1067     }
1068 
1069   code = (enum syntaxcode) (XINT (first) & 0377);
1070   start1 = (XINT (first) >> 16) & 1;
1071   start2 = (XINT (first) >> 17) & 1;
1072   end1 = (XINT (first) >> 18) & 1;
1073   end2 = (XINT (first) >> 19) & 1;
1074   prefix = (XINT (first) >> 20) & 1;
1075   comstyle = (XINT (first) >> 21) & 1;
1076   comnested = (XINT (first) >> 22) & 1;
1077 
1078   if ((int) code < 0 || (int) code >= (int) Smax)
1079     {
1080       insert_string ("invalid");
1081       return syntax;
1082     }
1083   desc = syntax_code_spec[(int) code];
1084 
1085   str[0] = desc, str[1] = 0;
1086   insert (str, 1);
1087 
1088   if (NILP (match_lisp))
1089     insert (" ", 1);
1090   else
1091     insert_char (XINT (match_lisp));
1092 
1093   if (start1)
1094     insert ("1", 1);
1095   if (start2)
1096     insert ("2", 1);
1097 
1098   if (end1)
1099     insert ("3", 1);
1100   if (end2)
1101     insert ("4", 1);
1102 
1103   if (prefix)
1104     insert ("p", 1);
1105   if (comstyle)
1106     insert ("b", 1);
1107   if (comnested)
1108     insert ("n", 1);
1109 
1110   insert_string ("\twhich means: ");
1111 
1112   switch (SWITCH_ENUM_CAST (code))
1113     {
1114     case Swhitespace:
1115       insert_string ("whitespace"); break;
1116     case Spunct:
1117       insert_string ("punctuation"); break;
1118     case Sword:
1119       insert_string ("word"); break;
1120     case Ssymbol:
1121       insert_string ("symbol"); break;
1122     case Sopen:
1123       insert_string ("open"); break;
1124     case Sclose:
1125       insert_string ("close"); break;
1126     case Squote:
1127       insert_string ("prefix"); break;
1128     case Sstring:
1129       insert_string ("string"); break;
1130     case Smath:
1131       insert_string ("math"); break;
1132     case Sescape:
1133       insert_string ("escape"); break;
1134     case Scharquote:
1135       insert_string ("charquote"); break;
1136     case Scomment:
1137       insert_string ("comment"); break;
1138     case Sendcomment:
1139       insert_string ("endcomment"); break;
1140     case Sinherit:
1141       insert_string ("inherit"); break;
1142     case Scomment_fence:
1143       insert_string ("comment fence"); break;
1144     case Sstring_fence:
1145       insert_string ("string fence"); break;
1146     default:
1147       insert_string ("invalid");
1148       return syntax;
1149     }
1150 
1151   if (!NILP (match_lisp))
1152     {
1153       insert_string (", matches ");
1154       insert_char (XINT (match_lisp));
1155     }
1156 
1157   if (start1)
1158     insert_string (",\n\t  is the first character of a comment-start sequence");
1159   if (start2)
1160     insert_string (",\n\t  is the second character of a comment-start sequence");
1161 
1162   if (end1)
1163     insert_string (",\n\t  is the first character of a comment-end sequence");
1164   if (end2)
1165     insert_string (",\n\t  is the second character of a comment-end sequence");
1166   if (comstyle)
1167     insert_string (" (comment style b)");
1168   if (comnested)
1169     insert_string (" (nestable)");
1170 
1171   if (prefix)
1172     insert_string (",\n\t  is a prefix character for `backward-prefix-chars'");
1173 
1174   return syntax;
1175 }
1176 
1177 int parse_sexp_ignore_comments;
1178 
1179 /* Char-table of functions that find the next or previous word
1180    boundary.  */
1181 Lisp_Object Vfind_word_boundary_function_table;
1182 
1183 /* Return the position across COUNT words from FROM.
1184    If that many words cannot be found before the end of the buffer, return 0.
1185    COUNT negative means scan backward and stop at word beginning.  */
1186 
1187 int
1188 scan_words (from, count)
1189      register int from, count;
1190 {
1191   register int beg = BEGV;
1192   register int end = ZV;
1193   register int from_byte = CHAR_TO_BYTE (from);
1194   register enum syntaxcode code;
1195   int ch0, ch1;
1196   Lisp_Object func, script, pos;
1197 
1198   immediate_quit = 1;
1199   QUIT;
1200 
1201   SETUP_SYNTAX_TABLE (from, count);
1202 
1203   while (count > 0)
1204     {
1205       while (1)
1206         {
1207           if (from == end)
1208             {
1209               immediate_quit = 0;
1210               return 0;
1211             }
1212           UPDATE_SYNTAX_TABLE_FORWARD (from);
1213           ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1214           code = SYNTAX (ch0);
1215           INC_BOTH (from, from_byte);
1216           if (words_include_escapes
1217               && (code == Sescape || code == Scharquote))
1218             break;
1219           if (code == Sword)
1220             break;
1221         }
1222       /* Now CH0 is a character which begins a word and FROM is the
1223          position of the next character.  */
1224       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1225       if (! NILP (Ffboundp (func)))
1226         {
1227           pos = call2 (func, make_number (from - 1), make_number (end));
1228           if (INTEGERP (pos) && XINT (pos) > from)
1229             {
1230               from = XINT (pos);
1231               from_byte = CHAR_TO_BYTE (from);
1232             }
1233         }
1234       else
1235         {
1236           script = CHAR_TABLE_REF (Vchar_script_table, ch0);
1237           while (1)
1238             {
1239               if (from == end) break;
1240               UPDATE_SYNTAX_TABLE_FORWARD (from);
1241               ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1242               code = SYNTAX (ch1);
1243               if ((code != Sword
1244                    && (! words_include_escapes
1245                        || (code != Sescape && code != Scharquote)))
1246                   || word_boundary_p (ch0, ch1))
1247                 break;
1248               INC_BOTH (from, from_byte);
1249               ch0 = ch1;
1250             }
1251         }
1252       count--;
1253     }
1254   while (count < 0)
1255     {
1256       while (1)
1257         {
1258           if (from == beg)
1259             {
1260               immediate_quit = 0;
1261               return 0;
1262             }
1263           DEC_BOTH (from, from_byte);
1264           UPDATE_SYNTAX_TABLE_BACKWARD (from);
1265           ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1266           code = SYNTAX (ch1);
1267           if (words_include_escapes
1268               && (code == Sescape || code == Scharquote))
1269             break;
1270           if (code == Sword)
1271             break;
1272         }
1273       /* Now CH1 is a character which ends a word and FROM is the
1274          position of it.  */
1275       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1276       if (! NILP (Ffboundp (func)))
1277         {
1278           pos = call2 (func, make_number (from), make_number (beg));
1279           if (INTEGERP (pos) && XINT (pos) < from)
1280             {
1281               from = XINT (pos);
1282               from_byte = CHAR_TO_BYTE (from);
1283             }
1284         }
1285       else
1286         {
1287           script = CHAR_TABLE_REF (Vchar_script_table, ch1);
1288           while (1)
1289             {
1290               if (from == beg)
1291                 break;
1292               DEC_BOTH (from, from_byte);
1293               UPDATE_SYNTAX_TABLE_BACKWARD (from);
1294               ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1295               code = SYNTAX (ch0);
1296               if ((code != Sword
1297                    && (! words_include_escapes
1298                        || (code != Sescape && code != Scharquote)))
1299                   || word_boundary_p (ch0, ch1))
1300                 {
1301                   INC_BOTH (from, from_byte);
1302                   break;
1303                 }
1304               ch1 = ch0;
1305             }
1306         }
1307       count++;
1308     }
1309 
1310   immediate_quit = 0;
1311 
1312   return from;
1313 }
1314 
1315 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1316        doc: /* Move point forward ARG words (backward if ARG is negative).
1317 Normally returns t.
1318 If an edge of the buffer or a field boundary is reached, point is left there
1319 and the function returns nil.  Field boundaries are not noticed if
1320 `inhibit-field-text-motion' is non-nil.  */)
1321      (arg)
1322      Lisp_Object arg;
1323 {
1324   Lisp_Object tmp;
1325   int orig_val, val;
1326 
1327   if (NILP (arg))
1328     XSETFASTINT (arg, 1);
1329   else
1330     CHECK_NUMBER (arg);
1331 
1332   val = orig_val = scan_words (PT, XINT (arg));
1333   if (! orig_val)
1334     val = XINT (arg) > 0 ? ZV : BEGV;
1335 
1336   /* Avoid jumping out of an input field.  */
1337   tmp = Fconstrain_to_field (make_number (val), make_number (PT),
1338                              Qt, Qnil, Qnil);
1339   val = XFASTINT (tmp);
1340 
1341   SET_PT (val);
1342   return val == orig_val ? Qt : Qnil;
1343 }
1344 
1345 Lisp_Object skip_chars ();
1346 
1347 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1348        doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1349 STRING is like the inside of a `[...]' in a regular expression
1350 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1351  (but not at the end of a range; quoting is never needed there).
1352 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1353 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1354 Char classes, e.g. `[:alpha:]', are supported.
1355 
1356 Returns the distance traveled, either zero or positive.  */)
1357      (string, lim)
1358      Lisp_Object string, lim;
1359 {
1360   return skip_chars (1, string, lim, 1);
1361 }
1362 
1363 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1364        doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1365 See `skip-chars-forward' for details.
1366 Returns the distance traveled, either zero or negative.  */)
1367      (string, lim)
1368      Lisp_Object string, lim;
1369 {
1370   return skip_chars (0, string, lim, 1);
1371 }
1372 
1373 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1374        doc: /* Move point forward across chars in specified syntax classes.
1375 SYNTAX is a string of syntax code characters.
1376 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1377 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1378 This function returns the distance traveled, either zero or positive.  */)
1379      (syntax, lim)
1380      Lisp_Object syntax, lim;
1381 {
1382   return skip_syntaxes (1, syntax, lim);
1383 }
1384 
1385 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1386        doc: /* Move point backward across chars in specified syntax classes.
1387 SYNTAX is a string of syntax code characters.
1388 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1389 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1390 This function returns the distance traveled, either zero or negative.  */)
1391      (syntax, lim)
1392      Lisp_Object syntax, lim;
1393 {
1394   return skip_syntaxes (0, syntax, lim);
1395 }
1396 
1397 static Lisp_Object
1398 skip_chars (forwardp, string, lim, handle_iso_classes)
1399      int forwardp;
1400      Lisp_Object string, lim;
1401      int handle_iso_classes;
1402 {
1403   register unsigned int c;
1404   unsigned char fastmap[0400];
1405   /* Store the ranges of non-ASCII characters.  */
1406   int *char_ranges;
1407   int n_char_ranges = 0;
1408   int negate = 0;
1409   register int i, i_byte;
1410   /* Set to 1 if the current buffer is multibyte and the region
1411      contains non-ASCII chars.  */
1412   int multibyte;
1413   /* Set to 1 if STRING is multibyte and it contains non-ASCII
1414      chars.  */
1415   int string_multibyte;
1416   int size_byte;
1417   const unsigned char *str;
1418   int len;
1419   Lisp_Object iso_classes;
1420 
1421   CHECK_STRING (string);
1422   iso_classes = Qnil;
1423 
1424   if (NILP (lim))
1425     XSETINT (lim, forwardp ? ZV : BEGV);
1426   else
1427     CHECK_NUMBER_COERCE_MARKER (lim);
1428 
1429   /* In any case, don't allow scan outside bounds of buffer.  */
1430   if (XINT (lim) > ZV)
1431     XSETFASTINT (lim, ZV);
1432   if (XINT (lim) < BEGV)
1433     XSETFASTINT (lim, BEGV);
1434 
1435   multibyte = (!NILP (current_buffer->enable_multibyte_characters)
1436                && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1437   string_multibyte = SBYTES (string) > SCHARS (string);
1438 
1439   bzero (fastmap, sizeof fastmap);
1440 
1441   str = SDATA (string);
1442   size_byte = SBYTES (string);
1443 
1444   i_byte = 0;
1445   if (i_byte < size_byte
1446       && SREF (string, 0) == '^')
1447     {
1448       negate = 1; i_byte++;
1449     }
1450 
1451   /* Find the characters specified and set their elements of fastmap.
1452      Handle backslashes and ranges specially.
1453 
1454      If STRING contains non-ASCII characters, setup char_ranges for
1455      them and use fastmap only for their leading codes.  */
1456 
1457   if (! string_multibyte)
1458     {
1459       int string_has_eight_bit = 0;
1460 
1461       /* At first setup fastmap.  */
1462       while (i_byte < size_byte)
1463         {
1464           c = str[i_byte++];
1465 
1466           if (handle_iso_classes && c == '['
1467               && i_byte < size_byte
1468               && str[i_byte] == ':')
1469             {
1470               const unsigned char *class_beg = str + i_byte + 1;
1471               const unsigned char *class_end = class_beg;
1472               const unsigned char *class_limit = str + size_byte - 2;
1473               /* Leave room for the null.  */
1474               unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1475               re_wctype_t cc;
1476 
1477               if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1478                 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1479 
1480               while (class_end < class_limit
1481                      && *class_end >= 'a' && *class_end <= 'z')
1482                 class_end++;
1483 
1484               if (class_end == class_beg
1485                   || *class_end != ':' || class_end[1] != ']')
1486                 goto not_a_class_name;
1487 
1488               bcopy (class_beg, class_name, class_end - class_beg);
1489               class_name[class_end - class_beg] = 0;
1490 
1491               cc = re_wctype (class_name);
1492               if (cc == 0)
1493                 error ("Invalid ISO C character class");
1494 
1495               iso_classes = Fcons (make_number (cc), iso_classes);
1496 
1497               i_byte = class_end + 2 - str;
1498               continue;
1499             }
1500 
1501         not_a_class_name:
1502           if (c == '\\')
1503             {
1504               if (i_byte == size_byte)
1505                 break;
1506 
1507               c = str[i_byte++];
1508             }
1509           /* Treat `-' as range character only if another character
1510              follows.  */
1511           if (i_byte + 1 < size_byte
1512               && str[i_byte] == '-')
1513             {
1514               unsigned int c2;
1515 
1516               /* Skip over the dash.  */
1517               i_byte++;
1518 
1519               /* Get the end of the range.  */
1520               c2 = str[i_byte++];
1521               if (c2 == '\\'
1522                   && i_byte < size_byte)
1523                 c2 = str[i_byte++];
1524 
1525               if (c <= c2)
1526                 {
1527                   while (c <= c2)
1528                     fastmap[c++] = 1;
1529                   if (! ASCII_CHAR_P (c2))
1530                     string_has_eight_bit = 1;
1531                 }
1532             }
1533           else
1534             {
1535               fastmap[c] = 1;
1536               if (! ASCII_CHAR_P (c))
1537                 string_has_eight_bit = 1;
1538             }
1539         }
1540 
1541       /* If the current range is multibyte and STRING contains
1542          eight-bit chars, arrange fastmap and setup char_ranges for
1543          the corresponding multibyte chars.  */
1544       if (multibyte && string_has_eight_bit)
1545         {
1546           unsigned char fastmap2[0400];
1547           int range_start_byte, range_start_char;
1548 
1549           bcopy (fastmap2 + 0200, fastmap + 0200, 0200);
1550           bzero (fastmap + 0200, 0200);
1551           /* We are sure that this loop stops.  */
1552           for (i = 0200; ! fastmap2[i]; i++);
1553           c = BYTE8_TO_CHAR (i);
1554           fastmap[CHAR_LEADING_CODE (c)] = 1;
1555           range_start_byte = i;
1556           range_start_char = c;
1557           char_ranges = (int *) alloca (sizeof (int) * 128 * 2);
1558           for (i = 129; i < 0400; i++)
1559             {
1560               c = BYTE8_TO_CHAR (i);
1561               fastmap[CHAR_LEADING_CODE (c)] = 1;
1562               if (i - range_start_byte != c - range_start_char)
1563                 {
1564                   char_ranges[n_char_ranges++] = range_start_char;
1565                   char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1566                                                   + range_start_char);
1567                   range_start_byte = i;
1568                   range_start_char = c;
1569                 }
1570             }
1571           char_ranges[n_char_ranges++] = range_start_char;
1572           char_ranges[n_char_ranges++] = ((i - 1 - range_start_byte)
1573                                           + range_start_char);
1574         }
1575     }
1576   else                          /* STRING is multibyte */
1577     {
1578       char_ranges = (int *) alloca (sizeof (int) * SCHARS (string) * 2);
1579 
1580       while (i_byte < size_byte)
1581         {
1582           unsigned char leading_code;
1583 
1584           leading_code = str[i_byte];
1585           c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1586           i_byte += len;
1587 
1588           if (handle_iso_classes && c == '['
1589               && i_byte < size_byte
1590               && STRING_CHAR (str + i_byte) == ':')
1591             {
1592               const unsigned char *class_beg = str + i_byte + 1;
1593               const unsigned char *class_end = class_beg;
1594               const unsigned char *class_limit = str + size_byte - 2;
1595               /* Leave room for the null.        */
1596               unsigned char class_name[CHAR_CLASS_MAX_LENGTH + 1];
1597               re_wctype_t cc;
1598 
1599               if (class_limit - class_beg > CHAR_CLASS_MAX_LENGTH)
1600                 class_limit = class_beg + CHAR_CLASS_MAX_LENGTH;
1601 
1602               while (class_end < class_limit
1603                      && *class_end >= 'a' && *class_end <= 'z')
1604                 class_end++;
1605 
1606               if (class_end == class_beg
1607                   || *class_end != ':' || class_end[1] != ']')
1608                 goto not_a_class_name_multibyte;
1609 
1610               bcopy (class_beg, class_name, class_end - class_beg);
1611               class_name[class_end - class_beg] = 0;
1612 
1613               cc = re_wctype (class_name);
1614               if (cc == 0)
1615                 error ("Invalid ISO C character class");
1616 
1617               iso_classes = Fcons (make_number (cc), iso_classes);
1618 
1619               i_byte = class_end + 2 - str;
1620               continue;
1621             }
1622 
1623         not_a_class_name_multibyte:
1624           if (c == '\\')
1625             {
1626               if (i_byte == size_byte)
1627                 break;
1628 
1629               leading_code = str[i_byte];
1630               c = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1631               i_byte += len;
1632             }
1633           /* Treat `-' as range character only if another character
1634              follows.  */
1635           if (i_byte + 1 < size_byte
1636               && str[i_byte] == '-')
1637             {
1638               unsigned int c2;
1639               unsigned char leading_code2;
1640 
1641               /* Skip over the dash.  */
1642               i_byte++;
1643 
1644               /* Get the end of the range.  */
1645               leading_code2 = str[i_byte];
1646               c2 = STRING_CHAR_AND_LENGTH (str + i_byte, len);
1647               i_byte += len;
1648 
1649               if (c2 == '\\'
1650                   && i_byte < size_byte)
1651                 {
1652                   leading_code2 = str[i_byte];
1653                   c2 =STRING_CHAR_AND_LENGTH (str + i_byte, len);
1654                   i_byte += len;
1655                 }
1656 
1657               if (c > c2)
1658                 continue;
1659               if (ASCII_CHAR_P (c))
1660                 {
1661                   while (c <= c2 && c < 0x80)
1662                     fastmap[c++] = 1;
1663                   leading_code = CHAR_LEADING_CODE (c);
1664                 }
1665               if (! ASCII_CHAR_P (c))
1666                 {
1667                   while (leading_code <= leading_code2)
1668                     fastmap[leading_code++] = 1;
1669                   if (c <= c2)
1670                     {
1671                       char_ranges[n_char_ranges++] = c;
1672                       char_ranges[n_char_ranges++] = c2;
1673                     }
1674                 }
1675             }
1676           else
1677             {
1678               if (ASCII_CHAR_P (c))
1679                 fastmap[c] = 1;
1680               else
1681                 {
1682                   fastmap[leading_code] = 1;
1683                   char_ranges[n_char_ranges++] = c;
1684                   char_ranges[n_char_ranges++] = c;
1685                 }
1686             }
1687         }
1688 
1689       /* If the current range is unibyte and STRING contains non-ASCII
1690          chars, arrange fastmap for the corresponding unibyte
1691          chars.  */
1692 
1693       if (! multibyte && n_char_ranges > 0)
1694         {
1695           bzero (fastmap + 0200, 0200);
1696           for (i = 0; i < n_char_ranges; i += 2)
1697             {
1698               int c1 = char_ranges[i];
1699               int c2 = char_ranges[i + 1];
1700 
1701               for (; c1 <= c2; c1++)
1702                 {
1703                   int b = CHAR_TO_BYTE_SAFE (c1);
1704                   if (b >= 0)
1705                     fastmap[b] = 1;
1706                 }
1707             }
1708         }
1709     }
1710 
1711   /* If ^ was the first character, complement the fastmap.  */
1712   if (negate)
1713     {
1714       if (! multibyte)
1715         for (i = 0; i < sizeof fastmap; i++)
1716           fastmap[i] ^= 1;
1717       else
1718         {
1719           for (i = 0; i < 0200; i++)
1720             fastmap[i] ^= 1;
1721           /* All non-ASCII chars possibly match.  */
1722           for (; i < sizeof fastmap; i++)
1723             fastmap[i] = 1;
1724         }
1725     }
1726 
1727   {
1728     int start_point = PT;
1729     int pos = PT;
1730     int pos_byte = PT_BYTE;
1731     unsigned char *p = PT_ADDR, *endp, *stop;
1732 
1733     if (forwardp)
1734       {
1735         endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1736         stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1737       }
1738     else
1739       {
1740         endp = CHAR_POS_ADDR (XINT (lim));
1741         stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1742       }
1743 
1744     immediate_quit = 1;
1745     /* This code may look up syntax tables using macros that rely on the
1746        gl_state object.  To make sure this object is not out of date,
1747        let's initialize it manually.
1748        We ignore syntax-table text-properties for now, since that's
1749        what we've done in the past.  */
1750     SETUP_BUFFER_SYNTAX_TABLE ();
1751     if (forwardp)
1752       {
1753         if (multibyte)
1754           while (1)
1755             {
1756               int nbytes;
1757 
1758               if (p >= stop)
1759                 {
1760                   if (p >= endp)
1761                     break;
1762                   p = GAP_END_ADDR;
1763                   stop = endp;
1764                 }
1765               c = STRING_CHAR_AND_LENGTH (p, nbytes);
1766               if (! NILP (iso_classes) && in_classes (c, iso_classes))
1767                 {
1768                   if (negate)
1769                     break;
1770                   else
1771                     goto fwd_ok;
1772                 }
1773 
1774               if (! fastmap[*p])
1775                 break;
1776               if (! ASCII_CHAR_P (c))
1777                 {
1778                   /* As we are looking at a multibyte character, we
1779                      must look up the character in the table
1780                      CHAR_RANGES.  If there's no data in the table,
1781                      that character is not what we want to skip.  */
1782 
1783                   /* The following code do the right thing even if
1784                      n_char_ranges is zero (i.e. no data in
1785                      CHAR_RANGES).  */
1786                   for (i = 0; i < n_char_ranges; i += 2)
1787                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1788                       break;
1789                   if (!(negate ^ (i < n_char_ranges)))
1790                     break;
1791                 }
1792             fwd_ok:
1793               p += nbytes, pos++, pos_byte += nbytes;
1794             }
1795         else
1796           while (1)
1797             {
1798               if (p >= stop)
1799                 {
1800                   if (p >= endp)
1801                     break;
1802                   p = GAP_END_ADDR;
1803                   stop = endp;
1804                 }
1805 
1806               if (!NILP (iso_classes) && in_classes (*p, iso_classes))
1807                 {
1808                   if (negate)
1809                     break;
1810                   else
1811                     goto fwd_unibyte_ok;
1812                 }
1813 
1814               if (!fastmap[*p])
1815                 break;
1816             fwd_unibyte_ok:
1817               p++, pos++, pos_byte++;
1818             }
1819       }
1820     else
1821       {
1822         if (multibyte)
1823           while (1)
1824             {
1825               unsigned char *prev_p;
1826 
1827               if (p <= stop)
1828                 {
1829                   if (p <= endp)
1830                     break;
1831                   p = GPT_ADDR;
1832                   stop = endp;
1833                 }
1834               prev_p = p;
1835               while (--p >= stop && ! CHAR_HEAD_P (*p));
1836               c = STRING_CHAR (p);
1837 
1838               if (! NILP (iso_classes) && in_classes (c, iso_classes))
1839                 {
1840                   if (negate)
1841                     break;
1842                   else
1843                     goto back_ok;
1844                 }
1845 
1846               if (! fastmap[*p])
1847                 break;
1848               if (! ASCII_CHAR_P (c))
1849                 {
1850                   /* See the comment in the previous similar code.  */
1851                   for (i = 0; i < n_char_ranges; i += 2)
1852                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1853                       break;
1854                   if (!(negate ^ (i < n_char_ranges)))
1855                     break;
1856                 }
1857             back_ok:
1858               pos--, pos_byte -= prev_p - p;
1859             }
1860         else
1861           while (1)
1862             {
1863               if (p <= stop)
1864                 {
1865                   if (p <= endp)
1866                     break;
1867                   p = GPT_ADDR;
1868                   stop = endp;
1869                 }
1870 
1871               if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
1872                 {
1873                   if (negate)
1874                     break;
1875                   else
1876                     goto back_unibyte_ok;
1877                 }
1878 
1879               if (!fastmap[p[-1]])
1880                 break;
1881             back_unibyte_ok:
1882               p--, pos--, pos_byte--;
1883             }
1884       }
1885 
1886     SET_PT_BOTH (pos, pos_byte);
1887     immediate_quit = 0;
1888 
1889     return make_number (PT - start_point);
1890   }
1891 }
1892 
1893 
1894 static Lisp_Object
1895 skip_syntaxes (forwardp, string, lim)
1896      int forwardp;
1897      Lisp_Object string, lim;
1898 {
1899   register unsigned int c;
1900   unsigned char fastmap[0400];
1901   int negate = 0;
1902   register int i, i_byte;
1903   int multibyte;
1904   int size_byte;
1905   unsigned char *str;
1906 
1907   CHECK_STRING (string);
1908 
1909   if (NILP (lim))
1910     XSETINT (lim, forwardp ? ZV : BEGV);
1911   else
1912     CHECK_NUMBER_COERCE_MARKER (lim);
1913 
1914   /* In any case, don't allow scan outside bounds of buffer.  */
1915   if (XINT (lim) > ZV)
1916     XSETFASTINT (lim, ZV);
1917   if (XINT (lim) < BEGV)
1918     XSETFASTINT (lim, BEGV);
1919 
1920   if (forwardp ? (PT >= XFASTINT (lim)) : (PT <= XFASTINT (lim)))
1921     return make_number (0);
1922 
1923   multibyte = (!NILP (current_buffer->enable_multibyte_characters)
1924                && (XINT (lim) - PT != CHAR_TO_BYTE (XINT (lim)) - PT_BYTE));
1925 
1926   bzero (fastmap, sizeof fastmap);
1927 
1928   if (SBYTES (string) > SCHARS (string))
1929     /* As this is very rare case (syntax spec is ASCII only), don't
1930        consider efficiency.  */
1931     string = string_make_unibyte (string);
1932 
1933   str = SDATA (string);
1934   size_byte = SBYTES (string);
1935 
1936   i_byte = 0;
1937   if (i_byte < size_byte
1938       && SREF (string, 0) == '^')
1939     {
1940       negate = 1; i_byte++;
1941     }
1942 
1943   /* Find the syntaxes specified and set their elements of fastmap.  */
1944 
1945   while (i_byte < size_byte)
1946     {
1947       c = str[i_byte++];
1948       fastmap[syntax_spec_code[c]] = 1;
1949     }
1950 
1951   /* If ^ was the first character, complement the fastmap.  */
1952   if (negate)
1953     for (i = 0; i < sizeof fastmap; i++)
1954       fastmap[i] ^= 1;
1955 
1956   {
1957     int start_point = PT;
1958     int pos = PT;
1959     int pos_byte = PT_BYTE;
1960     unsigned char *p = PT_ADDR, *endp, *stop;
1961 
1962     if (forwardp)
1963       {
1964         endp = (XINT (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XINT (lim));
1965         stop = (pos < GPT && GPT < XINT (lim)) ? GPT_ADDR : endp;
1966       }
1967     else
1968       {
1969         endp = CHAR_POS_ADDR (XINT (lim));
1970         stop = (pos >= GPT && GPT > XINT (lim)) ? GAP_END_ADDR : endp;
1971       }
1972 
1973     immediate_quit = 1;
1974     SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
1975     if (forwardp)
1976       {
1977         if (multibyte)
1978           {
1979             while (1)
1980               {
1981                 int nbytes;
1982 
1983                 if (p >= stop)
1984                   {
1985                     if (p >= endp)
1986                       break;
1987                     p = GAP_END_ADDR;
1988                     stop = endp;
1989                   }
1990                 c = STRING_CHAR_AND_LENGTH (p, nbytes);
1991                 if (! fastmap[(int) SYNTAX (c)])
1992                   break;
1993                 p += nbytes, pos++, pos_byte += nbytes;
1994                 UPDATE_SYNTAX_TABLE_FORWARD (pos);
1995               }
1996           }
1997         else
1998           {
1999             while (1)
2000               {
2001                 if (p >= stop)
2002                   {
2003                     if (p >= endp)
2004                       break;
2005                     p = GAP_END_ADDR;
2006                     stop = endp;
2007                   }
2008                 if (! fastmap[(int) SYNTAX (*p)])
2009                   break;
2010                 p++, pos++, pos_byte++;
2011                 UPDATE_SYNTAX_TABLE_FORWARD (pos);
2012               }
2013           }
2014       }
2015     else
2016       {
2017         if (multibyte)
2018           {
2019             while (1)
2020               {
2021                 unsigned char *prev_p;
2022 
2023                 if (p <= stop)
2024                   {
2025                     if (p <= endp)
2026                       break;
2027                     p = GPT_ADDR;
2028                     stop = endp;
2029                   }
2030                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2031                 prev_p = p;
2032                 while (--p >= stop && ! CHAR_HEAD_P (*p));
2033                 c = STRING_CHAR (p);
2034                 if (! fastmap[(int) SYNTAX (c)])
2035                   break;
2036                 pos--, pos_byte -= prev_p - p;
2037               }
2038           }
2039         else
2040           {
2041             while (1)
2042               {
2043                 if (p <= stop)
2044                   {
2045                     if (p <= endp)
2046                       break;
2047                     p = GPT_ADDR;
2048                     stop = endp;
2049                   }
2050                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2051                 if (! fastmap[(int) SYNTAX (p[-1])])
2052                   break;
2053                 p--, pos--, pos_byte--;
2054               }
2055           }
2056       }
2057 
2058     SET_PT_BOTH (pos, pos_byte);
2059     immediate_quit = 0;
2060 
2061     return make_number (PT - start_point);
2062   }
2063 }
2064 
2065 /* Return 1 if character C belongs to one of the ISO classes
2066    in the list ISO_CLASSES.  Each class is represented by an
2067    integer which is its type according to re_wctype.  */
2068 
2069 static int
2070 in_classes (c, iso_classes)
2071      int c;
2072      Lisp_Object iso_classes;
2073 {
2074   int fits_class = 0;
2075 
2076   while (CONSP (iso_classes))
2077     {
2078       Lisp_Object elt;
2079       elt = XCAR (iso_classes);
2080       iso_classes = XCDR (iso_classes);
2081 
2082       if (re_iswctype (c, XFASTINT (elt)))
2083         fits_class = 1;
2084     }
2085 
2086   return fits_class;
2087 }
2088 
2089 /* Jump over a comment, assuming we are at the beginning of one.
2090    FROM is the current position.
2091    FROM_BYTE is the bytepos corresponding to FROM.
2092    Do not move past STOP (a charpos).
2093    The comment over which we have to jump is of style STYLE
2094      (either SYNTAX_COMMENT_STYLE(foo) or ST_COMMENT_STYLE).
2095    NESTING should be positive to indicate the nesting at the beginning
2096      for nested comments and should be zero or negative else.
2097      ST_COMMENT_STYLE cannot be nested.
2098    PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2099      (or 0 If the search cannot start in the middle of a two-character).
2100 
2101    If successful, return 1 and store the charpos of the comment's end
2102    into *CHARPOS_PTR and the corresponding bytepos into *BYTEPOS_PTR.
2103    Else, return 0 and store the charpos STOP into *CHARPOS_PTR, the
2104    corresponding bytepos into *BYTEPOS_PTR and the current nesting
2105    (as defined for state.incomment) in *INCOMMENT_PTR.
2106 
2107    The comment end is the last character of the comment rather than the
2108      character just after the comment.
2109 
2110    Global syntax data is assumed to initially be valid for FROM and
2111    remains valid for forward search starting at the returned position. */
2112 
2113 static int
2114 forw_comment (from, from_byte, stop, nesting, style, prev_syntax,
2115               charpos_ptr, bytepos_ptr, incomment_ptr)
2116      EMACS_INT from, from_byte, stop;
2117      int nesting, style, prev_syntax;
2118      EMACS_INT *charpos_ptr, *bytepos_ptr;
2119      int *incomment_ptr;
2120 {
2121   register int c, c1;
2122   register enum syntaxcode code;
2123   register int syntax;
2124 
2125   if (nesting <= 0) nesting = -1;
2126 
2127   /* Enter the loop in the middle so that we find
2128      a 2-char comment ender if we start in the middle of it.  */
2129   syntax = prev_syntax;
2130   if (syntax != 0) goto forw_incomment;
2131 
2132   while (1)
2133     {
2134       if (from == stop)
2135         {
2136           *incomment_ptr = nesting;
2137           *charpos_ptr = from;
2138           *bytepos_ptr = from_byte;
2139           return 0;
2140         }
2141       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2142       syntax = SYNTAX_WITH_FLAGS (c);
2143       code = syntax & 0xff;
2144       if (code == Sendcomment
2145           && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
2146           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2147               (nesting > 0 && --nesting == 0) : nesting < 0))
2148         /* we have encountered a comment end of the same style
2149            as the comment sequence which began this comment
2150            section */
2151         break;
2152       if (code == Scomment_fence
2153           && style == ST_COMMENT_STYLE)
2154         /* we have encountered a comment end of the same style
2155            as the comment sequence which began this comment
2156            section.  */
2157         break;
2158       if (nesting > 0
2159           && code == Scomment
2160           && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2161           && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style)
2162         /* we have encountered a nested comment of the same style
2163            as the comment sequence which began this comment section */
2164         nesting++;
2165       INC_BOTH (from, from_byte);
2166       UPDATE_SYNTAX_TABLE_FORWARD (from);
2167 
2168     forw_incomment:
2169       if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2170           && SYNTAX_FLAGS_COMMENT_STYLE (syntax) == style
2171           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2172               SYNTAX_COMEND_SECOND (c1))
2173           && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2174                SYNTAX_COMMENT_NESTED (c1)) ? nesting > 0 : nesting < 0))
2175         {
2176           if (--nesting <= 0)
2177             /* we have encountered a comment end of the same style
2178                as the comment sequence which began this comment
2179                section */
2180             break;
2181           else
2182             {
2183               INC_BOTH (from, from_byte);
2184               UPDATE_SYNTAX_TABLE_FORWARD (from);
2185             }
2186         }
2187       if (nesting > 0
2188           && from < stop
2189           && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2190           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2191               SYNTAX_COMMENT_STYLE (c1) == style
2192               && SYNTAX_COMSTART_SECOND (c1))
2193           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2194               SYNTAX_COMMENT_NESTED (c1)))
2195         /* we have encountered a nested comment of the same style
2196            as the comment sequence which began this comment
2197            section */
2198         {
2199           INC_BOTH (from, from_byte);
2200           UPDATE_SYNTAX_TABLE_FORWARD (from);
2201           nesting++;
2202         }
2203     }
2204   *charpos_ptr = from;
2205   *bytepos_ptr = from_byte;
2206   return 1;
2207 }
2208 
2209 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2210        doc: /*
2211 Move forward across up to COUNT comments.  If COUNT is negative, move backward.
2212 Stop scanning if we find something other than a comment or whitespace.
2213 Set point to where scanning stops.
2214 If COUNT comments are found as expected, with nothing except whitespace
2215 between them, return t; otherwise return nil.  */)
2216      (count)
2217      Lisp_Object count;
2218 {
2219   register EMACS_INT from;
2220   EMACS_INT from_byte;
2221   register EMACS_INT stop;
2222   register int c, c1;
2223   register enum syntaxcode code;
2224   int comstyle = 0;         /* style of comment encountered */
2225   int comnested = 0;        /* whether the comment is nestable or not */
2226   int found;
2227   EMACS_INT count1;
2228   EMACS_INT out_charpos, out_bytepos;
2229   int dummy;
2230 
2231   CHECK_NUMBER (count);
2232   count1 = XINT (count);
2233   stop = count1 > 0 ? ZV : BEGV;
2234 
2235   immediate_quit = 1;
2236   QUIT;
2237 
2238   from = PT;
2239   from_byte = PT_BYTE;
2240 
2241   SETUP_SYNTAX_TABLE (from, count1);
2242   while (count1 > 0)
2243     {
2244       do
2245         {
2246           int comstart_first;
2247 
2248           if (from == stop)
2249             {
2250               SET_PT_BOTH (from, from_byte);
2251               immediate_quit = 0;
2252               return Qnil;
2253             }
2254           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2255           code = SYNTAX (c);
2256           comstart_first = SYNTAX_COMSTART_FIRST (c);
2257           comnested = SYNTAX_COMMENT_NESTED (c);
2258           comstyle = SYNTAX_COMMENT_STYLE (c);
2259           INC_BOTH (from, from_byte);
2260           UPDATE_SYNTAX_TABLE_FORWARD (from);
2261           if (from < stop && comstart_first
2262               && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2263                   SYNTAX_COMSTART_SECOND (c1)))
2264             {
2265               /* We have encountered a comment start sequence and we
2266                  are ignoring all text inside comments.  We must record
2267                  the comment style this sequence begins so that later,
2268                  only a comment end of the same style actually ends
2269                  the comment section.  */
2270               code = Scomment;
2271               comstyle = SYNTAX_COMMENT_STYLE (c1);
2272               comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2273               INC_BOTH (from, from_byte);
2274               UPDATE_SYNTAX_TABLE_FORWARD (from);
2275             }
2276         }
2277       while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2278 
2279       if (code == Scomment_fence)
2280         comstyle = ST_COMMENT_STYLE;
2281       else if (code != Scomment)
2282         {
2283           immediate_quit = 0;
2284           DEC_BOTH (from, from_byte);
2285           SET_PT_BOTH (from, from_byte);
2286           return Qnil;
2287         }
2288       /* We're at the start of a comment.  */
2289       found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2290                             &out_charpos, &out_bytepos, &dummy);
2291       from = out_charpos; from_byte = out_bytepos;
2292       if (!found)
2293         {
2294           immediate_quit = 0;
2295           SET_PT_BOTH (from, from_byte);
2296           return Qnil;
2297         }
2298       INC_BOTH (from, from_byte);
2299       UPDATE_SYNTAX_TABLE_FORWARD (from);
2300       /* We have skipped one comment.  */
2301       count1--;
2302     }
2303 
2304   while (count1 < 0)
2305     {
2306       while (1)
2307         {
2308           int quoted;
2309 
2310           if (from <= stop)
2311             {
2312               SET_PT_BOTH (BEGV, BEGV_BYTE);
2313               immediate_quit = 0;
2314               return Qnil;
2315             }
2316 
2317           DEC_BOTH (from, from_byte);
2318           /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from).  */
2319           quoted = char_quoted (from, from_byte);
2320           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2321           code = SYNTAX (c);
2322           comstyle = 0;
2323           comnested = SYNTAX_COMMENT_NESTED (c);
2324           if (code == Sendcomment)
2325             comstyle = SYNTAX_COMMENT_STYLE (c);
2326           if (from > stop && SYNTAX_COMEND_SECOND (c)
2327               && prev_char_comend_first (from, from_byte)
2328               && !char_quoted (from - 1, dec_bytepos (from_byte)))
2329             {
2330               /* We must record the comment style encountered so that
2331                  later, we can match only the proper comment begin
2332                  sequence of the same style.  */
2333               DEC_BOTH (from, from_byte);
2334               code = Sendcomment;
2335               /* Calling char_quoted, above, set up global syntax position
2336                  at the new value of FROM.  */
2337               c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2338               comstyle = SYNTAX_COMMENT_STYLE (c1);
2339               comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2340             }
2341 
2342           if (code == Scomment_fence)
2343             {
2344               /* Skip until first preceding unquoted comment_fence.  */
2345               int found = 0, ini = from, ini_byte = from_byte;
2346 
2347               while (1)
2348                 {
2349                   DEC_BOTH (from, from_byte);
2350                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2351                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2352                   if (SYNTAX (c) == Scomment_fence
2353                       && !char_quoted (from, from_byte))
2354                     {
2355                       found = 1;
2356                       break;
2357                     }
2358                   else if (from == stop)
2359                     break;
2360                 }
2361               if (found == 0)
2362                 {
2363                   from = ini;           /* Set point to ini + 1.  */
2364                   from_byte = ini_byte;
2365                   goto leave;
2366                 }
2367               else
2368                 /* We have skipped one comment.  */
2369                 break;
2370             }
2371           else if (code == Sendcomment)
2372             {
2373               found = back_comment (from, from_byte, stop, comnested, comstyle,
2374                                     &out_charpos, &out_bytepos);
2375               if (found == -1)
2376                 {
2377                   if (c == '\n')
2378                     /* This end-of-line is not an end-of-comment.
2379                        Treat it like a whitespace.
2380                        CC-mode (and maybe others) relies on this behavior.  */
2381                     ;
2382                   else
2383                     {
2384                       /* Failure: we should go back to the end of this
2385                          not-quite-endcomment.  */
2386                       if (SYNTAX(c) != code)
2387                         /* It was a two-char Sendcomment.  */
2388                         INC_BOTH (from, from_byte);
2389                       goto leave;
2390                     }
2391                 }
2392               else
2393                 {
2394                   /* We have skipped one comment.  */
2395                   from = out_charpos, from_byte = out_bytepos;
2396                   break;
2397                 }
2398             }
2399           else if (code != Swhitespace || quoted)
2400             {
2401             leave:
2402               immediate_quit = 0;
2403               INC_BOTH (from, from_byte);
2404               SET_PT_BOTH (from, from_byte);
2405               return Qnil;
2406             }
2407         }
2408 
2409       count1++;
2410     }
2411 
2412   SET_PT_BOTH (from, from_byte);
2413   immediate_quit = 0;
2414   return Qt;
2415 }
2416 
2417 /* Return syntax code of character C if C is an ASCII character
2418    or `multibyte_symbol_p' is zero.  Otherwise, return Ssymbol.  */
2419 
2420 #define SYNTAX_WITH_MULTIBYTE_CHECK(c)          \
2421   ((ASCII_CHAR_P (c) || !multibyte_symbol_p)    \
2422    ? SYNTAX (c) : Ssymbol)
2423 
2424 static Lisp_Object
2425 scan_lists (from, count, depth, sexpflag)
2426      register EMACS_INT from;
2427      EMACS_INT count, depth;
2428      int sexpflag;
2429 {
2430   Lisp_Object val;
2431   register EMACS_INT stop = count > 0 ? ZV : BEGV;
2432   register int c, c1;
2433   int stringterm;
2434   int quoted;
2435   int mathexit = 0;
2436   register enum syntaxcode code, temp_code;
2437   int min_depth = depth;    /* Err out if depth gets less than this.  */
2438   int comstyle = 0;         /* style of comment encountered */
2439   int comnested = 0;        /* whether the comment is nestable or not */
2440   EMACS_INT temp_pos;
2441   EMACS_INT last_good = from;
2442   int found;
2443   EMACS_INT from_byte;
2444   EMACS_INT out_bytepos, out_charpos;
2445   int temp, dummy;
2446   int multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2447 
2448   if (depth > 0) min_depth = 0;
2449 
2450   if (from > ZV) from = ZV;
2451   if (from < BEGV) from = BEGV;
2452 
2453   from_byte = CHAR_TO_BYTE (from);
2454 
2455   immediate_quit = 1;
2456   QUIT;
2457 
2458   SETUP_SYNTAX_TABLE (from, count);
2459   while (count > 0)
2460     {
2461       while (from < stop)
2462         {
2463           int comstart_first, prefix;
2464           UPDATE_SYNTAX_TABLE_FORWARD (from);
2465           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2466           code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2467           comstart_first = SYNTAX_COMSTART_FIRST (c);
2468           comnested = SYNTAX_COMMENT_NESTED (c);
2469           comstyle = SYNTAX_COMMENT_STYLE (c);
2470           prefix = SYNTAX_PREFIX (c);
2471           if (depth == min_depth)
2472             last_good = from;
2473           INC_BOTH (from, from_byte);
2474           UPDATE_SYNTAX_TABLE_FORWARD (from);
2475           if (from < stop && comstart_first
2476               && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2477                   SYNTAX_COMSTART_SECOND (c))
2478               && parse_sexp_ignore_comments)
2479             {
2480               /* we have encountered a comment start sequence and we
2481                  are ignoring all text inside comments.  We must record
2482                  the comment style this sequence begins so that later,
2483                  only a comment end of the same style actually ends
2484                  the comment section */
2485               code = Scomment;
2486               c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2487               comstyle = SYNTAX_COMMENT_STYLE (c1);
2488               comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2489               INC_BOTH (from, from_byte);
2490               UPDATE_SYNTAX_TABLE_FORWARD (from);
2491             }
2492 
2493           if (prefix)
2494             continue;
2495 
2496           switch (SWITCH_ENUM_CAST (code))
2497             {
2498             case Sescape:
2499             case Scharquote:
2500               if (from == stop)
2501                 goto lose;
2502               INC_BOTH (from, from_byte);
2503               /* treat following character as a word constituent */
2504             case Sword:
2505             case Ssymbol:
2506               if (depth || !sexpflag) break;
2507               /* This word counts as a sexp; return at end of it.  */
2508               while (from < stop)
2509                 {
2510                   UPDATE_SYNTAX_TABLE_FORWARD (from);
2511 
2512                   /* Some compilers can't handle this inside the switch.  */
2513                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2514                   temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2515                   switch (temp)
2516                     {
2517                     case Scharquote:
2518                     case Sescape:
2519                       INC_BOTH (from, from_byte);
2520                       if (from == stop)
2521                         goto lose;
2522                       break;
2523                     case Sword:
2524                     case Ssymbol:
2525                     case Squote:
2526                       break;
2527                     default:
2528                       goto done;
2529                     }
2530                   INC_BOTH (from, from_byte);
2531                 }
2532               goto done;
2533 
2534             case Scomment_fence:
2535               comstyle = ST_COMMENT_STYLE;
2536               /* FALLTHROUGH */
2537             case Scomment:
2538               if (!parse_sexp_ignore_comments) break;
2539               UPDATE_SYNTAX_TABLE_FORWARD (from);
2540               found = forw_comment (from, from_byte, stop,
2541                                     comnested, comstyle, 0,
2542                                     &out_charpos, &out_bytepos, &dummy);
2543               from = out_charpos, from_byte = out_bytepos;
2544               if (!found)
2545                 {
2546                   if (depth == 0)
2547                     goto done;
2548                   goto lose;
2549                 }
2550               INC_BOTH (from, from_byte);
2551               UPDATE_SYNTAX_TABLE_FORWARD (from);
2552               break;
2553 
2554             case Smath:
2555               if (!sexpflag)
2556                 break;
2557               if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2558                 {
2559                   INC_BOTH (from, from_byte);
2560                 }
2561               if (mathexit)
2562                 {
2563                   mathexit = 0;
2564                   goto close1;
2565                 }
2566               mathexit = 1;
2567 
2568             case Sopen:
2569               if (!++depth) goto done;
2570               break;
2571 
2572             case Sclose:
2573             close1:
2574               if (!--depth) goto done;
2575               if (depth < min_depth)
2576                 xsignal3 (Qscan_error,
2577                           build_string ("Containing expression ends prematurely"),
2578                           make_number (last_good), make_number (from));
2579               break;
2580 
2581             case Sstring:
2582             case Sstring_fence:
2583               temp_pos = dec_bytepos (from_byte);
2584               stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2585               while (1)
2586                 {
2587                   if (from >= stop)
2588                     goto lose;
2589                   UPDATE_SYNTAX_TABLE_FORWARD (from);
2590                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2591                   if (code == Sstring
2592                       ? (c == stringterm
2593                          && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2594                       : SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring_fence)
2595                     break;
2596 
2597                   /* Some compilers can't handle this inside the switch.  */
2598                   temp = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2599                   switch (temp)
2600                     {
2601                     case Scharquote:
2602                     case Sescape:
2603                       INC_BOTH (from, from_byte);
2604                     }
2605                   INC_BOTH (from, from_byte);
2606                 }
2607               INC_BOTH (from, from_byte);
2608               if (!depth && sexpflag) goto done;
2609               break;
2610             default:
2611               /* Ignore whitespace, punctuation, quote, endcomment.  */
2612               break;
2613             }
2614         }
2615 
2616       /* Reached end of buffer.  Error if within object, return nil if between */
2617       if (depth)
2618         goto lose;
2619 
2620       immediate_quit = 0;
2621       return Qnil;
2622 
2623       /* End of object reached */
2624     done:
2625       count--;
2626     }
2627 
2628   while (count < 0)
2629     {
2630       while (from > stop)
2631         {
2632           DEC_BOTH (from, from_byte);
2633           UPDATE_SYNTAX_TABLE_BACKWARD (from);
2634           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2635           code = SYNTAX_WITH_MULTIBYTE_CHECK (c);
2636           if (depth == min_depth)
2637             last_good = from;
2638           comstyle = 0;
2639           comnested = SYNTAX_COMMENT_NESTED (c);
2640           if (code == Sendcomment)
2641             comstyle = SYNTAX_COMMENT_STYLE (c);
2642           if (from > stop && SYNTAX_COMEND_SECOND (c)
2643               && prev_char_comend_first (from, from_byte)
2644               && parse_sexp_ignore_comments)
2645             {
2646               /* We must record the comment style encountered so that
2647                  later, we can match only the proper comment begin
2648                  sequence of the same style.  */
2649               DEC_BOTH (from, from_byte);
2650               UPDATE_SYNTAX_TABLE_BACKWARD (from);
2651               code = Sendcomment;
2652               c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2653               comstyle = SYNTAX_COMMENT_STYLE (c1);
2654               comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
2655             }
2656 
2657           /* Quoting turns anything except a comment-ender
2658              into a word character.  Note that this cannot be true
2659              if we decremented FROM in the if-statement above.  */
2660           if (code != Sendcomment && char_quoted (from, from_byte))
2661             {
2662               DEC_BOTH (from, from_byte);
2663               code = Sword;
2664             }
2665           else if (SYNTAX_PREFIX (c))
2666             continue;
2667 
2668           switch (SWITCH_ENUM_CAST (code))
2669             {
2670             case Sword:
2671             case Ssymbol:
2672             case Sescape:
2673             case Scharquote:
2674               if (depth || !sexpflag) break;
2675               /* This word counts as a sexp; count object finished
2676                  after passing it.  */
2677               while (from > stop)
2678                 {
2679                   temp_pos = from_byte;
2680                   if (! NILP (current_buffer->enable_multibyte_characters))
2681                     DEC_POS (temp_pos);
2682                   else
2683                     temp_pos--;
2684                   UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2685                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2686                   temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2687                   /* Don't allow comment-end to be quoted.  */
2688                   if (temp_code == Sendcomment)
2689                     goto done2;
2690                   quoted = char_quoted (from - 1, temp_pos);
2691                   if (quoted)
2692                     {
2693                       DEC_BOTH (from, from_byte);
2694                       temp_pos = dec_bytepos (temp_pos);
2695                       UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2696                     }
2697                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2698                   temp_code = SYNTAX_WITH_MULTIBYTE_CHECK (c1);
2699                   if (! (quoted || temp_code == Sword
2700                          || temp_code == Ssymbol
2701                          || temp_code == Squote))
2702                     goto done2;
2703                   DEC_BOTH (from, from_byte);
2704                 }
2705               goto done2;
2706 
2707             case Smath:
2708               if (!sexpflag)
2709                 break;
2710               temp_pos = dec_bytepos (from_byte);
2711               UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2712               if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2713                 DEC_BOTH (from, from_byte);
2714               if (mathexit)
2715                 {
2716                   mathexit = 0;
2717                   goto open2;
2718                 }
2719               mathexit = 1;
2720 
2721             case Sclose:
2722               if (!++depth) goto done2;
2723               break;
2724 
2725             case Sopen:
2726             open2:
2727               if (!--depth) goto done2;
2728               if (depth < min_depth)
2729                 xsignal3 (Qscan_error,
2730                           build_string ("Containing expression ends prematurely"),
2731                           make_number (last_good), make_number (from));
2732               break;
2733 
2734             case Sendcomment:
2735               if (!parse_sexp_ignore_comments)
2736                 break;
2737               found = back_comment (from, from_byte, stop, comnested, comstyle,
2738                                     &out_charpos, &out_bytepos);
2739               /* FIXME:  if found == -1, then it really wasn't a comment-end.
2740                  For single-char Sendcomment, we can't do much about it apart
2741                  from skipping the char.
2742                  For 2-char endcomments, we could try again, taking both
2743                  chars as separate entities, but it's a lot of trouble
2744                  for very little gain, so we don't bother either.  -sm */
2745               if (found != -1)
2746                 from = out_charpos, from_byte = out_bytepos;
2747               break;
2748 
2749             case Scomment_fence:
2750             case Sstring_fence:
2751               while (1)
2752                 {
2753                   if (from == stop)
2754                     goto lose;
2755                   DEC_BOTH (from, from_byte);
2756                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2757                   if (!char_quoted (from, from_byte)
2758                       && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2759                           SYNTAX_WITH_MULTIBYTE_CHECK (c) == code))
2760                     break;
2761                 }
2762               if (code == Sstring_fence && !depth && sexpflag) goto done2;
2763               break;
2764 
2765             case Sstring:
2766               stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2767               while (1)
2768                 {
2769                   if (from == stop)
2770                     goto lose;
2771                   DEC_BOTH (from, from_byte);
2772                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
2773                   if (!char_quoted (from, from_byte)
2774                       && (stringterm
2775                           == (c = FETCH_CHAR_AS_MULTIBYTE (from_byte)))
2776                       && SYNTAX_WITH_MULTIBYTE_CHECK (c) == Sstring)
2777                     break;
2778                 }
2779               if (!depth && sexpflag) goto done2;
2780               break;
2781             default:
2782               /* Ignore whitespace, punctuation, quote, endcomment.  */
2783               break;
2784             }
2785         }
2786 
2787       /* Reached start of buffer.  Error if within object, return nil if between */
2788       if (depth)
2789         goto lose;
2790 
2791       immediate_quit = 0;
2792       return Qnil;
2793 
2794     done2:
2795       count++;
2796     }
2797 
2798 
2799   immediate_quit = 0;
2800   XSETFASTINT (val, from);
2801   return val;
2802 
2803  lose:
2804   xsignal3 (Qscan_error,
2805             build_string ("Unbalanced parentheses"),
2806             make_number (last_good), make_number (from));
2807 }
2808 
2809 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
2810        doc: /* Scan from character number FROM by COUNT lists.
2811 Returns the character number of the position thus found.
2812 
2813 If DEPTH is nonzero, paren depth begins counting from that value,
2814 only places where the depth in parentheses becomes zero
2815 are candidates for stopping; COUNT such places are counted.
2816 Thus, a positive value for DEPTH means go out levels.
2817 
2818 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2819 
2820 If the beginning or end of (the accessible part of) the buffer is reached
2821 and the depth is wrong, an error is signaled.
2822 If the depth is right but the count is not used up, nil is returned.  */)
2823      (from, count, depth)
2824      Lisp_Object from, count, depth;
2825 {
2826   CHECK_NUMBER (from);
2827   CHECK_NUMBER (count);
2828   CHECK_NUMBER (depth);
2829 
2830   return scan_lists (XINT (from), XINT (count), XINT (depth), 0);
2831 }
2832 
2833 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
2834        doc: /* Scan from character number FROM by COUNT balanced expressions.
2835 If COUNT is negative, scan backwards.
2836 Returns the character number of the position thus found.
2837 
2838 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
2839 
2840 If the beginning or end of (the accessible part of) the buffer is reached
2841 in the middle of a parenthetical grouping, an error is signaled.
2842 If the beginning or end is reached between groupings
2843 but before count is used up, nil is returned.  */)
2844      (from, count)
2845      Lisp_Object from, count;
2846 {
2847   CHECK_NUMBER (from);
2848   CHECK_NUMBER (count);
2849 
2850   return scan_lists (XINT (from), XINT (count), 0, 1);
2851 }
2852 
2853 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
2854        0, 0, 0,
2855        doc: /* Move point backward over any number of chars with prefix syntax.
2856 This includes chars with "quote" or "prefix" syntax (' or p).  */)
2857      ()
2858 {
2859   int beg = BEGV;
2860   int opoint = PT;
2861   int opoint_byte = PT_BYTE;
2862   int pos = PT;
2863   int pos_byte = PT_BYTE;
2864   int c;
2865 
2866   if (pos <= beg)
2867     {
2868       SET_PT_BOTH (opoint, opoint_byte);
2869 
2870       return Qnil;
2871     }
2872 
2873   SETUP_SYNTAX_TABLE (pos, -1);
2874 
2875   DEC_BOTH (pos, pos_byte);
2876 
2877   while (!char_quoted (pos, pos_byte)
2878          /* Previous statement updates syntax table.  */
2879          && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
2880              || SYNTAX_PREFIX (c)))
2881     {
2882       opoint = pos;
2883       opoint_byte = pos_byte;
2884 
2885       if (pos + 1 > beg)
2886         DEC_BOTH (pos, pos_byte);
2887     }
2888 
2889   SET_PT_BOTH (opoint, opoint_byte);
2890 
2891   return Qnil;
2892 }
2893 
2894 /* Parse forward from FROM / FROM_BYTE to END,
2895    assuming that FROM has state OLDSTATE (nil means FROM is start of function),
2896    and return a description of the state of the parse at END.
2897    If STOPBEFORE is nonzero, stop at the start of an atom.
2898    If COMMENTSTOP is 1, stop at the start of a comment.
2899    If COMMENTSTOP is -1, stop at the start or end of a comment,
2900    after the beginning of a string, or after the end of a string.  */
2901 
2902 static void
2903 scan_sexps_forward (stateptr, from, from_byte, end, targetdepth,
2904                     stopbefore, oldstate, commentstop)
2905      struct lisp_parse_state *stateptr;
2906      register EMACS_INT from;
2907      EMACS_INT from_byte, end;
2908      int targetdepth, stopbefore;
2909      Lisp_Object oldstate;
2910      int commentstop;
2911 {
2912   struct lisp_parse_state state;
2913 
2914   register enum syntaxcode code;
2915   int c1;
2916   int comnested;
2917   struct level { int last, prev; };
2918   struct level levelstart[100];
2919   register struct level *curlevel = levelstart;
2920   struct level *endlevel = levelstart + 100;
2921   register int depth;   /* Paren depth of current scanning location.
2922                            level - levelstart equals this except
2923                            when the depth becomes negative.  */
2924   int mindepth;         /* Lowest DEPTH value seen.  */
2925   int start_quoted = 0;         /* Nonzero means starting after a char quote */
2926   Lisp_Object tem;
2927   EMACS_INT prev_from;          /* Keep one character before FROM.  */
2928   EMACS_INT prev_from_byte;
2929   int prev_from_syntax;
2930   int boundary_stop = commentstop == -1;
2931   int nofence;
2932   int found;
2933   EMACS_INT out_bytepos, out_charpos;
2934   int temp;
2935 
2936   prev_from = from;
2937   prev_from_byte = from_byte;
2938   if (from != BEGV)
2939     DEC_BOTH (prev_from, prev_from_byte);
2940 
2941   /* Use this macro instead of `from++'.  */
2942 #define INC_FROM                                \
2943 do { prev_from = from;                          \
2944      prev_from_byte = from_byte;                \
2945      temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte);   \
2946      prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
2947      INC_BOTH (from, from_byte);                \
2948      if (from < end)                            \
2949        UPDATE_SYNTAX_TABLE_FORWARD (from);      \
2950   } while (0)
2951 
2952   immediate_quit = 1;
2953   QUIT;
2954 
2955   if (NILP (oldstate))
2956     {
2957       depth = 0;
2958       state.instring = -1;
2959       state.incomment = 0;
2960       state.comstyle = 0;       /* comment style a by default.  */
2961       state.comstr_start = -1;  /* no comment/string seen.  */
2962     }
2963   else
2964     {
2965       tem = Fcar (oldstate);
2966       if (!NILP (tem))
2967         depth = XINT (tem);
2968       else
2969         depth = 0;
2970 
2971       oldstate = Fcdr (oldstate);
2972       oldstate = Fcdr (oldstate);
2973       oldstate = Fcdr (oldstate);
2974       tem = Fcar (oldstate);
2975       /* Check whether we are inside string_fence-style string: */
2976       state.instring = (!NILP (tem)
2977                         ? (INTEGERP (tem) ? XINT (tem) : ST_STRING_STYLE)
2978                         : -1);
2979 
2980       oldstate = Fcdr (oldstate);
2981       tem = Fcar (oldstate);
2982       state.incomment = (!NILP (tem)
2983                          ? (INTEGERP (tem) ? XINT (tem) : -1)
2984                          : 0);
2985 
2986       oldstate = Fcdr (oldstate);
2987       tem = Fcar (oldstate);
2988       start_quoted = !NILP (tem);
2989 
2990       /* if the eighth element of the list is nil, we are in comment
2991          style a.  If it is non-nil, we are in comment style b */
2992       oldstate = Fcdr (oldstate);
2993       oldstate = Fcdr (oldstate);
2994       tem = Fcar (oldstate);
2995       state.comstyle = NILP (tem) ? 0 : (EQ (tem, Qsyntax_table)
2996                                          ? ST_COMMENT_STYLE : 1);
2997 
2998       oldstate = Fcdr (oldstate);
2999       tem = Fcar (oldstate);
3000       state.comstr_start = NILP (tem) ? -1 : XINT (tem) ;
3001       oldstate = Fcdr (oldstate);
3002       tem = Fcar (oldstate);
3003       while (!NILP (tem))               /* >= second enclosing sexps.  */
3004         {
3005           /* curlevel++->last ran into compiler bug on Apollo */
3006           curlevel->last = XINT (Fcar (tem));
3007           if (++curlevel == endlevel)
3008             curlevel--; /* error ("Nesting too deep for parser"); */
3009           curlevel->prev = -1;
3010           curlevel->last = -1;
3011           tem = Fcdr (tem);
3012         }
3013     }
3014   state.quoted = 0;
3015   mindepth = depth;
3016 
3017   curlevel->prev = -1;
3018   curlevel->last = -1;
3019 
3020   SETUP_SYNTAX_TABLE (prev_from, 1);
3021   temp = FETCH_CHAR (prev_from_byte);
3022   prev_from_syntax = SYNTAX_WITH_FLAGS (temp);
3023   UPDATE_SYNTAX_TABLE_FORWARD (from);
3024 
3025   /* Enter the loop at a place appropriate for initial state.  */
3026 
3027   if (state.incomment)
3028     goto startincomment;
3029   if (state.instring >= 0)
3030     {
3031       nofence = state.instring != ST_STRING_STYLE;
3032       if (start_quoted)
3033         goto startquotedinstring;
3034       goto startinstring;
3035     }
3036   else if (start_quoted)
3037     goto startquoted;
3038 
3039   while (from < end)
3040     {
3041       INC_FROM;
3042       code = prev_from_syntax & 0xff;
3043 
3044       if (from < end
3045           && SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3046           && (c1 = FETCH_CHAR (from_byte),
3047               SYNTAX_COMSTART_SECOND (c1)))
3048         /* Duplicate code to avoid a complex if-expression
3049            which causes trouble for the SGI compiler.  */
3050         {
3051           /* Record the comment style we have entered so that only
3052              the comment-end sequence of the same style actually
3053              terminates the comment section.  */
3054           state.comstyle = SYNTAX_COMMENT_STYLE (c1);
3055           comnested = SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax);
3056           comnested = comnested || SYNTAX_COMMENT_NESTED (c1);
3057           state.incomment = comnested ? 1 : -1;
3058           state.comstr_start = prev_from;
3059           INC_FROM;
3060           code = Scomment;
3061         }
3062       else if (code == Scomment_fence)
3063         {
3064           /* Record the comment style we have entered so that only
3065              the comment-end sequence of the same style actually
3066              terminates the comment section.  */
3067           state.comstyle = ST_COMMENT_STYLE;
3068           state.incomment = -1;
3069           state.comstr_start = prev_from;
3070           code = Scomment;
3071         }
3072       else if (code == Scomment)
3073         {
3074           state.comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax);
3075           state.incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3076                              1 : -1);
3077           state.comstr_start = prev_from;
3078         }
3079 
3080       if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3081         continue;
3082       switch (SWITCH_ENUM_CAST (code))
3083         {
3084         case Sescape:
3085         case Scharquote:
3086           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3087           curlevel->last = prev_from;
3088         startquoted:
3089           if (from == end) goto endquoted;
3090           INC_FROM;
3091           goto symstarted;
3092           /* treat following character as a word constituent */
3093         case Sword:
3094         case Ssymbol:
3095           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3096           curlevel->last = prev_from;
3097         symstarted:
3098           while (from < end)
3099             {
3100               /* Some compilers can't handle this inside the switch.  */
3101               temp = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3102               temp = SYNTAX (temp);
3103               switch (temp)
3104                 {
3105                 case Scharquote:
3106                 case Sescape:
3107                   INC_FROM;
3108                   if (from == end) goto endquoted;
3109                   break;
3110                 case Sword:
3111                 case Ssymbol:
3112                 case Squote:
3113                   break;
3114                 default:
3115                   goto symdone;
3116                 }
3117               INC_FROM;
3118             }
3119         symdone:
3120           curlevel->prev = curlevel->last;
3121           break;
3122 
3123         case Scomment_fence: /* Can't happen because it's handled above.  */
3124         case Scomment:
3125           if (commentstop || boundary_stop) goto done;
3126         startincomment:
3127           /* The (from == BEGV) test was to enter the loop in the middle so
3128              that we find a 2-char comment ender even if we start in the
3129              middle of it.  We don't want to do that if we're just at the
3130              beginning of the comment (think of (*) ... (*)).  */
3131           found = forw_comment (from, from_byte, end,
3132                                 state.incomment, state.comstyle,
3133                                 (from == BEGV || from < state.comstr_start + 3)
3134                                 ? 0 : prev_from_syntax,
3135                                 &out_charpos, &out_bytepos, &state.incomment);
3136           from = out_charpos; from_byte = out_bytepos;
3137           /* Beware!  prev_from and friends are invalid now.
3138              Luckily, the `done' doesn't use them and the INC_FROM
3139              sets them to a sane value without looking at them. */
3140           if (!found) goto done;
3141           INC_FROM;
3142           state.incomment = 0;
3143           state.comstyle = 0;   /* reset the comment style */
3144           if (boundary_stop) goto done;
3145           break;
3146 
3147         case Sopen:
3148           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3149           depth++;
3150           /* curlevel++->last ran into compiler bug on Apollo */
3151           curlevel->last = prev_from;
3152           if (++curlevel == endlevel)
3153             curlevel--; /* error ("Nesting too deep for parser"); */
3154           curlevel->prev = -1;
3155           curlevel->last = -1;
3156           if (targetdepth == depth) goto done;
3157           break;
3158 
3159         case Sclose:
3160           depth--;
3161           if (depth < mindepth)
3162             mindepth = depth;
3163           if (curlevel != levelstart)
3164             curlevel--;
3165           curlevel->prev = curlevel->last;
3166           if (targetdepth == depth) goto done;
3167           break;
3168 
3169         case Sstring:
3170         case Sstring_fence:
3171           state.comstr_start = from - 1;
3172           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3173           curlevel->last = prev_from;
3174           state.instring = (code == Sstring
3175                             ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3176                             : ST_STRING_STYLE);
3177           if (boundary_stop) goto done;
3178         startinstring:
3179           {
3180             nofence = state.instring != ST_STRING_STYLE;
3181 
3182             while (1)
3183               {
3184                 int c;
3185 
3186                 if (from >= end) goto done;
3187                 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3188                 /* Some compilers can't handle this inside the switch.  */
3189                 temp = SYNTAX (c);
3190 
3191                 /* Check TEMP here so that if the char has
3192                    a syntax-table property which says it is NOT
3193                    a string character, it does not end the string.  */
3194                 if (nofence && c == state.instring && temp == Sstring)
3195                   break;
3196 
3197                 switch (temp)
3198                   {
3199                   case Sstring_fence:
3200                     if (!nofence) goto string_end;
3201                     break;
3202                   case Scharquote:
3203                   case Sescape:
3204                     INC_FROM;
3205                   startquotedinstring:
3206                     if (from >= end) goto endquoted;
3207                   }
3208                 INC_FROM;
3209               }
3210           }
3211         string_end:
3212           state.instring = -1;
3213           curlevel->prev = curlevel->last;
3214           INC_FROM;
3215           if (boundary_stop) goto done;
3216           break;
3217 
3218         case Smath:
3219           /* FIXME: We should do something with it.  */
3220           break;
3221         default:
3222           /* Ignore whitespace, punctuation, quote, endcomment.  */
3223           break;
3224         }
3225     }
3226   goto done;
3227 
3228  stop:   /* Here if stopping before start of sexp. */
3229   from = prev_from;    /* We have just fetched the char that starts it; */
3230   goto done; /* but return the position before it. */
3231 
3232  endquoted:
3233   state.quoted = 1;
3234  done:
3235   state.depth = depth;
3236   state.mindepth = mindepth;
3237   state.thislevelstart = curlevel->prev;
3238   state.prevlevelstart
3239     = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3240   state.location = from;
3241   state.levelstarts = Qnil;
3242   while (--curlevel >= levelstart)
3243       state.levelstarts = Fcons (make_number (curlevel->last),
3244                                  state.levelstarts);
3245   immediate_quit = 0;
3246 
3247   *stateptr = state;
3248 }
3249 
3250 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3251        doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3252 Parsing stops at TO or when certain criteria are met;
3253  point is set to where parsing stops.
3254 If fifth arg OLDSTATE is omitted or nil,
3255  parsing assumes that FROM is the beginning of a function.
3256 Value is a list of elements describing final state of parsing:
3257  0. depth in parens.
3258  1. character address of start of innermost containing list; nil if none.
3259  2. character address of start of last complete sexp terminated.
3260  3. non-nil if inside a string.
3261     (it is the character that will terminate the string,
3262      or t if the string should be terminated by a generic string delimiter.)
3263  4. nil if outside a comment, t if inside a non-nestable comment,
3264     else an integer (the current comment nesting).
3265  5. t if following a quote character.
3266  6. the minimum paren-depth encountered during this scan.
3267  7. t if in a comment of style b; symbol `syntax-table' if the comment
3268     should be terminated by a generic comment delimiter.
3269  8. character address of start of comment or string; nil if not in one.
3270  9. Intermediate data for continuation of parsing (subject to change).
3271 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3272 in parentheses becomes equal to TARGETDEPTH.
3273 Fourth arg STOPBEFORE non-nil means stop when come to
3274  any character that starts a sexp.
3275 Fifth arg OLDSTATE is a list like what this function returns.
3276  It is used to initialize the state of the parse.  Elements number 1, 2, 6
3277  and 8 are ignored.
3278 Sixth arg COMMENTSTOP non-nil means stop at the start of a comment.
3279  If it is symbol `syntax-table', stop after the start of a comment or a
3280  string, or after end of a comment or a string.  */)
3281      (from, to, targetdepth, stopbefore, oldstate, commentstop)
3282      Lisp_Object from, to, targetdepth, stopbefore, oldstate, commentstop;
3283 {
3284   struct lisp_parse_state state;
3285   int target;
3286 
3287   if (!NILP (targetdepth))
3288     {
3289       CHECK_NUMBER (targetdepth);
3290       target = XINT (targetdepth);
3291     }
3292   else
3293     target = -100000;           /* We won't reach this depth */
3294 
3295   validate_region (&from, &to);
3296   scan_sexps_forward (&state, XINT (from), CHAR_TO_BYTE (XINT (from)),
3297                       XINT (to),
3298                       target, !NILP (stopbefore), oldstate,
3299                       (NILP (commentstop)
3300                        ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3301 
3302   SET_PT (state.location);
3303 
3304   return Fcons (make_number (state.depth),
3305            Fcons (state.prevlevelstart < 0 ? Qnil : make_number (state.prevlevelstart),
3306              Fcons (state.thislevelstart < 0 ? Qnil : make_number (state.thislevelstart),
3307                Fcons (state.instring >= 0
3308                       ? (state.instring == ST_STRING_STYLE
3309                          ? Qt : make_number (state.instring)) : Qnil,
3310                  Fcons (state.incomment < 0 ? Qt :
3311                         (state.incomment == 0 ? Qnil :
3312                          make_number (state.incomment)),
3313                    Fcons (state.quoted ? Qt : Qnil,
3314                      Fcons (make_number (state.mindepth),
3315                        Fcons ((state.comstyle
3316                                ? (state.comstyle == ST_COMMENT_STYLE
3317                                   ? Qsyntax_table : Qt) :
3318                                Qnil),
3319                               Fcons (((state.incomment
3320                                        || (state.instring >= 0))
3321                                       ? make_number (state.comstr_start)
3322                                       : Qnil),
3323                                      Fcons (state.levelstarts, Qnil))))))))));
3324 }
3325 
3326 void
3327 init_syntax_once ()
3328 {
3329   register int i, c;
3330   Lisp_Object temp;
3331 
3332   /* This has to be done here, before we call Fmake_char_table.  */
3333   Qsyntax_table = intern_c_string ("syntax-table");
3334   staticpro (&Qsyntax_table);
3335 
3336   /* Intern_C_String this now in case it isn't already done.
3337      Setting this variable twice is harmless.
3338      But don't staticpro it here--that is done in alloc.c.  */
3339   Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
3340 
3341   /* Create objects which can be shared among syntax tables.  */
3342   Vsyntax_code_object = Fmake_vector (make_number (Smax), Qnil);
3343   for (i = 0; i < XVECTOR (Vsyntax_code_object)->size; i++)
3344     XVECTOR (Vsyntax_code_object)->contents[i]
3345       = Fcons (make_number (i), Qnil);
3346 
3347   /* Now we are ready to set up this property, so we can
3348      create syntax tables.  */
3349   Fput (Qsyntax_table, Qchar_table_extra_slots, make_number (0));
3350 
3351   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3352 
3353   Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3354 
3355   /* Control characters should not be whitespace.  */
3356   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3357   for (i = 0; i <= ' ' - 1; i++)
3358     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3359   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3360 
3361   /* Except that a few really are whitespace.  */
3362   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Swhitespace];
3363   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3364   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3365   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3366   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3367   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3368 
3369   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3370   for (i = 'a'; i <= 'z'; i++)
3371     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3372   for (i = 'A'; i <= 'Z'; i++)
3373     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3374   for (i = '0'; i <= '9'; i++)
3375     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3376 
3377   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3378   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3379 
3380   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3381                         Fcons (make_number (Sopen), make_number (')')));
3382   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3383                         Fcons (make_number (Sclose), make_number ('(')));
3384   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3385                         Fcons (make_number (Sopen), make_number (']')));
3386   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3387                         Fcons (make_number (Sclose), make_number ('[')));
3388   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3389                         Fcons (make_number (Sopen), make_number ('}')));
3390   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3391                         Fcons (make_number (Sclose), make_number ('{')));
3392   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3393                         Fcons (make_number ((int) Sstring), Qnil));
3394   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3395                         Fcons (make_number ((int) Sescape), Qnil));
3396 
3397   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Ssymbol];
3398   for (i = 0; i < 10; i++)
3399     {
3400       c = "_-+*/&|<>="[i];
3401       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3402     }
3403 
3404   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Spunct];
3405   for (i = 0; i < 12; i++)
3406     {
3407       c = ".,;:?!#@~^'`"[i];
3408       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3409     }
3410 
3411   /* All multibyte characters have syntax `word' by default.  */
3412   temp = XVECTOR (Vsyntax_code_object)->contents[(int) Sword];
3413   char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3414 }
3415 
3416 void
3417 syms_of_syntax ()
3418 {
3419   Qsyntax_table_p = intern_c_string ("syntax-table-p");
3420   staticpro (&Qsyntax_table_p);
3421 
3422   staticpro (&Vsyntax_code_object);
3423 
3424   staticpro (&gl_state.object);
3425   staticpro (&gl_state.global_code);
3426   staticpro (&gl_state.current_syntax_table);
3427   staticpro (&gl_state.old_prop);
3428 
3429   /* Defined in regex.c */
3430   staticpro (&re_match_object);
3431 
3432   Qscan_error = intern_c_string ("scan-error");
3433   staticpro (&Qscan_error);
3434   Fput (Qscan_error, Qerror_conditions,
3435         pure_cons (Qscan_error, pure_cons (Qerror, Qnil)));
3436   Fput (Qscan_error, Qerror_message,
3437         make_pure_c_string ("Scan error"));
3438 
3439   DEFVAR_BOOL ("parse-sexp-ignore-comments", &parse_sexp_ignore_comments,
3440                doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace.  */);
3441 
3442   DEFVAR_BOOL ("parse-sexp-lookup-properties", &parse_sexp_lookup_properties,
3443                doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3444 Otherwise, that text property is simply ignored.
3445 See the info node `(elisp)Syntax Properties' for a description of the
3446 `syntax-table' property.  */);
3447 
3448   words_include_escapes = 0;
3449   DEFVAR_BOOL ("words-include-escapes", &words_include_escapes,
3450                doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words.  */);
3451 
3452   DEFVAR_BOOL ("multibyte-syntax-as-symbol", &multibyte_syntax_as_symbol,
3453                doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol.  */);
3454   multibyte_syntax_as_symbol = 0;
3455 
3456   DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3457                &open_paren_in_column_0_is_defun_start,
3458                doc: /* *Non-nil means an open paren in column 0 denotes the start of a defun.  */);
3459   open_paren_in_column_0_is_defun_start = 1;
3460 
3461 
3462   DEFVAR_LISP ("find-word-boundary-function-table",
3463                &Vfind_word_boundary_function_table,
3464                doc: /*
3465 Char table of functions to search for the word boundary.
3466 Each function is called with two arguments; POS and LIMIT.
3467 POS and LIMIT are character positions in the current buffer.
3468 
3469 If POS is less than LIMIT, POS is at the first character of a word,
3470 and the return value of a function is a position after the last
3471 character of that word.
3472 
3473 If POS is not less than LIMIT, POS is at the last character of a word,
3474 and the return value of a function is a position at the first
3475 character of that word.
3476 
3477 In both cases, LIMIT bounds the search. */);
3478   Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3479 
3480   defsubr (&Ssyntax_table_p);
3481   defsubr (&Ssyntax_table);
3482   defsubr (&Sstandard_syntax_table);
3483   defsubr (&Scopy_syntax_table);
3484   defsubr (&Sset_syntax_table);
3485   defsubr (&Schar_syntax);
3486   defsubr (&Smatching_paren);
3487   defsubr (&Sstring_to_syntax);
3488   defsubr (&Smodify_syntax_entry);
3489   defsubr (&Sinternal_describe_syntax_value);
3490 
3491   defsubr (&Sforward_word);
3492 
3493   defsubr (&Sskip_chars_forward);
3494   defsubr (&Sskip_chars_backward);
3495   defsubr (&Sskip_syntax_forward);
3496   defsubr (&Sskip_syntax_backward);
3497 
3498   defsubr (&Sforward_comment);
3499   defsubr (&Sscan_lists);
3500   defsubr (&Sscan_sexps);
3501   defsubr (&Sbackward_prefix_chars);
3502   defsubr (&Sparse_partial_sexp);
3503 }
3504 
3505 /* arch-tag: 3e297b9f-088e-4b64-8f4c-fb0b3443e412
3506    (do not change this comment) */