1 /* Basic character set support.
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
   3      2008, 2009, 2010  Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008, 2009, 2010
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8 
   9    Copyright (C) 2003, 2004
  10      National Institute of Advanced Industrial Science and Technology (AIST)
  11      Registration Number H13PRO009
  12 
  13 This file is part of GNU Emacs.
  14 
  15 GNU Emacs is free software: you can redistribute it and/or modify
  16 it under the terms of the GNU General Public License as published by
  17 the Free Software Foundation, either version 3 of the License, or
  18 (at your option) any later version.
  19 
  20 GNU Emacs is distributed in the hope that it will be useful,
  21 but WITHOUT ANY WARRANTY; without even the implied warranty of
  22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  23 GNU General Public License for more details.
  24 
  25 You should have received a copy of the GNU General Public License
  26 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  27 
  28 #include <config.h>
  29 
  30 #include <stdio.h>
  31 #include <unistd.h>
  32 #include <ctype.h>
  33 #include <sys/types.h>
  34 #include <setjmp.h>
  35 #include "lisp.h"
  36 #include "character.h"
  37 #include "charset.h"
  38 #include "coding.h"
  39 #include "disptab.h"
  40 #include "buffer.h"
  41 
  42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
  43 
  44   A coded character set ("charset" hereafter) is a meaningful
  45   collection (i.e. language, culture, functionality, etc.) of
  46   characters.  Emacs handles multiple charsets at once.  In Emacs Lisp
  47   code, a charset is represented by a symbol.  In C code, a charset is
  48   represented by its ID number or by a pointer to a struct charset.
  49 
  50   The actual information about each charset is stored in two places.
  51   Lispy information is stored in the hash table Vcharset_hash_table as
  52   a vector (charset attributes).  The other information is stored in
  53   charset_table as a struct charset.
  54 
  55 */
  56 
  57 /* List of all charsets.  This variable is used only from Emacs
  58    Lisp.  */
  59 Lisp_Object Vcharset_list;
  60 
  61 /* Hash table that contains attributes of each charset.  Keys are
  62    charset symbols, and values are vectors of charset attributes.  */
  63 Lisp_Object Vcharset_hash_table;
  64 
  65 /* Table of struct charset.  */
  66 struct charset *charset_table;
  67 
  68 static int charset_table_size;
  69 static int charset_table_used;
  70 
  71 Lisp_Object Qcharsetp;
  72 
  73 /* Special charset symbols.  */
  74 Lisp_Object Qascii;
  75 Lisp_Object Qeight_bit;
  76 Lisp_Object Qiso_8859_1;
  77 Lisp_Object Qunicode;
  78 Lisp_Object Qemacs;
  79 
  80 /* The corresponding charsets.  */
  81 int charset_ascii;
  82 int charset_eight_bit;
  83 int charset_iso_8859_1;
  84 int charset_unicode;
  85 int charset_emacs;
  86 
  87 /* The other special charsets.  */
  88 int charset_jisx0201_roman;
  89 int charset_jisx0208_1978;
  90 int charset_jisx0208;
  91 int charset_ksc5601;
  92 
  93 /* Value of charset attribute `charset-iso-plane'.  */
  94 Lisp_Object Qgl, Qgr;
  95 
  96 /* Charset of unibyte characters.  */
  97 int charset_unibyte;
  98 
  99 /* List of charsets ordered by the priority.  */
 100 Lisp_Object Vcharset_ordered_list;
 101 
 102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
 103    charsets.  */
 104 Lisp_Object Vcharset_non_preferred_head;
 105 
 106 /* Incremented everytime we change Vcharset_ordered_list.  This is
 107    unsigned short so that it fits in Lisp_Int and never matches
 108    -1.  */
 109 unsigned short charset_ordered_list_tick;
 110 
 111 /* List of iso-2022 charsets.  */
 112 Lisp_Object Viso_2022_charset_list;
 113 
 114 /* List of emacs-mule charsets.  */
 115 Lisp_Object Vemacs_mule_charset_list;
 116 
 117 struct charset *emacs_mule_charset[256];
 118 
 119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
 120    CHARS, and FINAL-CHAR) to Emacs' charset.  */
 121 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
 122 
 123 Lisp_Object Vcharset_map_path;
 124 
 125 /* If nonzero, don't load charset maps.  */
 126 int inhibit_load_charset_map;
 127 
 128 Lisp_Object Vcurrent_iso639_language;
 129 
 130 /* Defined in chartab.c */
 131 extern void
 132 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
 133                                 Lisp_Object function, Lisp_Object table,
 134                                 Lisp_Object arg, struct charset *charset,
 135                                 unsigned from, unsigned to));
 136 
 137 #define CODE_POINT_TO_INDEX(charset, code)                              \
 138   ((charset)->code_linear_p                                             \
 139    ? (code) - (charset)->min_code                                       \
 140    : (((charset)->code_space_mask[(code) >> 24] & 0x8)                  \
 141       && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4)      \
 142       && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2)       \
 143       && ((charset)->code_space_mask[(code) & 0xFF] & 0x1))             \
 144    ? (((((code) >> 24) - (charset)->code_space[12])                     \
 145        * (charset)->code_space[11])                                     \
 146       + (((((code) >> 16) & 0xFF) - (charset)->code_space[8])           \
 147          * (charset)->code_space[7])                                    \
 148       + (((((code) >> 8) & 0xFF) - (charset)->code_space[4])            \
 149          * (charset)->code_space[3])                                    \
 150       + (((code) & 0xFF) - (charset)->code_space[0])                    \
 151       - ((charset)->char_index_offset))                                 \
 152    : -1)
 153 
 154 
 155 /* Convert the character index IDX to code-point CODE for CHARSET.
 156    It is assumed that IDX is in a valid range.  */
 157 
 158 #define INDEX_TO_CODE_POINT(charset, idx)                                    \
 159   ((charset)->code_linear_p                                                  \
 160    ? (idx) + (charset)->min_code                                             \
 161    : (idx += (charset)->char_index_offset,                                   \
 162       (((charset)->code_space[0] + (idx) % (charset)->code_space[2])         \
 163        | (((charset)->code_space[4]                                          \
 164            + ((idx) / (charset)->code_space[3] % (charset)->code_space[6]))  \
 165           << 8)                                                              \
 166        | (((charset)->code_space[8]                                          \
 167            + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
 168           << 16)                                                             \
 169        | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11]))  \
 170           << 24))))
 171 
 172 /* Structure to hold mapping tables for a charset.  Used by temacs
 173    invoked for dumping.  */
 174 
 175 static struct
 176 {
 177   /* The current charset for which the following tables are setup.  */
 178   struct charset *current;
 179 
 180   /* 1 iff the following table is used for encoder.  */
 181   short for_encoder;
 182 
 183   /* When the following table is used for encoding, mininum and
 184      maxinum character of the current charset.  */
 185   int min_char, max_char;
 186 
 187   /* A Unicode character correspoinding to the code indice 0 (i.e. the
 188      minimum code-point) of the current charset, or -1 if the code
 189      indice 0 is not a Unicode character.  This is checked when
 190      table.encoder[CHAR] is zero.  */
 191   int zero_index_char;
 192 
 193   union {
 194     /* Table mapping code-indices (not code-points) of the current
 195        charset to Unicode characters.  If decoder[CHAR] is -1, CHAR
 196        doesn't belong to the current charset.  */
 197     int decoder[0x10000];
 198     /* Table mapping Unicode characters to code-indices of the current
 199        charset.  The first 0x10000 elements are for BMP (0..0xFFFF),
 200        and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
 201        (0x20000..0x2FFFF).  Note that there is no charset map that
 202        uses both SMP and SIP.  */
 203     unsigned short encoder[0x20000];
 204   } table;
 205 } *temp_charset_work;
 206 
 207 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE)                  \
 208   do {                                                          \
 209     if ((CODE) == 0)                                            \
 210       temp_charset_work->zero_index_char = (C);                 \
 211     else if ((C) < 0x20000)                                     \
 212       temp_charset_work->table.encoder[(C)] = (CODE);           \
 213     else                                                        \
 214       temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
 215   } while (0)
 216 
 217 #define GET_TEMP_CHARSET_WORK_ENCODER(C)                                  \
 218   ((C) == temp_charset_work->zero_index_char ? 0                          \
 219    : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)]               \
 220                       ? (int) temp_charset_work->table.encoder[(C)] : -1) \
 221    : temp_charset_work->table.encoder[(C) - 0x10000]                      \
 222    ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
 223 
 224 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE)  \
 225   (temp_charset_work->table.decoder[(CODE)] = (C))
 226 
 227 #define GET_TEMP_CHARSET_WORK_DECODER(CODE)     \
 228   (temp_charset_work->table.decoder[(CODE)])
 229 
 230 
 231 /* Set to 1 to warn that a charset map is loaded and thus a buffer
 232    text and a string data may be relocated.  */
 233 int charset_map_loaded;
 234 
 235 struct charset_map_entries
 236 {
 237   struct {
 238     unsigned from, to;
 239     int c;
 240   } entry[0x10000];
 241   struct charset_map_entries *next;
 242 };
 243 
 244 /* Load the mapping information of CHARSET from ENTRIES for
 245    initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
 246    encoding (CONTROL_FLAG == 2).
 247 
 248    If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
 249    and CHARSET->fast_map.
 250 
 251    If CONTROL_FLAG is 1, setup the following tables according to
 252    CHARSET->method and inhibit_load_charset_map.
 253 
 254    CHARSET->method       | inhibit_lcm == 0   | inhibit_lcm == 1
 255    ----------------------+--------------------+---------------------------
 256    CHARSET_METHOD_MAP    | CHARSET->decoder   | temp_charset_work->decoder
 257    ----------------------+--------------------+---------------------------
 258    CHARSET_METHOD_OFFSET | Vchar_unify_table  | temp_charset_work->decoder
 259 
 260    If CONTROL_FLAG is 2, setup the following tables.
 261 
 262    CHARSET->method       | inhibit_lcm == 0   | inhibit_lcm == 1
 263    ----------------------+--------------------+---------------------------
 264    CHARSET_METHOD_MAP    | CHARSET->encoder   | temp_charset_work->encoder
 265    ----------------------+--------------------+--------------------------
 266    CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
 267 */
 268 
 269 static void
 270 load_charset_map (charset, entries, n_entries, control_flag)
 271   struct charset *charset;
 272   struct charset_map_entries *entries;
 273   int n_entries;
 274   int control_flag;
 275 {
 276   Lisp_Object vec, table;
 277   unsigned max_code = CHARSET_MAX_CODE (charset);
 278   int ascii_compatible_p = charset->ascii_compatible_p;
 279   int min_char, max_char, nonascii_min_char;
 280   int i;
 281   unsigned char *fast_map = charset->fast_map;
 282 
 283   if (n_entries <= 0)
 284     return;
 285 
 286   if (control_flag)
 287     {
 288       if (! inhibit_load_charset_map)
 289         {
 290           if (control_flag == 1)
 291             {
 292               if (charset->method == CHARSET_METHOD_MAP)
 293                 {
 294                   int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
 295 
 296                   vec = CHARSET_DECODER (charset)
 297                     = Fmake_vector (make_number (n), make_number (-1));
 298                 }
 299               else
 300                 {
 301                   char_table_set_range (Vchar_unify_table,
 302                                         charset->min_char, charset->max_char,
 303                                         Qnil);
 304                 }
 305             }
 306           else
 307             {
 308               table = Fmake_char_table (Qnil, Qnil);
 309               if (charset->method == CHARSET_METHOD_MAP)
 310                 CHARSET_ENCODER (charset) = table;
 311               else
 312                 CHARSET_DEUNIFIER (charset) = table;
 313             }
 314         }
 315       else
 316         {
 317           if (! temp_charset_work)
 318             temp_charset_work = malloc (sizeof (*temp_charset_work));
 319           if (control_flag == 1)
 320             {
 321               memset (temp_charset_work->table.decoder, -1,
 322                       sizeof (int) * 0x10000);
 323             }
 324           else
 325             {
 326               memset (temp_charset_work->table.encoder, 0,
 327                       sizeof (unsigned short) * 0x20000);
 328               temp_charset_work->zero_index_char = -1;
 329             }
 330           temp_charset_work->current = charset;
 331           temp_charset_work->for_encoder = (control_flag == 2);
 332           control_flag += 2;
 333         }
 334       charset_map_loaded = 1;
 335     }
 336 
 337   min_char = max_char = entries->entry[0].c;
 338   nonascii_min_char = MAX_CHAR;
 339   for (i = 0; i < n_entries; i++)
 340     {
 341       unsigned from, to;
 342       int from_index, to_index;
 343       int from_c, to_c;
 344       int idx = i % 0x10000;
 345 
 346       if (i > 0 && idx == 0)
 347         entries = entries->next;
 348       from = entries->entry[idx].from;
 349       to = entries->entry[idx].to;
 350       from_c = entries->entry[idx].c;
 351       from_index = CODE_POINT_TO_INDEX (charset, from);
 352       if (from == to)
 353         {
 354           to_index = from_index;
 355           to_c = from_c;
 356         }
 357       else
 358         {
 359           to_index = CODE_POINT_TO_INDEX (charset, to);
 360           to_c = from_c + (to_index - from_index);
 361         }
 362       if (from_index < 0 || to_index < 0)
 363         continue;
 364 
 365       if (to_c > max_char)
 366         max_char = to_c;
 367       else if (from_c < min_char)
 368         min_char = from_c;
 369 
 370       if (control_flag == 1)
 371         {
 372           if (charset->method == CHARSET_METHOD_MAP)
 373             for (; from_index <= to_index; from_index++, from_c++)
 374               ASET (vec, from_index, make_number (from_c));
 375           else
 376             for (; from_index <= to_index; from_index++, from_c++)
 377               CHAR_TABLE_SET (Vchar_unify_table,
 378                               CHARSET_CODE_OFFSET (charset) + from_index,
 379                               make_number (from_c));
 380         }
 381       else if (control_flag == 2)
 382         {
 383           if (charset->method == CHARSET_METHOD_MAP
 384               && CHARSET_COMPACT_CODES_P (charset))
 385             for (; from_index <= to_index; from_index++, from_c++)
 386               {
 387                 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
 388 
 389                 if (NILP (CHAR_TABLE_REF (table, from_c)))
 390                   CHAR_TABLE_SET (table, from_c, make_number (code));
 391               }
 392           else
 393             for (; from_index <= to_index; from_index++, from_c++)
 394               {
 395                 if (NILP (CHAR_TABLE_REF (table, from_c)))
 396                   CHAR_TABLE_SET (table, from_c, make_number (from_index));
 397               }
 398         }
 399       else if (control_flag == 3)
 400         for (; from_index <= to_index; from_index++, from_c++)
 401           SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
 402       else if (control_flag == 4)
 403         for (; from_index <= to_index; from_index++, from_c++)
 404           SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
 405       else                      /* control_flag == 0 */
 406         {
 407           if (ascii_compatible_p)
 408             {
 409               if (! ASCII_BYTE_P (from_c))
 410                 {
 411                   if (from_c < nonascii_min_char)
 412                     nonascii_min_char = from_c;
 413                 }
 414               else if (! ASCII_BYTE_P (to_c))
 415                 {
 416                   nonascii_min_char = 0x80;
 417                 }
 418             }
 419 
 420           for (; from_c <= to_c; from_c++)
 421             CHARSET_FAST_MAP_SET (from_c, fast_map);
 422         }
 423     }
 424 
 425   if (control_flag == 0)
 426     {
 427       CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
 428                                     ? nonascii_min_char : min_char);
 429       CHARSET_MAX_CHAR (charset) = max_char;
 430     }
 431   else if (control_flag == 4)
 432     {
 433       temp_charset_work->min_char = min_char;
 434       temp_charset_work->max_char = max_char;
 435     }
 436 }
 437 
 438 
 439 /* Read a hexadecimal number (preceded by "0x") from the file FP while
 440    paying attention to comment charcter '#'.  */
 441 
 442 static INLINE unsigned
 443 read_hex (fp, eof)
 444      FILE *fp;
 445      int *eof;
 446 {
 447   int c;
 448   unsigned n;
 449 
 450   while ((c = getc (fp)) != EOF)
 451     {
 452       if (c == '#')
 453         {
 454           while ((c = getc (fp)) != EOF && c != '\n');
 455         }
 456       else if (c == '0')
 457         {
 458           if ((c = getc (fp)) == EOF || c == 'x')
 459             break;
 460         }
 461     }
 462   if (c == EOF)
 463     {
 464       *eof = 1;
 465       return 0;
 466     }
 467   *eof = 0;
 468   n = 0;
 469   if (c == 'x')
 470     while ((c = getc (fp)) != EOF && isxdigit (c))
 471       n = ((n << 4)
 472            | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
 473   else
 474     while ((c = getc (fp)) != EOF && isdigit (c))
 475       n = (n * 10) + c - '0';
 476   if (c != EOF)
 477     ungetc (c, fp);
 478   return n;
 479 }
 480 
 481 extern Lisp_Object Qfile_name_handler_alist;
 482 
 483 /* Return a mapping vector for CHARSET loaded from MAPFILE.
 484    Each line of MAPFILE has this form
 485         0xAAAA 0xCCCC
 486    where 0xAAAA is a code-point and 0xCCCC is the corresponding
 487    character code, or this form
 488         0xAAAA-0xBBBB 0xCCCC
 489    where 0xAAAA and 0xBBBB are code-points specifying a range, and
 490    0xCCCC is the first character code of the range.
 491 
 492    The returned vector has this form:
 493         [ CODE1 CHAR1 CODE2 CHAR2 .... ]
 494    where CODE1 is a code-point or a cons of code-points specifying a
 495    range.
 496 
 497    Note that this function uses `openp' to open MAPFILE but ignores
 498    `file-name-handler-alist' to avoid running any Lisp code.  */
 499 
 500 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
 501 
 502 static void
 503 load_charset_map_from_file (charset, mapfile, control_flag)
 504      struct charset *charset;
 505      Lisp_Object mapfile;
 506      int control_flag;
 507 {
 508   unsigned min_code = CHARSET_MIN_CODE (charset);
 509   unsigned max_code = CHARSET_MAX_CODE (charset);
 510   int fd;
 511   FILE *fp;
 512   int eof;
 513   Lisp_Object suffixes;
 514   struct charset_map_entries *head, *entries;
 515   int n_entries, count;
 516   USE_SAFE_ALLOCA;
 517 
 518   suffixes = Fcons (build_string (".map"),
 519                     Fcons (build_string (".TXT"), Qnil));
 520 
 521   count = SPECPDL_INDEX ();
 522   specbind (Qfile_name_handler_alist, Qnil);
 523   fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
 524   unbind_to (count, Qnil);
 525   if (fd < 0
 526       || ! (fp = fdopen (fd, "r")))
 527     error ("Failure in loading charset map: %S", SDATA (mapfile));
 528 
 529   /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
 530      large (larger than MAX_ALLOCA).  */
 531   SAFE_ALLOCA (head, struct charset_map_entries *,
 532                sizeof (struct charset_map_entries));
 533   entries = head;
 534   bzero (entries, sizeof (struct charset_map_entries));
 535 
 536   n_entries = 0;
 537   eof = 0;
 538   while (1)
 539     {
 540       unsigned from, to;
 541       int c;
 542       int idx;
 543 
 544       from = read_hex (fp, &eof);
 545       if (eof)
 546         break;
 547       if (getc (fp) == '-')
 548         to = read_hex (fp, &eof);
 549       else
 550         to = from;
 551       c = (int) read_hex (fp, &eof);
 552 
 553       if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
 554         continue;
 555 
 556       if (n_entries > 0 && (n_entries % 0x10000) == 0)
 557         {
 558           SAFE_ALLOCA (entries->next, struct charset_map_entries *,
 559                        sizeof (struct charset_map_entries));
 560           entries = entries->next;
 561           bzero (entries, sizeof (struct charset_map_entries));
 562         }
 563       idx = n_entries % 0x10000;
 564       entries->entry[idx].from = from;
 565       entries->entry[idx].to = to;
 566       entries->entry[idx].c = c;
 567       n_entries++;
 568     }
 569   fclose (fp);
 570 
 571   load_charset_map (charset, head, n_entries, control_flag);
 572   SAFE_FREE ();
 573 }
 574 
 575 static void
 576 load_charset_map_from_vector (charset, vec, control_flag)
 577      struct charset *charset;
 578      Lisp_Object vec;
 579      int control_flag;
 580 {
 581   unsigned min_code = CHARSET_MIN_CODE (charset);
 582   unsigned max_code = CHARSET_MAX_CODE (charset);
 583   struct charset_map_entries *head, *entries;
 584   int n_entries;
 585   int len = ASIZE (vec);
 586   int i;
 587   USE_SAFE_ALLOCA;
 588 
 589   if (len % 2 == 1)
 590     {
 591       add_to_log ("Failure in loading charset map: %V", vec, Qnil);
 592       return;
 593     }
 594 
 595   /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
 596      large (larger than MAX_ALLOCA).  */
 597   SAFE_ALLOCA (head, struct charset_map_entries *,
 598                sizeof (struct charset_map_entries));
 599   entries = head;
 600   bzero (entries, sizeof (struct charset_map_entries));
 601 
 602   n_entries = 0;
 603   for (i = 0; i < len; i += 2)
 604     {
 605       Lisp_Object val, val2;
 606       unsigned from, to;
 607       int c;
 608       int idx;
 609 
 610       val = AREF (vec, i);
 611       if (CONSP (val))
 612         {
 613           val2 = XCDR (val);
 614           val = XCAR (val);
 615           CHECK_NATNUM (val);
 616           CHECK_NATNUM (val2);
 617           from = XFASTINT (val);
 618           to = XFASTINT (val2);
 619         }
 620       else
 621         {
 622           CHECK_NATNUM (val);
 623           from = to = XFASTINT (val);
 624         }
 625       val = AREF (vec, i + 1);
 626       CHECK_NATNUM (val);
 627       c = XFASTINT (val);
 628 
 629       if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
 630         continue;
 631 
 632       if (n_entries > 0 && (n_entries % 0x10000) == 0)
 633         {
 634           SAFE_ALLOCA (entries->next, struct charset_map_entries *,
 635                        sizeof (struct charset_map_entries));
 636           entries = entries->next;
 637           bzero (entries, sizeof (struct charset_map_entries));
 638         }
 639       idx = n_entries % 0x10000;
 640       entries->entry[idx].from = from;
 641       entries->entry[idx].to = to;
 642       entries->entry[idx].c = c;
 643       n_entries++;
 644     }
 645 
 646   load_charset_map (charset, head, n_entries, control_flag);
 647   SAFE_FREE ();
 648 }
 649 
 650 
 651 /* Load a mapping table for CHARSET.  CONTROL-FLAG tells what kind of
 652    map it is (see the comment of load_charset_map for the detail).  */
 653 
 654 static void
 655 load_charset (charset, control_flag)
 656      struct charset *charset;
 657      int control_flag;
 658 {
 659   Lisp_Object map;
 660 
 661   if (inhibit_load_charset_map
 662       && temp_charset_work
 663       && charset == temp_charset_work->current
 664       && ((control_flag == 2) == temp_charset_work->for_encoder))
 665     return;
 666 
 667   if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
 668     map = CHARSET_MAP (charset);
 669   else if (CHARSET_UNIFIED_P (charset))
 670     map = CHARSET_UNIFY_MAP (charset);
 671   if (STRINGP (map))
 672     load_charset_map_from_file (charset, map, control_flag);
 673   else
 674     load_charset_map_from_vector (charset, map, control_flag);
 675 }
 676 
 677 
 678 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
 679        doc: /* Return non-nil if and only if OBJECT is a charset.*/)
 680      (object)
 681      Lisp_Object object;
 682 {
 683   return (CHARSETP (object) ? Qt : Qnil);
 684 }
 685 
 686 
 687 void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
 688                                Lisp_Object function, Lisp_Object arg,
 689                                unsigned from, unsigned to));
 690 
 691 void
 692 map_charset_for_dump (c_function, function, arg, from, to)
 693      void (*c_function) (Lisp_Object, Lisp_Object);
 694      Lisp_Object function, arg;
 695      unsigned from, to;
 696 {
 697   int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
 698   int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
 699   Lisp_Object range;
 700   int c, stop;
 701   struct gcpro gcpro1;
 702 
 703   range = Fcons (Qnil, Qnil);
 704   GCPRO1 (range);
 705 
 706   c = temp_charset_work->min_char;
 707   stop = (temp_charset_work->max_char < 0x20000
 708           ? temp_charset_work->max_char : 0xFFFF);
 709           
 710   while (1)
 711     {
 712       int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
 713 
 714       if (index >= from_idx && index <= to_idx)
 715         {
 716           if (NILP (XCAR (range)))
 717             XSETCAR (range, make_number (c));
 718         }
 719       else if (! NILP (XCAR (range)))
 720         {
 721           XSETCDR (range, make_number (c - 1));
 722           if (c_function)
 723             (*c_function) (arg, range);
 724           else
 725             call2 (function, range, arg);
 726           XSETCAR (range, Qnil);
 727         }
 728       if (c == stop)
 729         {
 730           if (c == temp_charset_work->max_char)
 731             {
 732               if (! NILP (XCAR (range)))
 733                 {
 734                   XSETCDR (range, make_number (c));
 735                   if (c_function)
 736                     (*c_function) (arg, range);
 737                   else
 738                     call2 (function, range, arg);
 739                 }
 740               break;
 741             }
 742           c = 0x1FFFF;
 743           stop = temp_charset_work->max_char;
 744         }
 745       c++;
 746     }
 747   UNGCPRO;
 748 }
 749 
 750 void
 751 map_charset_chars (c_function, function, arg,
 752                    charset, from, to)
 753      void (*c_function) P_ ((Lisp_Object, Lisp_Object));
 754      Lisp_Object function, arg;
 755      struct charset *charset;
 756      unsigned from, to;
 757 {
 758   Lisp_Object range;
 759   int partial;
 760 
 761   partial = (from > CHARSET_MIN_CODE (charset)
 762              || to < CHARSET_MAX_CODE (charset));
 763 
 764   if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
 765     {
 766       int from_idx = CODE_POINT_TO_INDEX (charset, from);
 767       int to_idx = CODE_POINT_TO_INDEX (charset, to);
 768       int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
 769       int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
 770 
 771       if (CHARSET_UNIFIED_P (charset))
 772         {
 773           if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
 774             load_charset (charset, 2);
 775           if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
 776             map_char_table_for_charset (c_function, function,
 777                                         CHARSET_DEUNIFIER (charset), arg,
 778                                         partial ? charset : NULL, from, to);
 779           else
 780             map_charset_for_dump (c_function, function, arg, from, to);
 781         }
 782 
 783       range = Fcons (make_number (from_c), make_number (to_c));
 784       if (NILP (function))
 785         (*c_function) (arg, range);
 786       else
 787         call2 (function, range, arg);
 788     }
 789   else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
 790     {
 791       if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
 792         load_charset (charset, 2);
 793       if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
 794         map_char_table_for_charset (c_function, function,
 795                                     CHARSET_ENCODER (charset), arg,
 796                                     partial ? charset : NULL, from, to);
 797       else
 798         map_charset_for_dump (c_function, function, arg, from, to);
 799     }
 800   else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
 801     {
 802       Lisp_Object subset_info;
 803       int offset;
 804 
 805       subset_info = CHARSET_SUBSET (charset);
 806       charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
 807       offset = XINT (AREF (subset_info, 3));
 808       from -= offset;
 809       if (from < XFASTINT (AREF (subset_info, 1)))
 810         from = XFASTINT (AREF (subset_info, 1));
 811       to -= offset;
 812       if (to > XFASTINT (AREF (subset_info, 2)))
 813         to = XFASTINT (AREF (subset_info, 2));
 814       map_charset_chars (c_function, function, arg, charset, from, to);
 815     }
 816   else                          /* i.e. CHARSET_METHOD_SUPERSET */
 817     {
 818       Lisp_Object parents;
 819 
 820       for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
 821            parents = XCDR (parents))
 822         {
 823           int offset;
 824           unsigned this_from, this_to;
 825 
 826           charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
 827           offset = XINT (XCDR (XCAR (parents)));
 828           this_from = from > offset ? from - offset : 0;
 829           this_to = to > offset ? to - offset : 0;
 830           if (this_from < CHARSET_MIN_CODE (charset))
 831             this_from = CHARSET_MIN_CODE (charset);
 832           if (this_to > CHARSET_MAX_CODE (charset))
 833             this_to = CHARSET_MAX_CODE (charset);
 834           map_charset_chars (c_function, function, arg, charset,
 835                              this_from, this_to);
 836         }
 837     }
 838 }
 839 
 840 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
 841        doc: /* Call FUNCTION for all characters in CHARSET.
 842 FUNCTION is called with an argument RANGE and the optional 3rd
 843 argument ARG.
 844 
 845 RANGE is a cons (FROM .  TO), where FROM and TO indicate a range of
 846 characters contained in CHARSET.
 847 
 848 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
 849 range of code points (in CHARSET) of target characters.  */)
 850      (function, charset, arg, from_code, to_code)
 851        Lisp_Object function, charset, arg, from_code, to_code;
 852 {
 853   struct charset *cs;
 854   unsigned from, to;
 855 
 856   CHECK_CHARSET_GET_CHARSET (charset, cs);
 857   if (NILP (from_code))
 858     from = CHARSET_MIN_CODE (cs);
 859   else
 860     {
 861       CHECK_NATNUM (from_code);
 862       from = XINT (from_code);
 863       if (from < CHARSET_MIN_CODE (cs))
 864         from = CHARSET_MIN_CODE (cs);
 865     }
 866   if (NILP (to_code))
 867     to = CHARSET_MAX_CODE (cs);
 868   else
 869     {
 870       CHECK_NATNUM (to_code);
 871       to = XINT (to_code);
 872       if (to > CHARSET_MAX_CODE (cs))
 873         to = CHARSET_MAX_CODE (cs);
 874     }
 875   map_charset_chars (NULL, function, arg, cs, from, to);
 876   return Qnil;
 877 }
 878 
 879 
 880 /* Define a charset according to the arguments.  The Nth argument is
 881    the Nth attribute of the charset (the last attribute `charset-id'
 882    is not included).  See the docstring of `define-charset' for the
 883    detail.  */
 884 
 885 DEFUN ("define-charset-internal", Fdefine_charset_internal,
 886        Sdefine_charset_internal, charset_arg_max, MANY, 0,
 887        doc: /* For internal use only.
 888 usage: (define-charset-internal ...)  */)
 889      (nargs, args)
 890      int nargs;
 891      Lisp_Object *args;
 892 {
 893   /* Charset attr vector.  */
 894   Lisp_Object attrs;
 895   Lisp_Object val;
 896   unsigned hash_code;
 897   struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
 898   int i, j;
 899   struct charset charset;
 900   int id;
 901   int dimension;
 902   int new_definition_p;
 903   int nchars;
 904 
 905   if (nargs != charset_arg_max)
 906     return Fsignal (Qwrong_number_of_arguments,
 907                     Fcons (intern ("define-charset-internal"),
 908                            make_number (nargs)));
 909 
 910   attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
 911 
 912   CHECK_SYMBOL (args[charset_arg_name]);
 913   ASET (attrs, charset_name, args[charset_arg_name]);
 914 
 915   val = args[charset_arg_code_space];
 916   for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
 917     {
 918       int min_byte, max_byte;
 919 
 920       min_byte = XINT (Faref (val, make_number (i * 2)));
 921       max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
 922       if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
 923         error ("Invalid :code-space value");
 924       charset.code_space[i * 4] = min_byte;
 925       charset.code_space[i * 4 + 1] = max_byte;
 926       charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
 927       nchars *= charset.code_space[i * 4 + 2];
 928       charset.code_space[i * 4 + 3] = nchars;
 929       if (max_byte > 0)
 930         dimension = i + 1;
 931     }
 932 
 933   val = args[charset_arg_dimension];
 934   if (NILP (val))
 935     charset.dimension = dimension;
 936   else
 937     {
 938       CHECK_NATNUM (val);
 939       charset.dimension = XINT (val);
 940       if (charset.dimension < 1 || charset.dimension > 4)
 941         args_out_of_range_3 (val, make_number (1), make_number (4));
 942     }
 943 
 944   charset.code_linear_p
 945     = (charset.dimension == 1
 946        || (charset.code_space[2] == 256
 947            && (charset.dimension == 2
 948                || (charset.code_space[6] == 256
 949                    && (charset.dimension == 3
 950                        || charset.code_space[10] == 256)))));
 951 
 952   if (! charset.code_linear_p)
 953     {
 954       charset.code_space_mask = (unsigned char *) xmalloc (256);
 955       bzero (charset.code_space_mask, 256);
 956       for (i = 0; i < 4; i++)
 957         for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
 958              j++)
 959           charset.code_space_mask[j] |= (1 << i);
 960     }
 961 
 962   charset.iso_chars_96 = charset.code_space[2] == 96;
 963 
 964   charset.min_code = (charset.code_space[0]
 965                       | (charset.code_space[4] << 8)
 966                       | (charset.code_space[8] << 16)
 967                       | (charset.code_space[12] << 24));
 968   charset.max_code = (charset.code_space[1]
 969                       | (charset.code_space[5] << 8)
 970                       | (charset.code_space[9] << 16)
 971                       | (charset.code_space[13] << 24));
 972   charset.char_index_offset = 0;
 973 
 974   val = args[charset_arg_min_code];
 975   if (! NILP (val))
 976     {
 977       unsigned code;
 978 
 979       if (INTEGERP (val))
 980         code = XINT (val);
 981       else
 982         {
 983           CHECK_CONS (val);
 984           CHECK_NUMBER_CAR (val);
 985           CHECK_NUMBER_CDR (val);
 986           code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
 987         }
 988       if (code < charset.min_code
 989           || code > charset.max_code)
 990         args_out_of_range_3 (make_number (charset.min_code),
 991                              make_number (charset.max_code), val);
 992       charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
 993       charset.min_code = code;
 994     }
 995 
 996   val = args[charset_arg_max_code];
 997   if (! NILP (val))
 998     {
 999       unsigned code;
1000 
1001       if (INTEGERP (val))
1002         code = XINT (val);
1003       else
1004         {
1005           CHECK_CONS (val);
1006           CHECK_NUMBER_CAR (val);
1007           CHECK_NUMBER_CDR (val);
1008           code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
1009         }
1010       if (code < charset.min_code
1011           || code > charset.max_code)
1012         args_out_of_range_3 (make_number (charset.min_code),
1013                              make_number (charset.max_code), val);
1014       charset.max_code = code;
1015     }
1016 
1017   charset.compact_codes_p = charset.max_code < 0x10000;
1018 
1019   val = args[charset_arg_invalid_code];
1020   if (NILP (val))
1021     {
1022       if (charset.min_code > 0)
1023         charset.invalid_code = 0;
1024       else
1025         {
1026           XSETINT (val, charset.max_code + 1);
1027           if (XINT (val) == charset.max_code + 1)
1028             charset.invalid_code = charset.max_code + 1;
1029           else
1030             error ("Attribute :invalid-code must be specified");
1031         }
1032     }
1033   else
1034     {
1035       CHECK_NATNUM (val);
1036       charset.invalid_code = XFASTINT (val);
1037     }
1038 
1039   val = args[charset_arg_iso_final];
1040   if (NILP (val))
1041     charset.iso_final = -1;
1042   else
1043     {
1044       CHECK_NUMBER (val);
1045       if (XINT (val) < '0' || XINT (val) > 127)
1046         error ("Invalid iso-final-char: %d", XINT (val));
1047       charset.iso_final = XINT (val);
1048     }
1049 
1050   val = args[charset_arg_iso_revision];
1051   if (NILP (val))
1052     charset.iso_revision = -1;
1053   else
1054     {
1055       CHECK_NUMBER (val);
1056       if (XINT (val) > 63)
1057         args_out_of_range (make_number (63), val);
1058       charset.iso_revision = XINT (val);
1059     }
1060 
1061   val = args[charset_arg_emacs_mule_id];
1062   if (NILP (val))
1063     charset.emacs_mule_id = -1;
1064   else
1065     {
1066       CHECK_NATNUM (val);
1067       if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1068         error ("Invalid emacs-mule-id: %d", XINT (val));
1069       charset.emacs_mule_id = XINT (val);
1070     }
1071 
1072   charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1073 
1074   charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1075 
1076   charset.unified_p = 0;
1077 
1078   bzero (charset.fast_map, sizeof (charset.fast_map));
1079 
1080   if (! NILP (args[charset_arg_code_offset]))
1081     {
1082       val = args[charset_arg_code_offset];
1083       CHECK_NUMBER (val);
1084 
1085       charset.method = CHARSET_METHOD_OFFSET;
1086       charset.code_offset = XINT (val);
1087 
1088       i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1089       charset.min_char = i + charset.code_offset;
1090       i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1091       charset.max_char = i + charset.code_offset;
1092       if (charset.max_char > MAX_CHAR)
1093         error ("Unsupported max char: %d", charset.max_char);
1094 
1095       i = (charset.min_char >> 7) << 7;
1096       for (; i < 0x10000 && i <= charset.max_char; i += 128)
1097         CHARSET_FAST_MAP_SET (i, charset.fast_map);
1098       i = (i >> 12) << 12;
1099       for (; i <= charset.max_char; i += 0x1000)
1100         CHARSET_FAST_MAP_SET (i, charset.fast_map);
1101       if (charset.code_offset == 0 && charset.max_char >= 0x80)
1102         charset.ascii_compatible_p = 1;
1103     }
1104   else if (! NILP (args[charset_arg_map]))
1105     {
1106       val = args[charset_arg_map];
1107       ASET (attrs, charset_map, val);
1108       charset.method = CHARSET_METHOD_MAP;
1109     }
1110   else if (! NILP (args[charset_arg_subset]))
1111     {
1112       Lisp_Object parent;
1113       Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1114       struct charset *parent_charset;
1115 
1116       val = args[charset_arg_subset];
1117       parent = Fcar (val);
1118       CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1119       parent_min_code = Fnth (make_number (1), val);
1120       CHECK_NATNUM (parent_min_code);
1121       parent_max_code = Fnth (make_number (2), val);
1122       CHECK_NATNUM (parent_max_code);
1123       parent_code_offset = Fnth (make_number (3), val);
1124       CHECK_NUMBER (parent_code_offset);
1125       val = Fmake_vector (make_number (4), Qnil);
1126       ASET (val, 0, make_number (parent_charset->id));
1127       ASET (val, 1, parent_min_code);
1128       ASET (val, 2, parent_max_code);
1129       ASET (val, 3, parent_code_offset);
1130       ASET (attrs, charset_subset, val);
1131 
1132       charset.method = CHARSET_METHOD_SUBSET;
1133       /* Here, we just copy the parent's fast_map.  It's not accurate,
1134          but at least it works for quickly detecting which character
1135          DOESN'T belong to this charset.  */
1136       for (i = 0; i < 190; i++)
1137         charset.fast_map[i] = parent_charset->fast_map[i];
1138 
1139       /* We also copy these for parents.  */
1140       charset.min_char = parent_charset->min_char;
1141       charset.max_char = parent_charset->max_char;
1142     }
1143   else if (! NILP (args[charset_arg_superset]))
1144     {
1145       val = args[charset_arg_superset];
1146       charset.method = CHARSET_METHOD_SUPERSET;
1147       val = Fcopy_sequence (val);
1148       ASET (attrs, charset_superset, val);
1149 
1150       charset.min_char = MAX_CHAR;
1151       charset.max_char = 0;
1152       for (; ! NILP (val); val = Fcdr (val))
1153         {
1154           Lisp_Object elt, car_part, cdr_part;
1155           int this_id, offset;
1156           struct charset *this_charset;
1157 
1158           elt = Fcar (val);
1159           if (CONSP (elt))
1160             {
1161               car_part = XCAR (elt);
1162               cdr_part = XCDR (elt);
1163               CHECK_CHARSET_GET_ID (car_part, this_id);
1164               CHECK_NUMBER (cdr_part);
1165               offset = XINT (cdr_part);
1166             }
1167           else
1168             {
1169               CHECK_CHARSET_GET_ID (elt, this_id);
1170               offset = 0;
1171             }
1172           XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1173 
1174           this_charset = CHARSET_FROM_ID (this_id);
1175           if (charset.min_char > this_charset->min_char)
1176             charset.min_char = this_charset->min_char;
1177           if (charset.max_char < this_charset->max_char)
1178             charset.max_char = this_charset->max_char;
1179           for (i = 0; i < 190; i++)
1180             charset.fast_map[i] |= this_charset->fast_map[i];
1181         }
1182     }
1183   else
1184     error ("None of :code-offset, :map, :parents are specified");
1185 
1186   val = args[charset_arg_unify_map];
1187   if (! NILP (val) && !STRINGP (val))
1188     CHECK_VECTOR (val);
1189   ASET (attrs, charset_unify_map, val);
1190 
1191   CHECK_LIST (args[charset_arg_plist]);
1192   ASET (attrs, charset_plist, args[charset_arg_plist]);
1193 
1194   charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1195                                     &hash_code);
1196   if (charset.hash_index >= 0)
1197     {
1198       new_definition_p = 0;
1199       id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1200       HASH_VALUE (hash_table, charset.hash_index) = attrs;
1201     }
1202   else
1203     {
1204       charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1205                                      hash_code);
1206       if (charset_table_used == charset_table_size)
1207         {
1208           struct charset *new_table
1209             = (struct charset *) xmalloc (sizeof (struct charset)
1210                                           * (charset_table_size + 16));
1211           bcopy (charset_table, new_table,
1212                  sizeof (struct charset) * charset_table_size);
1213           charset_table_size += 16;
1214           charset_table = new_table;
1215         }
1216       id = charset_table_used++;
1217       new_definition_p = 1;
1218     }
1219 
1220   ASET (attrs, charset_id, make_number (id));
1221   charset.id = id;
1222   charset_table[id] = charset;
1223 
1224   if (charset.method == CHARSET_METHOD_MAP)
1225     {
1226       load_charset (&charset, 0);
1227       charset_table[id] = charset;
1228     }
1229 
1230   if (charset.iso_final >= 0)
1231     {
1232       ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1233                          charset.iso_final) = id;
1234       if (new_definition_p)
1235         Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1236                                          Fcons (make_number (id), Qnil));
1237       if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1238         charset_jisx0201_roman = id;
1239       else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1240         charset_jisx0208_1978 = id;
1241       else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1242         charset_jisx0208 = id;
1243       else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1244         charset_ksc5601 = id;
1245     }
1246 
1247   if (charset.emacs_mule_id >= 0)
1248     {
1249       emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1250       if (charset.emacs_mule_id < 0xA0)
1251         emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1252       else
1253         emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1254       if (new_definition_p)
1255         Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1256                                            Fcons (make_number (id), Qnil));
1257     }
1258 
1259   if (new_definition_p)
1260     {
1261       Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1262       if (charset.supplementary_p)
1263         Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1264                                         Fcons (make_number (id), Qnil));
1265       else
1266         {
1267           Lisp_Object tail;
1268 
1269           for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1270             {
1271               struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1272 
1273               if (cs->supplementary_p)
1274                 break;
1275             }
1276           if (EQ (tail, Vcharset_ordered_list))
1277             Vcharset_ordered_list = Fcons (make_number (id),
1278                                            Vcharset_ordered_list);
1279           else if (NILP (tail))
1280             Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1281                                             Fcons (make_number (id), Qnil));
1282           else
1283             {
1284               val = Fcons (XCAR (tail), XCDR (tail));
1285               XSETCDR (tail, val);
1286               XSETCAR (tail, make_number (id));
1287             }
1288         }
1289       charset_ordered_list_tick++;
1290     }
1291 
1292   return Qnil;
1293 }
1294 
1295 
1296 /* Same as Fdefine_charset_internal but arguments are more convenient
1297    to call from C (typically in syms_of_charset).  This can define a
1298    charset of `offset' method only.  Return the ID of the new
1299    charset.  */
1300 
1301 static int
1302 define_charset_internal (name, dimension, code_space, min_code, max_code,
1303                          iso_final, iso_revision, emacs_mule_id,
1304                          ascii_compatible, supplementary,
1305                          code_offset)
1306      Lisp_Object name;
1307      int dimension;
1308      unsigned char *code_space;
1309      unsigned min_code, max_code;
1310      int iso_final, iso_revision, emacs_mule_id;
1311      int ascii_compatible, supplementary;
1312      int code_offset;
1313 {
1314   Lisp_Object args[charset_arg_max];
1315   Lisp_Object plist[14];
1316   Lisp_Object val;
1317   int i;
1318 
1319   args[charset_arg_name] = name;
1320   args[charset_arg_dimension] = make_number (dimension);
1321   val = Fmake_vector (make_number (8), make_number (0));
1322   for (i = 0; i < 8; i++)
1323     ASET (val, i, make_number (code_space[i]));
1324   args[charset_arg_code_space] = val;
1325   args[charset_arg_min_code] = make_number (min_code);
1326   args[charset_arg_max_code] = make_number (max_code);
1327   args[charset_arg_iso_final]
1328     = (iso_final < 0 ? Qnil : make_number (iso_final));
1329   args[charset_arg_iso_revision] = make_number (iso_revision);
1330   args[charset_arg_emacs_mule_id]
1331     = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1332   args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1333   args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1334   args[charset_arg_invalid_code] = Qnil;
1335   args[charset_arg_code_offset] = make_number (code_offset);
1336   args[charset_arg_map] = Qnil;
1337   args[charset_arg_subset] = Qnil;
1338   args[charset_arg_superset] = Qnil;
1339   args[charset_arg_unify_map] = Qnil;
1340 
1341   plist[0] = intern_c_string (":name");
1342   plist[1] = args[charset_arg_name];
1343   plist[2] = intern_c_string (":dimension");
1344   plist[3] = args[charset_arg_dimension];
1345   plist[4] = intern_c_string (":code-space");
1346   plist[5] = args[charset_arg_code_space];
1347   plist[6] = intern_c_string (":iso-final-char");
1348   plist[7] = args[charset_arg_iso_final];
1349   plist[8] = intern_c_string (":emacs-mule-id");
1350   plist[9] = args[charset_arg_emacs_mule_id];
1351   plist[10] = intern_c_string (":ascii-compatible-p");
1352   plist[11] = args[charset_arg_ascii_compatible_p];
1353   plist[12] = intern_c_string (":code-offset");
1354   plist[13] = args[charset_arg_code_offset];
1355 
1356   args[charset_arg_plist] = Flist (14, plist);
1357   Fdefine_charset_internal (charset_arg_max, args);
1358 
1359   return XINT (CHARSET_SYMBOL_ID (name));
1360 }
1361 
1362 
1363 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1364        Sdefine_charset_alias, 2, 2, 0,
1365        doc: /* Define ALIAS as an alias for charset CHARSET.  */)
1366      (alias, charset)
1367      Lisp_Object alias, charset;
1368 {
1369   Lisp_Object attr;
1370 
1371   CHECK_CHARSET_GET_ATTR (charset, attr);
1372   Fputhash (alias, attr, Vcharset_hash_table);
1373   Vcharset_list = Fcons (alias, Vcharset_list);
1374   return Qnil;
1375 }
1376 
1377 
1378 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1379        doc: /* Return the property list of CHARSET.  */)
1380      (charset)
1381      Lisp_Object charset;
1382 {
1383   Lisp_Object attrs;
1384 
1385   CHECK_CHARSET_GET_ATTR (charset, attrs);
1386   return CHARSET_ATTR_PLIST (attrs);
1387 }
1388 
1389 
1390 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1391        doc: /* Set CHARSET's property list to PLIST.  */)
1392      (charset, plist)
1393      Lisp_Object charset, plist;
1394 {
1395   Lisp_Object attrs;
1396 
1397   CHECK_CHARSET_GET_ATTR (charset, attrs);
1398   CHARSET_ATTR_PLIST (attrs) = plist;
1399   return plist;
1400 }
1401 
1402 
1403 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1404        doc: /* Unify characters of CHARSET with Unicode.
1405 This means reading the relevant file and installing the table defined
1406 by CHARSET's `:unify-map' property.
1407 
1408 Optional second arg UNIFY-MAP is a file name string or a vector.  It has
1409 the same meaning as the `:unify-map' attribute in the function
1410 `define-charset' (which see).
1411 
1412 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET.  */)
1413      (charset, unify_map, deunify)
1414      Lisp_Object charset, unify_map, deunify;
1415 {
1416   int id;
1417   struct charset *cs;
1418 
1419   CHECK_CHARSET_GET_ID (charset, id);
1420   cs = CHARSET_FROM_ID (id);
1421   if (NILP (deunify)
1422       ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1423       : ! CHARSET_UNIFIED_P (cs))
1424     return Qnil;
1425 
1426   CHARSET_UNIFIED_P (cs) = 0;
1427   if (NILP (deunify))
1428     {
1429       if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1430           || CHARSET_CODE_OFFSET (cs) < 0x110000)
1431         error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1432       if (NILP (unify_map))
1433         unify_map = CHARSET_UNIFY_MAP (cs);
1434       else
1435         {
1436           if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1437             signal_error ("Bad unify-map", unify_map);
1438           CHARSET_UNIFY_MAP (cs) = unify_map;
1439         }
1440       if (NILP (Vchar_unify_table))
1441         Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1442       char_table_set_range (Vchar_unify_table,
1443                             cs->min_char, cs->max_char, charset);
1444       CHARSET_UNIFIED_P (cs) = 1;
1445     }
1446   else if (CHAR_TABLE_P (Vchar_unify_table))
1447     {
1448       int min_code = CHARSET_MIN_CODE (cs);
1449       int max_code = CHARSET_MAX_CODE (cs);
1450       int min_char = DECODE_CHAR (cs, min_code);
1451       int max_char = DECODE_CHAR (cs, max_code);
1452 
1453       char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1454     }
1455 
1456   return Qnil;
1457 }
1458 
1459 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1460        Sget_unused_iso_final_char, 2, 2, 0,
1461        doc: /*
1462 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1463 DIMENSION is the number of bytes to represent a character: 1 or 2.
1464 CHARS is the number of characters in a dimension: 94 or 96.
1465 
1466 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1467 If there's no unused final char for the specified kind of charset,
1468 return nil.  */)
1469      (dimension, chars)
1470      Lisp_Object dimension, chars;
1471 {
1472   int final_char;
1473 
1474   CHECK_NUMBER (dimension);
1475   CHECK_NUMBER (chars);
1476   if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1477     args_out_of_range_3 (dimension, make_number (1), make_number (3));
1478   if (XINT (chars) != 94 && XINT (chars) != 96)
1479     args_out_of_range_3 (chars, make_number (94), make_number (96));
1480   for (final_char = '0'; final_char <= '?'; final_char++)
1481     if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1482       break;
1483   return (final_char <= '?' ? make_number (final_char) : Qnil);
1484 }
1485 
1486 static void
1487 check_iso_charset_parameter (dimension, chars, final_char)
1488      Lisp_Object dimension, chars, final_char;
1489 {
1490   CHECK_NATNUM (dimension);
1491   CHECK_NATNUM (chars);
1492   CHECK_NATNUM (final_char);
1493 
1494   if (XINT (dimension) > 3)
1495     error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1496   if (XINT (chars) != 94 && XINT (chars) != 96)
1497     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1498   if (XINT (final_char) < '0' || XINT (final_char) > '~')
1499     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1500 }
1501 
1502 
1503 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1504        4, 4, 0,
1505        doc: /* Declare an equivalent charset for ISO-2022 decoding.
1506 
1507 On decoding by an ISO-2022 base coding system, when a charset
1508 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1509 if CHARSET is designated instead.  */)
1510      (dimension, chars, final_char, charset)
1511      Lisp_Object dimension, chars, final_char, charset;
1512 {
1513   int id;
1514   int chars_flag;
1515 
1516   CHECK_CHARSET_GET_ID (charset, id);
1517   check_iso_charset_parameter (dimension, chars, final_char);
1518   chars_flag = XINT (chars) == 96;
1519   ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1520   return Qnil;
1521 }
1522 
1523 
1524 /* Return information about charsets in the text at PTR of NBYTES
1525    bytes, which are NCHARS characters.  The value is:
1526 
1527         0: Each character is represented by one byte.  This is always
1528            true for a unibyte string.  For a multibyte string, true if
1529            it contains only ASCII characters.
1530 
1531         1: No charsets other than ascii, control-1, and latin-1 are
1532            found.
1533 
1534         2: Otherwise.
1535 */
1536 
1537 int
1538 string_xstring_p (string)
1539      Lisp_Object string;
1540 {
1541   const unsigned char *p = SDATA (string);
1542   const unsigned char *endp = p + SBYTES (string);
1543 
1544   if (SCHARS (string) == SBYTES (string))
1545     return 0;
1546 
1547   while (p < endp)
1548     {
1549       int c = STRING_CHAR_ADVANCE (p);
1550 
1551       if (c >= 0x100)
1552         return 2;
1553     }
1554   return 1;
1555 }
1556 
1557 
1558 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1559 
1560    CHARSETS is a vector.  If Nth element is non-nil, it means the
1561    charset whose id is N is already found.
1562 
1563    It may lookup a translation table TABLE if supplied.  */
1564 
1565 static void
1566 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1567      const unsigned char *ptr;
1568      EMACS_INT nchars, nbytes;
1569      Lisp_Object charsets, table;
1570      int multibyte;
1571 {
1572   const unsigned char *pend = ptr + nbytes;
1573 
1574   if (nchars == nbytes)
1575     {
1576       if (multibyte)
1577         ASET (charsets, charset_ascii, Qt);
1578       else
1579         while (ptr < pend)
1580           {
1581             int c = *ptr++;
1582 
1583             if (!NILP (table))
1584               c = translate_char (table, c);
1585             if (ASCII_BYTE_P (c))
1586               ASET (charsets, charset_ascii, Qt);
1587             else
1588               ASET (charsets, charset_eight_bit, Qt);
1589           }
1590     }
1591   else
1592     {
1593       while (ptr < pend)
1594         {
1595           int c = STRING_CHAR_ADVANCE (ptr);
1596           struct charset *charset;
1597 
1598           if (!NILP (table))
1599             c = translate_char (table, c);
1600           charset = CHAR_CHARSET (c);
1601           ASET (charsets, CHARSET_ID (charset), Qt);
1602         }
1603     }
1604 }
1605 
1606 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1607        2, 3, 0,
1608        doc: /* Return a list of charsets in the region between BEG and END.
1609 BEG and END are buffer positions.
1610 Optional arg TABLE if non-nil is a translation table to look up.
1611 
1612 If the current buffer is unibyte, the returned list may contain
1613 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
1614      (beg, end, table)
1615      Lisp_Object beg, end, table;
1616 {
1617   Lisp_Object charsets;
1618   EMACS_INT from, from_byte, to, stop, stop_byte;
1619   int i;
1620   Lisp_Object val;
1621   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1622 
1623   validate_region (&beg, &end);
1624   from = XFASTINT (beg);
1625   stop = to = XFASTINT (end);
1626 
1627   if (from < GPT && GPT < to)
1628     {
1629       stop = GPT;
1630       stop_byte = GPT_BYTE;
1631     }
1632   else
1633     stop_byte = CHAR_TO_BYTE (stop);
1634 
1635   from_byte = CHAR_TO_BYTE (from);
1636 
1637   charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1638   while (1)
1639     {
1640       find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1641                              stop_byte - from_byte, charsets, table,
1642                              multibyte);
1643       if (stop < to)
1644         {
1645           from = stop, from_byte = stop_byte;
1646           stop = to, stop_byte = CHAR_TO_BYTE (stop);
1647         }
1648       else
1649         break;
1650     }
1651 
1652   val = Qnil;
1653   for (i = charset_table_used - 1; i >= 0; i--)
1654     if (!NILP (AREF (charsets, i)))
1655       val = Fcons (CHARSET_NAME (charset_table + i), val);
1656   return val;
1657 }
1658 
1659 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1660        1, 2, 0,
1661        doc: /* Return a list of charsets in STR.
1662 Optional arg TABLE if non-nil is a translation table to look up.
1663 
1664 If STR is unibyte, the returned list may contain
1665 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1666      (str, table)
1667      Lisp_Object str, table;
1668 {
1669   Lisp_Object charsets;
1670   int i;
1671   Lisp_Object val;
1672 
1673   CHECK_STRING (str);
1674 
1675   charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1676   find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1677                          charsets, table,
1678                          STRING_MULTIBYTE (str));
1679   val = Qnil;
1680   for (i = charset_table_used - 1; i >= 0; i--)
1681     if (!NILP (AREF (charsets, i)))
1682       val = Fcons (CHARSET_NAME (charset_table + i), val);
1683   return val;
1684 }
1685 
1686 
1687 
1688 /* Return a unified character code for C (>= 0x110000).  VAL is a
1689    value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1690    charset symbol.  */
1691 int
1692 maybe_unify_char (c, val)
1693      int c;
1694      Lisp_Object val;
1695 {
1696   struct charset *charset;
1697 
1698   if (INTEGERP (val))
1699     return XINT (val);
1700   if (NILP (val))
1701     return c;
1702 
1703   CHECK_CHARSET_GET_CHARSET (val, charset);
1704   load_charset (charset, 1);
1705   if (! inhibit_load_charset_map)
1706     {
1707       val = CHAR_TABLE_REF (Vchar_unify_table, c);
1708       if (! NILP (val))
1709         c = XINT (val);
1710     }
1711   else
1712     {
1713       int code_index = c - CHARSET_CODE_OFFSET (charset);
1714       int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1715 
1716       if (unified > 0)
1717         c = unified;
1718     }
1719   return c;
1720 }
1721 
1722 
1723 /* Return a character correponding to the code-point CODE of
1724    CHARSET.  */
1725 
1726 int
1727 decode_char (charset, code)
1728      struct charset *charset;
1729      unsigned code;
1730 {
1731   int c, char_index;
1732   enum charset_method method = CHARSET_METHOD (charset);
1733 
1734   if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1735     return -1;
1736 
1737   if (method == CHARSET_METHOD_SUBSET)
1738     {
1739       Lisp_Object subset_info;
1740 
1741       subset_info = CHARSET_SUBSET (charset);
1742       charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1743       code -= XINT (AREF (subset_info, 3));
1744       if (code < XFASTINT (AREF (subset_info, 1))
1745           || code > XFASTINT (AREF (subset_info, 2)))
1746         c = -1;
1747       else
1748         c = DECODE_CHAR (charset, code);
1749     }
1750   else if (method == CHARSET_METHOD_SUPERSET)
1751     {
1752       Lisp_Object parents;
1753 
1754       parents = CHARSET_SUPERSET (charset);
1755       c = -1;
1756       for (; CONSP (parents); parents = XCDR (parents))
1757         {
1758           int id = XINT (XCAR (XCAR (parents)));
1759           int code_offset = XINT (XCDR (XCAR (parents)));
1760           unsigned this_code = code - code_offset;
1761 
1762           charset = CHARSET_FROM_ID (id);
1763           if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1764             break;
1765         }
1766     }
1767   else
1768     {
1769       char_index = CODE_POINT_TO_INDEX (charset, code);
1770       if (char_index < 0)
1771         return -1;
1772 
1773       if (method == CHARSET_METHOD_MAP)
1774         {
1775           Lisp_Object decoder;
1776 
1777           decoder = CHARSET_DECODER (charset);
1778           if (! VECTORP (decoder))
1779             {
1780               load_charset (charset, 1);
1781               decoder = CHARSET_DECODER (charset);
1782             }
1783           if (VECTORP (decoder))
1784             c = XINT (AREF (decoder, char_index));
1785           else
1786             c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1787         }
1788       else                      /* method == CHARSET_METHOD_OFFSET */
1789         {
1790           c = char_index + CHARSET_CODE_OFFSET (charset);
1791           if (CHARSET_UNIFIED_P (charset)
1792               && c > MAX_UNICODE_CHAR)
1793             MAYBE_UNIFY_CHAR (c);
1794         }
1795     }
1796 
1797   return c;
1798 }
1799 
1800 /* Variable used temporarily by the macro ENCODE_CHAR.  */
1801 Lisp_Object charset_work;
1802 
1803 /* Return a code-point of CHAR in CHARSET.  If CHAR doesn't belong to
1804    CHARSET, return CHARSET_INVALID_CODE (CHARSET).  If STRICT is true,
1805    use CHARSET's strict_max_char instead of max_char.  */
1806 
1807 unsigned
1808 encode_char (charset, c)
1809      struct charset *charset;
1810      int c;
1811 {
1812   unsigned code;
1813   enum charset_method method = CHARSET_METHOD (charset);
1814 
1815   if (CHARSET_UNIFIED_P (charset))
1816     {
1817       Lisp_Object deunifier;
1818       int code_index = -1;
1819 
1820       deunifier = CHARSET_DEUNIFIER (charset);
1821       if (! CHAR_TABLE_P (deunifier))
1822         {
1823           load_charset (charset, 2);
1824           deunifier = CHARSET_DEUNIFIER (charset);
1825         }
1826       if (CHAR_TABLE_P (deunifier))
1827         {
1828           Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1829 
1830           if (INTEGERP (deunified))
1831             code_index = XINT (deunified);
1832         }
1833       else
1834         {
1835           code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1836         }
1837       if (code_index >= 0)
1838         c = CHARSET_CODE_OFFSET (charset) + code_index;
1839     }
1840 
1841   if (method == CHARSET_METHOD_SUBSET)
1842     {
1843       Lisp_Object subset_info;
1844       struct charset *this_charset;
1845 
1846       subset_info = CHARSET_SUBSET (charset);
1847       this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1848       code = ENCODE_CHAR (this_charset, c);
1849       if (code == CHARSET_INVALID_CODE (this_charset)
1850           || code < XFASTINT (AREF (subset_info, 1))
1851           || code > XFASTINT (AREF (subset_info, 2)))
1852         return CHARSET_INVALID_CODE (charset);
1853       code += XINT (AREF (subset_info, 3));
1854       return code;
1855     }
1856 
1857   if (method == CHARSET_METHOD_SUPERSET)
1858     {
1859       Lisp_Object parents;
1860 
1861       parents = CHARSET_SUPERSET (charset);
1862       for (; CONSP (parents); parents = XCDR (parents))
1863         {
1864           int id = XINT (XCAR (XCAR (parents)));
1865           int code_offset = XINT (XCDR (XCAR (parents)));
1866           struct charset *this_charset = CHARSET_FROM_ID (id);
1867 
1868           code = ENCODE_CHAR (this_charset, c);
1869           if (code != CHARSET_INVALID_CODE (this_charset))
1870             return code + code_offset;
1871         }
1872       return CHARSET_INVALID_CODE (charset);
1873     }
1874 
1875   if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1876       || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1877     return CHARSET_INVALID_CODE (charset);
1878 
1879   if (method == CHARSET_METHOD_MAP)
1880     {
1881       Lisp_Object encoder;
1882       Lisp_Object val;
1883 
1884       encoder = CHARSET_ENCODER (charset);
1885       if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1886         {
1887           load_charset (charset, 2);
1888           encoder = CHARSET_ENCODER (charset);
1889         }
1890       if (CHAR_TABLE_P (encoder))
1891         {
1892           val = CHAR_TABLE_REF (encoder, c);
1893           if (NILP (val))
1894             return CHARSET_INVALID_CODE (charset);
1895           code = XINT (val);
1896           if (! CHARSET_COMPACT_CODES_P (charset))
1897             code = INDEX_TO_CODE_POINT (charset, code);
1898         }
1899       else
1900         {
1901           code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1902           code = INDEX_TO_CODE_POINT (charset, code);     
1903         }
1904     }
1905   else                          /* method == CHARSET_METHOD_OFFSET */
1906     {
1907       int code_index = c - CHARSET_CODE_OFFSET (charset);
1908 
1909       code = INDEX_TO_CODE_POINT (charset, code_index);
1910     }
1911 
1912   return code;
1913 }
1914 
1915 
1916 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1917        doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1918 Return nil if CODE-POINT is not valid in CHARSET.
1919 
1920 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1921 
1922 Optional argument RESTRICTION specifies a way to map the pair of CCS
1923 and CODE-POINT to a character.  Currently not supported and just ignored.  */)
1924   (charset, code_point, restriction)
1925      Lisp_Object charset, code_point, restriction;
1926 {
1927   int c, id;
1928   unsigned code;
1929   struct charset *charsetp;
1930 
1931   CHECK_CHARSET_GET_ID (charset, id);
1932   if (CONSP (code_point))
1933     {
1934       CHECK_NATNUM_CAR (code_point);
1935       CHECK_NATNUM_CDR (code_point);
1936       code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1937     }
1938   else
1939     {
1940       CHECK_NATNUM (code_point);
1941       code = XINT (code_point);
1942     }
1943   charsetp = CHARSET_FROM_ID (id);
1944   c = DECODE_CHAR (charsetp, code);
1945   return (c >= 0 ? make_number (c) : Qnil);
1946 }
1947 
1948 
1949 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1950        doc: /* Encode the character CH into a code-point of CHARSET.
1951 Return nil if CHARSET doesn't include CH.
1952 
1953 Optional argument RESTRICTION specifies a way to map CH to a
1954 code-point in CCS.  Currently not supported and just ignored.  */)
1955      (ch, charset, restriction)
1956      Lisp_Object ch, charset, restriction;
1957 {
1958   int id;
1959   unsigned code;
1960   struct charset *charsetp;
1961 
1962   CHECK_CHARSET_GET_ID (charset, id);
1963   CHECK_NATNUM (ch);
1964   charsetp = CHARSET_FROM_ID (id);
1965   code = ENCODE_CHAR (charsetp, XINT (ch));
1966   if (code == CHARSET_INVALID_CODE (charsetp))
1967     return Qnil;
1968   if (code > 0x7FFFFFF)
1969     return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1970   return make_number (code);
1971 }
1972 
1973 
1974 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1975        doc:
1976        /* Return a character of CHARSET whose position codes are CODEn.
1977 
1978 CODE1 through CODE4 are optional, but if you don't supply sufficient
1979 position codes, it is assumed that the minimum code in each dimension
1980 is specified.  */)
1981      (charset, code1, code2, code3, code4)
1982      Lisp_Object charset, code1, code2, code3, code4;
1983 {
1984   int id, dimension;
1985   struct charset *charsetp;
1986   unsigned code;
1987   int c;
1988 
1989   CHECK_CHARSET_GET_ID (charset, id);
1990   charsetp = CHARSET_FROM_ID (id);
1991 
1992   dimension = CHARSET_DIMENSION (charsetp);
1993   if (NILP (code1))
1994     code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1995             ? 0 : CHARSET_MIN_CODE (charsetp));
1996   else
1997     {
1998       CHECK_NATNUM (code1);
1999       if (XFASTINT (code1) >= 0x100)
2000         args_out_of_range (make_number (0xFF), code1);
2001       code = XFASTINT (code1);
2002 
2003       if (dimension > 1)
2004         {
2005           code <<= 8;
2006           if (NILP (code2))
2007             code |= charsetp->code_space[(dimension - 2) * 4];
2008           else
2009             {
2010               CHECK_NATNUM (code2);
2011               if (XFASTINT (code2) >= 0x100)
2012                 args_out_of_range (make_number (0xFF), code2);
2013               code |= XFASTINT (code2);
2014             }
2015 
2016           if (dimension > 2)
2017             {
2018               code <<= 8;
2019               if (NILP (code3))
2020                 code |= charsetp->code_space[(dimension - 3) * 4];
2021               else
2022                 {
2023                   CHECK_NATNUM (code3);
2024                   if (XFASTINT (code3) >= 0x100)
2025                     args_out_of_range (make_number (0xFF), code3);
2026                   code |= XFASTINT (code3);
2027                 }
2028 
2029               if (dimension > 3)
2030                 {
2031                   code <<= 8;
2032                   if (NILP (code4))
2033                     code |= charsetp->code_space[0];
2034                   else
2035                     {
2036                       CHECK_NATNUM (code4);
2037                       if (XFASTINT (code4) >= 0x100)
2038                         args_out_of_range (make_number (0xFF), code4);
2039                       code |= XFASTINT (code4);
2040                     }
2041                 }
2042             }
2043         }
2044     }
2045 
2046   if (CHARSET_ISO_FINAL (charsetp) >= 0)
2047     code &= 0x7F7F7F7F;
2048   c = DECODE_CHAR (charsetp, code);
2049   if (c < 0)
2050     error ("Invalid code(s)");
2051   return make_number (c);
2052 }
2053 
2054 
2055 /* Return the first charset in CHARSET_LIST that contains C.
2056    CHARSET_LIST is a list of charset IDs.  If it is nil, use
2057    Vcharset_ordered_list.  */
2058 
2059 struct charset *
2060 char_charset (c, charset_list, code_return)
2061      int c;
2062      Lisp_Object charset_list;
2063      unsigned *code_return;
2064 {
2065   int maybe_null = 0;
2066 
2067   if (NILP (charset_list))
2068     charset_list = Vcharset_ordered_list;
2069   else
2070     maybe_null = 1;
2071 
2072   while (CONSP (charset_list))
2073     {
2074       struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2075       unsigned code = ENCODE_CHAR (charset, c);
2076 
2077       if (code != CHARSET_INVALID_CODE (charset))
2078         {
2079           if (code_return)
2080             *code_return = code;
2081           return charset;
2082         }
2083       charset_list = XCDR (charset_list);
2084       if (! maybe_null
2085           && c <= MAX_UNICODE_CHAR
2086           && EQ (charset_list, Vcharset_non_preferred_head))
2087         return CHARSET_FROM_ID (charset_unicode);
2088     }
2089   return (maybe_null ? NULL
2090           : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2091           : CHARSET_FROM_ID (charset_eight_bit));
2092 }
2093 
2094 
2095 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2096        doc:
2097        /*Return list of charset and one to four position-codes of CH.
2098 The charset is decided by the current priority order of charsets.
2099 A position-code is a byte value of each dimension of the code-point of
2100 CH in the charset.  */)
2101      (ch)
2102      Lisp_Object ch;
2103 {
2104   struct charset *charset;
2105   int c, dimension;
2106   unsigned code;
2107   Lisp_Object val;
2108 
2109   CHECK_CHARACTER (ch);
2110   c = XFASTINT (ch);
2111   charset = CHAR_CHARSET (c);
2112   if (! charset)
2113     abort ();
2114   code = ENCODE_CHAR (charset, c);
2115   if (code == CHARSET_INVALID_CODE (charset))
2116     abort ();
2117   dimension = CHARSET_DIMENSION (charset);
2118   for (val = Qnil; dimension > 0; dimension--)
2119     {
2120       val = Fcons (make_number (code & 0xFF), val);
2121       code >>= 8;
2122     }
2123   return Fcons (CHARSET_NAME (charset), val);
2124 }
2125 
2126 
2127 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2128        doc: /* Return the charset of highest priority that contains CH.
2129 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2130 from which to find the charset.  It may also be a coding system.  In
2131 that case, find the charset from what supported by that coding system.  */)
2132      (ch, restriction)
2133      Lisp_Object ch, restriction;
2134 {
2135   struct charset *charset;
2136 
2137   CHECK_CHARACTER (ch);
2138   if (NILP (restriction))
2139     charset = CHAR_CHARSET (XINT (ch));
2140   else
2141     {
2142       Lisp_Object charset_list;
2143 
2144       if (CONSP (restriction))
2145         {
2146           for (charset_list = Qnil; CONSP (restriction);
2147                restriction = XCDR (restriction))
2148             {
2149               int id;
2150 
2151               CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2152               charset_list = Fcons (make_number (id), charset_list);
2153             }
2154           charset_list = Fnreverse (charset_list);
2155         }
2156       else
2157         charset_list = coding_system_charset_list (restriction);
2158       charset = char_charset (XINT (ch), charset_list, NULL);
2159       if (! charset)
2160         return Qnil;
2161     }
2162   return (CHARSET_NAME (charset));
2163 }
2164 
2165 
2166 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2167        doc: /*
2168 Return charset of a character in the current buffer at position POS.
2169 If POS is nil, it defauls to the current point.
2170 If POS is out of range, the value is nil.  */)
2171      (pos)
2172      Lisp_Object pos;
2173 {
2174   Lisp_Object ch;
2175   struct charset *charset;
2176 
2177   ch = Fchar_after (pos);
2178   if (! INTEGERP (ch))
2179     return ch;
2180   charset = CHAR_CHARSET (XINT (ch));
2181   return (CHARSET_NAME (charset));
2182 }
2183 
2184 
2185 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2186        doc: /*
2187 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2188 
2189 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2190 by their DIMENSION, CHARS, and FINAL-CHAR,
2191 whereas Emacs distinguishes them by charset symbol.
2192 See the documentation of the function `charset-info' for the meanings of
2193 DIMENSION, CHARS, and FINAL-CHAR.  */)
2194      (dimension, chars, final_char)
2195      Lisp_Object dimension, chars, final_char;
2196 {
2197   int id;
2198   int chars_flag;
2199 
2200   check_iso_charset_parameter (dimension, chars, final_char);
2201   chars_flag = XFASTINT (chars) == 96;
2202   id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2203                           XFASTINT (final_char));
2204   return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2205 }
2206 
2207 
2208 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2209        0, 0, 0,
2210        doc: /*
2211 Internal use only.
2212 Clear temporary charset mapping tables.
2213 It should be called only from temacs invoked for dumping.  */)
2214      ()
2215 {
2216   if (temp_charset_work)
2217     {
2218       free (temp_charset_work);
2219       temp_charset_work = NULL;
2220     }
2221 
2222   if (CHAR_TABLE_P (Vchar_unify_table))
2223     Foptimize_char_table (Vchar_unify_table, Qnil);
2224 
2225   return Qnil;
2226 }
2227 
2228 DEFUN ("charset-priority-list", Fcharset_priority_list,
2229        Scharset_priority_list, 0, 1, 0,
2230        doc: /* Return the list of charsets ordered by priority.
2231 HIGHESTP non-nil means just return the highest priority one.  */)
2232      (highestp)
2233      Lisp_Object highestp;
2234 {
2235   Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2236 
2237   if (!NILP (highestp))
2238     return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2239 
2240   while (!NILP (list))
2241     {
2242       val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2243       list = XCDR (list);
2244     }
2245   return Fnreverse (val);
2246 }
2247 
2248 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2249        1, MANY, 0,
2250        doc: /* Assign higher priority to the charsets given as arguments.
2251 usage: (set-charset-priority &rest charsets)  */)
2252        (nargs, args)
2253      int nargs;
2254      Lisp_Object *args;
2255 {
2256   Lisp_Object new_head, old_list, arglist[2];
2257   Lisp_Object list_2022, list_emacs_mule;
2258   int i, id;
2259 
2260   old_list = Fcopy_sequence (Vcharset_ordered_list);
2261   new_head = Qnil;
2262   for (i = 0; i < nargs; i++)
2263     {
2264       CHECK_CHARSET_GET_ID (args[i], id);
2265       if (! NILP (Fmemq (make_number (id), old_list)))
2266         {
2267           old_list = Fdelq (make_number (id), old_list);
2268           new_head = Fcons (make_number (id), new_head);
2269         }
2270     }
2271   arglist[0] = Fnreverse (new_head);
2272   arglist[1] = Vcharset_non_preferred_head = old_list;
2273   Vcharset_ordered_list = Fnconc (2, arglist);
2274   charset_ordered_list_tick++;
2275 
2276   charset_unibyte = -1;
2277   for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2278        CONSP (old_list); old_list = XCDR (old_list))
2279     {
2280       if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2281         list_2022 = Fcons (XCAR (old_list), list_2022);
2282       if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2283         list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2284       if (charset_unibyte < 0)
2285         {
2286           struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2287 
2288           if (CHARSET_DIMENSION (charset) == 1
2289               && CHARSET_ASCII_COMPATIBLE_P (charset)
2290               && CHARSET_MAX_CHAR (charset) >= 0x80)
2291             charset_unibyte = CHARSET_ID (charset);
2292         }
2293     }
2294   Viso_2022_charset_list = Fnreverse (list_2022);
2295   Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2296   if (charset_unibyte < 0)
2297     charset_unibyte = charset_iso_8859_1;
2298 
2299   return Qnil;
2300 }
2301 
2302 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2303        0, 1, 0,
2304        doc: /* Internal use only.
2305 Return charset identification number of CHARSET.  */)
2306      (charset)
2307      Lisp_Object charset;
2308 {
2309   int id;
2310 
2311   CHECK_CHARSET_GET_ID (charset, id);
2312   return make_number (id);
2313 }
2314 
2315 
2316 void
2317 init_charset ()
2318 {
2319   Lisp_Object tempdir;
2320   tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2321   if (access ((char *) SDATA (tempdir), 0) < 0)
2322     {
2323       dir_warning ("Error: charsets directory (%s) does not exist.\n\
2324 Emacs will not function correctly without the character map files.\n\
2325 Please check your installation!\n",
2326                    tempdir);
2327       /* TODO should this be a fatal error?  (Bug#909)  */
2328     }
2329 
2330   Vcharset_map_path = Fcons (tempdir, Qnil);
2331 }
2332 
2333 
2334 void
2335 init_charset_once ()
2336 {
2337   int i, j, k;
2338 
2339   for (i = 0; i < ISO_MAX_DIMENSION; i++)
2340     for (j = 0; j < ISO_MAX_CHARS; j++)
2341       for (k = 0; k < ISO_MAX_FINAL; k++)
2342         iso_charset_table[i][j][k] = -1;
2343 
2344   for (i = 0; i < 256; i++)
2345     emacs_mule_charset[i] = NULL;
2346 
2347   charset_jisx0201_roman = -1;
2348   charset_jisx0208_1978 = -1;
2349   charset_jisx0208 = -1;
2350   charset_ksc5601 = -1;
2351 }
2352 
2353 #ifdef emacs
2354 
2355 void
2356 syms_of_charset ()
2357 {
2358   DEFSYM (Qcharsetp, "charsetp");
2359 
2360   DEFSYM (Qascii, "ascii");
2361   DEFSYM (Qunicode, "unicode");
2362   DEFSYM (Qemacs, "emacs");
2363   DEFSYM (Qeight_bit, "eight-bit");
2364   DEFSYM (Qiso_8859_1, "iso-8859-1");
2365 
2366   DEFSYM (Qgl, "gl");
2367   DEFSYM (Qgr, "gr");
2368 
2369   staticpro (&Vcharset_ordered_list);
2370   Vcharset_ordered_list = Qnil;
2371 
2372   staticpro (&Viso_2022_charset_list);
2373   Viso_2022_charset_list = Qnil;
2374 
2375   staticpro (&Vemacs_mule_charset_list);
2376   Vemacs_mule_charset_list = Qnil;
2377 
2378   /* Don't staticpro them here.  It's done in syms_of_fns.  */
2379   QCtest = intern (":test");
2380   Qeq = intern ("eq");
2381 
2382   staticpro (&Vcharset_hash_table);
2383   {
2384     Lisp_Object args[2];
2385     args[0] = QCtest;
2386     args[1] = Qeq;
2387     Vcharset_hash_table = Fmake_hash_table (2, args);
2388   }
2389 
2390   charset_table_size = 128;
2391   charset_table = ((struct charset *)
2392                    xmalloc (sizeof (struct charset) * charset_table_size));
2393   charset_table_used = 0;
2394 
2395   defsubr (&Scharsetp);
2396   defsubr (&Smap_charset_chars);
2397   defsubr (&Sdefine_charset_internal);
2398   defsubr (&Sdefine_charset_alias);
2399   defsubr (&Scharset_plist);
2400   defsubr (&Sset_charset_plist);
2401   defsubr (&Sunify_charset);
2402   defsubr (&Sget_unused_iso_final_char);
2403   defsubr (&Sdeclare_equiv_charset);
2404   defsubr (&Sfind_charset_region);
2405   defsubr (&Sfind_charset_string);
2406   defsubr (&Sdecode_char);
2407   defsubr (&Sencode_char);
2408   defsubr (&Ssplit_char);
2409   defsubr (&Smake_char);
2410   defsubr (&Schar_charset);
2411   defsubr (&Scharset_after);
2412   defsubr (&Siso_charset);
2413   defsubr (&Sclear_charset_maps);
2414   defsubr (&Scharset_priority_list);
2415   defsubr (&Sset_charset_priority);
2416   defsubr (&Scharset_id_internal);
2417 
2418   DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2419                doc: /* *List of directories to search for charset map files.  */);
2420   Vcharset_map_path = Qnil;
2421 
2422   DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2423                doc: /* Inhibit loading of charset maps.  Used when dumping Emacs.  */);
2424   inhibit_load_charset_map = 0;
2425 
2426   DEFVAR_LISP ("charset-list", &Vcharset_list,
2427                doc: /* List of all charsets ever defined.  */);
2428   Vcharset_list = Qnil;
2429 
2430   DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2431                doc: /* ISO639 language mnemonic symbol for the current language environment.
2432 If the current language environment is for multiple languages (e.g. "Latin-1"),
2433 the value may be a list of mnemonics.  */);
2434   Vcurrent_iso639_language = Qnil;
2435 
2436   charset_ascii
2437     = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2438                                0, 127, 'B', -1, 0, 1, 0, 0);
2439   charset_iso_8859_1
2440     = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2441                                0, 255, -1, -1, -1, 1, 0, 0);
2442   charset_unicode
2443     = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2444                                0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2445   charset_emacs
2446     = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2447                                0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2448   charset_eight_bit
2449     = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2450                                128, 255, -1, 0, -1, 0, 1,
2451                                MAX_5_BYTE_CHAR + 1);
2452   charset_unibyte = charset_iso_8859_1;
2453 }
2454 
2455 #endif /* emacs */
2456 
2457 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2458    (do not change this comment) */