1 /* GNU Emacs case conversion functions.
  2    Copyright (C) 1985, 1994, 1997, 1998, 1999, 2001, 2002, 2003, 2004,
  3                  2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
  4 
  5 This file is part of GNU Emacs.
  6 
  7 GNU Emacs is free software: you can redistribute it and/or modify
  8 it under the terms of the GNU General Public License as published by
  9 the Free Software Foundation, either version 3 of the License, or
 10 (at your option) any later version.
 11 
 12 GNU Emacs is distributed in the hope that it will be useful,
 13 but WITHOUT ANY WARRANTY; without even the implied warranty of
 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 15 GNU General Public License for more details.
 16 
 17 You should have received a copy of the GNU General Public License
 18 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 19 
 20 
 21 #include <config.h>
 22 #include <setjmp.h>
 23 #include "lisp.h"
 24 #include "buffer.h"
 25 #include "character.h"
 26 #include "commands.h"
 27 #include "syntax.h"
 28 #include "composite.h"
 29 #include "keymap.h"
 30 
 31 enum case_action {CASE_UP, CASE_DOWN, CASE_CAPITALIZE, CASE_CAPITALIZE_UP};
 32 
 33 Lisp_Object Qidentity;
 34 
 35 Lisp_Object
 36 casify_object (flag, obj)
 37      enum case_action flag;
 38      Lisp_Object obj;
 39 {
 40   register int c, c1;
 41   register int inword = flag == CASE_DOWN;
 42 
 43   /* If the case table is flagged as modified, rescan it.  */
 44   if (NILP (XCHAR_TABLE (current_buffer->downcase_table)->extras[1]))
 45     Fset_case_table (current_buffer->downcase_table);
 46 
 47   if (INTEGERP (obj))
 48     {
 49       int flagbits = (CHAR_ALT | CHAR_SUPER | CHAR_HYPER
 50                       | CHAR_SHIFT | CHAR_CTL | CHAR_META);
 51       int flags = XINT (obj) & flagbits;
 52       int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
 53 
 54       /* If the character has higher bits set
 55          above the flags, return it unchanged.
 56          It is not a real character.  */
 57       if ((unsigned) XFASTINT (obj) > (unsigned) flagbits)
 58         return obj;
 59 
 60       c1 = XFASTINT (obj) & ~flagbits;
 61       /* FIXME: Even if enable-multibyte-characters is nil, we may
 62          manipulate multibyte chars.  This means we have a bug for latin-1
 63          chars since when we receive an int 128-255 we can't tell whether
 64          it's an eight-bit byte or a latin-1 char.  */
 65       if (c1 >= 256)
 66         multibyte = 1;
 67       if (! multibyte)
 68         MAKE_CHAR_MULTIBYTE (c1);
 69       c = DOWNCASE (c1);
 70       if (inword)
 71         XSETFASTINT (obj, c | flags);
 72       else if (c == (XFASTINT (obj) & ~flagbits))
 73         {
 74           if (! inword)
 75             c = UPCASE1 (c1);
 76           if (! multibyte)
 77             MAKE_CHAR_UNIBYTE (c);
 78           XSETFASTINT (obj, c | flags);
 79         }
 80       return obj;
 81     }
 82 
 83   if (!STRINGP (obj))
 84     wrong_type_argument (Qchar_or_string_p, obj);
 85   else if (!STRING_MULTIBYTE (obj))
 86     {
 87       EMACS_INT i;
 88       EMACS_INT size = SCHARS (obj);
 89 
 90       obj = Fcopy_sequence (obj);
 91       for (i = 0; i < size; i++)
 92         {
 93           c = SREF (obj, i);
 94           MAKE_CHAR_MULTIBYTE (c);
 95           c1 = c;
 96           if (inword && flag != CASE_CAPITALIZE_UP)
 97             c = DOWNCASE (c);
 98           else if (!UPPERCASEP (c)
 99                    && (!inword || flag != CASE_CAPITALIZE_UP))
100             c = UPCASE1 (c1);
101           if ((int) flag >= (int) CASE_CAPITALIZE)
102             inword = (SYNTAX (c) == Sword);
103           if (c != c1)
104             {
105                   MAKE_CHAR_UNIBYTE (c);
106               /* If the char can't be converted to a valid byte, just don't
107                  change it.  */
108               if (c >= 0 && c < 256)
109                 SSET (obj, i, c);
110             }
111         }
112       return obj;
113     }
114   else
115     {
116       EMACS_INT i, i_byte, size = SCHARS (obj);
117       int len;
118       USE_SAFE_ALLOCA;
119       unsigned char *dst, *o;
120       /* Over-allocate by 12%: this is a minor overhead, but should be
121          sufficient in 99.999% of the cases to avoid a reallocation.  */
122       EMACS_INT o_size = SBYTES (obj) + SBYTES (obj) / 8 + MAX_MULTIBYTE_LENGTH;
123       SAFE_ALLOCA (dst, void *, o_size);
124       o = dst;
125 
126       for (i = i_byte = 0; i < size; i++, i_byte += len)
127         {
128           if ((o - dst) + MAX_MULTIBYTE_LENGTH > o_size)
129             { /* Not enough space for the next char: grow the destination.  */
130               unsigned char *old_dst = dst;
131               o_size += o_size; /* Probably overkill, but extremely rare.  */
132               SAFE_ALLOCA (dst, void *, o_size);
133               bcopy (old_dst, dst, o - old_dst);
134               o = dst + (o - old_dst);
135             }
136           c = STRING_CHAR_AND_LENGTH (SDATA (obj) + i_byte, len);
137           if (inword && flag != CASE_CAPITALIZE_UP)
138             c = DOWNCASE (c);
139           else if (!UPPERCASEP (c)
140                    && (!inword || flag != CASE_CAPITALIZE_UP))
141             c = UPCASE1 (c);
142           if ((int) flag >= (int) CASE_CAPITALIZE)
143             inword = (SYNTAX (c) == Sword);
144           o += CHAR_STRING (c, o);
145         }
146       eassert (o - dst <= o_size);
147       obj = make_multibyte_string (dst, size, o - dst);
148       SAFE_FREE ();
149       return obj;
150     }
151 }
152 
153 DEFUN ("upcase", Fupcase, Supcase, 1, 1, 0,
154        doc: /* Convert argument to upper case and return that.
155 The argument may be a character or string.  The result has the same type.
156 The argument object is not altered--the value is a copy.
157 See also `capitalize', `downcase' and `upcase-initials'.  */)
158      (obj)
159      Lisp_Object obj;
160 {
161   return casify_object (CASE_UP, obj);
162 }
163 
164 DEFUN ("downcase", Fdowncase, Sdowncase, 1, 1, 0,
165        doc: /* Convert argument to lower case and return that.
166 The argument may be a character or string.  The result has the same type.
167 The argument object is not altered--the value is a copy.  */)
168      (obj)
169      Lisp_Object obj;
170 {
171   return casify_object (CASE_DOWN, obj);
172 }
173 
174 DEFUN ("capitalize", Fcapitalize, Scapitalize, 1, 1, 0,
175        doc: /* Convert argument to capitalized form and return that.
176 This means that each word's first character is upper case
177 and the rest is lower case.
178 The argument may be a character or string.  The result has the same type.
179 The argument object is not altered--the value is a copy.  */)
180      (obj)
181      Lisp_Object obj;
182 {
183   return casify_object (CASE_CAPITALIZE, obj);
184 }
185 
186 /* Like Fcapitalize but change only the initials.  */
187 
188 DEFUN ("upcase-initials", Fupcase_initials, Supcase_initials, 1, 1, 0,
189        doc: /* Convert the initial of each word in the argument to upper case.
190 Do not change the other letters of each word.
191 The argument may be a character or string.  The result has the same type.
192 The argument object is not altered--the value is a copy.  */)
193      (obj)
194      Lisp_Object obj;
195 {
196   return casify_object (CASE_CAPITALIZE_UP, obj);
197 }
198 
199 /* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
200    b and e specify range of buffer to operate on. */
201 
202 void
203 casify_region (flag, b, e)
204      enum case_action flag;
205      Lisp_Object b, e;
206 {
207   register int c;
208   register int inword = flag == CASE_DOWN;
209   register int multibyte = !NILP (current_buffer->enable_multibyte_characters);
210   EMACS_INT start, end;
211   EMACS_INT start_byte, end_byte;
212   EMACS_INT first = -1, last;   /* Position of first and last changes.  */
213   EMACS_INT opoint = PT;
214   EMACS_INT opoint_byte = PT_BYTE;
215 
216   if (EQ (b, e))
217     /* Not modifying because nothing marked */
218     return;
219 
220   /* If the case table is flagged as modified, rescan it.  */
221   if (NILP (XCHAR_TABLE (current_buffer->downcase_table)->extras[1]))
222     Fset_case_table (current_buffer->downcase_table);
223 
224   validate_region (&b, &e);
225   start = XFASTINT (b);
226   end = XFASTINT (e);
227   modify_region (current_buffer, start, end, 0);
228   record_change (start, end - start);
229   start_byte = CHAR_TO_BYTE (start);
230   end_byte = CHAR_TO_BYTE (end);
231 
232   while (start < end)
233     {
234       int c2, len;
235 
236       if (multibyte)
237         {
238           c = FETCH_MULTIBYTE_CHAR (start_byte);
239           len = CHAR_BYTES (c);
240         }
241       else
242         {
243           c = FETCH_BYTE (start_byte);
244           MAKE_CHAR_MULTIBYTE (c);
245           len = 1;
246         }
247       c2 = c;
248       if (inword && flag != CASE_CAPITALIZE_UP)
249         c = DOWNCASE (c);
250       else if (!UPPERCASEP (c)
251                && (!inword || flag != CASE_CAPITALIZE_UP))
252         c = UPCASE1 (c);
253       if ((int) flag >= (int) CASE_CAPITALIZE)
254         inword = ((SYNTAX (c) == Sword) && (inword || !SYNTAX_PREFIX (c)));
255       if (c != c2)
256         {
257           last = start;
258           if (first < 0)
259             first = start;
260 
261           if (! multibyte)
262             {
263               MAKE_CHAR_UNIBYTE (c);
264               FETCH_BYTE (start_byte) = c;
265             }
266           else if (ASCII_CHAR_P (c2) && ASCII_CHAR_P (c))
267             FETCH_BYTE (start_byte) = c;
268           else
269             {
270               int tolen = CHAR_BYTES (c);
271               int j;
272               unsigned char str[MAX_MULTIBYTE_LENGTH];
273 
274               CHAR_STRING (c, str);
275               if (len == tolen)
276                 {
277                   /* Length is unchanged.  */
278                   for (j = 0; j < len; ++j)
279                     FETCH_BYTE (start_byte + j) = str[j];
280                 }
281               else
282                 {
283                   /* Replace one character with the other,
284                      keeping text properties the same.  */
285                   replace_range_2 (start, start_byte,
286                                    start + 1, start_byte + len,
287                                    str, 1, tolen,
288                                    0);
289                   len = tolen;
290                 }
291             }
292         }
293       start++;
294       start_byte += len;
295     }
296 
297   if (PT != opoint)
298     TEMP_SET_PT_BOTH (opoint, opoint_byte);
299 
300   if (first >= 0)
301     {
302       signal_after_change (first, last + 1 - first, last + 1 - first);
303       update_compositions (first, last + 1, CHECK_ALL);
304     }
305 }
306 
307 DEFUN ("upcase-region", Fupcase_region, Supcase_region, 2, 2, "r",
308        doc: /* Convert the region to upper case.  In programs, wants two arguments.
309 These arguments specify the starting and ending character numbers of
310 the region to operate on.  When used as a command, the text between
311 point and the mark is operated on.
312 See also `capitalize-region'.  */)
313      (beg, end)
314      Lisp_Object beg, end;
315 {
316   casify_region (CASE_UP, beg, end);
317   return Qnil;
318 }
319 
320 DEFUN ("downcase-region", Fdowncase_region, Sdowncase_region, 2, 2, "r",
321        doc: /* Convert the region to lower case.  In programs, wants two arguments.
322 These arguments specify the starting and ending character numbers of
323 the region to operate on.  When used as a command, the text between
324 point and the mark is operated on.  */)
325      (beg, end)
326      Lisp_Object beg, end;
327 {
328   casify_region (CASE_DOWN, beg, end);
329   return Qnil;
330 }
331 
332 DEFUN ("capitalize-region", Fcapitalize_region, Scapitalize_region, 2, 2, "r",
333        doc: /* Convert the region to capitalized form.
334 Capitalized form means each word's first character is upper case
335 and the rest of it is lower case.
336 In programs, give two arguments, the starting and ending
337 character positions to operate on.  */)
338      (beg, end)
339      Lisp_Object beg, end;
340 {
341   casify_region (CASE_CAPITALIZE, beg, end);
342   return Qnil;
343 }
344 
345 /* Like Fcapitalize_region but change only the initials.  */
346 
347 DEFUN ("upcase-initials-region", Fupcase_initials_region,
348        Supcase_initials_region, 2, 2, "r",
349        doc: /* Upcase the initial of each word in the region.
350 Subsequent letters of each word are not changed.
351 In programs, give two arguments, the starting and ending
352 character positions to operate on.  */)
353      (beg, end)
354      Lisp_Object beg, end;
355 {
356   casify_region (CASE_CAPITALIZE_UP, beg, end);
357   return Qnil;
358 }
359 
360 static Lisp_Object
361 operate_on_word (arg, newpoint)
362      Lisp_Object arg;
363      EMACS_INT *newpoint;
364 {
365   Lisp_Object val;
366   int farend;
367   int iarg;
368 
369   CHECK_NUMBER (arg);
370   iarg = XINT (arg);
371   farend = scan_words (PT, iarg);
372   if (!farend)
373     farend = iarg > 0 ? ZV : BEGV;
374 
375   *newpoint = PT > farend ? PT : farend;
376   XSETFASTINT (val, farend);
377 
378   return val;
379 }
380 
381 DEFUN ("upcase-word", Fupcase_word, Supcase_word, 1, 1, "p",
382        doc: /* Convert following word (or ARG words) to upper case, moving over.
383 With negative argument, convert previous words but do not move.
384 See also `capitalize-word'.  */)
385      (arg)
386      Lisp_Object arg;
387 {
388   Lisp_Object beg, end;
389   EMACS_INT newpoint;
390   XSETFASTINT (beg, PT);
391   end = operate_on_word (arg, &newpoint);
392   casify_region (CASE_UP, beg, end);
393   SET_PT (newpoint);
394   return Qnil;
395 }
396 
397 DEFUN ("downcase-word", Fdowncase_word, Sdowncase_word, 1, 1, "p",
398        doc: /* Convert following word (or ARG words) to lower case, moving over.
399 With negative argument, convert previous words but do not move.  */)
400      (arg)
401      Lisp_Object arg;
402 {
403   Lisp_Object beg, end;
404   EMACS_INT newpoint;
405   XSETFASTINT (beg, PT);
406   end = operate_on_word (arg, &newpoint);
407   casify_region (CASE_DOWN, beg, end);
408   SET_PT (newpoint);
409   return Qnil;
410 }
411 
412 DEFUN ("capitalize-word", Fcapitalize_word, Scapitalize_word, 1, 1, "p",
413        doc: /* Capitalize the following word (or ARG words), moving over.
414 This gives the word(s) a first character in upper case
415 and the rest lower case.
416 With negative argument, capitalize previous words but do not move.  */)
417      (arg)
418      Lisp_Object arg;
419 {
420   Lisp_Object beg, end;
421   EMACS_INT newpoint;
422   XSETFASTINT (beg, PT);
423   end = operate_on_word (arg, &newpoint);
424   casify_region (CASE_CAPITALIZE, beg, end);
425   SET_PT (newpoint);
426   return Qnil;
427 }
428 
429 void
430 syms_of_casefiddle ()
431 {
432   Qidentity = intern_c_string ("identity");
433   staticpro (&Qidentity);
434   defsubr (&Supcase);
435   defsubr (&Sdowncase);
436   defsubr (&Scapitalize);
437   defsubr (&Supcase_initials);
438   defsubr (&Supcase_region);
439   defsubr (&Sdowncase_region);
440   defsubr (&Scapitalize_region);
441   defsubr (&Supcase_initials_region);
442   defsubr (&Supcase_word);
443   defsubr (&Sdowncase_word);
444   defsubr (&Scapitalize_word);
445 }
446 
447 void
448 keys_of_casefiddle ()
449 {
450   initial_define_key (control_x_map, Ctl('U'), "upcase-region");
451   Fput (intern ("upcase-region"), Qdisabled, Qt);
452   initial_define_key (control_x_map, Ctl('L'), "downcase-region");
453   Fput (intern ("downcase-region"), Qdisabled, Qt);
454 
455   initial_define_key (meta_map, 'u', "upcase-word");
456   initial_define_key (meta_map, 'l', "downcase-word");
457   initial_define_key (meta_map, 'c', "capitalize-word");
458 }
459 
460 /* arch-tag: 60a73c66-5489-47e7-a81f-cead4057c526
461    (do not change this comment) */