]> ncurses.scripts.mit.edu Git - ncurses.git/blob - ncurses/comp_scan.c
ncurses 4.1
[ncurses.git] / ncurses / comp_scan.c
1 /***************************************************************************
2 *                            COPYRIGHT NOTICE                              *
3 ****************************************************************************
4 *                ncurses is copyright (C) 1992-1995                        *
5 *                          Zeyd M. Ben-Halim                               *
6 *                          zmbenhal@netcom.com                             *
7 *                          Eric S. Raymond                                 *
8 *                          esr@snark.thyrsus.com                           *
9 *                                                                          *
10 *        Permission is hereby granted to reproduce and distribute ncurses  *
11 *        by any means and for any fee, whether alone or as part of a       *
12 *        larger distribution, in source or in binary form, PROVIDED        *
13 *        this notice is included with any such distribution, and is not    *
14 *        removed from any of its header files. Mention of ncurses in any   *
15 *        applications linked with it is highly appreciated.                *
16 *                                                                          *
17 *        ncurses comes AS IS with no warranty, implied or expressed.       *
18 *                                                                          *
19 ***************************************************************************/
20
21 /*
22  *      comp_scan.c --- Lexical scanner for terminfo compiler.
23  *
24  *      _nc_reset_input()
25  *      _nc_get_token()
26  *      _nc_panic_mode()
27  *      int _nc_syntax;
28  *      int _nc_curr_line;
29  *      long _nc_curr_file_pos;
30  *      long _nc_comment_start;
31  *      long _nc_comment_end;
32  */
33
34 #include <curses.priv.h>
35
36 #include <ctype.h>
37 #include <tic.h>
38
39 MODULE_ID("$Id: comp_scan.c,v 1.21 1997/04/24 10:37:34 tom Exp $")
40
41 /*
42  * Maximum length of string capability we'll accept before raising an error.
43  * Yes, there is a real capability in /etc/termcap this long, an "is".
44  */
45 #define MAXCAPLEN       600
46
47 #define iswhite(ch)     (ch == ' '  ||  ch == '\t')
48
49 int     _nc_syntax;             /* termcap or terminfo? */
50 int     _nc_curr_line;          /* current line # in input */
51 int     _nc_curr_col;           /* current column # in input */
52 long    _nc_curr_file_pos;      /* file offset of current line */
53 long    _nc_comment_start;      /* start of comment range before name */
54 long    _nc_comment_end;        /* end of comment range before name */
55 long    _nc_start_line;         /* start line of current entry */
56
57 /*****************************************************************************
58  *
59  * Token-grabbing machinery
60  *
61  *****************************************************************************/
62
63 static bool first_column;       /* See 'next_char()' below */
64 static char separator;          /* capability separator */
65 static int pushtype;            /* type of pushback token */
66 static char pushname[MAX_NAME_SIZE+1];
67
68 static int  next_char(void);
69 static long stream_pos(void);
70 static bool end_of_stream(void);
71 static char trans_string(char *);
72 static void push_back(char c);
73
74 /* Assume we may be looking at a termcap-style continuation */
75 static inline int eat_escaped_newline(int ch)
76 {
77         if (ch == '\\')
78                 while ((ch = next_char()) == '\n'  ||  iswhite(ch))
79                         continue;
80         return ch;
81 }
82
83 /*
84  *      int
85  *      get_token()
86  *
87  *      Scans the input for the next token, storing the specifics in the
88  *      global structure 'curr_token' and returning one of the following:
89  *
90  *              NAMES           A line beginning in column 1.  'name'
91  *                              will be set to point to everything up to but
92  *                              not including the first separator on the line.
93  *              BOOLEAN         An entry consisting of a name followed by
94  *                              a separator.  'name' will be set to point to
95  *                              the name of the capability.
96  *              NUMBER          An entry of the form
97  *                                      name#digits,
98  *                              'name' will be set to point to the capability
99  *                              name and 'valnumber' to the number given.
100  *              STRING          An entry of the form
101  *                                      name=characters,
102  *                              'name' is set to the capability name and
103  *                              'valstring' to the string of characters, with
104  *                              input translations done.
105  *              CANCEL          An entry of the form
106  *                                      name@,
107  *                              'name' is set to the capability name and
108  *                              'valnumber' to -1.
109  *              EOF             The end of the file has been reached.
110  *
111  *      A `separator' is either a comma or a semicolon, depending on whether
112  *      we are in termcap or terminfo mode.
113  *
114  */
115
116 int _nc_get_token(void)
117 {
118 static const char terminfo_punct[] = "@%&*!#";
119 long            number;
120 int             type;
121 int             ch;
122 bool            found;
123 static char     buffer[MAX_ENTRY_SIZE];
124 char            *ptr;
125 int             dot_flag = FALSE;
126 long            token_start;
127
128         if (pushtype != NO_PUSHBACK)
129         {
130             int retval = pushtype;
131
132             _nc_set_type(pushname);
133             DEBUG(3, ("pushed-back token: `%s', class %d",
134                       _nc_curr_token.tk_name, pushtype));
135
136             pushtype = NO_PUSHBACK;
137             pushname[0] = '\0';
138
139             /* currtok wasn't altered by _nc_push_token() */
140             return(retval);
141         }
142
143         if (end_of_stream())
144             return(EOF);
145
146 start_token:
147         token_start = stream_pos();
148         while ((ch = next_char()) == '\n'  ||  iswhite(ch))
149             continue;
150
151         ch = eat_escaped_newline(ch);
152
153         if (ch == EOF)
154             type = EOF;
155         else {
156             /* if this is a termcap entry, skip a leading separator */
157             if (separator == ':' && ch == ':')
158                 ch = next_char();
159
160             if (ch == '.') {
161                         dot_flag = TRUE;
162                         DEBUG(8, ("dot-flag set"));
163
164                         while ((ch = next_char())=='.' || iswhite(ch))
165                             continue;
166             }
167
168             if (ch == EOF) {
169                 type = EOF;
170                 goto end_of_token;
171             }
172
173             /* have to make some punctuation chars legal for terminfo */
174             if (!isalnum(ch) && !strchr(terminfo_punct, (char)ch)) {
175                  _nc_warning("Illegal character (expected alphanumeric or %s) - %s",
176                         terminfo_punct, _tracechar((chtype)ch));
177                  _nc_panic_mode(separator);
178                  goto start_token;
179             }
180
181             ptr = buffer;
182             *(ptr++) = ch;
183
184             if (first_column) {
185                         char    *desc;
186
187                         _nc_comment_start = token_start;
188                         _nc_comment_end = _nc_curr_file_pos;
189                         _nc_start_line = _nc_curr_line;
190
191                         _nc_syntax = ERR;
192                         while ((ch = next_char()) != '\n')
193                         {
194                             if (ch == EOF)
195                                 _nc_err_abort("premature EOF");
196                             else if (ch == ':')
197                             {
198                                 _nc_syntax = SYN_TERMCAP;
199                                 separator = ':';
200                                 break;
201                             }
202                             else if (ch == ',')
203                             {
204                                 _nc_syntax = SYN_TERMINFO;
205                                 separator = ',';
206                                 /*
207                                  * Fall-through here is not an accident.
208                                  * The idea is that if we see a comma, we
209                                  * figure this is terminfo unless we
210                                  * subsequently run into a colon -- but
211                                  * we don't stop looking for that colon until
212                                  * hitting a newline.  This allows commas to
213                                  * be embedded in description fields of
214                                  * either syntax.
215                                  */
216                                 /* FALLTHRU */
217                             }
218                             else
219                                 ch = eat_escaped_newline(ch);
220
221                             *ptr++ = ch;
222                         }
223                         ptr[0] = '\0';
224                         if (_nc_syntax == ERR)
225                         {
226                             /*
227                              * Grrr...what we ought to do here is barf,
228                              * complaining that the entry is malformed.
229                              * But because a couple of name fields in the
230                              * 8.2 termcap file end with |\, we just have
231                              * to assume it's termcap syntax.
232                              */
233                             _nc_syntax = SYN_TERMCAP;
234                             separator = ':';
235                         }
236                         else if (_nc_syntax == SYN_TERMINFO)
237                         {
238                             /* throw away trailing /, *$/ */
239                             for (--ptr; iswhite(*ptr) || *ptr == ','; ptr--)
240                                 continue;
241                             ptr[1] = '\0';
242                         }
243
244                         /*
245                          * This is the soonest we have the terminal name
246                          * fetched.  Set up for following warning messages.
247                          */
248                         ptr = strchr(buffer, '|');
249                         if (ptr == (char *)NULL)
250                             ptr = buffer + strlen(buffer);
251                         ch = *ptr;
252                         *ptr = '\0';
253                         _nc_set_type(buffer);
254                         *ptr = ch;
255
256                         /*
257                          * Compute the boundary between the aliases and the
258                          * description field for syntax-checking purposes.
259                          */
260                         desc = strrchr(buffer, '|');
261                         if (desc)
262                             if (*desc == '\0')
263                                 _nc_warning("empty longname field");
264                             else if (strchr(desc, ' ') == (char *)NULL)
265                                 _nc_warning("older tic versions may treat the description field as an alias");
266                         if (!desc)
267                             desc = buffer + strlen(buffer);
268
269                         /*
270                          * Whitespace in a name field other than the long name
271                          * can confuse rdist and some termcap tools.  Slashes
272                          * are a no-no.  Other special characters can be
273                          * dangerous due to shell expansion.
274                          */
275                         for (ptr = buffer; ptr < desc; ptr++)
276                         {
277                             if (isspace(*ptr))
278                             {
279                                 _nc_warning("whitespace in name or alias field");
280                                 break;
281                             }
282                             else if (*ptr == '/')
283                             {
284                                 _nc_warning("slashes aren't allowed in names or aliases");
285                                 break;
286                             }
287                             else if (strchr("$[]!*?", *ptr))
288                             {
289                                 _nc_warning("dubious character `%c' in name or alias field", *ptr);
290                                 break;
291                             }
292                         }
293
294                         ptr = buffer;
295
296                         _nc_curr_token.tk_name = buffer;
297                         type = NAMES;
298             } else {
299                         while ((ch = next_char()) != EOF) {
300                                 if (!isalnum(ch)) {
301                                         if (_nc_syntax == SYN_TERMINFO) {
302                                                 if (ch != '_')
303                                                         break;
304                                         } else { /* allow ';' for "k;" */
305                                                 if (ch != ';')
306                                                         break;
307                                         }
308                                 }
309                                 *(ptr++) = ch;
310                         }
311
312                         *ptr++ = '\0';
313                         switch (ch) {
314                         case ',':
315                         case ':':
316                                 if (ch != separator)
317                                         _nc_err_abort("Separator inconsistent with syntax");
318                                 _nc_curr_token.tk_name = buffer;
319                                 type = BOOLEAN;
320                                 break;
321                         case '@':
322                                 if ((ch = next_char()) != separator)
323                                         _nc_warning("Missing separator after `%s', have %s",
324                                                 buffer, _tracechar((chtype)ch));
325                                 _nc_curr_token.tk_name = buffer;
326                                 type = CANCEL;
327                                 break;
328
329                         case '#':
330                                 number = 0;
331                                 found  = FALSE;
332                                 while (isdigit(ch = next_char())) {
333                                         number = number * 10 + ch - '0';
334                                         found  = TRUE;
335                                 }
336                                 if (found == FALSE)
337                                         _nc_warning("no value given for `%s'", buffer);
338                                 if (ch != separator)
339                                         _nc_warning("Missing separator");
340                                 _nc_curr_token.tk_name = buffer;
341                                 _nc_curr_token.tk_valnumber = number;
342                                 type = NUMBER;
343                                 break;
344
345                         case '=':
346                                 ch = trans_string(ptr);
347                                 if (ch != separator)
348                                         _nc_warning("Missing separator");
349                                 _nc_curr_token.tk_name = buffer;
350                                 _nc_curr_token.tk_valstring = ptr;
351                                 type = STRING;
352                                 break;
353
354                         case EOF:
355                                 type = EOF;
356                                 break;
357                         default:
358                                 /* just to get rid of the compiler warning */
359                                 type = UNDEF;
360                                 _nc_warning("Illegal character - %s",
361                                         _tracechar((chtype)ch));
362                         }
363                 } /* end else (first_column == FALSE) */
364         } /* end else (ch != EOF) */
365
366 end_of_token:
367         if (dot_flag == TRUE)
368             DEBUG(8, ("Commented out "));
369
370         if (_nc_tracing & TRACE_IEVENT)
371         {
372             fprintf(stderr, "Token: ");
373             switch (type)
374             {
375                 case BOOLEAN:
376                     fprintf(stderr, "Boolean; name='%s'\n",
377                             _nc_curr_token.tk_name);
378                     break;
379
380                 case NUMBER:
381                     fprintf(stderr, "Number;  name='%s', value=%d\n",
382                             _nc_curr_token.tk_name,
383                             _nc_curr_token.tk_valnumber);
384                     break;
385
386                 case STRING:
387                     fprintf(stderr, "String;  name='%s', value=%s\n",
388                             _nc_curr_token.tk_name,
389                             _nc_visbuf(_nc_curr_token.tk_valstring));
390                     break;
391
392                 case CANCEL:
393                     fprintf(stderr, "Cancel; name='%s'\n",
394                             _nc_curr_token.tk_name);
395                     break;
396
397                 case NAMES:
398
399                     fprintf(stderr, "Names; value='%s'\n",
400                             _nc_curr_token.tk_name);
401                     break;
402
403                 case EOF:
404                     fprintf(stderr, "End of file\n");
405                     break;
406
407                 default:
408                     _nc_warning("Bad token type");
409             }
410         }
411
412         if (dot_flag == TRUE)           /* if commented out, use the next one */
413             type = _nc_get_token();
414
415         DEBUG(3, ("token: `%s', class %d", _nc_curr_token.tk_name, type));
416
417         return(type);
418 }
419
420 /*
421  *      char
422  *      trans_string(ptr)
423  *
424  *      Reads characters using next_char() until encountering a separator, nl,
425  *      or end-of-file.  The returned value is the character which caused
426  *      reading to stop.  The following translations are done on the input:
427  *
428  *              ^X  goes to  ctrl-X (i.e. X & 037)
429  *              {\E,\n,\r,\b,\t,\f}  go to
430  *                      {ESCAPE,newline,carriage-return,backspace,tab,formfeed}
431  *              {\^,\\}  go to  {carat,backslash}
432  *              \ddd (for ddd = up to three octal digits)  goes to the character ddd
433  *
434  *              \e == \E
435  *              \0 == \200
436  *
437  */
438
439 static char
440 trans_string(char *ptr)
441 {
442 int     count = 0;
443 int     number;
444 int     i, c;
445 chtype  ch, last_ch = '\0';
446
447         while ((ch = c = next_char()) != (chtype)separator && c != EOF) {
448             if ((_nc_syntax == SYN_TERMCAP) && c == '\n')
449                 break;
450             if (ch == '^' && last_ch != '%') {
451                 ch = c = next_char();
452                 if (c == EOF)
453                     _nc_err_abort("Premature EOF");
454
455                 if (! (is7bits(ch) && isprint(ch))) {
456                     _nc_warning("Illegal ^ character - %s",
457                         _tracechar((unsigned char)ch));
458                 }
459                 if (ch == '?')
460                     *(ptr++) = '\177';
461                 else
462                     *(ptr++) = (char)(ch & 037);
463             }
464             else if (ch == '\\') {
465                 ch = c = next_char();
466                 if (c == EOF)
467                     _nc_err_abort("Premature EOF");
468
469                 if (ch >= '0'  &&  ch <= '7') {
470                     number = ch - '0';
471                     for (i=0; i < 2; i++) {
472                         ch = c = next_char();
473                         if (c == EOF)
474                             _nc_err_abort("Premature EOF");
475
476                         if (c < '0'  ||  c > '7') {
477                             if (isdigit(c)) {
478                                 _nc_warning("Non-octal digit `%c' in \\ sequence", c);
479                                 /* allow the digit; it'll do less harm */
480                             } else {
481                                 push_back((char)c);
482                                 break;
483                             }
484                         }
485
486                         number = number * 8 + c - '0';
487                     }
488
489                     if (number == 0)
490                         number = 0200;
491                     *(ptr++) = (char) number;
492                 } else {
493                     switch (c) {
494                         case 'E':
495                         case 'e':       *(ptr++) = '\033';      break;
496
497                         case 'l':
498                         case 'n':       *(ptr++) = '\n';        break;
499
500                         case 'r':       *(ptr++) = '\r';        break;
501
502                         case 'b':       *(ptr++) = '\010';      break;
503
504                         case 's':       *(ptr++) = ' ';         break;
505
506                         case 'f':       *(ptr++) = '\014';      break;
507
508                         case 't':       *(ptr++) = '\t';        break;
509
510                         case '\\':      *(ptr++) = '\\';        break;
511
512                         case '^':       *(ptr++) = '^';         break;
513
514                         case ',':       *(ptr++) = ',';         break;
515
516                         case ':':       *(ptr++) = ':';         break;
517
518                         case '\n':
519                             continue;
520
521                         default:
522                             _nc_warning("Illegal character %s in \\ sequence",
523                                     _tracechar((unsigned char)ch));
524                             *(ptr++) = (char)ch;
525                     } /* endswitch (ch) */
526                 } /* endelse (ch < '0' ||  ch > '7') */
527             } /* end else if (ch == '\\') */
528             else {
529                 *(ptr++) = (char)ch;
530             }
531
532             count ++;
533
534             last_ch = ch;
535
536             if (count > MAXCAPLEN)
537                 _nc_warning("Very long string found.  Missing separator?");
538         } /* end while */
539
540         *ptr = '\0';
541
542         return(ch);
543 }
544
545 /*
546  *      _nc_push_token()
547  *
548  *      Push a token of given type so that it will be reread by the next
549  *      get_token() call.
550  */
551
552 void _nc_push_token(int class)
553 {
554     /*
555      * This implementation is kind of bogus, it will fail if we ever do
556      * more than one pushback at a time between get_token() calls.  It
557      * relies on the fact that curr_tok is static storage that nothing
558      * but get_token() touches.
559      */
560     pushtype = class;
561     _nc_get_type(pushname);
562
563     DEBUG(3, ("pushing token: `%s', class %d",
564               _nc_curr_token.tk_name, pushtype));
565 }
566
567 /*
568  * Panic mode error recovery - skip everything until a "ch" is found.
569  */
570 void _nc_panic_mode(char ch)
571 {
572         int c;
573
574         for (;;) {
575                 c = next_char();
576                 if (c == ch)
577                         return;
578                 if (c == EOF)
579                         return;
580         }
581 }
582
583 /*****************************************************************************
584  *
585  * Character-stream handling
586  *
587  *****************************************************************************/
588
589 #define LEXBUFSIZ       1024
590
591 static char *bufptr;            /* otherwise, the input buffer pointer */
592 static char *bufstart;          /* start of buffer so we can compute offsets */
593 static FILE *yyin;              /* scanner's input file descriptor */
594
595 /*
596  *      _nc_reset_input()
597  *
598  *      Resets the input-reading routines.  Used on initialization,
599  *      or after a seek has been done.  Exactly one argument must be
600  *      non-null.
601  */
602
603 void _nc_reset_input(FILE *fp, char *buf)
604 {
605         pushtype = NO_PUSHBACK;
606         pushname[0] = '\0';
607         yyin = fp;
608         bufstart = bufptr = buf;
609         _nc_curr_file_pos = 0L;
610         if (fp != 0)
611                 _nc_curr_line = 0;
612         _nc_curr_col = 0;
613 }
614
615 /*
616  *      int next_char()
617  *
618  *      Returns the next character in the input stream.  Comments and leading
619  *      white space are stripped.
620  *
621  *      The global state variable 'firstcolumn' is set TRUE if the character
622  *      returned is from the first column of the input line.
623  *
624  *      The global variable _nc_curr_line is incremented for each new line.
625  *      The global variable _nc_curr_file_pos is set to the file offset of the
626  *      beginning of each line.
627  */
628
629 static int
630 next_char(void)
631 {
632     if (!yyin)
633     {
634         if (*bufptr == '\0')
635             return(EOF);
636         if (*bufptr == '\n') {
637             _nc_curr_line++;
638             _nc_curr_col = 0;
639         }
640     }
641     else if (!bufptr || !*bufptr)
642     {
643         /*
644          * In theory this could be recoded to do its I/O one
645          * character at a time, saving the buffer space.  In
646          * practice, this turns out to be quite hard to get
647          * completely right.  Try it and see.  If you succeed,
648          * don't forget to hack push_back() correspondingly.
649          */
650         static char line[LEXBUFSIZ];
651
652         do {
653                _nc_curr_file_pos = ftell(yyin);
654
655                if ((bufstart = fgets(line, LEXBUFSIZ, yyin)) != NULL) {
656                    _nc_curr_line++;
657                    _nc_curr_col = 0;
658                }
659                bufptr = bufstart;
660            } while
661                (bufstart != NULL && line[0] == '#');
662
663         if (bufstart == NULL)
664             return (EOF);
665
666         while (iswhite(*bufptr))
667             bufptr++;
668     }
669
670     first_column = (bufptr == bufstart);
671
672     _nc_curr_col++;
673     return(*bufptr++);
674 }
675
676 static void push_back(char c)
677 /* push a character back onto the input stream */
678 {
679     if (bufptr == bufstart)
680             _nc_syserr_abort("Can't backspace off beginning of line");
681     *--bufptr = c;
682 }
683
684 static long stream_pos(void)
685 /* return our current character position in the input stream */
686 {
687     return (yyin ? ftell(yyin) : (bufptr ? bufptr - bufstart : 0));
688 }
689
690 static bool end_of_stream(void)
691 /* are we at end of input? */
692 {
693     return (yyin ? feof(yyin) : (bufptr && *bufptr == '\0'));
694 }
695
696 /* comp_scan.c ends here */