ncurses/comp_scan.c

   1 /***************************************************************************
   2 *                            COPYRIGHT NOTICE                              *
   3 ****************************************************************************
   4 *                ncurses is copyright (C) 1992-1995                        *
   5 *                          Zeyd M. Ben-Halim                               *
   6 *                          zmbenhal@netcom.com                             *
   7 *                          Eric S. Raymond                                 *
   8 *                          esr@snark.thyrsus.com                           *
   9 *                                                                          *
  10 *        Permission is hereby granted to reproduce and distribute ncurses  *
  11 *        by any means and for any fee, whether alone or as part of a       *
  12 *        larger distribution, in source or in binary form, PROVIDED        *
  13 *        this notice is included with any such distribution, and is not    *
  14 *        removed from any of its header files. Mention of ncurses in any   *
  15 *        applications linked with it is highly appreciated.                *
  16 *                                                                          *
  17 *        ncurses comes AS IS with no warranty, implied or expressed.       *
  18 *                                                                          *
  19 ***************************************************************************/
  20
  21 /*
  22  *      comp_scan.c --- Lexical scanner for terminfo compiler.
  23  *
  24  *      _nc_reset_input()
  25  *      _nc_get_token()
  26  *      _nc_panic_mode()
  27  *      int _nc_syntax;
  28  *      int _nc_curr_line;
  29  *      long _nc_curr_file_pos;
  30  *      long _nc_comment_start;
  31  *      long _nc_comment_end;
  32  */
  33
  34 #include <curses.priv.h>
  35
  36 #include <ctype.h>
  37 #include <tic.h>
  38
  39 MODULE_ID("$Id: comp_scan.c,v 1.21 1997/04/24 10:37:34 tom Exp $")
  40
  41 /*
  42  * Maximum length of string capability we'll accept before raising an error.
  43  * Yes, there is a real capability in /etc/termcap this long, an "is".
  44  */
  45 #define MAXCAPLEN       600
  46
  47 #define iswhite(ch)     (ch == ' '  ||  ch == '\t')
  48
  49 int     _nc_syntax;             /* termcap or terminfo? */
  50 int     _nc_curr_line;          /* current line # in input */
  51 int     _nc_curr_col;           /* current column # in input */
  52 long    _nc_curr_file_pos;      /* file offset of current line */
  53 long    _nc_comment_start;      /* start of comment range before name */
  54 long    _nc_comment_end;        /* end of comment range before name */
  55 long    _nc_start_line;         /* start line of current entry */
  56
  57 /*****************************************************************************
  58  *
  59  * Token-grabbing machinery
  60  *
  61  *****************************************************************************/
  62
  63 static bool first_column;       /* See 'next_char()' below */
  64 static char separator;          /* capability separator */
  65 static int pushtype;            /* type of pushback token */
  66 static char pushname[MAX_NAME_SIZE+1];
  67
  68 static int  next_char(void);
  69 static long stream_pos(void);
  70 static bool end_of_stream(void);
  71 static char trans_string(char *);
  72 static void push_back(char c);
  73
  74 /* Assume we may be looking at a termcap-style continuation */
  75 static inline int eat_escaped_newline(int ch)
  76 {
  77         if (ch == '\\')
  78                 while ((ch = next_char()) == '\n'  ||  iswhite(ch))
  79                         continue;
  80         return ch;
  81 }
  82
  83 /*
  84  *      int
  85  *      get_token()
  86  *
  87  *      Scans the input for the next token, storing the specifics in the
  88  *      global structure 'curr_token' and returning one of the following:
  89  *
  90  *              NAMES           A line beginning in column 1.  'name'
  91  *                              will be set to point to everything up to but
  92  *                              not including the first separator on the line.
  93  *              BOOLEAN         An entry consisting of a name followed by
  94  *                              a separator.  'name' will be set to point to
  95  *                              the name of the capability.
  96  *              NUMBER          An entry of the form
  97  *                                      name#digits,
  98  *                              'name' will be set to point to the capability
  99  *                              name and 'valnumber' to the number given.
 100  *              STRING          An entry of the form
 101  *                                      name=characters,
 102  *                              'name' is set to the capability name and
 103  *                              'valstring' to the string of characters, with
 104  *                              input translations done.
 105  *              CANCEL          An entry of the form
 106  *                                      name@,
 107  *                              'name' is set to the capability name and
 108  *                              'valnumber' to -1.
 109  *              EOF             The end of the file has been reached.
 110  *
 111  *      A `separator' is either a comma or a semicolon, depending on whether
 112  *      we are in termcap or terminfo mode.
 113  *
 114  */
 115
 116 int _nc_get_token(void)
 117 {
 118 static const char terminfo_punct[] = "@%&*!#";
 119 long            number;
 120 int             type;
 121 int             ch;
 122 bool            found;
 123 static char     buffer[MAX_ENTRY_SIZE];
 124 char            *ptr;
 125 int             dot_flag = FALSE;
 126 long            token_start;
 127
 128         if (pushtype != NO_PUSHBACK)
 129         {
 130             int retval = pushtype;
 131
 132             _nc_set_type(pushname);
 133             DEBUG(3, ("pushed-back token: `%s', class %d",
 134                       _nc_curr_token.tk_name, pushtype));
 135
 136             pushtype = NO_PUSHBACK;
 137             pushname[0] = '\0';
 138
 139             /* currtok wasn't altered by _nc_push_token() */
 140             return(retval);
 141         }
 142
 143         if (end_of_stream())
 144             return(EOF);
 145
 146 start_token:
 147         token_start = stream_pos();
 148         while ((ch = next_char()) == '\n'  ||  iswhite(ch))
 149             continue;
 150
 151         ch = eat_escaped_newline(ch);
 152
 153         if (ch == EOF)
 154             type = EOF;
 155         else {
 156             /* if this is a termcap entry, skip a leading separator */
 157             if (separator == ':' && ch == ':')
 158                 ch = next_char();
 159
 160             if (ch == '.') {
 161                         dot_flag = TRUE;
 162                         DEBUG(8, ("dot-flag set"));
 163
 164                         while ((ch = next_char())=='.' || iswhite(ch))
 165                             continue;
 166             }
 167
 168             if (ch == EOF) {
 169                 type = EOF;
 170                 goto end_of_token;
 171             }
 172
 173             /* have to make some punctuation chars legal for terminfo */
 174             if (!isalnum(ch) && !strchr(terminfo_punct, (char)ch)) {
 175                  _nc_warning("Illegal character (expected alphanumeric or %s) - %s",
 176                         terminfo_punct, _tracechar((chtype)ch));
 177                  _nc_panic_mode(separator);
 178                  goto start_token;
 179             }
 180
 181             ptr = buffer;
 182             *(ptr++) = ch;
 183
 184             if (first_column) {
 185                         char    *desc;
 186
 187                         _nc_comment_start = token_start;
 188                         _nc_comment_end = _nc_curr_file_pos;
 189                         _nc_start_line = _nc_curr_line;
 190
 191                         _nc_syntax = ERR;
 192                         while ((ch = next_char()) != '\n')
 193                         {
 194                             if (ch == EOF)
 195                                 _nc_err_abort("premature EOF");
 196                             else if (ch == ':')
 197                             {
 198                                 _nc_syntax = SYN_TERMCAP;
 199                                 separator = ':';
 200                                 break;
 201                             }
 202                             else if (ch == ',')
 203                             {
 204                                 _nc_syntax = SYN_TERMINFO;
 205                                 separator = ',';
 206                                 /*
 207                                  * Fall-through here is not an accident.
 208                                  * The idea is that if we see a comma, we
 209                                  * figure this is terminfo unless we
 210                                  * subsequently run into a colon -- but
 211                                  * we don't stop looking for that colon until
 212                                  * hitting a newline.  This allows commas to
 213                                  * be embedded in description fields of
 214                                  * either syntax.
 215                                  */
 216                                 /* FALLTHRU */
 217                             }
 218                             else
 219                                 ch = eat_escaped_newline(ch);
 220
 221                             *ptr++ = ch;
 222                         }
 223                         ptr[0] = '\0';
 224                         if (_nc_syntax == ERR)
 225                         {
 226                             /*
 227                              * Grrr...what we ought to do here is barf,
 228                              * complaining that the entry is malformed.
 229                              * But because a couple of name fields in the
 230                              * 8.2 termcap file end with |\, we just have
 231                              * to assume it's termcap syntax.
 232                              */
 233                             _nc_syntax = SYN_TERMCAP;
 234                             separator = ':';
 235                         }
 236                         else if (_nc_syntax == SYN_TERMINFO)
 237                         {
 238                             /* throw away trailing /, *$/ */
 239                             for (--ptr; iswhite(*ptr) || *ptr == ','; ptr--)
 240                                 continue;
 241                             ptr[1] = '\0';
 242                         }
 243
 244                         /*
 245                          * This is the soonest we have the terminal name
 246                          * fetched.  Set up for following warning messages.
 247                          */
 248                         ptr = strchr(buffer, '|');
 249                         if (ptr == (char *)NULL)
 250                             ptr = buffer + strlen(buffer);
 251                         ch = *ptr;
 252                         *ptr = '\0';
 253                         _nc_set_type(buffer);
 254                         *ptr = ch;
 255
 256                         /*
 257                          * Compute the boundary between the aliases and the
 258                          * description field for syntax-checking purposes.
 259                          */
 260                         desc = strrchr(buffer, '|');
 261                         if (desc)
 262                             if (*desc == '\0')
 263                                 _nc_warning("empty longname field");
 264                             else if (strchr(desc, ' ') == (char *)NULL)
 265                                 _nc_warning("older tic versions may treat the description field as an alias");
 266                         if (!desc)
 267                             desc = buffer + strlen(buffer);
 268
 269                         /*
 270                          * Whitespace in a name field other than the long name
 271                          * can confuse rdist and some termcap tools.  Slashes
 272                          * are a no-no.  Other special characters can be
 273                          * dangerous due to shell expansion.
 274                          */
 275                         for (ptr = buffer; ptr < desc; ptr++)
 276                         {
 277                             if (isspace(*ptr))
 278                             {
 279                                 _nc_warning("whitespace in name or alias field");
 280                                 break;
 281                             }
 282                             else if (*ptr == '/')
 283                             {
 284                                 _nc_warning("slashes aren't allowed in names or aliases");
 285                                 break;
 286                             }
 287                             else if (strchr("$[]!*?", *ptr))
 288                             {
 289                                 _nc_warning("dubious character `%c' in name or alias field", *ptr);
 290                                 break;
 291                             }
 292                         }
 293
 294                         ptr = buffer;
 295
 296                         _nc_curr_token.tk_name = buffer;
 297                         type = NAMES;
 298             } else {
 299                         while ((ch = next_char()) != EOF) {
 300                                 if (!isalnum(ch)) {
 301                                         if (_nc_syntax == SYN_TERMINFO) {
 302                                                 if (ch != '_')
 303                                                         break;
 304                                         } else { /* allow ';' for "k;" */
 305                                                 if (ch != ';')
 306                                                         break;
 307                                         }
 308                                 }
 309                                 *(ptr++) = ch;
 310                         }
 311
 312                         *ptr++ = '\0';
 313                         switch (ch) {
 314                         case ',':
 315                         case ':':
 316                                 if (ch != separator)
 317                                         _nc_err_abort("Separator inconsistent with syntax");
 318                                 _nc_curr_token.tk_name = buffer;
 319                                 type = BOOLEAN;
 320                                 break;
 321                         case '@':
 322                                 if ((ch = next_char()) != separator)
 323                                         _nc_warning("Missing separator after `%s', have %s",
 324                                                 buffer, _tracechar((chtype)ch));
 325                                 _nc_curr_token.tk_name = buffer;
 326                                 type = CANCEL;
 327                                 break;
 328
 329                         case '#':
 330                                 number = 0;
 331                                 found  = FALSE;
 332                                 while (isdigit(ch = next_char())) {
 333                                         number = number * 10 + ch - '0';
 334                                         found  = TRUE;
 335                                 }
 336                                 if (found == FALSE)
 337                                         _nc_warning("no value given for `%s'", buffer);
 338                                 if (ch != separator)
 339                                         _nc_warning("Missing separator");
 340                                 _nc_curr_token.tk_name = buffer;
 341                                 _nc_curr_token.tk_valnumber = number;
 342                                 type = NUMBER;
 343                                 break;
 344
 345                         case '=':
 346                                 ch = trans_string(ptr);
 347                                 if (ch != separator)
 348                                         _nc_warning("Missing separator");
 349                                 _nc_curr_token.tk_name = buffer;
 350                                 _nc_curr_token.tk_valstring = ptr;
 351                                 type = STRING;
 352                                 break;
 353
 354                         case EOF:
 355                                 type = EOF;
 356                                 break;
 357                         default:
 358                                 /* just to get rid of the compiler warning */
 359                                 type = UNDEF;
 360                                 _nc_warning("Illegal character - %s",
 361                                         _tracechar((chtype)ch));
 362                         }
 363                 } /* end else (first_column == FALSE) */
 364         } /* end else (ch != EOF) */
 365
 366 end_of_token:
 367         if (dot_flag == TRUE)
 368             DEBUG(8, ("Commented out "));
 369
 370         if (_nc_tracing & TRACE_IEVENT)
 371         {
 372             fprintf(stderr, "Token: ");
 373             switch (type)
 374             {
 375                 case BOOLEAN:
 376                     fprintf(stderr, "Boolean; name='%s'\n",
 377                             _nc_curr_token.tk_name);
 378                     break;
 379
 380                 case NUMBER:
 381                     fprintf(stderr, "Number;  name='%s', value=%d\n",
 382                             _nc_curr_token.tk_name,
 383                             _nc_curr_token.tk_valnumber);
 384                     break;
 385
 386                 case STRING:
 387                     fprintf(stderr, "String;  name='%s', value=%s\n",
 388                             _nc_curr_token.tk_name,
 389                             _nc_visbuf(_nc_curr_token.tk_valstring));
 390                     break;
 391
 392                 case CANCEL:
 393                     fprintf(stderr, "Cancel; name='%s'\n",
 394                             _nc_curr_token.tk_name);
 395                     break;
 396
 397                 case NAMES:
 398
 399                     fprintf(stderr, "Names; value='%s'\n",
 400                             _nc_curr_token.tk_name);
 401                     break;
 402
 403                 case EOF:
 404                     fprintf(stderr, "End of file\n");
 405                     break;
 406
 407                 default:
 408                     _nc_warning("Bad token type");
 409             }
 410         }
 411
 412         if (dot_flag == TRUE)           /* if commented out, use the next one */
 413             type = _nc_get_token();
 414
 415         DEBUG(3, ("token: `%s', class %d", _nc_curr_token.tk_name, type));
 416
 417         return(type);
 418 }
 419
 420 /*
 421  *      char
 422  *      trans_string(ptr)
 423  *
 424  *      Reads characters using next_char() until encountering a separator, nl,
 425  *      or end-of-file.  The returned value is the character which caused
 426  *      reading to stop.  The following translations are done on the input:
 427  *
 428  *              ^X  goes to  ctrl-X (i.e. X & 037)
 429  *              {\E,\n,\r,\b,\t,\f}  go to
 430  *                      {ESCAPE,newline,carriage-return,backspace,tab,formfeed}
 431  *              {\^,\\}  go to  {carat,backslash}
 432  *              \ddd (for ddd = up to three octal digits)  goes to the character ddd
 433  *
 434  *              \e == \E
 435  *              \0 == \200
 436  *
 437  */
 438
 439 static char
 440 trans_string(char *ptr)
 441 {
 442 int     count = 0;
 443 int     number;
 444 int     i, c;
 445 chtype  ch, last_ch = '\0';
 446
 447         while ((ch = c = next_char()) != (chtype)separator && c != EOF) {
 448             if ((_nc_syntax == SYN_TERMCAP) && c == '\n')
 449                 break;
 450             if (ch == '^' && last_ch != '%') {
 451                 ch = c = next_char();
 452                 if (c == EOF)
 453                     _nc_err_abort("Premature EOF");
 454
 455                 if (! (is7bits(ch) && isprint(ch))) {
 456                     _nc_warning("Illegal ^ character - %s",
 457                         _tracechar((unsigned char)ch));
 458                 }
 459                 if (ch == '?')
 460                     *(ptr++) = '\177';
 461                 else
 462                     *(ptr++) = (char)(ch & 037);
 463             }
 464             else if (ch == '\\') {
 465                 ch = c = next_char();
 466                 if (c == EOF)
 467                     _nc_err_abort("Premature EOF");
 468
 469                 if (ch >= '0'  &&  ch <= '7') {
 470                     number = ch - '0';
 471                     for (i=0; i < 2; i++) {
 472                         ch = c = next_char();
 473                         if (c == EOF)
 474                             _nc_err_abort("Premature EOF");
 475
 476                         if (c < '0'  ||  c > '7') {
 477                             if (isdigit(c)) {
 478                                 _nc_warning("Non-octal digit `%c' in \\ sequence", c);
 479                                 /* allow the digit; it'll do less harm */
 480                             } else {
 481                                 push_back((char)c);
 482                                 break;
 483                             }
 484                         }
 485
 486                         number = number * 8 + c - '0';
 487                     }
 488
 489                     if (number == 0)
 490                         number = 0200;
 491                     *(ptr++) = (char) number;
 492                 } else {
 493                     switch (c) {
 494                         case 'E':
 495                         case 'e':       *(ptr++) = '\033';      break;
 496
 497                         case 'l':
 498                         case 'n':       *(ptr++) = '\n';        break;
 499
 500                         case 'r':       *(ptr++) = '\r';        break;
 501
 502                         case 'b':       *(ptr++) = '\010';      break;
 503
 504                         case 's':       *(ptr++) = ' ';         break;
 505
 506                         case 'f':       *(ptr++) = '\014';      break;
 507
 508                         case 't':       *(ptr++) = '\t';        break;
 509
 510                         case '\\':      *(ptr++) = '\\';        break;
 511
 512                         case '^':       *(ptr++) = '^';         break;
 513
 514                         case ',':       *(ptr++) = ',';         break;
 515
 516                         case ':':       *(ptr++) = ':';         break;
 517
 518                         case '\n':
 519                             continue;
 520
 521                         default:
 522                             _nc_warning("Illegal character %s in \\ sequence",
 523                                     _tracechar((unsigned char)ch));
 524                             *(ptr++) = (char)ch;
 525                     } /* endswitch (ch) */
 526                 } /* endelse (ch < '0' ||  ch > '7') */
 527             } /* end else if (ch == '\\') */
 528             else {
 529                 *(ptr++) = (char)ch;
 530             }
 531
 532             count ++;
 533
 534             last_ch = ch;
 535
 536             if (count > MAXCAPLEN)
 537                 _nc_warning("Very long string found.  Missing separator?");
 538         } /* end while */
 539
 540         *ptr = '\0';
 541
 542         return(ch);
 543 }
 544
 545 /*
 546  *      _nc_push_token()
 547  *
 548  *      Push a token of given type so that it will be reread by the next
 549  *      get_token() call.
 550  */
 551
 552 void _nc_push_token(int class)
 553 {
 554     /*
 555      * This implementation is kind of bogus, it will fail if we ever do
 556      * more than one pushback at a time between get_token() calls.  It
 557      * relies on the fact that curr_tok is static storage that nothing
 558      * but get_token() touches.
 559      */
 560     pushtype = class;
 561     _nc_get_type(pushname);
 562
 563     DEBUG(3, ("pushing token: `%s', class %d",
 564               _nc_curr_token.tk_name, pushtype));
 565 }
 566
 567 /*
 568  * Panic mode error recovery - skip everything until a "ch" is found.
 569  */
 570 void _nc_panic_mode(char ch)
 571 {
 572         int c;
 573
 574         for (;;) {
 575                 c = next_char();
 576                 if (c == ch)
 577                         return;
 578                 if (c == EOF)
 579                         return;
 580         }
 581 }
 582
 583 /*****************************************************************************
 584  *
 585  * Character-stream handling
 586  *
 587  *****************************************************************************/
 588
 589 #define LEXBUFSIZ       1024
 590
 591 static char *bufptr;            /* otherwise, the input buffer pointer */
 592 static char *bufstart;          /* start of buffer so we can compute offsets */
 593 static FILE *yyin;              /* scanner's input file descriptor */
 594
 595 /*
 596  *      _nc_reset_input()
 597  *
 598  *      Resets the input-reading routines.  Used on initialization,
 599  *      or after a seek has been done.  Exactly one argument must be
 600  *      non-null.
 601  */
 602
 603 void _nc_reset_input(FILE *fp, char *buf)
 604 {
 605         pushtype = NO_PUSHBACK;
 606         pushname[0] = '\0';
 607         yyin = fp;
 608         bufstart = bufptr = buf;
 609         _nc_curr_file_pos = 0L;
 610         if (fp != 0)
 611                 _nc_curr_line = 0;
 612         _nc_curr_col = 0;
 613 }
 614
 615 /*
 616  *      int next_char()
 617  *
 618  *      Returns the next character in the input stream.  Comments and leading
 619  *      white space are stripped.
 620  *
 621  *      The global state variable 'firstcolumn' is set TRUE if the character
 622  *      returned is from the first column of the input line.
 623  *
 624  *      The global variable _nc_curr_line is incremented for each new line.
 625  *      The global variable _nc_curr_file_pos is set to the file offset of the
 626  *      beginning of each line.
 627  */
 628
 629 static int
 630 next_char(void)
 631 {
 632     if (!yyin)
 633     {
 634         if (*bufptr == '\0')
 635             return(EOF);
 636         if (*bufptr == '\n') {
 637             _nc_curr_line++;
 638             _nc_curr_col = 0;
 639         }
 640     }
 641     else if (!bufptr || !*bufptr)
 642     {
 643         /*
 644          * In theory this could be recoded to do its I/O one
 645          * character at a time, saving the buffer space.  In
 646          * practice, this turns out to be quite hard to get
 647          * completely right.  Try it and see.  If you succeed,
 648          * don't forget to hack push_back() correspondingly.
 649          */
 650         static char line[LEXBUFSIZ];
 651
 652         do {
 653                _nc_curr_file_pos = ftell(yyin);
 654
 655                if ((bufstart = fgets(line, LEXBUFSIZ, yyin)) != NULL) {
 656                    _nc_curr_line++;
 657                    _nc_curr_col = 0;
 658                }
 659                bufptr = bufstart;
 660            } while
 661                (bufstart != NULL && line[0] == '#');
 662
 663         if (bufstart == NULL)
 664             return (EOF);
 665
 666         while (iswhite(*bufptr))
 667             bufptr++;
 668     }
 669
 670     first_column = (bufptr == bufstart);
 671
 672     _nc_curr_col++;
 673     return(*bufptr++);
 674 }
 675
 676 static void push_back(char c)
 677 /* push a character back onto the input stream */
 678 {
 679     if (bufptr == bufstart)
 680             _nc_syserr_abort("Can't backspace off beginning of line");
 681     *--bufptr = c;
 682 }
 683
 684 static long stream_pos(void)
 685 /* return our current character position in the input stream */
 686 {
 687     return (yyin ? ftell(yyin) : (bufptr ? bufptr - bufstart : 0));
 688 }
 689
 690 static bool end_of_stream(void)
 691 /* are we at end of input? */
 692 {
 693     return (yyin ? feof(yyin) : (bufptr && *bufptr == '\0'));
 694 }
 695
 696 /* comp_scan.c ends here */