static char RCSid[] = "$Id: cagt_lex.c,v 1.10 1992/08/17 13:36:56 waite Exp $";
/* Copyright, 1989, The Regents of the University of Colorado */

/* Modified to work with YACC				June 1986
 *	- renamed lexical to yylex
 *	- removed the symbol table parameter from lexical (And from the
 *	  lexical analysis module altogether.
 *	- removed the pointer to a user supplied token struct parameter from
 *        lexical for the same reason as above. Made token a global variable
 *	  (lex_curtok) available only to the lex module.
 *	- Removed terminal/non-terminal determination from the
 *	  lexical analysis module and transfered it to gram_parse.
 *
 * NOTE: This module modified from code by
 *	 W.M. Waite
 *	 Department of Electrical and Computer Engineering
 *	 University of Colorado, Boulder
 *
 *    This module performs the lexical analysis task for cagt.
 * Since cagt does not use a parser to aid in input analysis,
 * the lexical analyzer assumes the task of determining whether
 * an identifier is a terminal or nonterminal. (a parser would
 * normally perform this function.)
 *
 * Routines:
 *	1) prtoken - Print the specification of a basic symbol.
 *	2) endline - Advance to the next source text module.
 *	3) expand -  Expand tabs in a string.
 *	4) lexical - Obtain the next basic symbol of the source text.
 *	5) is_idnt - Check if string represents valid identifier.
 *	6) lexinit - Initialize the lexical analysis module.
*/



#include <stdio.h>
#include "cagt_config.h"
#include "support.h"
#include "gram.h"
#include "lextbl.h"
#include "cagt_lex.h"

#define TABS                                    /* Allow tabs in user input */



/* The following variable is READONLY global outside of this module */
public TOKEN lex_curtok;       /* Token information from last call to yylex */

/* The following variables constitute the internal state of this module */
private char *line;     /* Pointer to the beginning of the current line */
private int linenum;	/* Index of the current line in the input text */



#ifdef UNUSED
public prtoken(d, t)
   FILE *d;
   TOKEN *t;
/*
 * Print the specification of a basic symbol
 *
 * On exit:
 *	t has been added to the current line of d
*/
   {
   prtpos(d,&(t->pos));
   (void) fprintf(d," %s",class_names[t->code]);
   switch (t->code) {
	case IDNT:
		{
                if (t->subrosa.IDSYMB->non_term)
                      (void) printf("(NT)");
                   else
                      (void) printf("(T)");
                (void) fprintf(d,"(IDSYM:%d)",t->subrosa.IDSYMB);
                (void) fprintf(d," ->");
                (void) prtsym(d,t->subrosa.IDSYMB);
                (void) fprintf(d,"<-"); break;
                }
	case LITT:
		{
                (void) fprintf(d,"(LITVAL:%d)",t->subrosa.LITVAL);
                (void) fprintf(d," '");
                (void) prtsym(d,t->subrosa.LITVAL);
                (void) fprintf(d,"'"); break;
		}
        }
   }
#endif







private  endline()
/*
 * Advance to the next source text line
 *
 *    On entry-
 *       TokenEnd points to the character position following the
 *          terminator of the current line.
 *
 *    On exit-
 *       The next line of the source text has been made current.
 *       TokenEnd points to the first character position of the
 *          current line.
*/
   {
   if (*TokenEnd == '\0') refillBuf(TokenEnd);
   line = TokenEnd; linenum++;
   }







#ifdef TABS

private char *expand(p, t)
   char *p;
   TOKEN *t;
/*
 * Expand tabs in a string
 *
 * On entry:
 *      p addresses the first character of the string denotation.
 *      t addresses the token being built.
 *
 * On Exit:
 *      expand addresses the character beyond the closing delimiter
 *         (or newline, if there is no delimiter).
 *      The subrosa information has been set in the token.
*/
   {
   TABLE
   register char *q;
   register int c, j;
   char expanded[BUFSIZ];
   int toktyp = LITT;
   char dummy_nt = FALSE;	                          /* Dummy variable */

   q = expanded;
   do {
top:  while (LIT(c = *p++)) *q++ = c;
      if (c == '\'' && *p == '\'') { *q++ = c; ++p; goto top; }
      if (c == '\t')
	    {
            line -= j = 7 - (p - line - 1) % 8;
            *q++ = ' ';
            while (j--) *q++ = ' ';
            }
         else
	    if (c == '\n') message(FATAL,"Illegal character",0,&t->pos);
      } while (c == '\t');

/* Usually, literals are put directly into the string table, however,
 * cagt puts them through the symbol table so as to make literal comparisons
 * in the EBNF grammar easier.
*/
/*   mkstr(expanded, q - expanded, &t->subrosa.STRVAL);            */
   getsymb(expanded,q - expanded, &toktyp,&t->subrosa.LITVAL,&dummy_nt);

   return(p);
   }

#endif







public int yylex()		        /* Formally lexical(table,t) */
/*
 * Obtain the next basic symbol of the source text
 *
 * On Entry:
 *	The string, symbol, and error modules have been initialized to
 *		to a valid state.
 *
 * On Exit:
 *	yylex returns the token code obtained
 *	lex_curtok describes the basic symbol obtained
*/
   {
   TABLE
   register char *p;
   register int c;
   int toktyp;
   char non_term = FALSE;	                     /* Used to call getsym */

   do {
      p = TokenEnd; while (*p++ == ' ') ; TokenEnd = p;
      lex_curtok.pos.line = linenum;
      lex_curtok.pos.col = (p - line);
      switch (toktyp = chtbl[c = p[-1]]) {
	    case CMNT:
		  while ((c = *p++) != '\n') ;
		  TokenEnd = p; toktyp = NULT;
            case NULT:
#ifdef TABS
                  if (c == '\t') {line -= 7-(p-line-1)%8; break;}
#endif
                  if (c == '\n') {endline(); break;}
                  message(FATAL,"Illegal character",0,&lex_curtok.pos);
                  break;
	    case IDNT:
                  for (;;)
		        {
                        while (IDN(c = *p++));
                        if (c != '_' || !IDN(*p))
			      {
                              getsymb(TokenEnd-1, p-TokenEnd,&toktyp,
                                      &lex_curtok.subrosa.IDSYMB,&non_term);
                              break;
                              }
                        }
                  TokenEnd = p - 1;
                  break;
            case LITT:
                     {
                     char delimiter;

                     NOTLIT(delimiter = c);
#ifdef TABS
                     p = expand(p, &lex_curtok);
#else
                top: while (LIT(c = *p++)) ;
                     if (c == '\'' && *p == '\'') { ++p; goto top; }
                     if (c == '\n') message(FATAL,"Illegal character",
					    0,&lex_curtok.pos);
                     /* See discussion of LITT in routine expand above... */
                     /* mkstr(TokenEnd, p-1-TokenEnd, &lex_curtok.subrosa.STRVAL);*/
                     getsymb(TokenEnd,p-1-TokenEnd, &toktyp,
                                        &lex_curtok.subrosa.LITVAL,&non_term);
#endif
                     INLIT(delimiter);
                     }
                  TokenEnd = p;
                  break;
            case CLNT:
                  if ( (*p++ == ':') && (*p == '=') ) TokenEnd += 2;
                  toktyp = IST;
                  break;
            case DOTT:
                  break;
            case BART:
                  if (*p == '|') { toktyp = SEPT; TokenEnd++; }
		  break;
            case SLHT:				       /* PGS compatibility */
                  if (*p == '/') { toktyp = SEPT; TokenEnd++; break; }
                  if (*p != '*') { toktyp = BART; break; }
                  p++; toktyp = NULT;
                  for (;;)
		        {
                        while (CMT(c = *p++)) ;
                        if (c == '\n')
			      {
			      TokenEnd = p;
			      endline();
			      p =TokenEnd;
			      }
#ifdef TABS
                           else
			      if (c == '\t')
				    line -= 7-(p-line-1)%8;
#endif
                                 else
				    if (c == '\0')
					  {
                                          p--;
                                          message(FATAL,
                                                  "Illegal character",0,
						  &lex_curtok.pos);
                                          break;
					  }
                                       else
					  if (*p == '/') {p++; break;}
                        }
                  TokenEnd = p;
                  break;
            }
      } while (toktyp == NULT);

   lex_curtok.code = toktyp;
   return (int) toktyp;

   }







public int is_idnt(s)
   char *s;
/*
 * On Entry:
 *      s points at a null terminated character string.
 *
 * On Exit:
 *      is_idnt returns TRUE is the string pointed at by s is a
 *         valid identifier (Token class IDNT), FALSE otherwise
*/
   {
   TABLE
   int ret_val;
   char c;
   char under_score = FALSE;		     /* True if last char. is a '_' */

   ret_val = (chtbl[*s++] == IDNT);
   if (ret_val)
      {
      while (c = *s++)
         if ( !(under_score = (c == '_')) && !IDN(c) )
            {
            ret_val = FALSE;
            break;
            }
      if (under_score) ret_val = FALSE;             /* Trailing underscores */
      }

   return(ret_val);
   }







public lexinit()
/*
 * Initialize the lexical analysis module
 *
 * On entry:
 *	The source module has been initialized
*/
   {
   line = TokenEnd;
   linenum = 1;
   }
