static char RCSid[] = "$Id: gram_fileio.c,v 1.18 1992/07/26 23:21:07 waite Exp $";
/* Copyright, 1989, The Regents of the University of Colorado */

/* This module contains routines to handle file input and output for
 * CAGT.
 *
 * Routines:
 *      1) output_left_node - Outputs the contents of an existing left node.
 *      2) output_right_side - Outputs the contents of an existing right node.
 *      3) output_ebnf_grammar - Outputs an existing EBNF grammar structure.
 *      4) output_rel_file - Outputs the relationship file that contains
 *                           the information required for cagt to perform
 *                           the reverse transformation from decorated
 *                           abstract grammar to parsing grammar.
 *	5) attach_stored_rel_chains - Read the relationship chains out of
 *				      the rel file and attach them to a
 *				      given EBNF grammar structure.
 *	6) output_symbol_assoc - For every symbol that has an
 *		association group, write the group into a file. The
 *		identifier in the group that is currently visible
 *		(there can be only one such symbol per group) is written
 *		first. The group is terminated by a period.
 *		in that group, followed by a period.
 *	7) process_sym_file -  Read the symbol equivalence file and rename
 *		all identifiers in a group to be the first identifier
 *		in the group. Each line is terminated by a DOTT.
 *	8) process_cull_file - Read the rule cul file and remove any
 *		rules in the grammar that are in the  cull file.
*/



#include <stdio.h>
#include <stdlib.h>
#include <sys/file.h>
#include "cagt_config.h"
#include "cagt_usr_err.h"
#include "support.h"
#include "queue.h"
#include "gram.h"
#include "cagt.h"







/* class_names contains descriptive names of the token types above */
#ifdef UNUSED
public char class_names[][5] ={"EOPT","NULT","IDNT","LITT","LPNT","RPNT",
			"LBKT","RBKT","CLNT"," IST","BART","ASTT","PLST",
			"DOTT","SEPT","SLHT","AMPT","GTRT","LSST","ATT",
			"DOLT","CMNT"};
#endif

/* token_sym contains the visible representations of the token types above */
public char token_symb[][4] = { ""	,""    ,""    ,""    ,"("   ,")"   ,
       		                "["   ,"]"   ,":"   ,"::=" ,"|"   ,
				 "*"   ,"+"   ,"."   ,"||"  ,"/"   ,
				 "&"   ,">"   ,"<"   ,"@"   ,"$"   ,  "%" };

public short token_symb_len[] = {0,0,0,0,1,1,1,1,1,3,1,1,1,1,2,1,1,1,1,1,1,1};








/*
 * The following are local to the gram_fileio module, and are used
 * to handle the formatting of output lines.
*/
#define R_THRESHOLD 75
private int cur_margin = 0;








public void output_left_node(d,node)
   FILE *d;
   LEFT_NODE_PTR node;
/*
 * On Entry:
 *      d points at a file descriptor on an open file
 *      node points at the left side of a rule in the EBNF internal structure
 * On Exit:
 *      If the left side is an IDNT:
 *         The identifier is placed on the file pointed at by d
 *      Otherwise:
 *         An error is signalled and execution of CAGT is terminated.
*/
   {
   if (node->code != IDNT)
	cagt_msg(0, MSG_EXIT, (cagt_msg_text, msg_arr[-(CAGT_INVLDLFTSD)],
           "output_left_side"))
   (void) prtsym(d,node->text->symbol_ptr);
   }







public void output_right_side(d,node,space)
   FILE *d;
   RIGHT_NODE_PTR node;
   char space;
/*
 * On Entry:
 *      d contains a pointer to a file descriptor on an open file.
 *      node contains a pointer to a node on the right side of a
 *         rule in the EBNF internal structure.
 *      space is true if last output node needs a space following it
 *         (Used for formatting)
 * On Exit:
 *      The right side of the rule, starting from the node pointed
 *         at by node has been output to file d.
*/
   {
   if (node == (RIGHT_NODE_PTR)0) return;

   switch (node->code) {
        case NULT :                                         /* Deleted node */
                {
                output_right_side(d,node->next,space);
                break;
                }
        case DOTT :
                {
                (void) fprintf(d,"%s",token_symb[node->code]);
                break;
                };
        case  IST :
                {
                (void) fprintf(d," ");
                (void) fprintf(d,"%s",token_symb[node->code]);
                (void) fprintf(d,"\n       ");
                cur_margin = 7;
                output_right_side(d,node->next,FALSE);
                break;
                }
        case GTRT : 
        case LSST :
                {
                if (space) { (void) fprintf(d," "); cur_margin++; }
                (void) fprintf(d,"%s",token_symb[node->code]);
                cur_margin += token_symb_len[node->code];
                output_right_side(d,node->next,TRUE);
                break;
                }
        case AMPT : 
	case ATT  :
	case DOLT :
                {
                if ((cur_margin + 2 + node->x.nest->x.text->symbol_ptr->l) >
                                                                  R_THRESHOLD)
                        {
                        (void) fprintf(d,"\n       ");
                        cur_margin = 7;
                        }                       
                   else
                        if (space) { (void) fprintf(d," "); cur_margin++; }
                (void) fprintf(d,"%s",token_symb[node->code]);
                cur_margin += token_symb_len[node->code];
                output_right_side(d,node->x.nest,FALSE);
                output_right_side(d,node->next,TRUE);
                break;
                }
        case ASTT : 
        case PLST : 
                {
                output_right_side(d,node->x.nest,space);
                (void) fprintf(d,"%s",token_symb[node->code]);
                cur_margin += token_symb_len[node->code];
                output_right_side(d,node->next,TRUE);
                break;
                }
        case BART : 
        case SEPT : 
                {
                output_right_side(d,node->x.infix->left,space);
                (void) fprintf(d," %s ",token_symb[node->code]);
                cur_margin += token_symb_len[node->code];
                output_right_side(d,node->x.infix->right,FALSE);
                output_right_side(d,node->next,TRUE);
                break;
                }
        case LITT :
        case IDNT:
                {
                if ((cur_margin + node->x.text->symbol_ptr->l) > R_THRESHOLD)
                        {
                        (void) fprintf(d,"\n       ");
                        cur_margin = 7;
                        }                       
                   else
                        if (space) { (void) fprintf(d," "); cur_margin++; }
                cur_margin += prtsym(d,node->x.text->symbol_ptr);
                output_right_side(d,node->next,TRUE);
                break;
                }
        case LPNT :
        case LBKT :
                {
                if (space) { (void) fprintf(d," "); cur_margin++; }
                (void) fprintf(d,"%s",token_symb[node->code]);
                cur_margin += token_symb_len[node->code];
                output_right_side(d,node->x.nest,FALSE);
                output_right_side(d,node->next,TRUE);
                break;
                }
        case RPNT:
        case RBKT:
                {
                (void) fprintf(d,"%s",token_symb[node->code]);
                cur_margin += token_symb_len[node->code];
                break;
                }
        }

   }








public void output_ebnf_grammar(d,start)
   FILE  *d;
   LEFT_NODE_PTR start;
/*
 * output_ebnf_grammar places the internal EBNF grammar structure in
 * file d in human readable form.
 *
*/
   {
   LEFT_NODE_PTR left;

   /* Output the structure */
   for (left = start ; left != (LEFT_NODE_PTR)0 ; left = left->next_rule)
      if (left->code != NULT)                         /* Not a deleted rule */
         {
         output_left_node(d,left);
         output_right_side(d,left->right_side,FALSE);
         (void) fprintf(d,"\n\n");
         };

   }






public void output_rel_file(d,start)
   FILE *d;
   LEFT_NODE_PTR start;
/*
 * On Entry:
 *      d is a file pointer to a file open for writing.
 *      start points at the chain of left sides in an EBNF internal
 *         grammar structure representing the abstract grammar.
 *
 * On Exit:
 *      For each rule in the grammar, the contents of it's rel_chain
 *         has been written out, one line per rule. The first integer
 *         on each line gives the number of relation nodes following.
*/
   {
   LEFT_NODE_PTR cur = start;
   REL_CHAIN_ELT_PTR rel_chain;
   int rel_cnt;

   while (cur)                             /* Write the relationship chains */
      {
      if (cur->code != NULT)
         {
         rel_cnt = 0;
         rel_chain = cur->rel_ptr;
         while (rel_chain)                                    /* Count them */
                {
                rel_cnt++;
                rel_chain = rel_chain->next;
                }
         (void) fprintf(d,"%d",rel_cnt);                    /* Output Count */
         rel_chain = cur->rel_ptr;
         while (rel_chain)                                  /* Output Chain */
                {
                (void) fprintf(d," %d",rel_chain->rule_num);
                rel_chain = rel_chain->next;
                }
         (void) fprintf(d,"\n");
         }
      cur = cur->next_rule;
      }
   }







public void attach_stored_rel_chains(rel,start)
   FILE *rel;
   LEFT_NODE_PTR start;
/*
 * On Entry:
 *	rel is a file pointer to an open relationship file.
 *	start points at the left side chain of an abstract grammar structure
 *	   without any relationship chains attached.
 *	The rel_chains stored in file rel belong to the grammar pointed
 *	   at by start on a one-to-one basis.
 *	
 * On Exit:
 *	The chains in rel have been attached to the grammar pointed
 *	   at by start.
 *	rel is at EOF.
*/
   {
   REL_CHAIN_ELT_PTR top_rel_chain = (REL_CHAIN_ELT_PTR)0,
   		     cur_rel_chain = (REL_CHAIN_ELT_PTR)0;
   int rel_num;
   int i;


   while (start)
      {
      if ( fscanf(rel,"%d",&rel_num) == EOF)
	    cagt_msg(0, MSG_EXIT, (cagt_msg_text, msg_arr[-(CAGT_RELFILEOF)], 1))
	 else
	    {
	    top_rel_chain = (REL_CHAIN_ELT_PTR)0;
            for (i=0; i<rel_num; i++)
	       {
	       if ( i == 0 )
		     GET_MEMORY(top_rel_chain = cur_rel_chain,
				REL_CHAIN_ELT_PTR, 1, REL_CHAIN_ELT,
				"attach_stored_rel_chains", 1)
	          else
		     {
		     GET_MEMORY(cur_rel_chain->next, REL_CHAIN_ELT_PTR, 1,
				REL_CHAIN_ELT,"attach_stored_rel_chains",2)
		     cur_rel_chain = cur_rel_chain->next;
		     }
	       cur_rel_chain->next = (REL_CHAIN_ELT_PTR)0;
	       if ( fscanf(rel,"%d",&(cur_rel_chain->rule_num)) == EOF)
		  cagt_msg(0, MSG_EXIT, (cagt_msg_text, msg_arr[-(CAGT_RELFILEOF)], 2))
	       }
            start->rel_ptr = top_rel_chain;
            }
      start = start->next_rule;
      }
   }







public void output_symbol_assoc(d)
   FILE *d;
/*
 * Print any symbol association circles containing more than one
 * member to the file d.
 *
 * On entry:
 *	A current symbol table exists.
 *
 * On exit:
 *	This routine combs the current symbol table searching for
 *	visible (currently used in a grammar structure) symbols.
 *	If any such simbols exist with an association circle that
 *	contains at least one other symbol. The identifiers for
 *	all the symbols in the circle are output on one line of file d.
*/
{
    SYMBOL first;
    SYMBOL cur;
    QUEUE_PTR queue;

    queue = get_symbol_queue(TRUE, TRUE, TRUE, TRUE, TRUE);

    while (!empty(queue))
	{
	first = cur = (SYMBOL) dequeue(queue);
	(void) prtsym(d,cur);
	(void) fprintf(d, " %s\n      ", token_symb[IST]);
	cur_margin = 6;
	while (cur->assoc != first)
	    {
	    cur = cur->assoc;
            if ((cur_margin + cur->l) > R_THRESHOLD)
		    {
		    (void) fprintf(d,"\n       ");
		    cur_margin = 7;
		    }                       
		else
		    { (void) putc(' ', d); cur_margin++; }
	    cur_margin += prtsym(d,cur);
	    }
	(void) fprintf(d, "%s\n\n", token_symb[DOTT]);
	}
    delete_queue(queue);
}







/*
 * Read the symbol equivalence file and rename all identifiers on the
 * line to be the first identifier. The format is defined by
 * output_symbol_assoc(), and is intended to look like EBNF.
*/
public void process_sym_file(grammar)
	LEFT_NODE_PTR	grammar;
{
	int fd;
	SYMBOL newsymb;

	if ((fd = open(sym_fname,O_RDONLY)) < 0) return;
	if (cagt_verbose)
	  cagt_msg(0, MSG_RET, (cagt_msg_text, msg_arr[-(CAGT_VERBOSE)],
				"Processing symbol equivalence file."))

	initBuf(sym_fname,fd);
	lexinit();

	while (yylex() == IDNT) {
		newsymb = lex_curtok.subrosa.IDSYMB;
		lex_curtok.subrosa.IDSYMB->non_term = TRUE;
		(void) yylex();		/* Skip the IST */
		while (yylex() == IDNT) {
			lex_curtok.subrosa.IDSYMB->non_term = TRUE;
			rename_identifier(lex_curtok.subrosa.IDSYMB,newsymb);
		}
	(void) NULT_redundant_rules(grammar);
	}
	(void) close(fd);
}







/*
 * Read the cull file and remove any rules found in grammar that
 * are found in the cull file.
 *
 * NOTE: Syntactic errors in the cull file will cause
 * cagt to exit before writing the output. This can cause you
 * to loose alot of work if you ran the forward pass interactivly.
 * I think this is OK, because culling rules is something that only
 * happens in the later stages of cagt use, and the user will have
 * graduated to using batch mode by then. If I'm proven wrong, I'll
 * have to modify get_ebnf_grammar to return a bad status instead of
 * simply aborting internally.
*/
public void process_cull_file(grammar)
	LEFT_NODE_PTR	grammar;
{
  int cullfd;
  LEFT_NODE_PTR	cull_gram, cur_cull;
  LEFT_NODE_PTR cur_gram;
  int cull_cnt;

  if ((cullfd = open(cull_fname, O_RDONLY)) < 0) return;
  if (cagt_verbose)
    cagt_msg(0, MSG_RET, (cagt_msg_text, msg_arr[-(CAGT_VERBOSE)],
			  "Processing rule cull file."))

  cull_gram = get_ebnf_grammar(cull_fname, cullfd, FALSE);

  /* NOTE: In the next 2 statements, order is important */
  add_rel_chain(cull_gram);
  remove_pgs_nodes(cull_gram); /*Remove PGS related nodes*/

  if (close(cullfd) <= SYS_ERR)
    cagt_msg(errno, MSG_EXIT, (cagt_msg_text, msg_arr[-(CAGT_CLOSERR)],
			       "Abstract Rule Cull Grammar", cull_fname))

  /* Compare each grammar rule to each cull rule. If there is a match,
   * then remove the rule from the grammar.
   */
  cull_cnt = 0;
  for (cur_cull = cull_gram; cur_cull; cur_cull = cur_cull->next_rule)
    {
      cull_cnt++;
      if (cur_cull->code == NULT) continue;   /* Nothing to try */
      for (cur_gram = grammar; cur_gram; cur_gram = cur_gram->next_rule)
	{
	  if ((cur_gram->code != NULT) && same_rule(cur_gram, cur_cull))
	    {
	      NULT_rule(cur_gram);
	      break;		/* No point in checking further */
	    }
	}
      /* If no match occurred, issue warning. */
      if (!cur_gram && (cur_cull->code != NULT))
	cagt_msg(errno, MSG_RET,
		 (cagt_msg_text, msg_arr[-(CAGT_CULLNOTMATCHED)], cull_cnt,
		  cull_fname))
    }
  
  /* The cull grammar is no longer useful, wipe it out */
  while (cull_gram)
    {
      cur_cull = cull_gram;
      cull_gram = cull_gram->next_rule;
      delete_rule(cur_cull);
    }

}

