/* lex.c: rc's lexical analyzer */

#include "rc.h"
#include "lex.h"
#include "y.tab.h"
#include "nalloc.h"
#include "input.h"
#include "utils.h"
#include "hash.h"
#include "heredoc.h"

/*
	Special characters (i.e., "non-word") in rc:
		\t \n # ; & | ^ $ = ~ ` ' { } @ ! ( ) < > \

	The lexical analyzer is fairly straightforward. The only really unclean part
	concerns backslash continuation and "double backslashes". A backslash followed by
	a newline is treated as a space, otherwise backslash is not a special characeter
	(i.e., it can be part of a word).  This introduces a host of unwanted special
	cases. In our case, \ cannot be a word character, since we wish to read in all
	word characters in a tight loop.

	Note: to save the trouble of declaring these arrays with TRUEs and FALSEs, I am assuming
	that FALSE = 0, TRUE = 1. (and so is it declared in rc.h)
*/

#define BUFSIZE 1000		/*	malloc hates power of 2 buffers? */
#define BUFMAX (8 * BUFSIZE)	/* 	How big the buffer can get before we re-allocate the
					space at BUFSIZE again. Premature optimization? Maybe.
				*/

enum wordstates { NW, RW, KW }; /* "nonword", "realword", "keyword" */

static int getrestnum(void);
static void print_ps2(void);

int lineno = 0;

char nw[] = {
	0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0,
	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};

char dnw[] = {
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1,
	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
	1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
};

static int bufsize = BUFSIZE;
static char *buf = NULL;
static boolean newline = FALSE;
static enum wordstates word = NW;
static int c;

#define checkfreecaret if (word != NW) { word = NW; ugchar(c); return '^'; }

int yylex(void) {
	static boolean dollar = FALSE;
	boolean saw_meta = FALSE;
	char *meta;
	int i;
	char *r,*s;

	/* rc variable-names may contain only alnum, '*' and '_', so use dnw if we are scanning one.*/
	if (dollar) {
		meta = dnw;
		dollar = FALSE;
	} else
		meta = nw;

	if (newline) {
		--lineno; /* slight space optimization; print_ps2() always increments lineno */
		print_ps2();
		newline = FALSE;
	}

top:	while ((c = gchar()) == ' ' || c == '\t')
		word = NW;

	if (c == EOF)
		return END;

	if (!meta[c]) {	/* it's a word or keyword. */
		checkfreecaret;
		word = RW;
		i = 0;
	read:	do {
			buf[i++] = c;
			if (c == '?' || c == '[' || c == '*')
				saw_meta = TRUE;
			if (i >= bufsize)
				buf = erealloc((void *)buf, bufsize *= 2);
		} while ((c = gchar()) != EOF && !meta[c]);
		if (c == '\\') {
			if ((c = gchar()) == '\n') {
				print_ps2();
				c = ' '; /* Pretend a space was read */
			} else {
	bs:			buf[i++] = '\\';
				if (!meta[c] || c == '\\')
					goto read;
			}
		}
		ugchar(c);
		buf[i] = '\0';
		word = KW;
		if (i == 2) {
			if (*buf == 'i' && buf[1] == 'f') return IF;
			if (*buf == 'f' && buf[1] == 'n') return FN;
			if (*buf == 'i' && buf[1] == 'n') return IN;
		}
		if (streq(buf,"for")) return FOR;
		if (streq(buf,"else")) return ELSE;
		if (streq(buf,"switch")) return SWITCH;
		if (streq(buf,"while")) return WHILE;
		word = RW;
		yylval.word.w = nalloc(strlen(buf) + 1);
		strcpy(yylval.word.w, buf);
		if (saw_meta) {
			yylval.word.m = nalloc(strlen(buf) + 1);
			for (r = buf, s = yylval.word.m; *r != '\0'; r++, s++)
				*s = (*r == '?' || *r == '[' || *r == '*');
		} else {
			yylval.word.m = NULL;
		}
		return WORD;
	}

	if (c == '`' || c == '!' || c == '@' || c == '~' || c == '$' || c == '\'') {
		checkfreecaret;
		if (c == '!' || c == '@' || c == '~')
			word = KW;
	}

	switch (c) { /* No-man's-land characters like ( and ` */
	case '`':
		return '`';
	case '!':
		return BANG;
	case '@':
		return SUBSHELL;
	case '~':
		return TWIDDLE;
	case '$':
		dollar = TRUE;
		c = gchar();
		if (c == '#')
			return COUNT;
		if (c == '^')
			return FLAT;
		ugchar(c);
		return '$';
	case '\'':
		word = RW;
		i = 0;
		do {
			buf[i++] = c;
	loop:		if (c == '\n')
				print_ps2();
			if (i >= bufsize)
				buf = erealloc((void *)buf, bufsize *= 2);
		} while ((c = gchar()) != EOF && c != '\'');
		if (c == EOF)
			scanerror("eof in quoted string");
		if ((c = gchar()) == '\'') { /* Quote quotes thus: 'Hi, how''s it going?' */
			buf[i++] = c;
			goto loop;
		}
		ugchar(c);
		buf[i] = '\0';
		yylval.word.w = ncpy(buf);
		yylval.word.m = NULL;
		return WORD;
	case '\\':
		if ((c = gchar()) == '\n') {
			print_ps2();
			goto top; /* Pretend it was just another space. */
		}
		ugchar(c);
		c = '\\';
		checkfreecaret;
		c = gchar();
		i = 0;
		goto bs;
	case '(': /* Must check for SUB, but do the right thing for if () while () etc. */
		if (word == RW) {
			word = NW;
			return SUB;
		} else {
			word = NW;
			return '(';
		}
	}

	word = NW;

	switch (c) {	/* Other random characters. */
	case '\n':
		lineno++;
		newline = TRUE;
	case ';':
	case '^':
	case ')':
	case '=':
	case '{': case '}':
		return c;
	case '#':
		while ((c = gchar()) != EOF && c != '\n')
			;
		if (c == EOF)
			return END;
		lineno++;
		return '\n';
	case '&':
		c = gchar();
		if (c == '&')
			return ANDAND;
		ugchar(c);
		return '&';
	case '|':
		c = gchar();
		if (c == '|')
			return OROR;
		yylval.pipe.left = 1;
		yylval.pipe.right = 0;
		if (c != '[') {
			ugchar(c);
			return PIPE;
		}
		c = gchar();
		yylval.pipe.left = getrestnum();
		if (c == '=') {
			c = gchar();
			if (c < '0' || c > '9')
				scanerror("expected digit after '=' in pipe");
			yylval.pipe.right = getrestnum();
		}
		if (c != ']') {
			fprint(2,"expected ']', saw '%c' in pipe\n",c);
			scanerror(NULL);
		}
		return PIPE;
	case '>':
		c = gchar();
		if (c == '>') {
			c = gchar();
			yylval.redir.type = APPEND;
		} else
			yylval.redir.type = CREATE;
		yylval.redir.fd = 1;
		goto common;
	case '<':
		c = gchar();
		if (c == '<') {
			c = gchar();
			if (c == '<') {
				c = gchar();
				yylval.redir.type = HERESTRING;
			} else {
				yylval.redir.type = HEREDOC;
			}
		} else
			yylval.redir.type = FROM;
		yylval.redir.fd = 0;
	common:	if (c != '[') {
			ugchar(c);
			return REDIR;
		}
		c = gchar();
		yylval.redir.fd = getrestnum();
		if (c != '=') {
			if (c == ']') {
				return REDIR;
			} else
				scanerror("bad character in redirection");
		}
		/* Okay, we have a dup. Recast yylval. */
		c = yylval.redir.fd;
		yylval.dup.type = yylval.redir.type;
		yylval.dup.left = c;
		c = gchar();
		if (c < '0' || c > '9') {
			if (c == ']') {
				yylval.dup.right = -1;	/* close fd is difft from fd = 0. */
				return DUP;
			} else
				scanerror("bad character in redirection");
		}
		yylval.dup.right = getrestnum();
		if (c != ']')
			scanerror("bad character in redirection");
		return DUP;
	default:
		return c; /* don't know what it is, let yacc barf on it */
	}
}

void skipnl(void) {
	int c;

	while ((c = gchar()) == ' ' || c == '\t' || c == '#' || c == '\n') {
		if (c == '\n' || c == '#') {
			while (c != '\n' && c != EOF)
				c = gchar(); /* skip comments */
			if (c == EOF) {
				ugchar(c);
				return;
			}
			print_ps2();
		}
	}
	ugchar(c);
}

void yyerror(const char *s) {
	if (!interactive) {
		fprint(2,"line %d: %s near ", lineno, s);
		if (word == NW)
			fprint(2,"'%c'\n",c);
		else
			fprint(2,"'%s'\n",buf);
	} else
		fprint(2,"%s\n",s);

}

void scanerror(const char *s) {
	flushu();
	rc_error(s);
}

static int getrestnum() {
	int num = c - '0';

	while ((c = gchar()) >= '0' && c <= '9')
		num = 10 * num + c - '0';

	return num;
}

void inityy(void) {
	newline = FALSE;
	word = NW;
	hq = NULL;
	if (bufsize > BUFMAX && buf != NULL) {
		efree(buf);
		bufsize = BUFSIZE;
		buf = ealloc(bufsize);
	} else if (buf == NULL)
		buf = ealloc(bufsize);
}

static void print_ps2() {
	lineno++;
	if (interactive)
		fprint(2,"%s",PS2);
}
