commit 283ecfcc4c23b0196e824239c103fae99b67ef29
parent 049ae43efd3e365198d94cbc8e92688372e468c0
Author: m21c <ho*******@gmail.com>
Date: Sat, 15 Jan 2022 22:33:34 +0100
cleaned up code, especially in the tokenizer
Diffstat:
| M | compiler.c | | | 233 | +++++++++++++++++++++++++++++++++++++++++++++++-------------------------------- |
1 file changed, 139 insertions(+), 94 deletions(-)
diff --git a/compiler.c b/compiler.c
@@ -951,13 +951,22 @@ error(SrcLoc *loc, const char *fmt, ...)
#define nextindent(source, indent) \
((indent) + (source)->tabwidth - ((indent) % (source)->tabwidth))
+#define peekchar(source) \
+ ((source)->line[(source)->currloc.column])
+
+#define peeknextchar(source) \
+ ((source)->line[(source)->currloc.column + 1])
+
+#define nextchar(source) \
+ ((source)->line[++(source)->currloc.column])
+
static int
-tokenizealphanumeric(Source *source, register int c0)
+tokenizealphanumeric(Source *source, register int ch)
{
int keyword;
- while (isalnum(c0) || c0 == '_')
- c0 = source->line[++source->currloc.column];
+ while (isalnum(ch) || ch == '_')
+ ch = nextchar(source);
keyword = getkeyword(
source->line + source->tok.loc.column,
@@ -1009,6 +1018,7 @@ suffixfloattype(Source *source, const char *end)
if (end[1])
goto errorfloat;
+
} else if (!mystrcasecmp(end, "f32") || !mystrcasecmp(end, "r32")) {
ty = prim + TF32;
@@ -1035,10 +1045,12 @@ suffixinttype(Source *source, const char *end)
case 's': case 'S': case 'i': case 'I':
typeid = 0;
+ /* fallthrough */
case 'u': case 'U':
++end;
if (*end == 0) {
return prim + (typeid + TINFER);
+
} else if (*end == '8') {
typeid += TS8;
@@ -1046,19 +1058,25 @@ suffixinttype(Source *source, const char *end)
goto errorint;
return prim + typeid;
+
} else if (!strcmp(end, "16")) {
return prim + (typeid + TS16);
+
} else if (!strcmp(end, "32")) {
return prim + (typeid + TS32);
+
} else if (!strcmp(end, "64")) {
return prim + (typeid + TS64);
+
} else if (!mystrcasecmp(end, "sz")) {
return prim + (typeid + TSSIZE);
}
+ /* fallthrough */
default:
if (!mystrcasecmp(end, "ll")) {
return prim + (typeid + TLLONG);
+
} else if (*end == 'l' || *end == 'L') {
typeid += TLONG;
@@ -1075,30 +1093,30 @@ errorint:
}
static int
-tokenizenumber(Source *source, register int c0)
+tokenizenumber(Source *source, register int ch)
{
- int l = c0, t = source->line[source->currloc.column+1], i, j;
+ int l = ch, t = peeknextchar(source), i, j;
bool hasdec = false, hasexp = false;
char *end;
advancenum:
- while (isalnum(c0) || c0 == '_' || (c0 == '.' &&
- source->line[source->currloc.column+1] != '.' && !hasdec))
+ while (isalnum(ch) || ch == '_' ||
+ (ch == '.' && peeknextchar(source) != '.' && !hasdec))
{
- if (c0 != '_')
- l = c0;
- if (c0 == '.')
+ if (ch != '_')
+ l = ch;
+ if (ch == '.')
hasdec = true;
- c0 = source->line[++source->currloc.column];
+ ch = nextchar(source);
}
- if (hasdec && !hasexp && (c0 == '+' || c0 == '-')) {
+ if (hasdec && !hasexp && (ch == '+' || ch == '-')) {
t = tolower(t);
l = tolower(l);
if ((l == 'e' && t != 'x') || (l == 'p' && t == 'x')) {
- c0 = source->line[++source->currloc.column];
+ ch = nextchar(source);
hasexp = true;
goto advancenum;
@@ -1134,6 +1152,7 @@ advancenum:
{
source->tok.u.d = strtod(source->stringbuf, &end);
source->tok.type = suffixfloattype(source, end);
+
} else {
if (mystrncasecmp(source->stringbuf, "0b", 2) == 0) {
source->tok.u.u = strtoull(
@@ -1156,41 +1175,41 @@ advancenum:
}
static int
-tokenizestring(Source *source, register int c0)
+tokenizestring(Source *source, register int ch)
{
- int delim = c0, j;
+ int delim = ch, j;
- c0 = source->line[++source->currloc.column];
+ ch = nextchar(source);
source->tok.loc.column = source->currloc.column;
j = source->currloc.column;
- while (c0 != delim && c0 != 0) {
- if (c0 == '\\') {
- c0 = source->line[++source->currloc.column];
+ while (ch != delim && ch != 0) {
+ if (ch == '\\') {
+ ch = nextchar(source);
- switch (c0) {
+ switch (ch) {
case '\\':
- c0 = '\\';
+ ch = '\\';
break;
case 'n':
- c0 = '\n';
+ ch = '\n';
break;
case 'r':
- c0 = '\r';
+ ch = '\r';
break;
case 't':
- c0 = '\t';
+ ch = '\t';
break;
case '\'':
- c0 = '\'';
+ ch = '\'';
break;
case '"':
- c0 = '"';
+ ch = '"';
break;
/* TODO(m21c): read more escape sequences */
@@ -1199,18 +1218,18 @@ tokenizestring(Source *source, register int c0)
default:
error(&source->currloc,
- "invalid escape sequence '\\%c'", c0);
+ "invalid escape sequence '\\%c'", ch);
}
}
- source->line[j++] = c0;
- c0 = source->line[++source->currloc.column];
+ source->line[j++] = ch;
+ ch = nextchar(source);
}
++source->currloc.column;
source->line[j++] = 0;
- if (c0 == 0) {
+ if (ch == 0) {
stringeol:
error(&source->currloc, "unexpected end-of-line");
@@ -1231,7 +1250,7 @@ tokenizestring(Source *source, register int c0)
static int
gettok(Source *source)
{
- register int c0 = (uchar) source->line[source->currloc.column];
+ register int ch = (uchar) peekchar(source);
static bool hasnewline = false;
source->lastkind = source->tok.kind;
@@ -1243,17 +1262,18 @@ skipwhite:
return source->tok.kind = 0;
}
- c0 = source->line[(source->currloc.column = 0)];
+ source->currloc.column = 0;
+ ch = peekchar(source);
}
if (source->currloc.column) {
- while (isspace(c0))
- c0 = source->line[++source->currloc.column];
+ while (isspace(ch))
+ ch = nextchar(source);
} else {
source->lastindent = 0;
- while (isspace(c0)) {
- if (c0 == '\t') {
+ while (isspace(ch)) {
+ if (ch == '\t') {
source->lastindent = nextindent(
source,
source->lastindent
@@ -1262,7 +1282,7 @@ skipwhite:
++source->lastindent;
}
- c0 = source->line[++source->currloc.column];
+ ch = nextchar(source);
}
}
@@ -1273,7 +1293,7 @@ skipwhite:
source->tok.loc.column = source->currloc.column;
/* get line */
- if (!c0 || c0 == '#') {
+ if (!ch || ch == '#') {
if (hasnewline) {
goto skipwhite;
} else {
@@ -1285,109 +1305,107 @@ skipwhite:
hasnewline = false;
/* identifier or keyword */
- if (isalpha(c0) || c0 == '_') {
- return tokenizealphanumeric(source, c0);
- }
+ if (isalpha(ch) || ch == '_')
+ return tokenizealphanumeric(source, ch);
/* number literal */
- if (isdigit(c0) || (c0 == '.' &&
- isdigit(source->line[source->currloc.column+1])))
- {
- return tokenizenumber(source, c0);
- }
+ if (isdigit(ch) || (ch == '.' && isdigit(peeknextchar(source))))
+ return tokenizenumber(source, ch);
/* string & character-literal */
- if (c0 == '"' || c0 == '\'') {
- return tokenizestring(source, c0);
- }
+ if (ch == '"' || ch == '\'')
+ return tokenizestring(source, ch);
/* delimiters */
- switch (c0) {
+ switch (ch) {
case ',': case ';': case '@': case ':':
case '{': case '}':
case ']': case '[':
case '(': case ')':
++source->currloc.column;
- return source->tok.kind = c0;
+ return source->tok.kind = ch;
}
/* operators */
#define select(ch, then, otherwise) ( \
- source->line[source->currloc.column] == (ch) ? \
+ peekchar(source) == (ch) ? \
++source->currloc.column, (then) : \
(otherwise) \
)
- switch (source->line[source->currloc.column++]) {
+
+ ++source->currloc.column;
+ switch (ch) {
case '.':
/* tok.kind = select('.', ORANGE, ODISP); */
- c0 = ODISP;
- goto joinop;
+ ch = ODISP;
+ break;
case '*':
- c0 = select('=', OMULA, OMUL);
- goto joinop;
+ ch = select('=', OMULA, OMUL);
+ break;
case '/':
- c0 = select('=', ODIVA, ODIV);
- goto joinop;
+ ch = select('=', ODIVA, ODIV);
+ break;
case '%':
- c0 = select('=', OMODA, OMOD);
- goto joinop;
+ ch = select('=', OMODA, OMOD);
+ break;
case '<':
- c0 = select('=', OLEQ,
+ ch = select('=', OLEQ,
select('<',
select('=', OLSHA, OLSH),
OLET));
- goto joinop;
+ break;
case '>':
- c0 = select('=', OGEQ,
+ ch = select('=', OGEQ,
select('>',
select('>',
select('=', OARSHA, OARSH),
select('=', ORSHA, ORSH)),
OGRT));
- goto joinop;
+ break;
case '&':
- c0 = select('=', OANDA, select('&', OLAND, OBAND));
- goto joinop;
+ ch = select('=', OANDA, select('&', OLAND, OBAND));
+ break;
case '+':
- c0 = select('=', OADDA, select('+', OSUFINC, OADD));
- goto joinop;
+ ch = select('=', OADDA, select('+', OSUFINC, OADD));
+ break;
case '-':
- c0 = select('=', OSUBA, select('-', OSUFDEC, OSUB));
- goto joinop;
+ ch = select('=', OSUBA, select('-', OSUFDEC, OSUB));
+ break;
case '|':
- c0 = select('=', OORA, select('|', OLOR, OBOR));
- goto joinop;
+ ch = select('=', OORA, select('|', OLOR, OBOR));
+ break;
case '^':
- c0 = select('=', OXORA, OXOR);
- goto joinop;
+ ch = select('=', OXORA, OXOR);
+ break;
case '!':
- c0 = select('=', ONEQ, OLNOT);
- goto joinop;
+ ch = select('=', ONEQ, OLNOT);
+ break;
case '~':
- c0 = select('=', OFLIP, OBNOT);
- goto joinop;
+ ch = select('=', OFLIP, OBNOT);
+ break;
case '=':
- c0 = select('=', select('=', OIDENT, OEQU), OASS);
- joinop:
- return source->tok.kind = c0;
+ ch = select('=', select('=', OIDENT, OEQU), OASS);
+ break;
default:
- error(&source->currloc, "invalid input character '%c'", c0);
+ error(&source->currloc, "invalid input character '%c'", ch);
return 'Z';
}
+
+ return source->tok.kind = ch;
#undef select
}
@@ -1423,7 +1441,8 @@ getunary(Kind kind)
case OSUB: return OMINUS;
case OSUFINC: return OINC;
case OSUFDEC: return ODEC;
- default: return 0;
+ default:
+ return 0;
}
}
@@ -1458,7 +1477,8 @@ getunarysuffix(Source *source)
switch (kind) {
case '(': return OCALL;
case '[': return OARRAY;
- default: return 0;
+ default:
+ return 0;
}
}
@@ -1521,6 +1541,7 @@ deletenode(Node *node)
} else if (node->kind == ASTMT) {
if (node->lhs)
deletenode(node->lhs);
+
} else {
if (node->rhs)
deletenode(node->rhs);
@@ -1712,6 +1733,7 @@ deferfuncenv(Source *source, int keydeclinfunc)
if (funcenv) {
if (!funcenv->pending) {
funcenv->pending = true;
+
if (!source->pendingenvhead) {
source->pendingenvtail = funcenv;
source->pendingenvhead = funcenv;
@@ -1965,6 +1987,7 @@ checkend(Source *source, bool hastail, int needindent,
{
if (getkind(source) == '\n') {
gettok(source);
+
if (getkind(source) == ';') {
error(getloc(source), expecterrmsg);
gettok(source);
@@ -2017,12 +2040,10 @@ stmtlist(Source *source, int indent, EnvKind envkind,
for (;;) {
Node *stmt;
- if (checkend(source, !!tail, needindent,
- "expected expression"))
+ if (checkend(source, !!tail, needindent, "expected expression"))
break;
stmt = exprlist(source, false, NULL);
-
stmt = tokennode(source, stmt);
stmt->kind = ASTMT;
@@ -2139,6 +2160,7 @@ redodeclaration:
gettok(source);
if (tryreadtype && (envkind == SSTRUCT || envkind == SUNION)) {
+
if (!isbasicdelimiter(getkind(source)) &&
getkind(source) != '(')
{
@@ -2202,6 +2224,7 @@ redodeclaration:
result = tokennode(source, NULL);
result->kind = 'T';
result->type = ty;
+
return result;
}
@@ -2520,6 +2543,7 @@ readatom(Source *source, int flags)
gettok(source);
lhs = declaration(source, gettype(source, type), false);
} while (0);
+
break;
case 'N':
@@ -2531,6 +2555,7 @@ readatom(Source *source, int flags)
if (flags & QCONST) {
/* TODO(m21c): const - conversion */
}
+
break;
case KVAR:
@@ -2579,6 +2604,7 @@ readatom(Source *source, int flags)
} else {
lhs->lhs = readatom(source, 0);
}
+
break;
case KBITCAST:
@@ -2627,6 +2653,7 @@ readatom(Source *source, int flags)
/* if is atom */
if (!isdelimiter(source->tok.kind))
lhs->rhs = exprlist(source, false, NULL);
+
break;
case KDO:
@@ -2659,7 +2686,6 @@ readatom(Source *source, int flags)
gettok(source);
lhs->u.payload = readexpr(source, POR);
lhs->lhs = stmtlist(source, indent, SWHILE, NULL, false);
-
goto joinelse;
case KIF:
@@ -2718,6 +2744,7 @@ readatom(Source *source, int flags)
error(getloc(source), "expected identifier");
lhs->rhs = tokennode(source, NULL);
+
} else if (getkind(source) == '(') {
gettok(source);
@@ -2728,6 +2755,7 @@ readatom(Source *source, int flags)
expect(source, ')', "expected ')'");
continue;
+
} else if (getkind(source) == '[') {
gettok(source);
@@ -3097,6 +3125,7 @@ conv(Node *node)
if (ty->kind == TINFER)
return wrap(prim + TINT, node);
+
if (ty->kind == TUINFER)
return wrap(prim + TUINT, node);
@@ -3179,12 +3208,14 @@ resolvepending(Env *env, Node *expr)
if (!decl) {
error(&expr->loc, "'%s' undeclared",
getstring(idents, expr->u.key));
+
return expr;
}
if (decl->kind != DVAR && decl->kind != DFUNCTION) {
error(&expr->loc, "'%s' is not a variable nor a function",
getstring(idents, expr->u.key));
+
return expr;
}
@@ -3462,11 +3493,13 @@ typecheck(Env *env, Node *expr)
advancestmt:
lhs = typecheck(env, lhs);
rhs->lhs = lhs;
+
if (rhs->rhs) {
assert(rhs->rhs->kind == ASTMT);
rhs = rhs->rhs, lhs = rhs->lhs;
goto advancestmt;
}
+
return expr;
case ADECL:
@@ -3721,7 +3754,6 @@ foldexpr(Env *env, Node *expr)
assert(expr->u.env);
expr->lhs = foldexpr(expr->u.env, expr->lhs);
-
return expr;
case ASTMT:
@@ -3729,11 +3761,13 @@ foldexpr(Env *env, Node *expr)
advancestmt:
lhs = foldexpr(env, lhs);
rhs->lhs = lhs;
+
if (rhs->rhs) {
assert(rhs->rhs->kind == ASTMT);
rhs = rhs->rhs, lhs = rhs->lhs;
goto advancestmt;
}
+
return expr;
case ACOMMA:
@@ -3767,7 +3801,6 @@ foldexpr(Env *env, Node *expr)
deletenode(lhs);
expr->kind = 'N';
-
return expr;
case ACONV:
@@ -3836,7 +3869,7 @@ promptenvpath(Env* currenv)
envstring = getstring(idents, key);
}
- fprintf(stdout, "%s/", envstring);
+ fprintf(stdout, "# scope: %s/", envstring);
}
}
@@ -3845,13 +3878,15 @@ tryprompt(Source *source, const char ch)
{
if (source->handlereplprompt) {
Env *currenv = source->currenv;
+
if (ch == '.' && currenv && currenv->kind != STOPLEVEL) {
- fputs("\e[34m", stdout);
+ fputs("\e[1;30m", stdout);
promptenvpath(currenv);
fprintf(stdout, "\n\e[35m%c \e[0m", ch);
} else {
fprintf(stdout, "\e[35m%c \e[0m", ch);
}
+
} else if (source->filein == stdin) {
source->handlereplprompt = true;
}
@@ -4025,6 +4060,7 @@ printtypetail(FILE *out, Type *type, int indent)
} else {
n += printtypetail(out, type->u.rtarget, indent);
}
+
break;
#define typecase(type, str) \
@@ -4341,6 +4377,7 @@ printexpr(FILE *out, Node *expr, int indent)
putc(' ', out), ++n;
}
}
+
n += printoperant(out, expr->lhs, PUNARY, false, indent);
}
} else {
@@ -4381,6 +4418,7 @@ printexpr(FILE *out, Node *expr, int indent)
n += fprintf(out, "true");
else
n += fprintf(out, "0x%016lx", expr->u.u);
+
break;
case TPTR:
@@ -4396,6 +4434,7 @@ printexpr(FILE *out, Node *expr, int indent)
break;
}
+
break;
case 'S':
@@ -4499,6 +4538,7 @@ printexpr(FILE *out, Node *expr, int indent)
if (expr->rhs)
n += printclause(out, expr->rhs, indent);
+
break;
case ASTMT:
@@ -4518,6 +4558,7 @@ printexpr(FILE *out, Node *expr, int indent)
expr = expr->rhs;
goto advancestmt;
}
+
break;
case ASCOPE:
@@ -4553,6 +4594,7 @@ printexpr(FILE *out, Node *expr, int indent)
default:
n += highlight(out, HLINFO);
n += fprintf(out, "node(%u)", expr->kind);
+
if (expr->lhs) {
n += fprintf(out, " -> ");
n += printsubexpr(out, expr->lhs, true, indent);
@@ -4764,8 +4806,11 @@ main(int argc, char **argv)
if (source->lastkind != ';' && source->lastkind != '\n') {
error(getloc(source), "expected new line");
- while (getkind(source) != ';' && getkind(source) != '\n' && getkind(source) != 0)
+ while (getkind(source) != ';' &&
+ getkind(source) != '\n' && getkind(source) != 0)
+ {
gettok(source);
+ }
if (source->filein == stdin) {
highlight(stdout, HLPROMPT);