commit 8480c7bcaea20aae2597b6a20387405ceba7d4b3
parent 0b7f291572407fee68bac011a6f4a2c509a55f21
Author: m21c <ho*******@gmail.com>
Date: Sat, 2 Oct 2021 14:16:41 +0200
extracted tokenize functions from gettok() for better code organization
Diffstat:
| M | compiler.c | | | 372 | +++++++++++++++++++++++++++++++++++++++---------------------------------------- |
1 file changed, 185 insertions(+), 187 deletions(-)
diff --git a/compiler.c b/compiler.c
@@ -943,63 +943,8 @@ error(SrcLoc *loc, const char *fmt, ...)
((indent) + (source)->tabwidth - ((indent) % (source)->tabwidth))
static int
-gettok(Source *source)
+tokenizealphanumeric(Source *source, register int c0)
{
- register int c0 = (uchar) source->line[source->currloc.column];
- static bool hasnewline = false;
-
- source->lastkind = source->tok.kind;
-
-skipwhite:
- if (hasnewline) {
- if (!mygetline(source)) {
- source->lastindent = 0;
- return source->tok.kind = 0;
- }
-
- c0 = source->line[(source->currloc.column = 0)];
- }
-
- if (source->currloc.column) {
- while (isspace(c0))
- c0 = source->line[++source->currloc.column];
-
- } else {
- source->lastindent = 0;
- while (isspace(c0)) {
- if (c0 == '\t') {
- source->lastindent = nextindent(
- source,
- source->lastindent
- );
- } else {
- ++source->lastindent;
- }
-
- c0 = source->line[++source->currloc.column];
- }
- }
-
- source->tok.type = prim + TUNDEFINED;
- source->tok.u.u = 0;
- source->tok.lhs = NULL;
- source->tok.rhs = NULL;
- source->tok.loc.column = source->currloc.column;
-
- /* get line */
- if (!c0 || c0 == '#') {
- if (hasnewline) {
- goto skipwhite;
- } else {
- hasnewline = true;
- return source->tok.kind = '\n';
- }
- }
-
- hasnewline = false;
-
- /* identifier or keyword */
- if (isalpha(c0) || c0 == '_') {
int keyword;
while (isalnum(c0) || c0 == '_')
@@ -1039,9 +984,90 @@ skipwhite:
return source->tok.kind = 'I';
}
- /* number literal */
- if (isdigit(c0) || (c0 == '.' &&
- isdigit(source->line[source->currloc.column+1])))
+static Type *
+suffixfloattype(Source *source, const char *end)
+{
+ Type *ty = prim + TDOUBLE;
+
+ if (*end == 0)
+ return ty;
+
+ /* FIXME(m21c): r-suffix might conflict with radix */
+ if ((*end == 'f' || *end == 'F') && !end[1]) {
+ ty = prim + TFLOAT;
+
+ } else if (*end == 'l' || *end == 'L') {
+ ty = prim + TDOUBLE;
+
+ if (end[1])
+ goto errorfloat;
+ } else if (!mystrcasecmp(end, "f32") || !mystrcasecmp(end, "r32")) {
+ ty = prim + TF32;
+
+ } else if (!mystrcasecmp(end, "f64") || !mystrcasecmp(end, "r64")) {
+ ty = prim + TF64;
+
+ } else {
+ errorfloat:
+ error(&source->currloc, "invalid floating-point format");
+ }
+
+ return ty;
+}
+
+static Type *
+suffixinttype(Source *source, const char *end)
+{
+ int typeid = TUINT - TINT;
+
+ switch (*end) {
+ case 0:
+ return prim + TINFER;
+
+ case 's': case 'S': case 'i': case 'I':
+ typeid = 0;
+
+ case 'u': case 'U':
+ ++end;
+ if (*end == 0) {
+ return prim + (typeid + TINFER);
+ } else if (*end == '8') {
+ typeid += TS8;
+
+ if (end[1])
+ goto errorint;
+
+ return prim + typeid;
+ } else if (!strcmp(end, "16")) {
+ return prim + (typeid + TS16);
+ } else if (!strcmp(end, "32")) {
+ return prim + (typeid + TS32);
+ } else if (!strcmp(end, "64")) {
+ return prim + (typeid + TS64);
+ } else if (!mystrcasecmp(end, "sz")) {
+ return prim + (typeid + TSSIZE);
+ }
+
+ default:
+ if (!mystrcasecmp(end, "ll")) {
+ return prim + (typeid + TLLONG);
+ } else if (*end == 'l' || *end == 'L') {
+ typeid += TLONG;
+
+ if (end[1])
+ goto errorint;
+
+ return prim + typeid;
+ }
+ }
+
+errorint:
+ error(&source->currloc, "invalid integer format");
+ return prim + TINT;
+}
+
+static int
+tokenizenumber(Source *source, register int c0)
{
int l = c0, t = source->line[source->currloc.column+1], i, j;
bool hasdec = false, hasexp = false;
@@ -1100,39 +1126,8 @@ skipwhite:
strpbrk(source->stringbuf, "eEfF")))
{
source->tok.u.d = strtod(source->stringbuf, &end);
- source->tok.type = prim + TDOUBLE;
-
- if (*end != 0) {
- /* FIXME(m21c): r-suffix might conflict with radix */
- if ((*end == 'f' || *end == 'F') && !end[1]) {
- source->tok.type = prim + TFLOAT;
-
- } else if (*end == 'l' || *end == 'L') {
- source->tok.type = prim + TDOUBLE;
-
- if (end[1])
- goto errorfloat;
- } else if (!mystrcasecmp(end, "f32") ||
- !mystrcasecmp(end, "r32"))
- {
- source->tok.type = prim + TF32;
-
- } else if (!mystrcasecmp(end, "f64") ||
- !mystrcasecmp(end, "r64"))
- {
- source->tok.type = prim + TF64;
-
+ source->tok.type = suffixfloattype(source, end);
} else {
- errorfloat:
- error(
- &source->currloc,
- "invalid floating-point format"
- );
- }
- }
- } else {
- int typeid = TUINT - TINT;
-
if (mystrncasecmp(source->stringbuf, "0b", 2) == 0) {
source->tok.u.u = strtoull(
source->stringbuf + 2,
@@ -1147,73 +1142,15 @@ skipwhite:
}
- switch (*end) {
- case 0:
- typeid = TINFER;
- break;
-
- case 's': case 'S':
- case 'i': case 'I':
- typeid = 0;
-
- case 'u': case 'U':
- ++end;
- if (*end == 0) {
- typeid += TINFER;
-
- break;
- } else if (*end == '8') {
- typeid += TS8;
-
- if (end[1])
- goto errorint;
- break;
- } else if (!strcmp(end, "16")) {
- typeid += TS16;
-
- break;
- } else if (!strcmp(end, "32")) {
- typeid += TS32;
-
- break;
- } else if (!strcmp(end, "64")) {
- typeid += TS64;
-
- break;
- } else if (!mystrcasecmp(end, "sz")) {
- typeid += TSSIZE;
-
- break;
- }
-
- default:
- if (!mystrcasecmp(end, "ll")) {
- typeid += TLLONG;
-
- } else if (*end == 'l' || *end == 'L') {
- typeid += TLONG;
-
- if (end[1])
- goto errorint;
- } else {
- errorint:
- error(
- &source->currloc,
- "invalid integer format"
- );
-
- typeid = TINT;
- }
- }
-
- source->tok.type = prim + typeid;
+ source->tok.type = suffixinttype(source, end);
}
return source->tok.kind = 'N';
}
- /* string & character-literal */
- if (c0 == '"' || c0 == '\'') {
+static int
+tokenizestring(Source *source, register int c0)
+{
int delim = c0, j;
c0 = source->line[++source->currloc.column];
@@ -1254,11 +1191,8 @@ skipwhite:
goto stringeol;
default:
- error(
- &source->currloc,
- "invalid escape sequence '\\%c'",
- c0
- );
+ error(&source->currloc,
+ "invalid escape sequence '\\%c'", c0);
}
}
@@ -1271,10 +1205,7 @@ skipwhite:
if (c0 == 0) {
stringeol:
- error(
- &source->currloc,
- "unexpected end-of-line"
- );
+ error(&source->currloc, "unexpected end-of-line");
return source->tok.kind = '\n';
}
@@ -1288,20 +1219,87 @@ skipwhite:
);
return source->tok.kind = 'S';
+}
+
+static int
+gettok(Source *source)
+{
+ register int c0 = (uchar) source->line[source->currloc.column];
+ static bool hasnewline = false;
+
+ source->lastkind = source->tok.kind;
+
+skipwhite:
+ if (hasnewline) {
+ if (!mygetline(source)) {
+ source->lastindent = 0;
+ return source->tok.kind = 0;
+ }
+
+ c0 = source->line[(source->currloc.column = 0)];
+ }
+
+ if (source->currloc.column) {
+ while (isspace(c0))
+ c0 = source->line[++source->currloc.column];
+
+ } else {
+ source->lastindent = 0;
+ while (isspace(c0)) {
+ if (c0 == '\t') {
+ source->lastindent = nextindent(
+ source,
+ source->lastindent
+ );
+ } else {
+ ++source->lastindent;
+ }
+
+ c0 = source->line[++source->currloc.column];
+ }
+ }
+
+ source->tok.type = prim + TUNDEFINED;
+ source->tok.u.u = 0;
+ source->tok.lhs = NULL;
+ source->tok.rhs = NULL;
+ source->tok.loc.column = source->currloc.column;
+
+ /* get line */
+ if (!c0 || c0 == '#') {
+ if (hasnewline) {
+ goto skipwhite;
+ } else {
+ hasnewline = true;
+ return source->tok.kind = '\n';
+ }
+ }
+
+ hasnewline = false;
+
+ /* identifier or keyword */
+ if (isalpha(c0) || c0 == '_') {
+ return tokenizealphanumeric(source, c0);
+ }
+
+ /* number literal */
+ if (isdigit(c0) || (c0 == '.' &&
+ isdigit(source->line[source->currloc.column+1])))
+ {
+ return tokenizenumber(source, c0);
+ }
+
+ /* string & character-literal */
+ if (c0 == '"' || c0 == '\'') {
+ return tokenizestring(source, c0);
}
/* delimiters */
switch (c0) {
- case ',':
- case ';':
- case '@':
- case ':':
- case '{':
- case '}':
- case ']':
- case ')':
- case '[':
- case '(':
+ case ',': case ';': case '@': case ':':
+ case '{': case '}':
+ case ']': case '[':
+ case '(': case ')':
++source->currloc.column;
return source->tok.kind = c0;
}
@@ -1315,30 +1313,30 @@ skipwhite:
switch (source->line[source->currloc.column++]) {
case '.':
/* tok.kind = select('.', ORANGE, ODISP); */
- source->tok.kind = ODISP;
+ c0 = ODISP;
goto joinop;
case '*':
- source->tok.kind = select('=', OMULA, OMUL);
+ c0 = select('=', OMULA, OMUL);
goto joinop;
case '/':
- source->tok.kind = select('=', ODIVA, ODIV);
+ c0 = select('=', ODIVA, ODIV);
goto joinop;
case '%':
- source->tok.kind = select('=', OMODA, OMOD);
+ c0 = select('=', OMODA, OMOD);
goto joinop;
case '<':
- source->tok.kind = select('=', OLEQ,
+ c0 = select('=', OLEQ,
select('<',
select('=', OLSHA, OLSH),
OLET));
goto joinop;
case '>':
- source->tok.kind = select('=', OGEQ,
+ c0 = select('=', OGEQ,
select('>',
select('>',
select('=', OARSHA, OARSH),
@@ -1347,37 +1345,37 @@ skipwhite:
goto joinop;
case '&':
- source->tok.kind = select('=', OANDA, select('&', OLAND, OBAND));
+ c0 = select('=', OANDA, select('&', OLAND, OBAND));
goto joinop;
case '+':
- source->tok.kind = select('=', OADDA, select('+', OSUFINC, OADD));
+ c0 = select('=', OADDA, select('+', OSUFINC, OADD));
goto joinop;
case '-':
- source->tok.kind = select('=', OSUBA, select('-', OSUFDEC, OSUB));
+ c0 = select('=', OSUBA, select('-', OSUFDEC, OSUB));
goto joinop;
case '|':
- source->tok.kind = select('=', OORA, select('|', OLOR, OBOR));
+ c0 = select('=', OORA, select('|', OLOR, OBOR));
goto joinop;
case '^':
- source->tok.kind = select('=', OXORA, OXOR);
+ c0 = select('=', OXORA, OXOR);
goto joinop;
case '!':
- source->tok.kind = select('=', ONEQ, OLNOT);
+ c0 = select('=', ONEQ, OLNOT);
goto joinop;
case '~':
- source->tok.kind = select('=', OFLIP, OBNOT);
+ c0 = select('=', OFLIP, OBNOT);
goto joinop;
case '=':
- source->tok.kind = select('=', select('=', OIDENT, OEQU), OASS);
+ c0 = select('=', select('=', OIDENT, OEQU), OASS);
joinop:
- return source->tok.kind;
+ return source->tok.kind = c0;
default:
error(&source->currloc, "invalid input character '%c'", c0);