Aria

A low-level systems programming language
git clone git://git.m21c.me/Aria.git
Log | Files | Refs | LICENSE

commit 5ebf14dd5565d7acaa934e6e9d7dc6dbec90fc16
parent 39c53cc7fa3a88942d83ca9852340efd78d7225b
Author: m21c  <ho*******@gmail.com>
Date:   Thu, 16 Sep 2021 20:05:34 +0200

worked on record parsing + implicit forward declarations + keyword constants + refactoring

Diffstat:
M.gitignore | 10++++++++--
Mcompiler.c | 1058+++++++++++++++++++++++++++++++++++++++++++------------------------------------
2 files changed, 579 insertions(+), 489 deletions(-)

diff --git a/.gitignore b/.gitignore @@ -1,13 +1,19 @@ .oldcode/* .vscode/* +.kdev4/* bin/* spec/* test/* debug*.cmd debug*.sh +functions.sh +cscope.* *.txt *.todo -*.exe -\ No newline at end of file +*.exe +*.kdev4 +*.kate-swp +*.json +\ No newline at end of file diff --git a/compiler.c b/compiler.c @@ -7,10 +7,10 @@ #include <stdbool.h> #include <string.h> +typedef unsigned char uchar; typedef unsigned int uint; - /* - forward declarations - */ typedef @@ -70,6 +70,7 @@ typedef enum Kind { KULONG, KLONG, KULLONG, KLLONG, KFLOAT, KDOUBLE, KLDOUBLE, KUSIZE, KSSIZE, + KFALSE, KTRUE, KNULL, KUSE, KNOT, KAND, KOR, KIS, KEXTERN, KINTERN, KSTATIC, KCONST, KVAR, KBREAK, KCONTINUE, KGOTO, KRETURN, @@ -176,6 +177,7 @@ enum EnvKind { SPARAMLIST, SFUNCTION, SSCOPE, + SRECORD, /* SUNION, SSTRUCT, @@ -341,6 +343,8 @@ typedef struct Source { Env *headenv, *currenv; Env *pendingenvhead, *pendingenvtail; + Env *implicitenv; + /* parser state */ Node *lastis; @@ -426,7 +430,7 @@ const int keywordtypeids[] = { [OSTART] = 0 }; -const char *nodestrings[] = { +const char *const nodestrings[] = { /* Keywords */ [KVOID] = "void", [KBOOL] = "bool", [KU8] = "u8", [KS8] = "s8", @@ -442,6 +446,8 @@ const char *nodestrings[] = { [KFLOAT] = "float", [KDOUBLE] = "double", [KLDOUBLE] = "ldouble", [KUSIZE] = "usize", [KSSIZE] = "ssize", + [KFALSE] = "false", [KTRUE] = "true", + [KNULL] = "null", [KUSE] = "use", [KNOT] = "not", [KAND] = "and", [KOR] = "or", [KIS] = "is", @@ -640,7 +646,7 @@ mystrcasecmp(const char *str1, const char *str2) { /* - pre-lexer - */ -void +static void tryprompt(Source *source, const char ch) { if (source->handlereplprompt) { fprintf(stdout, "\e[35m%c \e[0m", ch); @@ -649,7 +655,7 @@ tryprompt(Source *source, const char ch) { } } -bool +static bool mygetline(Source *source) { int i, l, c; FILE *in = source->filein; @@ -726,7 +732,7 @@ advance: const char *keywordkeys[KEYWORD_MAP_SIZE]; int keywordvals[KEYWORD_MAP_SIZE]; -int +static int strnhash(const char *str, int n) { int hash = 5381, i; for (i = 0; i < n && str[i]; ++i) @@ -734,7 +740,7 @@ strnhash(const char *str, int n) { return hash; } -void +static void initkeywords(void) { int i, j, h; for (i = 0; i < lengthof(keywordlengths); ++i) { @@ -762,7 +768,7 @@ initkeywords(void) { */ } -int +static int getkeyword(const char *str, int n) { int i, h = strnhash(str, n) & (lengthof(keywordkeys) - 8); @@ -799,7 +805,7 @@ struct StringMap { StringMap idents; StringMap strings; -void +static void initstrmap(StringMap *map) { map->keys = calloc(32, sizeof(int)); map->keyscap = 32; @@ -811,7 +817,7 @@ initstrmap(StringMap *map) { assert(map->vals); } -void +static void disposestrmap(StringMap *map) { int i; for (i = map->valslen - 1; i >= 0; --i) { @@ -851,7 +857,7 @@ redo: int auxthen; -int +static int getstringkey(StringMap *map, const char *str, int n) { int *keys = map->keys; StringEntry *vals = map->vals; @@ -884,7 +890,7 @@ getstringkey(StringMap *map, const char *str, int n) { map->valscap = cap; } - newstr = calloc(n + 1, sizeof(char*)); + newstr = calloc(n + 1, sizeof(char*)); /* TODO(m21c): sizeof(char*) --> sizeof(char) ? */ assert(newstr); memcpy(newstr, str, n); @@ -902,17 +908,17 @@ getstringkey(StringMap *map, const char *str, int n) { /* - error reporting - */ -int +static int warn(SrcLoc *loc, const char *fmt, ...) { va_list ap; int n; const char *filename = loc ? loc->filename : "<unknown-source>"; - int line = loc ? loc->line : 1; - int column = loc ? loc->column : 0; + uint line = loc ? loc->line : 1; + uint column = loc ? loc->column : 0; va_start(ap, fmt); - n = fprintf(stderr, "%s:%i:%i: warning: ", + n = fprintf(stderr, "%s:%u:%u: warning: ", filename, line, column + 1); n += vfprintf(stderr, fmt, ap); n += fprintf(stderr, "\n"); @@ -921,17 +927,17 @@ warn(SrcLoc *loc, const char *fmt, ...) { return n; } -int +static int error(SrcLoc *loc, const char *fmt, ...) { va_list ap; int n; const char *filename = loc ? loc->filename : "<unknown-source>"; - int line = loc ? loc->line : 1; - int column = loc ? loc->column : 0; + uint line = loc ? loc->line : 1; + uint column = loc ? loc->column : 0; va_start(ap, fmt); - n = fprintf(stderr, "%s:%i:%i: error: ", + n = fprintf(stderr, "%s:%u:%u: error: ", filename, line, column + 1); n += vfprintf(stderr, fmt, ap); n += fprintf(stderr, "\n"); @@ -947,9 +953,9 @@ error(SrcLoc *loc, const char *fmt, ...) { #define nextindent(source, indent) \ ((indent) + (source)->tabwidth - ((indent) % (source)->tabwidth)) -int +static int gettok(Source *source, bool haslhs) { - register int c0 = source->line[source->currloc.column]; + register int c0 = (uchar) source->line[source->currloc.column]; static bool hasnewline = false; source->lastkind = source->tok.kind; @@ -1044,19 +1050,17 @@ skipwhite: } /* number literal */ - if (isdigit(c0) || - c0 == '.' && - isdigit(source->line[source->currloc.column+1])) + if (isdigit(c0) || (c0 == '.' && + isdigit(source->line[source->currloc.column+1]))) { int l = c0, t = source->line[source->currloc.column+1], i, j; bool hasdec = false, hasexp = false; char *end; advancenum: - while (isalnum(c0) || c0 == '_' || - c0 == '.' && + while (isalnum(c0) || c0 == '_' || (c0 == '.' && source->line[source->currloc.column+1] != '.' && - !hasdec) + !hasdec)) { if (c0 != '_') l = c0; @@ -1070,7 +1074,7 @@ skipwhite: t = tolower(t); l = tolower(l); - if (l == 'e' && t != 'x' || l == 'p' && t == 'x') { + if ((l == 'e' && t != 'x') || (l == 'p' && t == 'x')) { c0 = source->line[++source->currloc.column]; hasexp = true; @@ -1102,8 +1106,8 @@ skipwhite: source->stringbuf[j] = 0; if (strpbrk(source->stringbuf, ".pPrR") || - !strpbrk(source->stringbuf, "xX") && - strpbrk(source->stringbuf, "eEfF")) + (!strpbrk(source->stringbuf, "xX") && + strpbrk(source->stringbuf, "eEfF"))) { source->tok.u.d = strtod(source->stringbuf, &end); source->tok.type = prim + TDOUBLE; @@ -1416,7 +1420,7 @@ skipwhite: Node nodebuf[4096]; int nodetop; -Node * +static Node * makenode(Node *tok, Node *lhs) { Node *node = nodebuf + nodetop++; @@ -1434,7 +1438,7 @@ makenode(Node *tok, Node *lhs) { Type typebuf[4096]; int typetop; -Type * +static Type * maketype(SrcLoc *loc, Type *orig, Type *target) { Type *ty = typebuf + typetop++; @@ -1452,7 +1456,7 @@ maketype(SrcLoc *loc, Type *orig, Type *target) { Env envbuf[4096]; int envtop; -Decl * +static Decl * finddeclinenv(int key, Env *env) { const int cacheindex = (key >> 3) & 0x3f; const int cachebit = 1 << (key & 0x03); @@ -1470,16 +1474,15 @@ finddeclinenv(int key, Env *env) { return NULL; } -Decl * -finddeclaration(Env *startenv, int key) { +static Decl * +finddeclaration(Source *source, Env *startenv, int key) { const int cacheindex = (key >> 3) & 0x3f; const int cachebit = 1 << (key & 0x03); Env *env; + Decl *decl; for (env = startenv; env; env = env->below) { - Decl *decl; - /* NOTE(m21c): look-up exclusion list first. * If found: only lookup in found env */ /* FIXME(m21c): make a separate list, and not use excludehead, @@ -1501,12 +1504,26 @@ finddeclaration(Env *startenv, int key) { } } + if (!source) + return NULL; + + env = source->implicitenv; + + if ((env->keycache[cacheindex] & cachebit) == 0) + return NULL; + + for (decl = env->head; decl; decl = decl->next) { + if (decl->key == key) + return decl; + } + return NULL; } -Env * +static Env * setheadenv(Source *source, EnvKind kind) { - /* NOTE(m21c): this might only be useful for parameter=>function env translation */ + /* NOTE(m21c): this might only be useful for parameter => function env + * translation */ Env *env = envbuf + envtop++; env->kind = kind; @@ -1525,8 +1542,10 @@ setheadenv(Source *source, EnvKind kind) { return env; } -Env * +static Env * pushenv(Source *source, EnvKind kind) { + Env *env; + if (source->headenv) { source->headenv->kind = kind; @@ -1534,25 +1553,24 @@ pushenv(Source *source, EnvKind kind) { source->headenv = NULL; return source->currenv; - } else { - Env *env = envbuf + envtop++; + } - env->kind = kind; - /* TODO(m21c): make sure that source->tok.loc is the correct - * source-location. */ - /* TODO(m21c): maybe use getloc(source) instead of - * &source->tok.loc and move the declaration of - * getloc() up in the source-code. */ - env->loc = source->tok.loc; - env->below = source->currenv; + env = envbuf + envtop++; + env->kind = kind; + /* TODO(m21c): make sure that source->tok.loc is the correct + * source-location. */ + /* TODO(m21c): maybe use getloc(source) instead of + * &source->tok.loc and move the declaration of + * getloc() up in the source-code. */ + env->loc = source->tok.loc; + env->below = source->currenv; - source->currenv = env; + source->currenv = env; - return env; - } + return env; } -Env * +static Env * popenv(Source *source) { Env *currenv = source->currenv; Env *env = currenv; @@ -1563,7 +1581,7 @@ popenv(Source *source) { return env; } -void +static void deferfuncenv(Source *source, int keydeclinfunc) { Env *env, *funcenv = NULL; @@ -1606,30 +1624,63 @@ deferfuncenv(Source *source, int keydeclinfunc) Decl declbuf[4096]; int decltop; -Decl * -makedecl(Source *source, int key, DeclKind kind) { +static void +appenddecltoenv(Decl *decl, Env *targetenv) { + const int key = decl->key; const int cacheindex = (key >> 3) & 0x3f; const int cachebit = 1 << (key & 0x03); - Env *currenv = source->currenv; - Decl *probe, *decl = declbuf + decltop++; + targetenv->keycache[cacheindex] |= cachebit; - decl->kind = kind; - /* TODO(m21c): make sure that source->tok.loc is the correct - * source-location. */ - /* TODO(m21c): maybe use getloc(source) instead of - * &source->tok.loc and move the declaration of - * getloc() up in the source-code. */ - decl->loc = source->tok.loc; - decl->key = key; - decl->type = prim + TVOID; - decl->contentenv = NULL; + decl->parentenv = targetenv; + + if (targetenv->tail) { + targetenv->tail->next = decl; + decl->prev = targetenv->tail; + } else { + assert(targetenv->head == NULL); + targetenv->head = decl; + } + + targetenv->tail = decl; + +} + +static void +removedeclfromenv(Decl *decl) { + Env *sourceenv = decl->parentenv; + + if (decl->prev) + decl->prev->next = decl->next; + else + sourceenv->head = decl->next; + + if (decl->next) + decl->next->prev = decl->prev; + else + sourceenv->tail = decl->prev; + + decl->parentenv = NULL; + decl->next = decl->prev = NULL; +} + +static Decl * +makedecl(Source *source, int key, DeclKind kind) { + Env *currenv = source->currenv; + Decl *decl; assert(currenv); - probe = finddeclinenv(key, currenv); + decl = finddeclinenv(key, currenv); + + if (decl) { + if (decl->parentenv == source->implicitenv) { + removedeclfromenv(decl); + appenddecltoenv(decl, currenv); + + return decl; + } - if (probe) { /* TODO(m21c): make sure that source->tok.loc is the correct * source-location. */ /* TODO(m21c): maybe use getloc(source) instead of @@ -1642,19 +1693,32 @@ makedecl(Source *source, int key, DeclKind kind) { ); } - currenv->keycache[cacheindex] |= cachebit; + decl = declbuf + decltop++; + + decl->kind = kind; + /* TODO(m21c): make sure that source->tok.loc is the correct + * source-location. */ + /* TODO(m21c): maybe use getloc(source) instead of + * &source->tok.loc and move the declaration of + * getloc() up in the source-code. */ + decl->loc = source->tok.loc; + decl->key = key; + decl->type = prim + TVOID; + decl->contentenv = NULL; + + appenddecltoenv(decl, currenv); - decl->parentenv = currenv; + return decl; +} - if (currenv->tail) { - currenv->tail->next = decl; - decl->prev = currenv->tail; - } else { - assert(currenv->head == NULL); - currenv->head = decl; - } +static Decl * +defertypedeclaration(Source *source, int key) { + Env *savedcurrenv = source->currenv; + Decl *decl; - currenv->tail = decl; + source->currenv = source->implicitenv; + decl = makedecl(source, key, DTYPE); + source->currenv = savedcurrenv; return decl; } @@ -1669,7 +1733,7 @@ makedecl(Source *source, int key, DeclKind kind) { #define getloc(source) \ (&(source)->tok.loc) -bool +static bool expect(Source *source, int kind, bool nexthaslhs, const char *fmt, ...) { va_list ap; @@ -1692,7 +1756,7 @@ expect(Source *source, int kind, bool nexthaslhs, const char *fmt, ...) { return true; } -int +static int qualifiers(Source *source, int allowmask) { int flags = 0, mask = allowmask; @@ -1713,7 +1777,7 @@ qualifiers(Source *source, int allowmask) { break; case KCONST: - f = QCONST; + f = QCONST, m = 0; break; case KVAR: @@ -1747,27 +1811,127 @@ finish: return flags; } -Node * -expr(Source *source, int minprec); +static bool +isnotatom(Source *source) { + switch ((int) getkind(source)) { + case 0: + case '\n': case ',': case ';': + case ':': + case ')': case ']': case '}': + case KELSE: + case KUNTIL: + return true; + } + + if (getnumops(getkind(source)) && getprec(getkind(source)) != PUNARY) + return true; -Type * -getbasetype(Source *source, int flags) { - Type *result; + return false; +} - if (getkind(source) == 'I') { - /* TODO(m21c): check/read type identifier */ - return NULL; - } else if (getkind(source) != 'T') { - return NULL; +static bool +checkend(Source *source, bool hastail, int needindent, + const char *expecterrmsg) +{ + if (getkind(source) == '\n') { + gettok(source, false); + if (getkind(source) == ';') + error(getloc(source), expecterrmsg); } - result = source->tok.type; + if (source->lastkind == '\n' && source->lastindent < needindent) + return true; - gettok(source, false); - return result; + if (getkind(source) == ';') { + gettok(source, false); + + /* NOTE(m21c): used for REPL. it allows having + * semicolons on line-endings and nultiple + * adjacent semecolons in REPL-mode. */ + if (getkind(source) == ';' || getkind(source) == '\n') { + /* TODO(m21c): output an error-message if not in REPL-mode */ + } + } + + if (isnotatom(source)) + return true; + + if (hastail && source->lastkind != '\n' && source->lastkind != ';') + error(getloc(source), "expected line delimiter"); + + return false; +} + +static Node * +exprlist(Source *source, bool isparam, Type *paramtype); + +static Node * +stmtlist(Source *source, int indent, EnvKind envkind) { + Node *head = NULL, *tail = NULL; + int needindent = nextindent(source, indent); + + Env *env = NULL; + /* printf("needident: %d, currindent: %d, lastindent: %d\n", needindent, currindent, lastindent); */ + + for (;;) { + Node *stmt; + + if (checkend(source, !!tail, needindent, + "expected expression")) + break; + + if (!env && + (envkind != SFUNCTION || !source->currenv || + source->currenv->kind != SPARAMLIST)) + { + /* NOTE(m21c): if there already is a + * paramlist-environment and we want a + * function-environment, we just use + * paramlist as our function-environment. + * Else, we push a new environment */ + env = pushenv(source, envkind); + } + + stmt = exprlist(source, false, NULL); + + stmt = makenode(&source->tok, stmt); + stmt->kind = ASTMT; + + if (!tail) { + head = tail = stmt; + } else { + tail->rhs = stmt; + tail = stmt; + } + } + + /* NOTE(m21c): function: paramlist --> function, see NOTE above */ + if (envkind == SFUNCTION && + source->currenv && + source->currenv->kind == SPARAMLIST) + { + assert(env == NULL); + source->currenv->kind = SFUNCTION; + env = source->currenv; + + popenv(source); + } else if (env) { + head = makenode(&source->tok, head); + head->kind = ASCOPE; + head->u.env = env; + env->stmts = head; + + popenv(source); + } + + + return head; } -Type * +static Node * +readexpr(Source *source, int minprec); + +static Type * gettype(Source *source, Type *basetype) { int flags; @@ -1777,19 +1941,19 @@ gettype(Source *source, Type *basetype) { advance: flags = qualifiers(source, QTYPE); - if (getkind(source) == '[') { + if (getkind(source) == '[' || getkind(source) == OARRAY) { Type *tmp = maketype(getloc(source), prim + TARRAY, basetype); basetype = tmp; gettok(source, false); if (source->tok.kind != ']') - basetype->u.val = expr(source, PASSIGN); + basetype->u.val = readexpr(source, PASSIGN); expect(source, ']', false, "expect ']'"); goto advance; } - if (getkind(source) == OLPTR) { + if (getkind(source) == OLPTR || getkind(source) == OMUL) { Type *tmp = maketype(getloc(source), prim + TPTR, basetype); basetype = tmp; @@ -1800,16 +1964,11 @@ advance: return basetype; } -Node * -stmtlist(Source *source, int indent, EnvKind kind); - -void -paramlist(Source *source, Type *selftype); - -Decl * +static Node * declaration(Source *source, Type *ty) { bool selfparam = false; Decl *decl = NULL; + Node *result = NULL; /* EnvKind context; @@ -1826,6 +1985,8 @@ declaration(Source *source, Type *ty) { context = source->currenv->kind; */ + skipnewline(source); + /* variable name */ if (getkind(source) == 'I') { decl = makedecl(source, source->tok.u.key, DVAR); @@ -1834,7 +1995,7 @@ declaration(Source *source, Type *ty) { /* module for variable */ } else if (getkind(source) == 'T') { - Type *module = gettype(source, getbasetype(source, 0)); + Type *module = gettype(source, source->tok.type); if (getkind(source) == ODISP || getkind(source) == ':') { selfparam = getkind(source) == ':'; @@ -1855,7 +2016,10 @@ declaration(Source *source, Type *ty) { } } else { - return NULL; + result = makenode(&source->tok, NULL); + result->kind = 'T'; + result->type = ty; + return result; } /* function declaration */ @@ -1867,9 +2031,13 @@ declaration(Source *source, Type *ty) { gettok(source, false); if (getkind(source) != ')') { Decl *param; + Node *paramlist; + int savedtop; functionenv = pushenv(source, SPARAMLIST); - paramlist(source, NULL); + + paramlist = exprlist(source, true, NULL); + /* deletenode(paramlist); */ for (param = functionenv->head; param; param = param->next) @@ -1889,7 +2057,7 @@ declaration(Source *source, Type *ty) { gettok(source, false); functionenv->kind = SFUNCTION; - body = expr(source, PASSIGN); + body = readexpr(source, PASSIGN); popenv(source); @@ -1915,14 +2083,14 @@ declaration(Source *source, Type *ty) { /* TODO(m21c): store the params-node (its initializations) * somewhere */ - return decl; + goto finish; } /* variable init */ if (getkind(source) == OASS) { gettok(source, false); assert(decl); - decl->content = expr(source, PASSIGN); + decl->content = readexpr(source, PASSIGN); /* no init */ } else { @@ -1930,268 +2098,89 @@ declaration(Source *source, Type *ty) { decl->content = NULL; } - return decl; -} - -Type * -gettypetuple(Source *source) { - if (getkind(source) == LPARDELIM) { - Type *ty; - - gettok(source, false); +finish: + result = makenode(&source->tok, decl->content); + result->type = decl->type; + result->u.declref = decl; + result->loc = decl->loc; + result->kind = ADECL; - ty = gettypetuple(source); + return result; +} - if (!ty) { - error(&source->tok.loc, "expected type"); - ty = maketype(&source->tok.loc, prim + TERRTYPE, NULL); +static Node * +readident(Source *source, int flags) { + Node *lhs = NULL; + Decl *decl = NULL; + SrcLoc loc = source->tok.loc; + int key = source->tok.u.key; + + decl = finddeclaration(source, source->currenv, source->tok.u.key); + gettok(source, true); + + if (source->currenv->kind == SRECORD) { + int kind = getkind(source); + if (!decl && kind != ',' && kind != ';' && kind != '\n' && + kind != '\0' && kind != ')' && kind != ']' && kind != '}') { + decl = defertypedeclaration(source, key); + decl->loc = loc; } + } - while (getkind(source) == ',') { - ty = maketype(&source->tok.loc, prim + TTUPLE, ty); - gettok(source, false); - ty->u.rtarget = gettypetuple(source); + if (decl && decl->kind == DTYPE) { + lhs = declaration(source, gettype(source, decl->type)); + return lhs; + } - if (!ty->u.rtarget) { - error(&source->tok.loc, "expected type"); - ty->u.rtarget = maketype(&source->tok.loc, prim + TERRTYPE, NULL); - } - } + lhs = makenode(&source->tok, NULL); + lhs->loc = loc; - expect(source, ')', true, "expected ')'"); - return ty; + if (decl) { + lhs->kind = ADECLREF; + lhs->type = decl->type; + lhs->u.declref = decl; + } else { + deferfuncenv(source, key); + lhs->kind = 'I'; + lhs->type = prim + TERRTYPE; + lhs->u.key = key; } - return gettype(source, getbasetype(source, 0)); + if (flags & QCONST) { + /* TODO(m21c): const - conversion */ + } + + return lhs; } -void -paramlist(Source *source, Type *selftype) { - Type *ty = gettypetuple(source); - Decl *decl; +static Node * +readrecord(Source *source, int indent, bool isunion) { + Node *recordnode; + Decl *module; + int key = 0; + + indent = source->lastindent; + recordnode = makenode(&source->tok, NULL); + recordnode->kind = getkind(source); + gettok(source, false); - if (!selftype && !ty) { - error(&source->tok.loc, "expected type"); - return; + if (getkind(source) == 'I') { + recordnode->lhs = makenode(&source->tok, NULL); + gettok(source, false); + } else { + error(getloc(source), "expected identifier"); } + /* recordnode->rhs = recordbody(source, indent, SSCOPE); */ + recordnode->rhs = stmtlist(source, indent, SRECORD); - if (!ty) - ty = selftype; + module = makedecl(source, recordnode->lhs->u.key, DTYPE); + module->type = prim + TINT; - decl = declaration(source, ty); + return recordnode; +} - if (!decl) - error(&source->tok.loc, "expected declaration"); - - while (getkind(source) == ',') { - gettok(source, false); - - ty = gettypetuple(source); - - if (!ty && decl) - ty = decl->type; - - decl = declaration(source, ty); - - if (!decl) - error(&source->tok.loc, "expected declaration"); - } -} - -Node * -declarationnode(Source *source, Type *ty) { - Node *result = NULL; - Decl *decl = declaration(source, ty); - - if (decl) { - result = makenode(&source->tok, decl->content); - result->type = decl->type; - result->u.declref = decl; - result->loc = decl->loc; - result->kind = ADECL; - } else if (ty) { - result = makenode(&source->tok, NULL); - result->kind = 'T'; - } - - return result; -} - -bool -isnotatom(Source *source) { - switch ((int) getkind(source)) { - case 0: - case '\n': case ',': case ';': - case ':': - case ')': case ']': case '}': - case KELSE: - case KUNTIL: - return true; - } - - if (getnumops(getkind(source)) && getprec(getkind(source)) != PUNARY) - return true; - - return false; -} - -bool -checkend(Source *source, bool hastail, int needindent, - const char *expecterrmsg) -{ - if (getkind(source) == '\n') { - gettok(source, false); - if (getkind(source) == ';') - error(getloc(source), expecterrmsg); - } - - if (source->lastkind == '\n' && source->lastindent < needindent) - return true; - - if (getkind(source) == ';') { - gettok(source, false); - - /* NOTE(m21c): used for REPL. it allows having - * semicolons on line-endings and nultiple - * adjacent semecolons in REPL-mode. */ - if (getkind(source) == ';' || getkind(source) == '\n') { - /* TODO(m21c): output an error-message if not in REPL-mode */ - } - } - - if (isnotatom(source)) - return true; - - if (hastail && source->lastkind != '\n' && source->lastkind != ';') - error(getloc(source), "expected line delimiter"); - - return false; -} - -Node * -recordbody(Source *source, int indent, EnvKind envkind) { - Node *head = NULL, *tail = NULL; - int needindent = nextindent(source, indent); - - Env *env = NULL; - Type *type; - - for (;;) { - Node *declstmt; - - if (checkend(source, !!tail, needindent, - "expected declaration")) - break; - - if (!env) - env = pushenv(source, envkind); - - /* parse default values */ - if (getkind(source) == ODISP) { - gettok(source, false); - - /* parse field-declaration */ - } else { - if (getkind(source) == KUSE) { - gettok(source, false); - } - - type = getbasetype(source, 0); - type = gettype(source, type); - declstmt = declarationnode(source, type); - - declstmt = makenode(declstmt, declstmt); - declstmt->kind = ASTMT; - } - - if (!tail) { - head = tail = declstmt; - } else { - tail->rhs = declstmt; - tail = declstmt; - } - } - - if (env) { - head = makenode(&source->tok, head); - head->kind = ASCOPE; - head->u.env = env; - env->stmts = head; - - popenv(source); - } - - return head; -} - -Node * -exprlist(Source *source, bool isparam, Type *paramtype); - -Node * -stmtlist(Source *source, int indent, EnvKind envkind) { - Node *head = NULL, *tail = NULL; - int needindent = nextindent(source, indent); - - Env *env = NULL; - /* printf("needident: %d, currindent: %d, lastindent: %d\n", needindent, currindent, lastindent); */ - - for (;;) { - Node *stmt; - - if (checkend(source, !!tail, needindent, - "expected expression")) - break; - - if (!env && - (envkind != SFUNCTION || !source->currenv || - source->currenv->kind != SPARAMLIST)) - { - /* NOTE(m21c): if there already is a - * paramlist-environment and we want a - * function-environment, we just use - * paramlist as our function-environment. - * Else, we push a new environment */ - env = pushenv(source, envkind); - } - - stmt = exprlist(source, false, NULL); - - stmt = makenode(&source->tok, stmt); - stmt->kind = ASTMT; - - if (!tail) { - head = tail = stmt; - } else { - tail->rhs = stmt; - tail = stmt; - } - } - - /* NOTE(m21c): function: paramlist --> function, see NOTE above */ - if (envkind == SFUNCTION && - source->currenv && - source->currenv->kind == SPARAMLIST) - { - assert(env == NULL); - source->currenv->kind = SFUNCTION; - env = source->currenv; - - popenv(source); - } else if (env) { - head = makenode(&source->tok, head); - head->kind = ASCOPE; - head->u.env = env; - env->stmts = head; - - popenv(source); - } - - - return head; -} - -Node * -atom(Source *source, int flags) { +static Node * +readatom(Source *source, int flags) { Node *lhs = NULL, *savedis = source->lastis; int indent; @@ -2215,8 +2204,8 @@ atom(Source *source, int flags) { gettok(source, false), lhs->kind = ONEQ; else lhs->kind = OEQU; - lhs->rhs = expr(source, PRELAT); + lhs->rhs = readexpr(source, PRELAT); return lhs; } @@ -2224,49 +2213,29 @@ atom(Source *source, int flags) { if (getprec(getkind(source)) == PUNARY) { lhs = makenode(&source->tok, NULL); gettok(source, false); - lhs->lhs = atom(source, 0); - + lhs->lhs = readatom(source, 0); return lhs; } - lhs = declarationnode(source, gettype(source, getbasetype(source, 0))); - if (lhs) - return lhs; - if (flags & ~(QINFER | QCONST)) { error(getloc(source), "invalid use of qualifiers"); flags = flags & (QINFER | QCONST); } if (flags) { - lhs = atom(source, flags); + lhs = readatom(source, flags); return lhs; } /* actual atom */ switch (getkind(source)) { + case OCALL: case '(': - #if 0 - gettok(source, false); - skipnewline(source); - lhs = exprlist(source, false, NULL), source->lastis = savedis; - - if (lhs->kind == 'T') { - /* NOTE(m21c): expecting that the type is also set in lhs->type */ - lhs->kind = OCAST; - skipnewline(source); - expect(source, ')', true, "expected ')'"); - - lhs->lhs = atom(source, 0); - break; - } - - skipnewline(source); - expect(source, ')', true, "expected ')'"); - #else gettok(source, false); if (getkind(source) == '\n') { + /* FIXME(m21c): stmtlist should ignore indentation in + * this case! */ lhs = stmtlist(source, source->lastindent, SSCOPE); source->lastis = savedis; } else { @@ -2279,40 +2248,46 @@ atom(Source *source, int flags) { skipnewline(source); expect(source, ')', true, "expected ')'"); - lhs->lhs = atom(source, 0); + lhs->lhs = readatom(source, 0); break; } + if (lhs->kind == ACOMMA && + lhs->lhs->kind == 'T' && + lhs->rhs->kind == 'T') + { + Type *ty = maketype(&lhs->loc, prim + TTUPLE, NULL); + ty->target = lhs->lhs->type; + ty->u.rtarget = lhs->rhs->type; + /* deletenode(lhs); */ + + skipnewline(source); + expect(source, ')', true, "expected ')'"); + + lhs = declaration(source, gettype(source, ty)); + + assert(lhs); + return lhs; + } + skipnewline(source); } expect(source, ')', true, "expected ')'"); - #endif break; case 'I': - lhs = makenode(&source->tok, NULL); - lhs->u.declref = finddeclaration( - source->currenv, - source->tok.u.key - ); - - if (lhs->u.declref) { - lhs->kind = ADECLREF; - lhs->type = lhs->u.declref->type; - } else { - deferfuncenv(source, source->tok.u.key); - lhs->u.key = source->tok.u.key; - } - gettok(source, true); - - if (flags & QCONST) { - /* TODO(m21c): const - conversion */ - } - + lhs = readident(source, flags); break; case 'T': + do { + Type *type = source->tok.type; + gettok(source, false); + lhs = declaration(source, gettype(source, type)); + } while (0); + break; + case 'N': case 'S': case 'C': @@ -2325,27 +2300,35 @@ atom(Source *source, int flags) { break; - case KSTRUCT: - case KUNION: - indent = source->lastindent; + case KFALSE: + case KTRUE: lhs = makenode(&source->tok, NULL); - lhs->kind = getkind(source); - gettok(source, false); - if (getkind(source) == 'I') { - lhs->lhs = makenode(&source->tok, NULL); - gettok(source, false); - } else { - error(getloc(source), "expected identifier"); - } - lhs->rhs = recordbody(source, indent, SSCOPE); + lhs->kind = 'N'; + lhs->type = prim + TBOOL; + lhs->u.u = (uint64_t) (getkind(source) == KTRUE); + gettok(source, true); break; + case KNULL: + lhs = makenode(&source->tok, NULL); + lhs->kind = 'N'; + lhs->type = maketype(&source->tok.loc, prim + TPTR, prim + TVOID); + lhs->u.u = (uint64_t) (getkind(source) == KTRUE); + gettok(source, true); + + break; + + case KSTRUCT: + case KUNION: + lhs = readrecord(source, indent, source->tok.kind == KUNION); + break; + case KNOT: lhs = makenode(&source->tok, NULL); gettok(source, false); lhs->kind = OLNOT; - lhs->lhs = expr(source, PRELAT); + lhs->lhs = readexpr(source, PRELAT); break; @@ -2408,7 +2391,7 @@ atom(Source *source, int flags) { if (getkind(source) == KUNTIL && source->lastindent >= indent) { lhs->kind = ALOOPUNTIL; gettok(source, false); - lhs->u.payload = expr(source, POR); + lhs->u.payload = readexpr(source, POR); } if (lhs->kind != ALOOP) @@ -2421,7 +2404,7 @@ atom(Source *source, int flags) { lhs = makenode(&source->tok, NULL); gettok(source, false); lhs->kind = AWHILE; - lhs->u.payload = expr(source, POR); + lhs->u.payload = readexpr(source, POR); lhs->lhs = stmtlist(source, indent, SSCOPE); goto joinelse; @@ -2431,7 +2414,7 @@ atom(Source *source, int flags) { lhs = makenode(&source->tok, NULL); gettok(source, false); lhs->kind = AIF; - lhs->u.payload = expr(source, POR); + lhs->u.payload = readexpr(source, POR); skipnewline(source); if (getkind(source) == 'I' && source->tok.u.key == auxthen) @@ -2500,15 +2483,15 @@ atom(Source *source, int flags) { lhs->kind = OEQU; source->lastis = lhs; - lhs->rhs = expr(source, PRELAT); + lhs->rhs = readexpr(source, PRELAT); } return lhs; } -Node * -expr(Source *source, int minprec) { - Node *lhs = atom(source, 0), *last = NULL; +static Node * +readexpr(Source *source, int minprec) { + Node *lhs = readatom(source, 0), *last = NULL; /* only binary expr */ while (getprec(getkind(source)) >= minprec) { @@ -2516,7 +2499,7 @@ expr(Source *source, int minprec) { gettok(source, false); skipnewline(source); - lhs->rhs = expr( + lhs->rhs = readexpr( source, getprec(lhs->kind) + !israssoc(lhs->kind) ); @@ -2547,7 +2530,7 @@ expr(Source *source, int minprec) { return lhs; } -Node * +static Node * todeclaration(Node *curr, Node **ty) { if (*ty) { if (curr->kind == 'I') { @@ -2572,7 +2555,7 @@ todeclaration(Node *curr, Node **ty) { return curr; } -Node * +static Node * exprlist(Source *source, bool isparam, Type *paramtype) { Node *lhs; bool isdeclaration, typetuple; @@ -2580,9 +2563,9 @@ exprlist(Source *source, bool isparam, Type *paramtype) { /* tail = todeclaration(tail, &paramtype); */ if (paramtype && getkind(source) == 'I') { - lhs = declarationnode(source, paramtype); + lhs = declaration(source, paramtype); } else { - lhs = expr(source, PASSIGN); + lhs = readexpr(source, PASSIGN); } if (isparam && lhs->kind != ADECL) @@ -2596,16 +2579,35 @@ exprlist(Source *source, bool isparam, Type *paramtype) { while (getkind(source) == ',') { Node *rhs = NULL; + + if (lhs->kind == ACOMMA && + lhs->lhs->kind == 'T' && + lhs->rhs->kind == 'T') + { + lhs->type = maketype(&lhs->loc, prim + TTUPLE, + lhs->lhs->type); + lhs->type->u.rtarget = lhs->rhs->type; + + lhs->lhs->type = NULL; + lhs->rhs->type = NULL; + /* deletenode(lhs->lhs); */ + /* deletenode(lhs->rhs) */ + + lhs->lhs = NULL; + lhs->rhs = NULL; + lhs->kind = 'T'; + } + lhs = makenode(&source->tok, lhs); lhs->kind = ACOMMA; gettok(source, false); if (getkind(source) == 'I' && isdeclaration) { assert(paramtype); - rhs = declarationnode(source, paramtype); + rhs = declaration(source, paramtype); typetuple = false; } else { - rhs = expr(source, PASSIGN); + rhs = readexpr(source, PASSIGN); typetuple &= rhs->kind == 'T'; /* rhs = todeclaration(curr, &paramtype); */ } @@ -2629,7 +2631,7 @@ exprlist(Source *source, bool isparam, Type *paramtype) { /* - type-checking & folding - */ -bool +static bool isinttype(Type *ty) { switch (ty->kind) { case TINFER: case TUINFER: @@ -2644,7 +2646,7 @@ isinttype(Type *ty) { } } -bool +static bool isfloattype(Type *ty) { switch (ty->kind) { case TF32: case TF64: @@ -2655,7 +2657,7 @@ isfloattype(Type *ty) { } } -bool +static bool isarithtype(Type *ty) { switch (ty->kind) { case TBOOL: @@ -2672,7 +2674,7 @@ isarithtype(Type *ty) { } } -bool +static bool isunsignedtype(Type *ty) { switch (ty->kind) { case TBOOL: case TUINFER: @@ -2686,7 +2688,7 @@ isunsignedtype(Type *ty) { } /* TODO(m21c): also mask int/float values in the tokenizer */ -uint64_t +static uint64_t maskint(int size, uint64_t value) { if (size == 1) return value & 0xfful; if (size == 2) return value & 0xfffful; @@ -2695,14 +2697,14 @@ maskint(int size, uint64_t value) { return value; } -double +static double maskfloat(int size, double value) { if (size == 4) return (double) (float) value; return value; } -uint64_t +static uint64_t convint(int srcsize, bool srcsigned, uint64_t value) { if (!srcsigned) return value; if (srcsize == 1) return (uint64_t) (int8_t ) value; @@ -2712,10 +2714,10 @@ convint(int srcsize, bool srcsigned, uint64_t value) { return value; } -Node * +static Node * conv(Node *node); -Node * +static Node * autoref(Type *ty, Node *node) { int numderefs = 0, i; @@ -2783,7 +2785,7 @@ autoref(Type *ty, Node *node) return node; } -Node * +static Node * wrap(Type *ty, Node *node) { assert(ty); assert(node->type); @@ -2841,7 +2843,7 @@ wrap(Type *ty, Node *node) { return node; } -Node * +static Node * conv(Node *node) { Type *ty = node->type; @@ -2856,10 +2858,10 @@ conv(Node *node) { } -Node * +static Node * typecheck(Env *env, Node *expr); -bool +static bool arithtuplereorder(Env *env, Node *expr, int numops) { Node *tmp; @@ -2901,7 +2903,7 @@ arithtuplereorder(Env *env, Node *expr, int numops) { return false; } -Node * +static Node * typecheck(Env *env, Node *expr) { Node *lhs = expr->lhs, *rhs = expr->rhs; @@ -3202,7 +3204,7 @@ joinbinarywrap: return expr; } -Node * +static Node * foldexpr(Env *env, Node *expr) { Node *lhs = expr->lhs, *rhs = expr->rhs; Type *ty = expr->type; @@ -3239,7 +3241,7 @@ foldexpr(Env *env, Node *expr) { switch ((int) expr->kind) { case IDENT: do { - Decl *declref = finddeclaration(env, expr->u.key); + Decl *declref = finddeclaration(NULL, env, expr->u.key); if (declref) { expr->kind = ADECLREF; @@ -3395,6 +3397,8 @@ foldexpr(Env *env, Node *expr) { if (lhs->type->kind == expr->type->kind) *expr = *lhs /*, delete(lhs) */; + case 'T': + error(&expr->loc, "exptected expression, not type"); default: return expr; @@ -3435,7 +3439,7 @@ enum Highlight { Highlight lasthighlight; -int +static int highlight(FILE *out, Highlight kind) { int n = 0; @@ -3513,17 +3517,19 @@ highlight(FILE *out, Highlight kind) { case HLPROMPT: n += fprintf(out, "\e[35m"); break; + default: + break; } lasthighlight = kind; return n; } -int +static int printexpr(FILE *out, Node *expr, int indent); -int -printtype(FILE *out, Type *type, int indent) { +static int +printtypetail(FILE *out, Type *type, int indent) { int n = 0; if (!type) @@ -3534,10 +3540,10 @@ printtype(FILE *out, Type *type, int indent) { type->target->kind == TTUPLE) { n += fprintf(out, "("); - n += printtype(out, type->target, indent); + n += printtypetail(out, type->target, indent); n += fprintf(out, ")"); } else { - n += printtype(out, type->target, indent); + n += printtypetail(out, type->target, indent); } switch (type->kind) { @@ -3549,7 +3555,13 @@ printtype(FILE *out, Type *type, int indent) { case TTUPLE: n += fprintf(out, ", "); - n += printtype(out, type->u.rtarget, indent); + if (type->u.rtarget && type->u.rtarget->kind == TTUPLE) { + n += fprintf(out, "("); + n += printtypetail(out, type->u.rtarget, indent); + n += fprintf(out, ")"); + } else { + n += printtypetail(out, type->u.rtarget, indent); + } break; #define typecase(type, str) \ @@ -3573,7 +3585,21 @@ printtype(FILE *out, Type *type, int indent) { return n; } -int +static int +printtype(FILE *out, Type *type, int indent) { + if (type && type->kind == TTUPLE) { + int n = 0; + + n += fprintf(out, "("); + n += printtypetail(out, type, indent); + + return n + fprintf(out, ")"); + } + + return printtypetail(out, type, indent); +} + +static int printtypesuffix(FILE *out, Type *type, int indent) { int n = 0; @@ -3598,7 +3624,7 @@ printtypesuffix(FILE *out, Type *type, int indent) { return n; } -bool +static bool isclauseorempty(Node *expr) { Kind kind; @@ -3613,7 +3639,7 @@ isclauseorempty(Node *expr) { kind == ARETURN || kind == ABREAK || kind == ACONTINUE; } -int +static int printclause(FILE *out, Node *expr, int indent) { int n = 0; @@ -3628,7 +3654,7 @@ printclause(FILE *out, Node *expr, int indent) { return n; } -int +static int printstring(FILE *out, Node *string) { const char *str = getstring(strings, string->u.key); int len = getlength(strings, string->u.key); @@ -3673,7 +3699,7 @@ printstring(FILE *out, Node *string) { return n + fprintf(out, "\""); } -int +static int printoperant(FILE *out, Node *expr, int opprec, bool braceequalprec, int indent) { int prec, n = 0; @@ -3699,14 +3725,14 @@ printoperant(FILE *out, Node *expr, int opprec, bool braceequalprec, int indent) return n; } -int +static int printsubexpr(FILE *out, Node *expr, bool islhs, int indent) { int prec, n = 0; if (!expr) return 0; - if (islhs && expr->kind == ACOMMA || expr->kind == ADECL) + if ((islhs && expr->kind == ACOMMA) || expr->kind == ADECL) n += printexpr(out, expr, indent); else n += printoperant(out, expr, PSTART, !islhs, indent); @@ -3714,7 +3740,7 @@ printsubexpr(FILE *out, Node *expr, bool islhs, int indent) { return n; } -int +static int printdeclaration(FILE *out, Decl *decl, int indent) { int n = 0; @@ -3763,9 +3789,11 @@ printdeclaration(FILE *out, Decl *decl, int indent) { n += fprintf(out, " = "); n += printoperant(out, decl->content, PASSIGN, false, indent); } + + return n; } -int +static int printexpr(FILE *out, Node *expr, int indent) { int n = 0; @@ -3831,15 +3859,48 @@ printexpr(FILE *out, Node *expr, int indent) { case 'N': n += highlight(out, HLNUMBER); - if (expr->type->kind == TFLOAT || - expr->type->kind == TDOUBLE || - expr->type->kind == TLDOUBLE) + + switch (expr->type->kind) { + case TF32: case TF64: + /* case TLDOUBLE: */ n += fprintf(out, "%f", expr->u.d); - else if (expr->type->kind & (TUINT - TINT)) + n += printtypesuffix(out, expr->type, indent); + break; + + case TINFER: + case TS8: case TS16: case TS32: case TS64: n += fprintf(out, "%li", expr->u.s); - else - n += fprintf(out, "%lu", expr->u.u); - n += printtypesuffix(out, expr->type, indent); + n += printtypesuffix(out, expr->type, indent); + break; + + case TUINFER: + case TU8: case TU16: case TU32: case TU64: + n += fprintf(out, "%lu", expr->u.s); + n += printtypesuffix(out, expr->type, indent); + break; + + case TBOOL: + if (expr->u.u == 0) + n += fprintf(out, "false"); + else if (expr->u.u == 1) + n += fprintf(out, "true"); + else + n += fprintf(out, "0x%016lx", expr->u.u); + break; + + case TPTR: + if (expr->u.u == 0) + n += fprintf(out, "null"); + else + n += fprintf(out, "0x%016lx", expr->u.u); + break; + + case TVOID: + default: + n += fprintf(out, "---"); + break; + + } break; case 'S': @@ -3906,6 +3967,19 @@ printexpr(FILE *out, Node *expr, int indent) { n += printclause(out, expr->lhs, indent); break; + case KUNION: + case KSTRUCT: + n += highlight(out, HLKEYWORD); + n += fprintf(out, expr->kind == KSTRUCT ? "struct" : "union"); + if (expr->lhs && expr->lhs->kind == IDENT) { + n += highlight(out, HLTYPE); + n += fprintf(out, " %s", getstring(idents, expr->lhs->u.key)); + } + + if (expr->rhs) + n += printclause(out, expr->rhs, indent); + break; + case ASTMT: advancestmt: do { @@ -3986,6 +4060,24 @@ printexpr(FILE *out, Node *expr, int indent) { +/* - init source - */ + +static void +initsource(Source *source, const char *filename, FILE *file) { + source->filein = file; + source->currloc.filename = filename; + source->tok.loc.filename = filename; + source->handlereplprompt = false; + + source->implicitenv = envbuf + envtop++; + + gettok(source, false); + if (getkind(source) == '\n') + gettok(source, false); +} + + + /* - main-routine - */ int @@ -4002,32 +4094,22 @@ main(int argc, char **argv) { auxthen = getstringkey(&idents, "then", 4); if (argc >= 2) { - source->filein = fopen(argv[1], "rb"); - source->currloc.filename = argv[1]; - source->tok.loc.filename = argv[1]; + initsource(source, argv[1], fopen(argv[1], "rb")); assert(source->filein); } else { - source->filein = stdin; - source->currloc.filename = "<stdin>"; - source->tok.loc.filename = "<stdin>"; - highlight(stdout, HLPROMPT); printf("> "); highlight(stdout, HLNONE); - source->handlereplprompt = false; + initsource(source, "<stdin>", stdin); } - gettok(source, false); - if (getkind(source) == '\n') - gettok(source, false); - pushenv(source, STOPLEVEL); while (getkind(source) != 0) { /* printf("token:%i:%i: %c '%.*s'\n", lastline, lastcol + 1, tok.u.id, currcol - lastcol, line + lastcol);*/ Node *ast; ast = exprlist(source, false, NULL); - /* ast = expr(source, PSTART); */ + /* ast = readexpr(source, PSTART); */ /* printast(ast, 0); printf("\n"); @@ -4092,4 +4174,6 @@ main(int argc, char **argv) { /* fclose(source->filein); */ /* disposestrmap(&strings); */ /* disposestrmap(&idents); */ + + return 0; }