Aria

A low-level systems programming language
git clone git://git.m21c.me/Aria.git
Log | Files | Refs | LICENSE

commit 9ef7e9b804bfb233b8613f26e710a7f034cb31c5
parent 169777f038533a9820dc94dfcb664a2ab53068df
Author: m21c <ho*******@gmail.com>
Date:   Fri, 27 Jun 2025 21:21:31 +0200

worked on codegen

Diffstat:
Mcompiler.c | 884+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 884 insertions(+), 0 deletions(-)

diff --git a/compiler.c b/compiler.c @@ -5694,6 +5694,890 @@ extractnestedfunctions(Env *env, Node *expr) // }}} +// @section c code generation {{{ + +typedef struct CodeGen CodeGen; + +struct CodeGen { + FILE *out; + + Env *env; + int indent, commacount; + bool needsvalue, hasclause; + const char *valuename; +}; + +static void +codegen(CodeGen *cg, Node *expr); + +static void +cgindent(CodeGen *cg) +{ + int i; + + for (i = 0; i < cg->indent; ++i) { + fprintf(cg->out, "\t"); + } +} + +static void +cgprintf(CodeGen *cg, const char *fmt, ...) +{ + va_list ap; + + (void) cg; + va_start(ap, fmt); + vfprintf(cg->out, fmt, ap); + va_end(ap); +} + +static void +cginit(CodeGen *cg, Env *toplevel, FILE *out) +{ + cg->out = out; + cg->env = toplevel; + cg->indent = 0; + cg->commacount = 0; + cg->needsvalue = false; + cg->hasclause = false; + cg->valuename = NULL; + + assert(cg->out); + assert(cg->env); + + cgprintf(cg, "#include <assert.h>\n"); + cgprintf(cg, "#include <stdarg.h>\n"); + cgprintf(cg, "#include <stdbool.h>\n"); + cgprintf(cg, "#include <stdint.h>\n"); + cgprintf(cg, "#include <stdio.h>\n"); + cgprintf(cg, "#include <stdlib.h>\n"); + cgprintf(cg, "#include <string.h>\n"); + + cgprintf(cg, "\n"); +} + +#if 0 +static void +cgtypetail(CodeGen *cg, Type *type) +{ + if (!type) + return; + + if (type->kind == TFUNCTION) { + cgtypetail(cg, type->u.rtarget); + cgprintf(cg, " function("); + if (type->target) + cgtypetail(cg, type->target); + cgprintf(cg, ")"); + return; + } + + if (type->kind != TTUPLE && type->target && + type->target->kind == TTUPLE) + { + cgprintf(cg, "("); + cgtypetail(cg, type->target); + cgprintf(cg, ")"); + } else { + cgtypetail(cg, type->target); + } + + if (type->module) { + cgprintf(cg, "%s", getstring(idents, type->module->key)); + return; + } + + switch (type->kind) { + case TARRAY: + cgprintf(cg, "["); + + /* NOTE(m21c): the value may be always set in the future */ + if (type->u.val) + codegen(cg, type->u.val); + + cgprintf(cg, "]"); + break; + + case TTUPLE: + cgprintf(cg, ", "); + if (type->u.rtarget && type->u.rtarget->kind == TTUPLE) { + cgprintf(cg, "("); + cgtypetail(cg, type->u.rtarget); + cgprintf(cg, ")"); + } else { + cgtypetail(cg, type->u.rtarget); + } + + break; + + default:; + } + + return; +} + +static void +cgbasetype(CodeGen *cg, Type *type) +{ + while (type->target) + type = type->target; + + switch (type->kind) { + #define typecase(type, str) \ + case type: cgprintf(cg, str); break + typecase(TERRTYPE, "<error-type>"); + typecase(TUNDEFINED, "<undefined-type>"); + typecase(TPTR, "*"); + typecase(TVOID, "void" ); typecase(TBOOL, "bool" ); + typecase(TINFER, "infer"); typecase(TUINFER, "uinfer"); + typecase(TS8, "char" ); typecase(TU8, "uchar" ); + typecase(TS16, "s16" ); typecase(TU16, "u16" ); + typecase(TS32, "int" ); typecase(TU32, "uint" ); + typecase(TS64, "s64" ); typecase(TU64, "u64" ); + typecase(TF32, "float"); typecase(TF64, "double"); + #undef typecase + default: + cgprintf(cg, "<unknown-type-%d>", type->kind); + } +} +#endif + +static void +cgtype(CodeGen *cg, Type *type, Decl *decl); + +static void +cgnamedparams(CodeGen *cg, Decl *decl) +{ + Decl *param, *head = NULL; + + cgprintf(cg, "("); + + if (decl->contentenv) { + head = decl->contentenv->head; + } + + for (param = head; param; param = param->next) { + if (param->kind != DPARAM) + break; + + if (param != head) { + cgprintf(cg, ", "); + } + + ++cg->indent; + cgtype(cg, param->type, param); + --cg->indent; + } + + cgprintf(cg, ")"); +} + +static void +cgdeclmodule(CodeGen *cg, Decl *module) +{ + if (module->module) + cgdeclmodule(cg, module->module); + + cgprintf(cg, "%s_", getstring(idents, module->key)); +} + +static void +cgdeclname(CodeGen *cg, Decl *decl) +{ + if (decl->module) + cgdeclmodule(cg, decl->module); + + cgprintf(cg, "%s", getstring(idents, decl->key)); +} + +static void +cgbasetype(CodeGen *cg, Type *type) +{ + switch (type->kind) { + #define typecase(type, str) \ + case type: cgprintf(cg, str); break + typecase(TERRTYPE, "<error-type>"); + typecase(TUNDEFINED, "<undefined-type>"); + typecase(TPTR, "*"); + typecase(TVOID, "void" ); typecase(TBOOL, "bool" ); + typecase(TINFER, "infer"); typecase(TUINFER, "uinfer"); + typecase(TS8, "char" ); typecase(TU8, "uchar" ); + typecase(TS16, "s16" ); typecase(TU16, "u16" ); + typecase(TS32, "int" ); typecase(TU32, "uint" ); + typecase(TS64, "s64" ); typecase(TU64, "u64" ); + typecase(TF32, "float"); typecase(TF64, "double"); + #undef typecase + + case TUNION: + cgprintf(cg, "union "); + goto joinstruct; + + case TSTRUCT: + cgprintf(cg, "struct "); + joinstruct: + assert(type->module); + cgdeclname(cg, type->module); + break; + + default: + cgprintf(cg, "<unknown-type-%d>", type->kind); + } +} + +static void +cgtype(CodeGen *cg, Type *type, Decl *decl) +{ + Type *stack[64]; + Type *post[64]; + int top = 0, pcount = 0; + + while (type->target || type->kind == TFUNCTION) { + assert(top < lengthof(stack)); + stack[top++] = type; + if (type->kind == TFUNCTION) { + assert(type->u.rtarget); + type = type->u.rtarget; + } else { + type = type->target; + } + } + + if (cg->commacount) + goto decorate; + + cgbasetype(cg, type); + cgprintf(cg, " "); + +decorate: + while (top > 0) { + Type *curr = stack[--top]; + if (curr->kind == TPTR) { + Type *target = curr->target; + if (target->kind != TPTR && target->target) { + cgprintf(cg, "(*"); + assert(pcount < lengthof(post)); + post[pcount++] = curr; + } else { + cgprintf(cg, "*"); + } +#if 0 + if (target->isconst) + cgprintf(cg, "const "); +#endif + } else { + assert(pcount < lengthof(post)); + post[pcount++] = curr; + } + } + + if (decl) + cgdeclname(cg, decl); + + + while (pcount > 0) { + Type *curr = post[--pcount]; + switch (curr->kind) { + case TPTR: + cgprintf(cg, ")"); + continue; + case TARRAY: + cgprintf(cg, "["); + codegen(cg, curr->u.val); + cgprintf(cg, "]"); + continue; + case TFUNCTION: + if (decl && decl->kind == DFUNCTION) { + /* print named or empty parameter list */ + cgnamedparams(cg, decl); + /* Only the innermost function is referred + * by decl. So we can set decl to NULL. */ + decl = NULL; + } else if (curr->target + && curr->target->kind == TTUPLE) { + Type *tuple = curr->target; + /* print anonymous parameter list */ + cgprintf(cg, "("); + for (;;) { + assert(tuple->target); + cgtype(cg, tuple->target, NULL); + cgprintf(cg, ", "); + + assert(tuple->u.rtarget); + tuple = tuple->u.rtarget; + if (tuple->kind != TTUPLE) { + cgtype(cg, tuple, NULL); + break; + } + } + cgprintf(cg, ")"); + } else { + /* print empty parameter list */ + cgprintf(cg, "()"); + } + default: + continue; + } + } +} + +static void +cgnumber(CodeGen *cg, Node *expr) +{ + switch (expr->type->kind) { + case TF32: case TF64: + /* case TLDOUBLE: */ + cgprintf(cg, "%f", expr->u.d); + if (expr->type->kind == TF32) + cgprintf(cg, "f"); + break; + + case TINFER: + case TS8: case TS16: case TS32: case TS64: + cgprintf(cg, "%lli", expr->u.s); + break; + + case TUINFER: + case TU8: case TU16: case TU32: case TU64: + cgprintf(cg, "%llu", expr->u.s); + break; + + case TBOOL: + if (expr->u.u == 0) + cgprintf(cg, "false"); + else if (expr->u.u == 1) + cgprintf(cg, "true"); + else + cgprintf(cg, "((bool) 0x%016llx)", expr->u.u); + break; + + case TPTR: + if (expr->u.u == 0) + cgprintf(cg, "NULL"); + else + cgprintf(cg, "((void *) 0x%016llx)", expr->u.u); + break; + + case TVOID: + default: + cgprintf(cg, "---"); + break; + + } + +} + +static void +cgdeclaration(CodeGen *cg, Node *expr) +{ + Decl *decl = expr->u.declref; + + assert(decl); + assert(expr->type); + + cgtype(cg, expr->type, decl); + + if (decl->kind == DFUNCTION) { + if (!decl->u.content) + return; + + cgprintf(cg, "\n"); + cgindent(cg); + cgprintf(cg, "{\n"); + + ++cg->indent; + codegen(cg, decl->contentenv->stmts); + --cg->indent; + + cgindent(cg); + cgprintf(cg, "}\n"); + cg->hasclause = true; + } else if (decl->kind == DPARAM || decl->kind == DVAR) { + if (decl->parentenv->kind == SUNION + || decl->parentenv->kind == SSTRUCT) + return; + cgprintf(cg, " = "); + + /* @todo remove condition. it is only for testing structs. + * content may not be NULL otherwise (needs validation) */ + if (decl->u.content) + codegen(cg, decl->u.content); + } +} + +static void +cgsubexpr(CodeGen *cg, Node *expr) +{ + if (isatomnode(expr->kind)) { + codegen(cg, expr); + return; + } + + cgprintf(cg, "("); + codegen(cg, expr); + cgprintf(cg, ")"); +} + +static void +cgunaryprefixop(CodeGen *cg, Node *expr, const char *op) +{ + cgprintf(cg, "%s", op); + cgsubexpr(cg, expr->lhs); +} + +static void +cgbinaryop(CodeGen *cg, Node *expr, const char *op) +{ + cgsubexpr(cg, expr->lhs); + cgprintf(cg, " %s ", op); + cgsubexpr(cg, expr->rhs); +} + +static void +cgprintclause(CodeGen *cg, Node *clause) +{ + cgprintf(cg, " {\n"); + ++cg->indent; + codegen(cg, clause); + --cg->indent; + cgindent(cg); + cgprintf(cg, "}"); + cg->hasclause = true; +} + +static void +codegen(CodeGen *cg, Node *expr) +{ + assert(expr); + + switch (expr->kind) { + case IDENT: + cgprintf(cg, "%s", getstring(idents, expr->u.key)); + break; + case CHAR: + /* @todo print char value properly */ + cgprintf(cg, "'%c'", (uchar) expr->u.u); + break; + case TYPE: + break; + case NUMBER: + cgnumber(cg, expr); + break; + case STRING: + cgprintf(cg, "\""); + do { + const int length = getlength(strings, expr->u.key); + const char *string = getstring(strings, expr->u.key); + + int i; + + /* @note string must have at least one char + * (null-char at the end) which is not printed */ + assert(length); + for (i = 0; i < length - 1; ++i) { + switch (string[i]) { + case '\\': + cgprintf(cg, "\\\\"); + break; + case '"': + cgprintf(cg, "\\\""); + break; + case '\n': + cgprintf(cg, "\\n"); + break; + case '\t': + cgprintf(cg, "\\t"); + break; + case '\r': + cgprintf(cg, "\\r"); + break; + case '\b': + cgprintf(cg, "\\b"); + break; + case '\f': + cgprintf(cg, "\\f"); + break; + case '\v': + cgprintf(cg, "\\v"); + break; + case '\0': + cgprintf(cg, "\\0"); + break; + default: + cgprintf(cg, "%c", string[i]); + break; + } + } + } while (0); + cgprintf(cg, "\""); + break; + case ACOMPOUND: + cgprintf(cg, "(("); + cgbasetype(cg, expr->type); + cgprintf(cg, ") {"); + ++cg->indent; + codegen(cg, expr->rhs); + --cg->indent; + cgindent(cg); + cgprintf(cg, "})"); + break; + case AFIELDINIT: + codegen(cg, expr->rhs); + break; + case KTRUE: + cgprintf(cg, "true"); + break; + case KFALSE: + cgprintf(cg, "false"); + break; + case KNULL: + cgprintf(cg, "NULL"); + break; + case KSIZEOF: + break; + case KALIGNOF: + break; + case KLENGTHOF: + break; + case ADECLREF: + cgdeclname(cg, expr->u.declref); + break; + case ALOOPUNTIL: + case ALABEL: + case ASWITCH: + case ACASE: + break; + case ACONV: + codegen(cg, expr->lhs); + break; + case ASCOPE: + do { + Node *curr = expr->lhs; + while (curr) { + cgindent(cg); + codegen(cg, curr); + if (curr->kind == ASTMT) + curr = curr->rhs; + else + break; + } + } while (0); + break; + case AENV: + assert(expr->lhs); + codegen(cg, expr->lhs); + break; + case ASTMT: + cg->hasclause = false; + codegen(cg, expr->lhs); + if (!cg->hasclause) + cgprintf(cg, "; /* statement */\n"); + cg->hasclause = false; + break; + case ADECL: + cgdeclaration(cg, expr); + break; + case OSUFINC: + cgsubexpr(cg, expr->lhs); + cgprintf(cg, "++"); + break; + case OSUFDEC: + cgsubexpr(cg, expr->lhs); + cgprintf(cg, "++"); + break; + case OARRAY: + cgsubexpr(cg, expr->lhs); + cgprintf(cg, "["); + codegen(cg, expr->rhs); + cgprintf(cg, "]"); + break; + case ODISP: + cgsubexpr(cg, expr->lhs); + cgprintf(cg, "."); + codegen(cg, expr->rhs); + break; + case OCALL: + cgsubexpr(cg, expr->lhs); + cgprintf(cg, "("); + if (expr->rhs) + codegen(cg, expr->rhs); + cgprintf(cg, ")"); + break; + case OADDR: + case AADDR: + cgunaryprefixop(cg, expr, "&"); + break; + case ODEREF: + case ADEREF: + cgunaryprefixop(cg, expr, "*"); + break; + case OINC: + cgunaryprefixop(cg, expr, "++"); + break; + case ODEC: + cgunaryprefixop(cg, expr, "--"); + break; + case OBNOT: + cgunaryprefixop(cg, expr, "~"); + break; + case OLNOT: + cgunaryprefixop(cg, expr, "!"); + break; + case OPLUS: + cgunaryprefixop(cg, expr, "+"); + break; + case OMINUS: + cgunaryprefixop(cg, expr, "-"); + break; + case OCAST: + /* @todo implement c version correctly */ + cgprintf(cg, "("); + codegen(cg, expr->lhs); + cgprintf(cg, ")"); + break; + case OMUL: + cgbinaryop(cg, expr, "*"); + break; + case ODIV: + cgbinaryop(cg, expr, "/"); + break; + case OMOD: + cgbinaryop(cg, expr, "%"); + break; + case OBAND: + cgbinaryop(cg, expr, "&"); + break; + case OLSH: + cgbinaryop(cg, expr, "<<"); + break; + case OARSH: + /* @todo implement c version correctly */ + cgbinaryop(cg, expr, ">>>"); + break; + case ORSH: + cgbinaryop(cg, expr, ">>"); + break; + case OADD: + cgbinaryop(cg, expr, "+"); + break; + case OSUB: + cgbinaryop(cg, expr, "-"); + break; + case OBOR: + cgbinaryop(cg, expr, "|"); + break; + case OXOR: + cgbinaryop(cg, expr, "^"); + break; + case OFLIP: + case ORANGE: + /* @todo implement c version correctly */ + cgbinaryop(cg, expr, "~"); + break; + case OLEQ: + cgbinaryop(cg, expr, "<="); + break; + case OLET: + cgbinaryop(cg, expr, "<"); + break; + case OGEQ: + cgbinaryop(cg, expr, ">="); + break; + case OGRT: + cgbinaryop(cg, expr, ">"); + break; + case ONEQ: + cgbinaryop(cg, expr, "!="); + break; + case OEQU: + cgbinaryop(cg, expr, "=="); + break; + case OIDENT: + /* @todo implement c version correctly */ + cgbinaryop(cg, expr, "=="); + break; + case OLAND: + cgbinaryop(cg, expr, "&&"); + break; + case OLOR: + cgbinaryop(cg, expr, "||"); + break; + case OASS: + cgbinaryop(cg, expr, "="); + break; + case OMULA: + cgbinaryop(cg, expr, "*="); + break; + case ODIVA: + cgbinaryop(cg, expr, "/="); + break; + case OMODA: + cgbinaryop(cg, expr, "%="); + break; + case OLSHA: + cgbinaryop(cg, expr, "<<="); + break; + case OARSHA: + /* @todo implement c version correctly */ + cgbinaryop(cg, expr, ">>>="); + break; + case ORSHA: + cgbinaryop(cg, expr, ">>="); + break; + case OANDA: + cgbinaryop(cg, expr, "&="); + break; + case OADDA: + cgbinaryop(cg, expr, "+="); + break; + case OSUBA: + cgbinaryop(cg, expr, "-="); + break; + case OXORA: + cgbinaryop(cg, expr, "^="); + break; + case OORA: + cgbinaryop(cg, expr, "|="); + break; + case KBREAK: + cgprintf(cg, "break"); + break; + case KCONTINUE: + cgprintf(cg, "continue"); + break; + case KGOTO: + cgprintf(cg, "goto %s", getstring(idents, expr->u.key)); + break; + case KRETURN: + cgprintf(cg, "return "); + if (expr->rhs) + codegen(cg, expr->rhs); + break; + case KWHILE: + cgprintf(cg, "while ("); + goto joinif; + case KFOR: + cgprintf(cg, "for ("); + goto joinif; + case KIF: + cgprintf(cg, "if ("); + joinif: + if (expr->u.payload) + codegen(cg, expr->u.payload); + cgprintf(cg, ") "); + if (expr->lhs) + cgprintclause(cg, expr->lhs); + if (expr->rhs) { + cgprintf(cg, " else "); + cgprintclause(cg, expr->rhs); + } + cgprintf(cg, "\n"); + break; + case AFORSTEP: + assert(expr->lhs); + assert(expr->rhs); + assert(expr->u.payload); + assert(expr->lhs->kind == ADECL || expr->lhs->kind == ADECLREF); + codegen(cg, expr->lhs); + cgprintf(cg, "; %s < (", getstring(idents, expr->lhs->u.declref->key)); + codegen(cg, expr->rhs); + cgprintf(cg, "); %s += (", getstring(idents, expr->lhs->u.declref->key)); + codegen(cg, expr->u.payload); + cgprintf(cg, ")"); + break; + case AFOREACH: + case KCASE: + case KOF: + case KDO: + case KLOOP: + case KUNTIL: + /* @todo implement c version correctly */ + break; + case ACOMMA: + codegen(cg, expr->lhs); + cgprintf(cg, ", "); + ++cg->commacount; + codegen(cg, expr->rhs); + --cg->commacount; + break; + case KSTRUCT: + case KUNION: + cgprintf(cg, "struct "); + + assert(expr->type); + if (expr->type->module) + cgdeclname(cg, expr->type->module); + + cgprintf(cg, " {\n"); + ++cg->indent; + if (expr->rhs) + codegen(cg, expr->rhs); + --cg->indent; + cgindent(cg); + cgprintf(cg, "}"); + break; + case INVALID: + case LINEDELIM: + case SEMIDELIM: + case COMMADELIM: + case COLONDELIM: + case LCURLDELIM: + case LSQRDELIM: + case LPARDELIM: + case RCURLDELIM: + case RSQRDELIM: + case RPARDELIM: + case ANNOT: + case KVOID: + case KBOOL: + case KU8: + case KS8: + case KU16: + case KS16: + case KU32: + case KS32: + case KU64: + case KS64: + case KF32: + case KF64: + case KUCHAR: + case KCHAR: + case KUSHORT: + case KSHORT: + case KUINT: + case KINT: + case KULONG: + case KLONG: + case KULLONG: + case KLLONG: + case KFLOAT: + case KDOUBLE: + case KLDOUBLE: + case KUSIZE: + case KSSIZE: + case KUSE: + case KNOT: + case KAND: + case KOR: + case KIS: + case KBITCAST: + case KEXTERN: + case KINTERN: + case KSTATIC: + case KCONST: + case KVAR: + case KELSE: + case MAXKINDS: + case ENDOFFILE: + cgprintf(cg, "<not implemented: '%s' ", nodestrings[expr->kind]); + if (expr->lhs) + codegen(cg, expr->lhs); + cgprintf(cg, " "); + if (expr->rhs) + codegen(cg, expr->rhs); + cgprintf(cg, ">"); + break; + } +}