From 1052452235cf0f4c77f0b9bd35830d1409a0652d Mon Sep 17 00:00:00 2001 From: Sam Anthony Date: Fri, 25 Apr 2025 20:57:51 -0400 Subject: toml library --- toml.c | 1983 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1983 insertions(+) create mode 100644 toml.c (limited to 'toml.c') diff --git a/toml.c b/toml.c new file mode 100644 index 0000000..7f3ac61 --- /dev/null +++ b/toml.c @@ -0,0 +1,1983 @@ +#define _POSIX_C_SOURCE 200809L +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "toml.h" + +#define ALIGN8(sz) (((sz) + 7) & ~7) +#define calloc(x, y) error - forbidden - use CALLOC instead +static void* CALLOC(size_t nmemb, size_t sz) { + int nb = ALIGN8(sz) * nmemb; + void* p = malloc(nb); + if (p) { + memset(p, 0, nb); + } + return p; +} + +// some old platforms define strdup macro -- drop it. +#undef strdup +#define strdup(x) error - forbidden - use STRDUP instead +static char* STRDUP(const char* s) { + int len = strlen(s); + char* p = malloc(len + 1); + if (p) { + memcpy(p, s, len); + p[len] = 0; + } + return p; +} + +// some old platforms define strndup macro -- drop it. +#undef strndup +#define strndup(x) error - forbidden - use STRNDUP instead +static char* STRNDUP(const char* s, size_t n) { + size_t len = strnlen(s, n); + char* p = malloc(len + 1); + if (p) { + memcpy(p, s, len); + p[len] = 0; + } + return p; +} + +// Unparsed values. +typedef const char* toml_unparsed_t; +toml_unparsed_t toml_table_unparsed(const toml_table_t* table, const char* key); +toml_unparsed_t toml_array_unparsed(const toml_array_t* array, int idx); +int toml_value_string(toml_unparsed_t s, char** ret, int* len); +int toml_value_bool(toml_unparsed_t s, bool* ret); +int toml_value_int(toml_unparsed_t s, int64_t* ret); +int toml_value_double(toml_unparsed_t s, double* ret); +int toml_value_timestamp(toml_unparsed_t s, toml_timestamp_t* ret); + +// Convert escape to UTF-8; return #bytes used in buf to encode the char, or -1 +// on error. +// http://stackoverflow.com/questions/6240055/manually-converting-unicode-codepoints-into-utf-8-and-utf-16 +int read_unicode_escape(uint64_t code, char buf[6]) { + if (0xd800 <= code && code <= 0xdfff) /// UTF-16 surrogates + return -1; + if (0x10FFFF < code) + return -1; + if (code <= 0x7F) { /// 0x00000000 - 0x0000007F: 0xxxxxxx + buf[0] = (unsigned char)code; + return 1; + } + if (code <= 0x000007FF) { /// 0x00000080 - 0x000007FF: 110xxxxx 10xxxxxx + buf[0] = (unsigned char)(0xc0 | (code >> 6)); + buf[1] = (unsigned char)(0x80 | (code & 0x3f)); + return 2; + } + if (code <= 0x0000FFFF) { /// 0x00000800 - 0x0000FFFF: 1110xxxx 10xxxxxx 10xxxxxx + buf[0] = (unsigned char)(0xe0 | (code >> 12)); + buf[1] = (unsigned char)(0x80 | ((code >> 6) & 0x3f)); + buf[2] = (unsigned char)(0x80 | (code & 0x3f)); + return 3; + } + if (code <= 0x001FFFFF) { /// 0x00010000 - 0x001FFFFF: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx + buf[0] = (unsigned char)(0xf0 | (code >> 18)); + buf[1] = (unsigned char)(0x80 | ((code >> 12) & 0x3f)); + buf[2] = (unsigned char)(0x80 | ((code >> 6) & 0x3f)); + buf[3] = (unsigned char)(0x80 | (code & 0x3f)); + return 4; + } + return -1; +} + +static inline void xfree(const void* x) { + if (x) + free((void*)(intptr_t)x); +} + +enum tokentype_t { INVALID, DOT, COMMA, EQUAL, LBRACE, RBRACE, NEWLINE, LBRACKET, RBRACKET, STRING, MSTRING }; +typedef enum tokentype_t tokentype_t; + +typedef struct token_t token_t; +struct token_t { + tokentype_t tok; + toml_pos_t pos; + char* ptr; // points into context->start + int len; + int eof; +}; + +typedef struct context_t context_t; +struct context_t { + char* start; + char* stop; + char* errbuf; + int errbufsz; + + token_t tok; + toml_table_t* root; + toml_table_t* curtbl; + + struct { + int top; + char* key[10]; + int keylen[10]; + token_t tok[10]; + } tpath; +}; + +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) +#define FLINE __FILE__ ":" TOSTRING(__LINE__) + +static int next_token(context_t* ctx, bool dotisspecial); + +// Error reporting. Call when an error is detected. Always return -1. +static int e_outofmemory(context_t* ctx, const char* fline) { + snprintf(ctx->errbuf, ctx->errbufsz, "ERROR: out of memory (%s)", fline); + return -1; +} + +static int e_internal(context_t* ctx, const char* fline) { + snprintf(ctx->errbuf, ctx->errbufsz, "internal error (%s)", fline); + return -1; +} + +static int e_syntax(context_t* ctx, toml_pos_t pos, const char* msg) { + snprintf(ctx->errbuf, ctx->errbufsz, "at %d:%d: %s", pos.line, pos.col, msg); + return -1; +} + +static int e_keyexists(context_t* ctx, toml_pos_t pos) { + snprintf(ctx->errbuf, ctx->errbufsz, "at %d:%d: key already defined", pos.line, pos.col); + return -1; +} + +static void* expand(void* p, int sz, int newsz) { + void* s = malloc(newsz); + if (!s) + return 0; + + if (p) { + memcpy(s, p, sz); + free(p); + } + return s; +} + +static void** expand_ptrarr(void** p, int n) { + void** s = malloc((n + 1) * sizeof(void*)); + if (!s) + return 0; + + s[n] = 0; + if (p) { + memcpy(s, p, n * sizeof(void*)); + free(p); + } + return s; +} + +static toml_arritem_t* expand_arritem(toml_arritem_t* p, int n) { + toml_arritem_t* pp = expand(p, n * sizeof(*p), (n + 1) * sizeof(*p)); + if (!pp) + return 0; + + memset(&pp[n], 0, sizeof(pp[n])); + return pp; +} + +static uint8_t const u8_length[] = {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 3, 4}; +#define u8length(s) u8_length[(((uint8_t*)(s))[0] & 0xFF) >> 4]; + +static char* norm_lit_str(const char* src, int srclen, int* len, bool multiline, char* errbuf, int errbufsz) { + const char* sp = src; + const char* sq = src + srclen; + char* dst = 0; /// will write to dst[] and return it + int max = 0; /// max size of dst[] + int off = 0; /// cur offset in dst[] + + for (;;) { /// scan forward on src + if (off >= max - 10) { /// have some slack for misc stuff + int newmax = max + 50; + char* x = expand(dst, max, newmax); + if (!x) { + xfree(dst); + snprintf(errbuf, errbufsz, "out of memory"); + return 0; + } + dst = x; + max = newmax; + } + + if (sp >= sq) /// finished? + break; + + uint8_t l = u8length(sp); + if (l == 0) { + xfree(dst); + snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off); + return 0; + } + if (l > 1) { + for (int i = 0; i < l; i++) { + char ch = *sp++; + if ((ch & 0x80) != 0x80) { + xfree(dst); + snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off); + return 0; + } + dst[off++] = ch; + } + continue; + } + + /// control characters other than Tab are not allowed + char ch = *sp++; + if ((0 <= ch && ch <= 0x08) || (0x0a <= ch && ch <= 0x1f) || ch == 0x7f) { + if (!(multiline && (ch == '\r' || ch == '\n'))) { + xfree(dst); + snprintf(errbuf, errbufsz, "invalid char U+%04x", ch); + return 0; + } + } + + dst[off++] = ch; /// a plain copy suffice + } + + *len = off; + dst[off++] = 0; + return dst; +} + +// Convert src to raw unescaped utf-8 string. Returns NULL if error with errmsg +// in errbuf. +static char* norm_basic_str(const char* src, int srclen, int* len, bool multiline, char* errbuf, int errbufsz) { + const char* sp = src; + const char* sq = src + srclen; + char* dst = 0; /// will write to dst[] and return it + int max = 0; /// max size of dst[] + int off = 0; /// cur offset in dst[] + + /// scan forward on src + for (;;) { + if (off >= max - 10) { /// have some slack for misc stuff + int newmax = max + 50; + char* x = expand(dst, max, newmax); + if (!x) { + xfree(dst); + snprintf(errbuf, errbufsz, "out of memory"); + return 0; + } + dst = x; + max = newmax; + } + + if (sp >= sq) /// finished? + break; + + uint8_t l = u8length(sp); + if (l == 0) { + xfree(dst); + snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off); + return 0; + } + if (l > 1) { + for (int i = 0; i < l; i++) { + char ch = *sp++; + if ((ch & 0x80) != 0x80) { + xfree(dst); + snprintf(errbuf, errbufsz, "invalid UTF-8 at byte pos %d", off); + return 0; + } + dst[off++] = ch; + } + continue; + } + + char ch = *sp++; + if (ch != '\\') { + /// must be escaped: U+0000 to U+0008, U+000A to U+001F, U+007F + if ((ch >= 0 && ch <= 0x08) || (ch >= 0x0a && ch <= 0x1f) || ch == 0x7f) { + if (!(multiline && (ch == '\r' || ch == '\n'))) { + xfree(dst); + snprintf(errbuf, errbufsz, "invalid char U+%04x", ch); + return 0; + } + } + + dst[off++] = ch; /// a plain copy suffice + continue; + } + + // TODO: unreachable, I think? + if (sp >= sq) { /// ch was backslash. we expect the escape char. + snprintf(errbuf, errbufsz, "last backslash is invalid"); + xfree(dst); + return 0; + } + + if (multiline) { /// for multi-line, we want to kill line-ending-backslash. + if (sp[strspn(sp, " \t\r")] == '\n') { /// if there is only whitespace after the backslash ... + sp += strspn(sp, " \t\r\n"); /// skip all the following whitespaces + continue; + } + } + + ch = *sp++; /// get the escaped char + switch (ch) { + case 'u': + case 'U': { + uint64_t ucs = 0; + int nhex = (ch == 'u' ? 4 : 8); + for (int i = 0; i < nhex; i++) { + // TODO: unreachable I think, as scan_string() already + // guarantees exactly 4 or 8 hex chars. + if (sp >= sq) { + snprintf(errbuf, errbufsz, "\\%c expected %d hex chars", ch, nhex); + xfree(dst); + return 0; + } + ch = *sp++; + int v = -1; + if ('0' <= ch && ch <= '9') + v = ch - '0'; + else if ('A' <= ch && ch <= 'F') + v = ch - 'A' + 10; + else if ('a' <= ch && ch <= 'f') + v = (ch ^ 0x20) - 'A' + 10; + // TODO: also unrechable, as per above. + if (v == -1) { + snprintf(errbuf, errbufsz, "invalid hex chars for \\u or \\U"); + xfree(dst); + return 0; + } + ucs = ucs * 16 + v; + } + int n = read_unicode_escape(ucs, &dst[off]); + if (n == -1) { + snprintf(errbuf, errbufsz, "illegal ucs code in \\u or \\U"); + xfree(dst); + return 0; + } + off += n; + }; + continue; + case 'b': ch = '\b'; break; + case 't': ch = '\t'; break; + case 'n': ch = '\n'; break; + case 'f': ch = '\f'; break; + case 'r': ch = '\r'; break; + case '"': ch = '"'; break; + case '\\': ch = '\\'; break; + default: + // TODO: unrechable, I think, as scan_string() already + // guarantees correct char. + snprintf(errbuf, errbufsz, "illegal escape char \\%c", ch); + xfree(dst); + return 0; + } + + dst[off++] = ch; + } + + *len = off; + dst[off++] = 0; /// Cap with NUL and return it. + return dst; +} + +// Normalize a key. Convert all special chars to raw unescaped utf-8 chars. +static char* normalize_key(context_t* ctx, token_t strtok, int* keylen) { + const char* sp = strtok.ptr; + const char* sq = strtok.ptr + strtok.len; + int ch = *sp; + char* ret; + + // Quoted string + if (ch == '\'' || ch == '\"') { + /// Take " or ' off from and back. + sp++, sq--; + + char ebuf[80]; + if (ch == '\'') + ret = norm_lit_str(sp, sq - sp, keylen, false, ebuf, sizeof(ebuf)); + else + ret = norm_basic_str(sp, sq - sp, keylen, false, ebuf, sizeof(ebuf)); + if (!ret) { + e_syntax(ctx, strtok.pos, ebuf); + return 0; + } + return ret; + } + + *keylen = 0; + for (const char* c = sp; c != sq; c++) { /// Bare key: allow: [A-Za-z0-9_-]+ + *keylen = *keylen + 1; + if (isalnum(*c) || *c == '_' || *c == '-') + continue; + // TODO: never triggered? When reading the file it already validates + // this, so seems redundant? Need to double-check. + e_syntax(ctx, ctx->tok.pos, "invalid key"); + return 0; + } + + if (!(ret = STRNDUP(sp, sq - sp))) { /// dup and return + e_outofmemory(ctx, FLINE); + return 0; + } + return ret; +} + +// Look up key in tbl. Return 0 if not found, or 'v'alue, 'a'rray or 't'able +// depending on the element. +static int check_key(toml_table_t* tbl, const char* key, toml_keyval_t** ret_val, toml_array_t** ret_arr, toml_table_t** ret_tbl) { + int i; + void* dummy; + + if (!ret_tbl) + ret_tbl = (toml_table_t**)&dummy; + if (!ret_arr) + ret_arr = (toml_array_t**)&dummy; + if (!ret_val) + ret_val = (toml_keyval_t**)&dummy; + + *ret_tbl = 0; + *ret_arr = 0; + *ret_val = 0; + + for (i = 0; i < tbl->nkval; i++) { + if (strcmp(key, tbl->kval[i]->key) == 0) { + *ret_val = tbl->kval[i]; + return 'v'; + } + } + for (i = 0; i < tbl->narr; i++) { + if (strcmp(key, tbl->arr[i]->key) == 0) { + *ret_arr = tbl->arr[i]; + return 'a'; + } + } + for (i = 0; i < tbl->ntbl; i++) { + if (strcmp(key, tbl->tbl[i]->key) == 0) { + *ret_tbl = tbl->tbl[i]; + return 't'; + } + } + return 0; +} + +static int key_kind(toml_table_t* tbl, const char* key) { + return check_key(tbl, key, 0, 0, 0); +} + +// Create a keyval in the table. +static toml_keyval_t* create_keyval_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok) { + int keylen; + char* newkey = normalize_key(ctx, keytok, &keylen); + if (!newkey) + return 0; + + toml_keyval_t* dest = 0; + if (key_kind(tbl, newkey)) { + xfree(newkey); + e_keyexists(ctx, keytok.pos); + return 0; + } + + int n = tbl->nkval; + toml_keyval_t** base; + if ((base = (toml_keyval_t**)expand_ptrarr((void**)tbl->kval, n)) == 0) { + xfree(newkey); + e_outofmemory(ctx, FLINE); + return 0; + } + tbl->kval = base; + + if ((base[n] = (toml_keyval_t*)CALLOC(1, sizeof(*base[n]))) == 0) { + xfree(newkey); + e_outofmemory(ctx, FLINE); + return 0; + } + + dest = tbl->kval[tbl->nkval++]; + dest->key = newkey; + dest->keylen = keylen; + return dest; +} + +// Create a table in the table. +static toml_table_t* create_keytable_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok) { + int keylen; + char* newkey = normalize_key(ctx, keytok, &keylen); + if (!newkey) + return 0; + + toml_table_t* dest = 0; + // TODO: need to check all parts for: + // + // [a] + // [a.c] # checks of "a.c" is defined, which is false. + if (check_key(tbl, newkey, 0, 0, &dest)) { + xfree(newkey); + + /// Special case: make explicit if table exists and was created + /// implicitly. + if (dest && dest->implicit) { + dest->implicit = false; + return dest; + } + e_keyexists(ctx, keytok.pos); + return 0; + } + + int n = tbl->ntbl; + toml_table_t** base; + if ((base = (toml_table_t**)expand_ptrarr((void**)tbl->tbl, n)) == 0) { + xfree(newkey); + e_outofmemory(ctx, FLINE); + return 0; + } + tbl->tbl = base; + + if ((base[n] = (toml_table_t*)CALLOC(1, sizeof(*base[n]))) == 0) { + xfree(newkey); + e_outofmemory(ctx, FLINE); + return 0; + } + + dest = tbl->tbl[tbl->ntbl++]; + dest->key = newkey; + dest->keylen = keylen; + return dest; +} + +// Create an array in the table. +static toml_array_t* create_keyarray_in_table(context_t* ctx, toml_table_t* tbl, token_t keytok, char kind) { + int keylen; + char* newkey = normalize_key(ctx, keytok, &keylen); + if (!newkey) + return 0; + + if (key_kind(tbl, newkey)) { + xfree(newkey); + e_keyexists(ctx, keytok.pos); + return 0; + } + + int n = tbl->narr; + toml_array_t** base; + if ((base = (toml_array_t**)expand_ptrarr((void**)tbl->arr, n)) == 0) { + xfree(newkey); + e_outofmemory(ctx, FLINE); + return 0; + } + tbl->arr = base; + + if ((base[n] = (toml_array_t*)CALLOC(1, sizeof(*base[n]))) == 0) { + xfree(newkey); + e_outofmemory(ctx, FLINE); + return 0; + } + toml_array_t* dest = tbl->arr[tbl->narr++]; + + dest->keylen = keylen; + dest->key = newkey; + dest->kind = kind; + return dest; +} + +static toml_arritem_t* create_value_in_array(context_t* ctx, toml_array_t* parent) { + const int n = parent->nitem; + toml_arritem_t* base = expand_arritem(parent->item, n); + if (!base) { + e_outofmemory(ctx, FLINE); + return 0; + } + parent->item = base; + parent->nitem++; + return &parent->item[n]; +} + +// Create an array in an array. +static toml_array_t* create_array_in_array(context_t* ctx, toml_array_t* parent) { + const int n = parent->nitem; + toml_arritem_t* base = expand_arritem(parent->item, n); + if (!base) { + e_outofmemory(ctx, FLINE); + return 0; + } + toml_array_t* ret = (toml_array_t*)CALLOC(1, sizeof(toml_array_t)); + if (!ret) { + e_outofmemory(ctx, FLINE); + return 0; + } + base[n].arr = ret; + parent->item = base; + parent->nitem++; + return ret; +} + +// Create a table in an array +static toml_table_t* create_table_in_array(context_t* ctx, toml_array_t* parent) { + int n = parent->nitem; + toml_arritem_t* base = expand_arritem(parent->item, n); + if (!base) { + e_outofmemory(ctx, FLINE); + return 0; + } + toml_table_t* ret = (toml_table_t*)CALLOC(1, sizeof(toml_table_t)); + if (!ret) { + e_outofmemory(ctx, FLINE); + return 0; + } + base[n].tbl = ret; + parent->item = base; + parent->nitem++; + return ret; +} + +static bool skip_newlines(context_t* ctx, bool isdotspecial) { + while (ctx->tok.tok == NEWLINE) { + if (next_token(ctx, isdotspecial)) + return false; + if (ctx->tok.eof) + break; + } + return true; +} + +static int parse_keyval(context_t* ctx, toml_table_t* tbl); + +static inline int eat_token(context_t* ctx, tokentype_t typ, bool isdotspecial, const char* fline) { + if (ctx->tok.tok != typ) + return e_internal(ctx, fline); + if (next_token(ctx, isdotspecial)) + return -1; + return 0; +} + +// We are at '{ ... }'; parse the table. +static int parse_inline_table(context_t* ctx, toml_table_t* tbl) { + if (eat_token(ctx, LBRACE, 1, FLINE)) + return -1; + + for (;;) { + if (ctx->tok.tok == NEWLINE) + return e_syntax(ctx, ctx->tok.pos, "newline not allowed in inline table"); + + if (ctx->tok.tok == RBRACE) // until closing brace + break; + + if (ctx->tok.tok != STRING) + return e_syntax(ctx, ctx->tok.pos, "expected a string"); + + if (parse_keyval(ctx, tbl)) + return -1; + + if (ctx->tok.tok == NEWLINE) + return e_syntax(ctx, ctx->tok.pos, "newline not allowed in inline table"); + + // On comma, continue to scan for next keyval. + if (ctx->tok.tok == COMMA) { + if (eat_token(ctx, COMMA, 1, FLINE)) + return -1; + continue; + } + break; + } + + if (eat_token(ctx, RBRACE, 1, FLINE)) + return -1; + tbl->readonly = 1; + return 0; +} + +static int valtype(const char* val) { + toml_timestamp_t ts; + if (*val == '\'' || *val == '"') + return 's'; + if (toml_value_bool(val, false) == 0) + return 'b'; + if (toml_value_int(val, 0) == 0) + return 'i'; + if (toml_value_double(val, 0) == 0) + return 'd'; + if (toml_value_timestamp(val, &ts) == 0) { + if (ts.year && ts.hour) + return 'T'; /// timestamp + if (ts.year) // TODO: never reached? + return 'D'; /// date + return 't'; /// time + } + return 'u'; /// unknown +} + +// We are at '[...]' +static int parse_array(context_t* ctx, toml_array_t* arr) { + if (eat_token(ctx, LBRACKET, 0, FLINE)) + return -1; + + for (;;) { + if (!skip_newlines(ctx, 0)) + return -1; + + if (ctx->tok.tok == RBRACKET) /// until ] + break; + + switch (ctx->tok.tok) { + case MSTRING: + case STRING: { + /// set array kind if this will be the first entry + if (arr->kind == 0) + arr->kind = 'v'; + else if (arr->kind != 'v') + arr->kind = 'm'; + + char* val = ctx->tok.ptr; + int vlen = ctx->tok.len; + + /// make a new value in array + toml_arritem_t* newval = create_value_in_array(ctx, arr); + if (!newval) + return e_outofmemory(ctx, FLINE); + + if (!(newval->val = STRNDUP(val, vlen))) + return e_outofmemory(ctx, FLINE); + + newval->valtype = valtype(newval->val); + + /// set array type if this is the first entry + if (arr->nitem == 1) + arr->type = newval->valtype; + else if (arr->type != newval->valtype) + arr->type = 'm'; /// mixed + + if (eat_token(ctx, ctx->tok.tok, 0, FLINE)) + return -1; + break; + } + case LBRACKET: { // [ [array], [array] ... ] + // set the array kind if this will be the first entry. + if (arr->kind == 0) + arr->kind = 'a'; + else if (arr->kind != 'a') + arr->kind = 'm'; + + toml_array_t* subarr = create_array_in_array(ctx, arr); + if (!subarr) + return -1; + if (parse_array(ctx, subarr)) + return -1; + break; + } + case LBRACE: { // [ {table}, {table} ... ] + // set the array kind if this will be the first entry. + if (arr->kind == 0) + arr->kind = 't'; + else if (arr->kind != 't') + arr->kind = 'm'; + + toml_table_t* subtbl = create_table_in_array(ctx, arr); + if (!subtbl) + return -1; + if (parse_inline_table(ctx, subtbl)) + return -1; + break; + } + default: return e_syntax(ctx, ctx->tok.pos, "syntax error"); + } + + if (!skip_newlines(ctx, 0)) + return -1; + + // on comma, continue to scan for next element + if (ctx->tok.tok == COMMA) { + if (eat_token(ctx, COMMA, 0, FLINE)) + return -1; + continue; + } + break; + } + + if (eat_token(ctx, RBRACKET, 1, FLINE)) + return -1; + return 0; +} + +// Handle lines like: +// key = "value" +// key = [ array ] +// key = { table } +static int parse_keyval(context_t* ctx, toml_table_t* tbl) { + if (tbl->readonly) + return e_keyexists(ctx, ctx->tok.pos); + + token_t key = ctx->tok; + if (eat_token(ctx, STRING, 1, FLINE)) + return -1; + + if (ctx->tok.tok == DOT) { + // Handle inline dotted key: + // physical.color = "orange" + // physical.shape = "round" + toml_table_t* subtbl = 0; + { + int keylen; + char* subtblstr = normalize_key(ctx, key, &keylen); + if (!subtblstr) + return -1; + + subtbl = toml_table_table(tbl, subtblstr); + if (subtbl) + subtbl->keylen = keylen; + xfree(subtblstr); + } + if (!subtbl) { + subtbl = create_keytable_in_table(ctx, tbl, key); + if (!subtbl) + return -1; + } + if (next_token(ctx, true)) + return -1; + if (parse_keyval(ctx, subtbl)) + return -1; + return 0; + } + + if (ctx->tok.tok != EQUAL) + return e_syntax(ctx, ctx->tok.pos, "missing '='"); + + if (next_token(ctx, false)) + return -1; + + switch (ctx->tok.tok) { + case MSTRING: + case STRING: { // key = "value" + toml_keyval_t* keyval = create_keyval_in_table(ctx, tbl, key); + if (!keyval) + return -1; + token_t val = ctx->tok; + + assert(keyval->val == 0); + if (!(keyval->val = STRNDUP(val.ptr, val.len))) + return e_outofmemory(ctx, FLINE); + + if (next_token(ctx, true)) + return -1; + + return 0; + } + case LBRACKET: { // key = [ array ] + toml_array_t* arr = create_keyarray_in_table(ctx, tbl, key, 0); + if (!arr) + return -1; + if (parse_array(ctx, arr)) + return -1; + return 0; + } + case LBRACE: { // key = { table } + toml_table_t* nexttbl = create_keytable_in_table(ctx, tbl, key); + if (!nexttbl) + return -1; + if (parse_inline_table(ctx, nexttbl)) + return -1; + return 0; + } + default: return e_syntax(ctx, ctx->tok.pos, "syntax error"); + } + return 0; +} + +typedef struct tabpath_t tabpath_t; +struct tabpath_t { + int cnt; + token_t key[10]; +}; + +// At [x.y.z] or [[x.y.z]] +// Scan forward and fill tblpath until it enters ] or ]] +// There will be at least one entry on return. +static int fill_tblpath(context_t* ctx) { + // clear tpath + for (int i = 0; i < ctx->tpath.top; i++) { + char** p = &ctx->tpath.key[i]; + xfree(*p); + *p = 0; + } + ctx->tpath.top = 0; + + for (;;) { + if (ctx->tpath.top >= 10) + return e_syntax(ctx, ctx->tok.pos, "table path is too deep; max allowed is 10."); + if (ctx->tok.tok != STRING) + return e_syntax(ctx, ctx->tok.pos, "invalid or missing key"); + + int keylen; + char* key = normalize_key(ctx, ctx->tok, &keylen); + if (!key) + return -1; + ctx->tpath.tok[ctx->tpath.top] = ctx->tok; + ctx->tpath.key[ctx->tpath.top] = key; + ctx->tpath.keylen[ctx->tpath.top] = keylen; + ctx->tpath.top++; + + if (next_token(ctx, true)) + return -1; + + if (ctx->tok.tok == RBRACKET) + break; + if (ctx->tok.tok != DOT) + return e_syntax(ctx, ctx->tok.pos, "invalid key"); + if (next_token(ctx, true)) + return -1; + } + + if (ctx->tpath.top <= 0) // TODO: never reached? + return e_syntax(ctx, ctx->tok.pos, "empty table selector"); + return 0; +} + +// Walk tblpath from the root, and create new tables on the way. Sets +// ctx->curtbl to the final table. +static int walk_tabpath(context_t* ctx) { + toml_table_t* curtbl = ctx->root; /// start from root + + for (int i = 0; i < ctx->tpath.top; i++) { + const char* key = ctx->tpath.key[i]; + int keylen = ctx->tpath.keylen[i]; + + toml_keyval_t* nextval = 0; + toml_array_t* nextarr = 0; + toml_table_t* nexttbl = 0; + switch (check_key(curtbl, key, &nextval, &nextarr, &nexttbl)) { + case 't': /// found a table. nexttbl is where we will go next. + break; + case 'a': /// found an array. nexttbl is the last table in the array. + if (nextarr->kind != 't') + return e_internal(ctx, FLINE); + + if (nextarr->nitem == 0) + return e_internal(ctx, FLINE); + + nexttbl = nextarr->item[nextarr->nitem - 1].tbl; + break; + case 'v': return e_keyexists(ctx, ctx->tpath.tok[i].pos); + default: { /// Not found. Let's create an implicit table. + int n = curtbl->ntbl; + toml_table_t** base = (toml_table_t**)expand_ptrarr((void**)curtbl->tbl, n); + if (base == 0) + return e_outofmemory(ctx, FLINE); + + curtbl->tbl = base; + + if ((base[n] = (toml_table_t*)CALLOC(1, sizeof(*base[n]))) == 0) + return e_outofmemory(ctx, FLINE); + + if ((base[n]->key = STRDUP(key)) == 0) + return e_outofmemory(ctx, FLINE); + base[n]->keylen = keylen; + + nexttbl = curtbl->tbl[curtbl->ntbl++]; + + /// tabs created by walk_tabpath are considered implicit + nexttbl->implicit = true; + }; break; + } + curtbl = nexttbl; /// switch to next tbl + } + + ctx->curtbl = curtbl; /// save it + return 0; +} + +// handle lines like [x.y.z] or [[x.y.z]] +static int parse_select(context_t* ctx) { + assert(ctx->tok.tok == LBRACKET); + + // true if [[ + bool aot = (ctx->tok.ptr + 1 < ctx->stop && ctx->tok.ptr[1] == '['); + + // Need to detect '[[' on our own because next_token() will skip whitespace, + // and '[ [' would be taken as '[[', which is wrong. + + // eat [ or [[ + if (eat_token(ctx, LBRACKET, 1, FLINE)) + return -1; + if (aot) { + assert(ctx->tok.tok == LBRACKET); + if (eat_token(ctx, LBRACKET, 1, FLINE)) + return -1; + } + + if (fill_tblpath(ctx)) + return -1; + + // For [x.y.z] or [[x.y.z]], remove z from tpath. + token_t z = ctx->tpath.tok[ctx->tpath.top - 1]; + xfree(ctx->tpath.key[ctx->tpath.top - 1]); + ctx->tpath.top--; + + // Set up ctx->curtbl. + if (walk_tabpath(ctx)) + return -1; + + if (!aot) { + // [x.y.z] -> create z = {} in x.y + toml_table_t* curtbl = create_keytable_in_table(ctx, ctx->curtbl, z); + if (!curtbl) + return -1; + ctx->curtbl = curtbl; + } else { + // [[x.y.z]] -> create z = [] in x.y + toml_array_t* arr = 0; + { + int keylen; + char* zstr = normalize_key(ctx, z, &keylen); + if (!zstr) + return -1; + arr = toml_table_array(ctx->curtbl, zstr); + if (arr) + arr->keylen = keylen; + xfree(zstr); + } + if (!arr) { + arr = create_keyarray_in_table(ctx, ctx->curtbl, z, 't'); + if (!arr) + return -1; + } + if (arr->kind != 't') + return e_syntax(ctx, z.pos, "array mismatch"); + + // add to z[] + toml_table_t* dest; + { + toml_table_t* t = create_table_in_array(ctx, arr); + if (!t) + return -1; + + if ((t->key = STRDUP("__anon__")) == 0) + return e_outofmemory(ctx, FLINE); + dest = t; + } + + ctx->curtbl = dest; + } + + if (ctx->tok.tok != RBRACKET) // TODO: never reached + return e_syntax(ctx, ctx->tok.pos, "expected ']'"); + if (aot) { + if (!(ctx->tok.ptr + 1 < ctx->stop && ctx->tok.ptr[1] == ']')) + return e_syntax(ctx, ctx->tok.pos, "expected ']]'"); + if (eat_token(ctx, RBRACKET, 1, FLINE)) + return -1; + } + + if (eat_token(ctx, RBRACKET, 1, FLINE)) + return -1; + if (ctx->tok.tok != NEWLINE) + return e_syntax(ctx, ctx->tok.pos, "extra chars after ] or ]]"); + return 0; +} + +toml_table_t* toml_parse(char* toml, char* errbuf, int errbufsz) { + context_t ctx; + + /// clear errbuf + if (errbufsz <= 0) + errbufsz = 0; + if (errbufsz > 0) + errbuf[0] = 0; + + // init context + memset(&ctx, 0, sizeof(ctx)); + ctx.start = toml; + ctx.stop = ctx.start + strlen(toml); + ctx.errbuf = errbuf; + ctx.errbufsz = errbufsz; + + // start with an artificial newline of length 0 + ctx.tok.tok = NEWLINE; + ctx.tok.pos.line = 1; + ctx.tok.pos.col = 1; + ctx.tok.ptr = toml; + ctx.tok.len = 0; + + // make a root table + if ((ctx.root = CALLOC(1, sizeof(*ctx.root))) == 0) { + e_outofmemory(&ctx, FLINE); + return 0; // Do not goto fail, root table not set up yet + } + + // set root as default table + ctx.curtbl = ctx.root; + + // Scan forward until EOF + for (token_t tok = ctx.tok; !tok.eof; tok = ctx.tok) { + switch (tok.tok) { + case NEWLINE: + if (next_token(&ctx, true)) + goto fail; + break; + + case STRING: + if (parse_keyval(&ctx, ctx.curtbl)) + goto fail; + + if (ctx.tok.tok != NEWLINE) { + e_syntax(&ctx, ctx.tok.pos, "extra chars after value"); + goto fail; + } + + if (eat_token(&ctx, NEWLINE, 1, FLINE)) + goto fail; + break; + + case LBRACKET: // [ x.y.z ] or [[ x.y.z ]] + if (parse_select(&ctx)) + goto fail; + break; + + default: e_syntax(&ctx, tok.pos, "syntax error"); goto fail; + } + } + + /// success + for (int i = 0; i < ctx.tpath.top; i++) + xfree(ctx.tpath.key[i]); + return ctx.root; + +fail: + // Something bad has happened. Free resources and return error. + for (int i = 0; i < ctx.tpath.top; i++) + xfree(ctx.tpath.key[i]); + toml_free(ctx.root); + return 0; +} + +toml_table_t* toml_parse_file(FILE* fp, char* errbuf, int errbufsz) { + int bufsz = 0; + char* buf = 0; + int off = 0; + int inc = 1024; + + while (!feof(fp)) { + if (bufsz == 1024 * 20) /// Increment buffer by 20k after 20k. + inc = 1024 * 20; + if (off == bufsz) { + int xsz = bufsz + inc; + char* x = expand(buf, bufsz, xsz); + if (!x) { + snprintf(errbuf, errbufsz, "out of memory"); + xfree(buf); + return 0; + } + buf = x; + bufsz = xsz; + } + + errno = 0; + int n = fread(buf + off, 1, bufsz - off, fp); + if (ferror(fp)) { + snprintf(errbuf, errbufsz, "%s", (errno ? strerror(errno) : "Error reading file")); + xfree(buf); + return 0; + } + off += n; + } + + /// tag on a NUL to cap the string + if (off == bufsz) { + int xsz = bufsz + 1; + char* x = expand(buf, bufsz, xsz); + if (!x) { + snprintf(errbuf, errbufsz, "out of memory"); + xfree(buf); + return 0; + } + buf = x; + bufsz = xsz; + } + buf[off] = 0; + + /// parse it, cleanup and finish. + toml_table_t* ret = toml_parse(buf, errbuf, errbufsz); + xfree(buf); + return ret; +} + +static void xfree_kval(toml_keyval_t* p) { + if (!p) + return; + xfree(p->key); + xfree(p->val); + xfree(p); +} + +static void xfree_tbl(toml_table_t* p); + +static void xfree_arr(toml_array_t* p) { + if (!p) + return; + + xfree(p->key); + const int n = p->nitem; + for (int i = 0; i < n; i++) { + toml_arritem_t* a = &p->item[i]; + if (a->val) + xfree(a->val); + else if (a->arr) + xfree_arr(a->arr); + else if (a->tbl) + xfree_tbl(a->tbl); + } + xfree(p->item); + xfree(p); +} + +static void xfree_tbl(toml_table_t* p) { + if (!p) + return; + + xfree(p->key); + + for (int i = 0; i < p->nkval; i++) + xfree_kval(p->kval[i]); + xfree(p->kval); + + for (int i = 0; i < p->narr; i++) + xfree_arr(p->arr[i]); + xfree(p->arr); + + for (int i = 0; i < p->ntbl; i++) + xfree_tbl(p->tbl[i]); + xfree(p->tbl); + + xfree(p); +} + +void toml_free(toml_table_t* tbl) { + xfree_tbl(tbl); +} + +static void set_token(context_t* ctx, tokentype_t tok, toml_pos_t pos, char* ptr, int len) { + token_t t; + t.tok = tok; + t.pos = pos; + t.ptr = ptr; + t.len = len; + t.eof = 0; + ctx->tok = t; +} + +static void set_eof(context_t* ctx, toml_pos_t pos) { + set_token(ctx, NEWLINE, pos, ctx->stop, 0); + ctx->tok.eof = 1; +} + +// Scan p for n digits compositing entirely of [0-9] +static int scan_digits(const char* p, int n) { + int ret = 0; + for (; n > 0 && isdigit(*p); n--, p++) + ret = 10 * ret + (*p - '0'); + return n ? -1 : ret; +} + +static bool scan_date(const char* p, int* YY, int* MM, int* DD) { + int year = scan_digits(p, 4); + int month = (year >= 0 && p[4] == '-') ? scan_digits(p + 5, 2) : -1; + int day = (month >= 0 && p[7] == '-') ? scan_digits(p + 8, 2) : -1; + if (YY) + *YY = year; + if (MM) + *MM = month; + if (DD) + *DD = day; + return (year >= 0 && month >= 0 && day >= 0); +} + +static bool scan_time(const char* p, int* hh, int* mm, int* ss) { + int hour = scan_digits(p, 2); + int minute = (hour >= 0 && p[2] == ':') ? scan_digits(p + 3, 2) : -1; + int second = (minute >= 0 && p[5] == ':') ? scan_digits(p + 6, 2) : -1; + if (hh) + *hh = hour; + if (mm) + *mm = minute; + if (ss) + *ss = second; + return (hour >= 0 && minute >= 0 && second >= 0); +} + +static int parse_millisec(const char* p, const char** endp) { + int ret = 0; + int unit = 100; /// unit in millisec + for (; '0' <= *p && *p <= '9'; p++, unit /= 10) + ret += (*p - '0') * unit; + *endp = p; + return ret; +} + +static bool scan_offset(const char* p, int* tz) { + int sign = p[0]; + int hour = scan_digits(p + 1, 2); + int minute = (hour >= 0 && p[3] == ':') ? scan_digits(p + 4, 2) : -1; + if (hour < -12 || hour > 14 || minute < 0 || minute > 59) + return false; + if (tz) { + *tz = hour * 60 + minute; + if (sign == '-') + *tz = -(*tz); + } + return true; +} + +static int scan_string(context_t* ctx, char* p, toml_pos_t* pos, bool dotisspecial) { + char* orig = p; + + // Literal multiline. + if (strncmp(p, "'''", 3) == 0) { + char* q = p + 3; + pos->col += 3; + while (true) { + q = strstr(q, "'''"); + if (q == 0) + return e_syntax(ctx, *pos, "unterminated triple quote (''')"); + int i = 0; + while (q[3] == '\'') { + i++; + if (i >= 3) + return e_syntax(ctx, *pos, "too many ''' in triple-s-quote"); + q++; + } + break; + } + set_token(ctx, MSTRING, *pos, orig, q + 3 - orig); + return 0; + } + + // Multiline. + if (strncmp(p, "\"\"\"", 3) == 0) { + char* q = p + 3; + pos->col += 3; + while (true) { + q = strstr(q, "\"\"\""); + if (q == 0) + return e_syntax(ctx, *pos, "unterminated triple quote (\"\"\")"); + if (q[-1] == '\\') { + q++; + continue; + } + int i = 0; + while (q[3] == '\"') { + i++; + if (i >= 3) + return e_syntax(ctx, *pos, "too many \"\"\" in triple-d-quote"); + q++; + } + break; + } + + /// the string is [p+3, q-1] + int hexreq = 0; /// #hex required + bool escape = false; + for (p += 3; p < q; p++) { + if (escape) { + escape = false; + if (strchr("btnfr\"\\", *p)) + continue; + if (*p == 'u') { + hexreq = 4; + continue; + } + if (*p == 'U') { + hexreq = 8; + continue; + } + if (p[strspn(p, " \t\r")] == '\n') + continue; // allow for line ending backslash + return e_syntax(ctx, *pos, "bad escape char"); + } + if (hexreq) { + hexreq--; + if (strchr("0123456789ABCDEFabcdef", *p)) + continue; + return e_syntax(ctx, *pos, "expected hex char"); + } + if (*p == '\\') { + escape = true; + continue; + } + } + if (escape) // TODO: unreachable, I think? + return e_syntax(ctx, *pos, "expected an escape char"); + if (hexreq) + return e_syntax(ctx, *pos, "expected more hex char"); + + set_token(ctx, MSTRING, *pos, orig, q + 3 - orig); + return 0; + } + + // Literal string. + if (*p == '\'') { + for (p++; *p && *p != '\n' && *p != '\''; p++) + pos->col++; + if (*p != '\'') + return e_syntax(ctx, *pos, "unterminated quote (')"); + set_token(ctx, STRING, *pos, orig, p + 1 - orig); + return 0; + } + + // Basic String. + if (*p == '\"') { + int hexreq = 0; /// #hex required + bool escape = false; + for (p++; *p; p++) { + pos->col++; + if (escape) { + escape = false; + if (strchr("btnfr\"\\", *p)) + continue; + if (*p == 'u') { + hexreq = 4; + continue; + } + if (*p == 'U') { + hexreq = 8; + continue; + } + return e_syntax(ctx, *pos, "bad escape char"); + } + if (hexreq) { + hexreq--; + if (strchr("0123456789ABCDEFabcdef", *p)) + continue; + return e_syntax(ctx, *pos, "expected hex char"); + } + if (*p == '\\') { + escape = true; + continue; + } + if (*p == '\n') + break; + if (*p == '"') + break; + } + if (*p != '"') + return e_syntax(ctx, *pos, "unterminated quote (\")"); + + set_token(ctx, STRING, *pos, orig, p + 1 - orig); + return 0; + } + + // Time + if (!dotisspecial && scan_time(p, 0, 0, 0)) { + p += strspn(p, "0123456789:"); /// forward thru the time. + if (p[0] == '.') { /// Subseconds + int n = strspn(++p, "0123456789"); + if (n == 0) + return e_syntax(ctx, *pos, "extra chars after '.'"); + p += n; + } + for (; p[-1] == ' '; p--) /// squeeze out any spaces at end of string + ; + set_token(ctx, STRING, *pos, orig, p - orig); /// tokenize + return 0; + } + + // Datetime + if (!dotisspecial && scan_date(p, 0, 0, 0)) { + p += strspn(p, "0123456789-"); /// forward thru the date + if (p[0] == ' ' || p[0] == 't' || p[0] == 'T') { /// forward thru the time + p++; + p += strspn(p, "0123456789:"); + if (p[0] == '.') { /// Subseconds + int n = strspn(++p, "0123456789"); + if (n == 0) + return e_syntax(ctx, *pos, "extra chars after '.'"); + p += n; + } + } + + // Offset + if (p[0] == 'Z' || p[0] == 'z') { + p++; + } else if (p[0] == '+' || p[0] == '-') { + if (!scan_offset(p, 0)) + return e_syntax(ctx, *pos, "invalid offset"); + p += 6; + } + + for (; p[-1] == ' '; p--) /// squeeze out any spaces at end of string + ; + set_token(ctx, STRING, *pos, orig, p - orig); /// tokenize + return 0; + } + + // Literals + for (; *p && *p != '\n'; p++) { + int ch = *p; + if (ch == '.' && dotisspecial) + break; + if ('A' <= ch && ch <= 'Z') + continue; + if ('a' <= ch && ch <= 'z') + continue; + if (strchr("0123456789+-_.", ch)) + continue; + break; + } + + set_token(ctx, STRING, *pos, orig, p - orig); + return 0; +} + +static int next_token(context_t* ctx, bool dotisspecial) { + // Eat this tok. + char* p = ctx->tok.ptr; + toml_pos_t pos = ctx->tok.pos; + for (int i = 0; i < ctx->tok.len; i++) { + pos.col++; + if (*p++ == '\n') { + pos.line++; + pos.col = 1; + } + } + + /// Make next tok + while (p < ctx->stop) { + if (*p == '#') { /// Skip comment. stop just before the \n. + for (p++; p < ctx->stop && *p != '\n'; p++) { + pos.col++; + if ((*p != '\t' && *p != '\r' && *p != '\n') && ((*p >= 0x00 && *p <= 0x1f) || *p == 0x7f)) + return e_syntax(ctx, pos, "invalid control character"); + if (*p == '\r' && p < ctx->stop + 1 && *(p + 1) != '\n') + return e_syntax(ctx, pos, "invalid control character"); + } + continue; + } + + if (dotisspecial && *p == '.') { + set_token(ctx, DOT, pos, p, 1); + return 0; + } + + switch (*p) { + case ',': set_token(ctx, COMMA, pos, p, 1); return 0; + case '=': set_token(ctx, EQUAL, pos, p, 1); return 0; + case '{': set_token(ctx, LBRACE, pos, p, 1); return 0; + case '}': set_token(ctx, RBRACE, pos, p, 1); return 0; + case '[': set_token(ctx, LBRACKET, pos, p, 1); return 0; + case ']': set_token(ctx, RBRACKET, pos, p, 1); return 0; + case '\n': set_token(ctx, NEWLINE, pos, p, 1); return 0; + case '\r': + case ' ': + case '\t': /// ignore white spaces + p++; + pos.col++; + continue; + } + + return scan_string(ctx, p, &pos, dotisspecial); + } + + set_eof(ctx, pos); + return 0; +} + +const char* toml_table_key(const toml_table_t* tbl, int keyidx, int* keylen) { + if (keyidx < tbl->nkval) { + *keylen = tbl->kval[keyidx]->keylen; + return tbl->kval[keyidx]->key; + } + if ((keyidx -= tbl->nkval) < tbl->narr) { + *keylen = tbl->arr[keyidx]->keylen; + return tbl->arr[keyidx]->key; + } + if ((keyidx -= tbl->narr) < tbl->ntbl) { + *keylen = tbl->tbl[keyidx]->keylen; + return tbl->tbl[keyidx]->key; + } + *keylen = 0; + return 0; +} + +toml_unparsed_t toml_table_unparsed(const toml_table_t* tbl, const char* key) { + for (int i = 0; i < tbl->nkval; i++) + if (strcmp(key, tbl->kval[i]->key) == 0) + return tbl->kval[i]->val; + return 0; +} + +toml_array_t* toml_table_array(const toml_table_t* tbl, const char* key) { + for (int i = 0; i < tbl->narr; i++) + if (strcmp(key, tbl->arr[i]->key) == 0) + return tbl->arr[i]; + return 0; +} + +toml_table_t* toml_table_table(const toml_table_t* tbl, const char* key) { + for (int i = 0; i < tbl->ntbl; i++) + if (strcmp(key, tbl->tbl[i]->key) == 0) + return tbl->tbl[i]; + return 0; +} + +toml_unparsed_t toml_array_unparsed(const toml_array_t* arr, int idx) { + return (0 <= idx && idx < arr->nitem) ? arr->item[idx].val : 0; +} + +int toml_table_len(const toml_table_t* tbl) { + return tbl->nkval + tbl->narr + tbl->ntbl; +} + +int toml_array_len(const toml_array_t* arr) { + return arr->nitem; +} + +toml_array_t* toml_array_array(const toml_array_t* arr, int idx) { + return (0 <= idx && idx < arr->nitem) ? arr->item[idx].arr : 0; +} + +toml_table_t* toml_array_table(const toml_array_t* arr, int idx) { + return (0 <= idx && idx < arr->nitem) ? arr->item[idx].tbl : 0; +} + +bool is_leap(int y) { + return y % 4 == 0 && (y % 100 != 0 || y % 400 == 0); +} + +int toml_value_timestamp(toml_unparsed_t src_, toml_timestamp_t* ret) { + if (!src_) + return -1; + + const char* p = src_; + bool must_parse_time = false; + + memset(ret, 0, sizeof(*ret)); + + /// YYYY-MM-DD + if (scan_date(p, &ret->year, &ret->month, &ret->day)) { + if (ret->month < 1 || ret->day < 1 || ret->month > 12 || ret->day > 31) + return -1; + if (ret->month == 2 && ret->day > (is_leap(ret->year) ? 29 : 28)) + return -1; + ret->kind = 'D'; + + p += 10; + if (*p) { + if (*p != 'T' && *p != 't' && *p != ' ') /// T or space + return -1; + must_parse_time = true; + p++; + } + } + + /// HH:MM:SS + if (scan_time(p, &ret->hour, &ret->minute, &ret->second)) { + if (ret->second < 0 || ret->minute < 0 || ret->hour < 0 || ret->hour > 23 || ret->minute > 59 || ret->second > 60) + return -1; + ret->kind = (ret->kind == 'D' ? 'l' : 't'); + + p += 8; + if (*p == '.') { /// optionally, parse millisec + p++; /// skip '.' + const char* qq; + ret->millisec = parse_millisec(p, &qq); + p = qq; + } + + if (*p) { /// parse and copy Z + ret->kind = 'd'; + if (*p == 'Z' || *p == 'z') + p++; + else if (*p == '+' || *p == '-') { + if (!scan_offset(p, &ret->tz)) + return -1; + p += 6; + } + } + } + if (*p != 0) + return -1; + if (must_parse_time && ret->kind == 'D') + return -1; + return 0; +} + +// Raw to boolean +int toml_value_bool(toml_unparsed_t src, bool* ret_) { + if (!src) + return -1; + bool dummy = false; + bool* ret = ret_ ? ret_ : &dummy; + + if (strcmp(src, "true") == 0) { + *ret = true; + return 0; + } + if (strcmp(src, "false") == 0) { + *ret = false; + return 0; + } + return -1; +} + +// Raw to integer +int toml_value_int(toml_unparsed_t src, int64_t* ret_) { + if (!src) + return -1; + + char buf[100]; + char* p = buf; + char* q = p + sizeof(buf); + const char* s = src; + int64_t dummy = 0; + int64_t* ret = ret_ ? ret_ : &dummy; + bool have_sign = false; + + if (s[0] == '+' || s[0] == '-') { /// allow +/- + have_sign = true; + *p++ = *s++; + } + + if (s[0] == '_') /// disallow +_100 + return -1; + + int base = 0; + if (s[0] == '0') { /// if 0* ... + switch (s[1]) { + case 'x': + base = 16; + s += 2; + break; + case 'o': + base = 8; + s += 2; + break; + case 'b': + base = 2; + s += 2; + break; + case '\0': return *ret = 0, 0; + default: + if (s[1]) /// ensure no other digits after it + return -1; + } + if (!*s) + return -1; + if (have_sign) /// disallow +0xff, -0xff + return -1; + if (s[0] == '_') /// disallow 0x_, 0o_, 0b_ + return -1; + if (s[0] == '+' || s[0] == '-') /// disallow 0x+10, 0x-10 + return -1; + } + + while (*s && p < q) { /// just strip underscores and pass to strtoll + int ch = *s++; + if (ch == '_') { + if (s[0] == '_') /// disallow '__' + return -1; + if (s[0] == '\0') /// numbers cannot end with '_' + return -1; + continue; /// skip _ + } + *p++ = ch; + } + + if (*s || p == q) /// if not at end-of-string or we ran out of buffer ... + return -1; + + *p = 0; /// cap with NUL + + /// Run strtoll on buf to get the integer + char* endp; + errno = 0; + *ret = strtoll(buf, &endp, base); + return (errno || *endp) ? -1 : 0; +} + +int toml_value_double(toml_unparsed_t src, double* ret_) { + if (!src) + return -1; + + char buf[100]; + char* p = buf; + char* q = p + sizeof(buf); + const char* s = src; + double dummy = 0.0; + double* ret = ret_ ? ret_ : &dummy; + + if (s[0] == '+' || s[0] == '-') /// allow +/- + *p++ = *s++; + + if (s[0] == '_') /// disallow +_1.00 + return -1; + + { /// decimal point, if used, must be surrounded by at least one digit on each side + char* dot = strchr(s, '.'); + if (dot) { + if (dot == s || !isdigit(dot[-1]) || !isdigit(dot[1])) + return -1; + } + } + + /// zero must be followed by . or 'e', or NUL + if (s[0] == '0' && s[1] && !strchr("eE.", s[1])) + return -1; + + /// Just strip underscores and pass to strtod + bool have_us = false; + while (*s && p < q) { + int ch = *s++; + if (ch == '_') { + have_us = true; + if (s[0] == '_') /// disallow '__' + return -1; + if (s[0] == 'e') /// disallow _e + return -1; + if (s[0] == 0) /// disallow last char '_' + return -1; + continue; /// skip _ + } + if (ch == 'I' || ch == 'N' || ch == 'F' || ch == 'A') /// inf and nan are case-sensitive. + return -1; + if (ch == 'e' && s[0] == '_') /// disallow e_ + return -1; + *p++ = ch; + } + if (*s || p == q) + return -1; /// reached end of string or buffer is full? + + *p = 0; /// cap with NUL + + /// Run strtod on buf to get the value + char* endp; + errno = 0; + *ret = strtod(buf, &endp); + if (errno || *endp) + return -1; + if (have_us && (isnan(*ret) || isinf(*ret))) + return -1; + return 0; +} + +int toml_value_string(toml_unparsed_t src, char** ret, int* len) { + bool multiline = false; + const char* sp; + const char* sq; + + *ret = 0; + if (!src) + return -1; + + /// First char must be a s-quote or d-quote + int qchar = src[0]; + int srclen = strlen(src); + if (!(qchar == '\'' || qchar == '"')) { + return -1; + } + + /// triple quotes? + if (qchar == src[1] && qchar == src[2]) { + multiline = true; /// triple-quote implies multiline + sp = src + 3; /// first char after quote + sq = src + srclen - 3; /// first char of ending quote + + if (!(sp <= sq && sq[0] == qchar && sq[1] == qchar && sq[2] == qchar)) + return -1; /// last 3 chars in src must be qchar + + if (sp[0] == '\n') /// skip new line immediate after qchar + sp++; + else if (sp[0] == '\r' && sp[1] == '\n') + sp += 2; + } else { + sp = src + 1; /// first char after quote + sq = src + srclen - 1; /// ending quote + if (!(sp <= sq && *sq == qchar)) /// last char in src must be qchar + return -1; + } + + /// at this point: + /// sp points to first valid char after quote. + /// sq points to one char beyond last valid char. + /// string len is (sq - sp). + if (qchar == '\'') + *ret = norm_lit_str(sp, sq - sp, len, multiline, 0, 0); + else + *ret = norm_basic_str(sp, sq - sp, len, multiline, 0, 0); + return *ret ? 0 : -1; +} + +toml_value_t toml_array_string(const toml_array_t* arr, int idx) { + toml_value_t ret; + memset(&ret, 0, sizeof(ret)); + ret.ok = (toml_value_string(toml_array_unparsed(arr, idx), &ret.u.s, &ret.u.sl) == 0); + return ret; +} + +toml_value_t toml_array_bool(const toml_array_t* arr, int idx) { + toml_value_t ret; + memset(&ret, 0, sizeof(ret)); + ret.ok = (toml_value_bool(toml_array_unparsed(arr, idx), &ret.u.b) == 0); + return ret; +} + +toml_value_t toml_array_int(const toml_array_t* arr, int idx) { + toml_value_t ret; + memset(&ret, 0, sizeof(ret)); + ret.ok = (toml_value_int(toml_array_unparsed(arr, idx), &ret.u.i) == 0); + return ret; +} + +toml_value_t toml_array_double(const toml_array_t* arr, int idx) { + toml_value_t ret; + memset(&ret, 0, sizeof(ret)); + ret.ok = (toml_value_double(toml_array_unparsed(arr, idx), &ret.u.d) == 0); + return ret; +} + +toml_value_t toml_array_timestamp(const toml_array_t* arr, int idx) { + toml_value_t ret; + memset(&ret, 0, sizeof(ret)); + ret.ok = (toml_value_timestamp(toml_array_unparsed(arr, idx), &ret.u.ts) == 0); + return ret; +} + +toml_value_t toml_table_string(const toml_table_t* tbl, const char* key) { + toml_value_t ret; + memset(&ret, 0, sizeof(ret)); + toml_unparsed_t raw = toml_table_unparsed(tbl, key); + if (raw) + ret.ok = (toml_value_string(raw, &ret.u.s, &ret.u.sl) == 0); + return ret; +} + +toml_value_t toml_table_bool(const toml_table_t* tbl, const char* key) { + toml_value_t ret; + memset(&ret, 0, sizeof(ret)); + ret.ok = (toml_value_bool(toml_table_unparsed(tbl, key), &ret.u.b) == 0); + return ret; +} + +toml_value_t toml_table_int(const toml_table_t* tbl, const char* key) { + toml_value_t ret; + memset(&ret, 0, sizeof(ret)); + ret.ok = (toml_value_int(toml_table_unparsed(tbl, key), &ret.u.i) == 0); + return ret; +} + +toml_value_t toml_table_double(const toml_table_t* tbl, const char* key) { + toml_value_t ret; + memset(&ret, 0, sizeof(ret)); + ret.ok = (toml_value_double(toml_table_unparsed(tbl, key), &ret.u.d) == 0); + return ret; +} + +toml_value_t toml_table_timestamp(const toml_table_t* tbl, const char* key) { + toml_value_t ret; + memset(&ret, 0, sizeof(ret)); + ret.ok = (toml_value_timestamp(toml_table_unparsed(tbl, key), &ret.u.ts) == 0); + return ret; +} -- cgit v1.2.3