diff --git a/build.c b/build.c index 843c4e7..08735c4 100644 --- a/build.c +++ b/build.c @@ -9,5 +9,9 @@ #include "arena.c" #include "str.c" #include "parsers.c" +#include "pretty_print.c" +#include "darr.h" + +#if !COLLA_NO_NET #include "net.c" -#include "darr.h" \ No newline at end of file +#endif diff --git a/core.c b/core.c index fd25d3a..aa165ac 100644 --- a/core.c +++ b/core.c @@ -19,9 +19,11 @@ colla_modules_e colla__initialised_modules = 0; extern void os_init(void); -extern void net_init(void); extern void os_cleanup(void); +#if !COLLA_NO_NET +extern void net_init(void); extern void net_cleanup(void); +#endif static char *colla_fmt__stb_callback(const char *buf, void *ud, int len) { fflush(stdout); @@ -34,9 +36,11 @@ void colla_init(colla_modules_e modules) { if (modules & COLLA_OS) { os_init(); } +#if !COLLA_NO_NET if (modules & COLLA_NET) { net_init(); } +#endif } void colla_cleanup(void) { @@ -44,9 +48,11 @@ void colla_cleanup(void) { if (modules & COLLA_OS) { os_cleanup(); } +#if !COLLA_NO_NET if (modules & COLLA_NET) { net_cleanup(); } +#endif } int fmt_print(const char *fmt, ...) { diff --git a/core.h b/core.h index 30db78f..dd33237 100644 --- a/core.h +++ b/core.h @@ -37,22 +37,35 @@ void colla_cleanup(void); // LINKED LISTS ///////////////////////////////// -#define list_push_n(list, item, next) ((item)->next=(list), (list)=(item)) -#define list_pop_n(list, next) ((list) = (list) ? (list)->next : NULL) +// singly linked list +#define list_push(list, item) ((item)->next=(list), (list)=(item)) +#define list_pop(list) ((list) = (list) ? (list)->next : NULL) -#define list_push(list, item) list_push_n(list, item, next) -#define list_pop(list) list_pop_n(list, next) +// double linked list +#define dlist_push(list, item) do { \ + if (item) (item)->next = (list); \ + if (list) (list)->prev = (item); \ + (list) = (item); \ + } while (0) -#define dlist_push_pn(list, item, next, prev) if (item) (item)->next = (list); if (list) (list)->prev = (item); (list) = (item) -#define dlist_pop_pn(list, item, next, prev) do { \ - if (!(item)) break; \ - if ((item)->prev) (item)->prev->next = (item)->next; \ - if ((item)->next) (item)->next->prev = (item)->prev; \ - if((item) == (list)) (list) = (item)->next; \ -} while (0) +#define dlist_pop(list, item) do { \ + if (!(item)) break; \ + if ((item)->prev) (item)->prev->next = (item)->next; \ + if ((item)->next) (item)->next->prev = (item)->prev; \ + if ((item) == (list)) (list) = (item)->next; \ + } while (0) -#define dlist_push(list, item) dlist_push_pn(list, item, next, prev) -#define dlist_pop(list, item) dlist_pop_pn(list, item, next, prev) +// ordered linked list + +#define olist_push(head, tail, item) do { \ + if (tail) { \ + (tail)->next = (item); \ + (tail) = (item); \ + } \ + else { \ + (head) = (tail) = (item); \ + } \ + } while (0) #define for_each(it, list) for (typeof(list) it = list; it; it = it->next) @@ -196,4 +209,4 @@ int fmt_bufferv(char *buf, usize len, const char *fmt, va_list args); ///////////////////////////////////////////////// -#endif \ No newline at end of file +#endif diff --git a/highlight.c b/highlight.c new file mode 100644 index 0000000..6619fd6 --- /dev/null +++ b/highlight.c @@ -0,0 +1,629 @@ +#include "highlight.h" + +// based on https://github.com/Theldus/kat + +#include "arena.h" +#include "str.h" +#include "os.h" + +typedef enum { + HL_STATE_DEFAULT, + HL_STATE_KEYWORD, + HL_STATE_NUMBER, + HL_STATE_CHAR, + HL_STATE_STRING, + HL_STATE_COMMENT_MULTI, + HL_STATE_PREPROCESSOR, + HL_STATE_PREPROCESSOR_INCLUDE, + HL_STATE_PREPROCESSOR_INCLUDE_STRING, +} hl_state_e; + +typedef enum { + HL_HTABLE_FAILED, + HL_HTABLE_REPLACED, + HL_HTABLE_ADDED, +} hl_htable_result_e; + +typedef struct hl_node_t { + strview_t key; + hl_color_e value; + struct hl_node_t *next; +} hl_node_t; + +typedef struct { + hl_node_t **buckets; + uint count; + uint used; + uint collisions; +} hl_hashtable_t; + +static hl_hashtable_t hl_htable_init(arena_t *arena, uint pow2_exp); +static hl_htable_result_e hl_htable_add(arena_t *arena, hl_hashtable_t *table, strview_t key, hl_color_e value); +static hl_node_t *hl_htable_get(hl_hashtable_t *table, strview_t key); +static u64 hl_htable_hash(const void *bytes, usize count); + +typedef struct hl_ctx_t { + hl_state_e state; + hl_flags_e flags; + usize kw_beg; + strview_t colors[HL_COLOR__COUNT]; // todo: maybe should be str_t? + outstream_t ostr; + hl_hashtable_t kw_htable; + bool symbol_table[256]; +} hl_ctx_t; + +#define KW(str, col) { { str, sizeof(str)-1 }, HL_COLOR_##col } + +static hl_keyword_t hl_c_cpp_kwrds[] = { + /* C Types. */ + KW("double", TYPES), + KW("int", TYPES), + KW("long", TYPES), + KW("char", TYPES), + KW("float", TYPES), + KW("short", TYPES), + KW("unsigned", TYPES), + KW("signed", TYPES), + KW("bool", TYPES), + + /* Common typedefs. */ + KW("int8_t", TYPES), KW("uint8_t", TYPES), + KW("int16_t", TYPES), KW("uint16_t", TYPES), + KW("int32_t", TYPES), KW("uint32_t", TYPES), + KW("int64_t", TYPES), KW("uint64_t", TYPES), + KW("int8", TYPES), KW("uint8", TYPES), + KW("int16", TYPES), KW("uint16", TYPES), + KW("int32", TYPES), KW("uint32", TYPES), + KW("int64", TYPES), KW("uint64", TYPES), + KW("i8", TYPES), KW("u8", TYPES), + KW("i16", TYPES), KW("u16", TYPES), + KW("i32", TYPES), KW("u32", TYPES), + KW("i64", TYPES), KW("u64", TYPES), + + + /* Colla keywords */ + KW("uchar", TYPES), + KW("ushort", TYPES), + KW("uint", TYPES), + KW("usize", TYPES), + KW("isize", TYPES), + KW("byte", TYPES), + + /* Other keywords. */ + KW("auto", KEYWORDS), KW("struct", KEYWORDS), KW("break", KEYWORDS), + KW("else", KEYWORDS), KW("switch", KEYWORDS), KW("case", KEYWORDS), + KW("enum", KEYWORDS), KW("register", KEYWORDS), KW("typedef", KEYWORDS), + KW("extern", KEYWORDS), KW("return", KEYWORDS), KW("union", KEYWORDS), + KW("const", KEYWORDS), KW("continue", KEYWORDS), KW("for", KEYWORDS), + KW("void", KEYWORDS), KW("default", KEYWORDS), KW("goto", KEYWORDS), + KW("sizeof", KEYWORDS), KW("volatile", KEYWORDS), KW("do", KEYWORDS), + KW("if", KEYWORDS), KW("static", KEYWORDS), KW("inline", KEYWORDS), + KW("while", KEYWORDS), +}; + +#undef KW + +static bool hl_default_symbols_table[256] = { + ['['] = true, [']'] = true, ['('] = true, + [')'] = true, ['{'] = true, ['}'] = true, + ['*'] = true, [':'] = true, ['='] = true, + [';'] = true, ['-'] = true, ['>'] = true, + ['&'] = true, ['+'] = true, ['~'] = true, + ['!'] = true, ['/'] = true, ['%'] = true, + ['<'] = true, ['^'] = true, ['|'] = true, + ['?'] = true, ['#'] = true, +}; + +static void hl_write_char(hl_ctx_t *ctx, char c); +static void hl_write(hl_ctx_t *ctx, strview_t v); +static bool hl_is_char_keyword(char c); +static bool hl_highlight_symbol(hl_ctx_t *ctx, char c); +static hl_color_e hl_get_keyword_color(hl_ctx_t *ctx, strview_t keyword); +static bool hl_is_capitalised(strview_t string); +static strview_t hl_finish_keyword(hl_ctx_t *ctx, usize beg, instream_t *in); +static void hl_print_keyword(hl_ctx_t *ctx, strview_t keyword, hl_color_e color); + +hl_ctx_t *hl_init(arena_t *arena, hl_config_t *config) { + if (!config) { + err(" cannot be null"); + return NULL; + } + + hl_ctx_t *out = alloc(arena, hl_ctx_t); + + out->flags = config->flags; + + memcpy(out->symbol_table, hl_default_symbols_table, sizeof(hl_default_symbols_table)); + memcpy(out->colors, config->colors, sizeof(config->colors)); + + int kw_count = arrlen(hl_c_cpp_kwrds); + + out->kw_htable = hl_htable_init(arena, 8); + + for (int i = 0; i < kw_count; ++i) { + hl_keyword_t *kw = &hl_c_cpp_kwrds[i]; + hl_htable_add(arena, &out->kw_htable, kw->keyword, kw->color); + } + + for (int i = 0; i < config->kwrds_count; ++i) { + hl_keyword_t *kw = &config->extra_kwrds[i]; + hl_htable_add(arena, &out->kw_htable, kw->keyword, kw->color); + } + + return out; +} + +void hl_next_char(hl_ctx_t *ctx, instream_t *in) { + char cur = istr_get(in); + bool is_last = istr_is_finished(in); + + switch (ctx->state) { + case HL_STATE_DEFAULT: + { + /* + * If potential keyword. + * + * A valid C keyword may contain numbers, but *not* + * as a suffix. + */ + if (hl_is_char_keyword(cur) && !char_is_num(cur)) { + ctx->kw_beg = istr_tell(in); + ctx->state = HL_STATE_KEYWORD; + } + + // potential number + else if (char_is_num(cur)) { + ctx->kw_beg = istr_tell(in); + ctx->state = HL_STATE_NUMBER; + } + + // potential char + else if (cur == '\'') { + ctx->kw_beg = istr_tell(in); + ctx->state = HL_STATE_CHAR; + } + + // potential string + else if (cur == '"') { + ctx->kw_beg = istr_tell(in); + ctx->state = HL_STATE_STRING; + } + + // line or multiline comment + else if (cur == '/') { + // single line comment + if (istr_peek(in) == '/') { + // rewind before comment begins + istr_rewind_n(in, 1); + + // comment until the end of line + hl_print_keyword(ctx, istr_get_line(in), HL_COLOR_COMMENT); + } + + // multiline comment + else if (istr_peek(in) == '*') { + ctx->state = HL_STATE_COMMENT_MULTI; + ctx->kw_beg = istr_tell(in); + istr_skip(in, 1); // skip * + } + + else { + // maybe a symbol? + hl_highlight_symbol(ctx, cur); + } + } + + // preprocessor + else if (cur == '#') { + // print the # as a symbol + hl_highlight_symbol(ctx, cur); + ctx->kw_beg = istr_tell(in); + ctx->state = HL_STATE_PREPROCESSOR; + } + + // other suppored symbols + else if (hl_highlight_symbol(ctx, cur)) { + // noop + } + + else { + hl_write_char(ctx, cur); + } + + break; + } + + case HL_STATE_KEYWORD: + { + // end of keyword, check if it really is a valid keyword + if (!hl_is_char_keyword(cur)) { + strview_t keyword = hl_finish_keyword(ctx, ctx->kw_beg, in); + hl_color_e kw_color = hl_get_keyword_color(ctx, keyword); + + if (kw_color != HL_COLOR__COUNT) { + hl_print_keyword(ctx, keyword, kw_color); + + // maybe we should highlight this remaining char. + if (!hl_highlight_symbol(ctx, cur)) { + hl_write_char(ctx, cur); + } + } + + /* + * If not keyword, maybe its a function call. + * + * Important to note that this is hacky and will only work + * if there is no space between keyword and '('. + */ + else if (cur == '(') { + hl_print_keyword(ctx, keyword, HL_COLOR_FUNC); + + // Opening parenthesis will always be highlighted + hl_highlight_symbol(ctx, cur); + } + else { + if (hl_is_capitalised(keyword)) { + hl_print_keyword(ctx, keyword, HL_COLOR_MACRO); + } + else { + hl_write(ctx, keyword); + } + if (!hl_highlight_symbol(ctx, cur)) { + hl_write_char(ctx, cur); + } + } + } + break; + } + + case HL_STATE_NUMBER: + { + char c = char_lower(cur); + + /* + * Should we end the state?. + * + * Very important observation: + * Although the number highlight works fine for most (if not all) + * of the possible cases, it also assumes that the code is written + * correctly and the source is able to compile, meaning that: + * + * Numbers like: 123, 0xABC123, 12.3e4f, 123ULL.... + * will be correctly identified and highlighted + * + * But, 'numbers' like: 123ABC, 0xxxxABCxx123, 123UUUUU.... + * will also be highlighted. + * + * It also assumes that no keyword will start with a number + * and everything starting with a number (except inside strings or + * comments) will be a number. + */ + if (!char_is_num(c) && + (c < 'a' || c > 'f') && + c != 'b' && c != 'x' && + c != 'u' && c != 'l' && + c != '.' + ) { + strview_t keyword = hl_finish_keyword(ctx, ctx->kw_beg, in); + + // if not a valid char keyword: valid number + if (!hl_is_char_keyword(cur)) { + hl_print_keyword(ctx, keyword, HL_COLOR_NUMBER); + } + else { + hl_write(ctx, keyword); + } + + // maybe we should highlight this remaining char. + if (!hl_highlight_symbol(ctx, cur)) { + hl_write_char(ctx, cur); + } + } + + break; + } + + case HL_STATE_CHAR: + { + if (is_last || (cur == '\'' && istr_peek(in) != '\'')) { + strview_t keyword = hl_finish_keyword(ctx, ctx->kw_beg, in); + keyword.len++; + + hl_print_keyword(ctx, keyword, HL_COLOR_STRING); + } + break; + } + + case HL_STATE_STRING: + { + if (is_last || (cur == '"' && istr_prev_prev(in) != '\\')) { + strview_t keyword = hl_finish_keyword(ctx, ctx->kw_beg, in); + keyword.len++; + + hl_print_keyword(ctx, keyword, HL_COLOR_STRING); + } + break; + } + + case HL_STATE_COMMENT_MULTI: + { + /* + * If we are at the end of line _or_ have identified + * an end of comment... + */ + if (is_last || (cur == '*' && istr_peek(in) == '/')) { + strview_t keyword = hl_finish_keyword(ctx, ctx->kw_beg, in); + + hl_print_keyword(ctx, keyword, HL_COLOR_COMMENT); + } + break; + } + + case HL_STATE_PREPROCESSOR: + { + + if (!hl_is_char_keyword(cur)) { + hl_write_char(ctx, cur); + break; + } + +#define hl_check(str, new_state) \ + if (cur == str[0]) { \ + instream_t temp = *in; \ + strview_t a = { &(str[1]), sizeof(str) - 2 }; \ + strview_t b = istr_get_view_len(&temp, a.len); \ + if (strv_equals(a, b)) { \ + *in = temp; \ + hl_print_keyword(ctx, (strview_t){ str, sizeof(str) - 1 }, HL_COLOR_PREPROC); \ + ctx->state = new_state; \ + break; \ + } \ + } + if (is_last) { + strview_t keyword = hl_finish_keyword(ctx, ctx->kw_beg, in); + hl_print_keyword(ctx, keyword, HL_COLOR_PREPROC); + break; + } + + hl_check("include", HL_STATE_PREPROCESSOR_INCLUDE) + hl_check("define", HL_STATE_DEFAULT) + hl_check("undef", HL_STATE_DEFAULT) + hl_check("ifdef", HL_STATE_DEFAULT) + hl_check("ifndef", HL_STATE_DEFAULT) + hl_check("if", HL_STATE_DEFAULT) + hl_check("endif", HL_STATE_DEFAULT) + hl_check("pragma", HL_STATE_DEFAULT) + +#undef hl_check + break; + } + + + /* + * Preprocessor/Preprocessor include + * + * This is a 'dumb' preprocessor highlighter: + * it highlights everything with the same color + * and if and only if an '#include' is detected + * the included header will be handled as string + * and thus, will have the same color as the string. + * + * In fact, it is somehow similar to what GtkSourceView + * does (Mousepad, Gedit...) but with one silly difference: + * single-line/multi-line comments will not be handled + * while inside the preprocessor state, meaning that + * comments will also have the same color as the remaining + * of the line, yeah, ugly. + */ + case HL_STATE_PREPROCESSOR_INCLUDE: + { + if (cur == '<' || cur == '"' || is_last) { + ctx->kw_beg = istr_tell(in); + ctx->state = HL_STATE_PREPROCESSOR_INCLUDE_STRING; + } + else { + hl_write_char(ctx, cur); + } + break; + } + case HL_STATE_PREPROCESSOR_INCLUDE_STRING: + { + if (cur == '>' || cur == '"' || is_last) { + strview_t keyword = hl_finish_keyword(ctx, ctx->kw_beg, in); + keyword.len += 1; + hl_print_keyword(ctx, keyword, HL_COLOR_STRING); + } + break; + } + } +} + +str_t hl_highlight(arena_t *arena, hl_ctx_t *ctx, strview_t data) { + ctx->ostr = ostr_init(arena); + + ctx->state = HL_STATE_DEFAULT; + ctx->kw_beg = 0; + + instream_t in = istr_init(data); + + while (!istr_is_finished(&in)) { + hl_next_char(ctx, &in); + } + + hl_next_char(ctx, &in); + + return ostr_to_str(&ctx->ostr); +} + +void hl_set_symbol_in_table(hl_ctx_t *ctx, char symbol, bool value) { + if (!ctx) return; + ctx->symbol_table[(unsigned char)symbol] = value; +} + +void hl_add_keyword(arena_t *arena, hl_ctx_t *ctx, hl_keyword_t *keyword) { + hl_htable_add(arena, &ctx->kw_htable, keyword->keyword, keyword->color); +} + +//// HASH TABLE /////////////////////////////////////////////////// + +static hl_hashtable_t hl_htable_init(arena_t *arena, uint pow2_exp) { + uint count = 1 << pow2_exp; + return (hl_hashtable_t) { + .count = count, + .buckets = alloc(arena, hl_node_t*, count), + }; +} + +static hl_htable_result_e hl_htable_add(arena_t *arena, hl_hashtable_t *table, strview_t key, hl_color_e value) { + if (!table) { + return HL_HTABLE_FAILED; + } + + if ((float)table->used >= table->count * 0.6f) { + warn("more than 60%% of the arena is being used: %d/%d", table->used, table->count); + } + + u64 hash = hl_htable_hash(key.buf, key.len); + usize index = hash & (table->count - 1); + hl_node_t *bucket = table->buckets[index]; + if (bucket) table->collisions++; + while (bucket) { + // already exists + if (strv_equals(bucket->key, key)) { + bucket->value = value; + return HL_HTABLE_REPLACED; + } + bucket = bucket->next; + } + + bucket = alloc(arena, hl_node_t); + + bucket->key = key; + bucket->value = value; + bucket->next = table->buckets[index]; + + table->buckets[index] = bucket; + table->used++; + + return HL_HTABLE_ADDED; +} + +static hl_node_t *hl_htable_get(hl_hashtable_t *table, strview_t key) { + if (!table || table->count == 0) { + return NULL; + } + + u64 hash = hl_htable_hash(key.buf, key.len); + usize index = hash & (table->count - 1); + hl_node_t *bucket = table->buckets[index]; + while (bucket) { + if (strv_equals(bucket->key, key)) { + return bucket; + } + bucket = bucket->next; + } + + return NULL; +} + +// uses the sdbm algorithm +static u64 hl_htable_hash(const void *bytes, usize count) { + const u8 *data = bytes; + u64 hash = 0; + + for (usize i = 0; i < count; ++i) { + hash = data[i] + (hash << 6) + (hash << 16) - hash; + } + + return hash; +} + +//// STATIC FUNCTIONS ///////////////////////////////////////////// + +static inline void hl_escape_html(outstream_t *out, char c) { + switch (c) { + case '&': + ostr_puts(out, strv("&")); + break; + case '<': + ostr_puts(out, strv("<")); + break; + case '>': + ostr_puts(out, strv(">")); + break; + default: + ostr_putc(out, c); + break; + } +} + +static void hl_write_char(hl_ctx_t *ctx, char c) { + if (ctx->flags & HL_FLAG_HTML) { + hl_escape_html(&ctx->ostr, c); + } + else { + ostr_putc(&ctx->ostr, c); + } +} + +static void hl_write(hl_ctx_t *ctx, strview_t v) { + if (ctx->flags & HL_FLAG_HTML) { + for (usize i = 0; i < v.len; ++i) { + hl_escape_html(&ctx->ostr, v.buf[i]); + } + } + else { + ostr_puts(&ctx->ostr, v); + } +} + +static bool hl_is_char_keyword(char c) { + return char_is_alpha(c) || char_is_num(c) || c == '_'; +} + +static bool hl_highlight_symbol(hl_ctx_t *ctx, char c) { + if (!ctx->symbol_table[(unsigned char)c]) { + return false; + } + + ostr_puts(&ctx->ostr, ctx->colors[HL_COLOR_SYMBOL]); + hl_write_char(ctx, c); + ostr_puts(&ctx->ostr, ctx->colors[HL_COLOR_NORMAL]); + + return true; +} + +static hl_color_e hl_get_keyword_color(hl_ctx_t *ctx, strview_t keyword) { + // todo: make this an option? + if (strv_ends_with_view(keyword, strv("_t"))) { + return HL_COLOR_CUSTOM_TYPES; + } + + hl_node_t *node = hl_htable_get(&ctx->kw_htable, keyword); + return node ? node->value : HL_COLOR__COUNT; +} + +static bool hl_is_capitalised(strview_t string) { + for (usize i = 0; i < string.len; ++i) { + char c = string.buf[i]; + if (!char_is_num(c) && c != '_' && (c < 'A' || c > 'Z')) { + return false; + } + } + return true; +} + +static strview_t hl_finish_keyword(hl_ctx_t *ctx, usize beg, instream_t *in) { + ctx->state = HL_STATE_DEFAULT; + beg -= 1; + usize end = istr_tell(in) - 1; + + return strv(in->beg + beg, end - beg); +} + +static void hl_print_keyword(hl_ctx_t *ctx, strview_t keyword, hl_color_e color) { + ostr_puts(&ctx->ostr, ctx->colors[color]); + hl_write(ctx, keyword); + ostr_puts(&ctx->ostr, ctx->colors[HL_COLOR_NORMAL]); +} + diff --git a/highlight.h b/highlight.h new file mode 100644 index 0000000..f095d9e --- /dev/null +++ b/highlight.h @@ -0,0 +1,49 @@ +#pragma once + +#include "str.h" + +typedef enum { + HL_COLOR_NORMAL, + HL_COLOR_PREPROC, + HL_COLOR_TYPES, + HL_COLOR_CUSTOM_TYPES, + HL_COLOR_KEYWORDS, + HL_COLOR_NUMBER, + HL_COLOR_STRING, + HL_COLOR_COMMENT, + HL_COLOR_FUNC, + HL_COLOR_SYMBOL, + HL_COLOR_MACRO, + + HL_COLOR__COUNT, +} hl_color_e; + +typedef enum { + HL_FLAG_NONE = 0, + HL_FLAG_HTML = 1 << 0, +} hl_flags_e; + +typedef struct { + strview_t keyword; + hl_color_e color; +} hl_keyword_t; + +typedef struct { + usize idx; + usize size; +} hl_line_t; + +typedef struct { + strview_t colors[HL_COLOR__COUNT]; + hl_keyword_t *extra_kwrds; + int kwrds_count; + hl_flags_e flags; +} hl_config_t; + +typedef struct hl_ctx_t hl_ctx_t; + +hl_ctx_t *hl_init(arena_t *arena, hl_config_t *config); +str_t hl_highlight(arena_t *arena, hl_ctx_t *ctx, strview_t str); + +void hl_set_symbol_in_table(hl_ctx_t *ctx, char symbol, bool value); +void hl_add_keyword(arena_t *arena, hl_ctx_t *ctx, hl_keyword_t *keyword); diff --git a/net.h b/net.h index 0cfd9f0..7734463 100644 --- a/net.h +++ b/net.h @@ -14,7 +14,8 @@ typedef enum http_method_e { HTTP_POST, HTTP_HEAD, HTTP_PUT, - HTTP_DELETE + HTTP_DELETE, + HTTP_METHOD__COUNT, } http_method_e; const char *http_get_method_string(http_method_e method); @@ -78,14 +79,17 @@ typedef struct { http_version_t version; // 1.1 by default http_method_e request_type; http_header_t *headers; - int header_count; + int header_count; // optional, if set to 0 it traverses headers using h->next strview_t body; } http_request_desc_t; +typedef void (*http_request_callback_fn)(strview_t chunk, void *udata); + // arena_t *arena, strview_t url, [ http_header_t *headers, int header_count, strview_t body ] #define http_get(arena, url, ...) http_request(&(http_request_desc_t){ arena, url, .request_type = HTTP_GET, .version = { 1, 1 }, __VA_ARGS__ }) http_res_t http_request(http_request_desc_t *request); +http_res_t http_request_cb(http_request_desc_t *request, http_request_callback_fn callback, void *userdata); // SOCKETS ////////////////////////// @@ -187,4 +191,4 @@ str_t sha1_str(arena_t *arena, sha1_t *ctx, const void *buf, usize len); buffer_t base64_encode(arena_t *arena, buffer_t buffer); buffer_t base64_decode(arena_t *arena, buffer_t buffer); -#endif \ No newline at end of file +#endif diff --git a/os.h b/os.h index 95dafa8..0488fc1 100644 --- a/os.h +++ b/os.h @@ -63,20 +63,35 @@ typedef enum os_log_level_e { } os_log_level_e; typedef enum os_log_colour_e { + LOG_COL_BLACK = 0, + LOG_COL_BLUE = 1, + LOG_COL_GREEN = 2, + LOG_COL_CYAN = LOG_COL_BLUE | LOG_COL_GREEN, + LOG_COL_RED = 4, + LOG_COL_MAGENTA = LOG_COL_RED | LOG_COL_BLUE, + LOG_COL_YELLOW = LOG_COL_RED | LOG_COL_GREEN, + LOG_COL_GREY = LOG_COL_RED | LOG_COL_BLUE | LOG_COL_GREEN, + + LOG_COL_LIGHT = 8, + + LOG_COL_DARK_GREY = LOG_COL_BLACK | LOG_COL_LIGHT, + LOG_COL_LIGHT_BLUE = LOG_COL_BLUE | LOG_COL_LIGHT, + LOG_COL_LIGHT_GREEN = LOG_COL_GREEN | LOG_COL_LIGHT, + LOG_COL_LIGHT_CYAN = LOG_COL_CYAN | LOG_COL_LIGHT, + LOG_COL_LIGHT_RED = LOG_COL_RED | LOG_COL_LIGHT, + LOG_COL_LIGHT_MAGENTA = LOG_COL_MAGENTA | LOG_COL_LIGHT, + LOG_COL_LIGHT_YELLOW = LOG_COL_YELLOW | LOG_COL_LIGHT, + LOG_COL_WHITE = LOG_COL_GREY | LOG_COL_LIGHT, + LOG_COL_RESET, - LOG_COL_BLACK, - LOG_COL_BLUE, - LOG_COL_GREEN, - LOG_COL_CYAN, - LOG_COL_RED, - LOG_COL_MAGENTA, - LOG_COL_YELLOW, - LOG_COL_WHITE, + + LOG_COL__COUNT, } os_log_colour_e; void os_log_print(os_log_level_e level, const char *fmt, ...); void os_log_printv(os_log_level_e level, const char *fmt, va_list args); void os_log_set_colour(os_log_colour_e colour); +void os_log_set_colour_bg(os_log_colour_e foreground, os_log_colour_e background); oshandle_t os_stdout(void); oshandle_t os_stdin(void); @@ -96,7 +111,10 @@ typedef enum filemode_e { FILEMODE_WRITE = 1 << 1, } filemode_e; -bool os_file_exists(strview_t path); +bool os_file_exists(strview_t filename); +bool os_dir_exists(strview_t folder); +bool os_file_or_dir_exists(strview_t path); +bool os_dir_create(strview_t folder); tstr_t os_file_fullpath(arena_t *arena, strview_t filename); void os_file_split_path(strview_t path, strview_t *dir, strview_t *name, strview_t *ext); bool os_file_delete(strview_t path); @@ -213,4 +231,4 @@ void os_cond_wait(oshandle_t cond, oshandle_t mutex, int milliseconds); #endif -#endif \ No newline at end of file +#endif diff --git a/parsers.c b/parsers.c index 807fb5f..e98ef49 100644 --- a/parsers.c +++ b/parsers.c @@ -1,6 +1,9 @@ #include "parsers.h" +#include + #include "os.h" +#include "darr.h" // == INI ============================================ @@ -126,6 +129,52 @@ bool ini_as_bool(inivalue_t *value) { return out; } +void ini_pretty_print(ini_t *ini, const ini_pretty_opts_t *options) { + ini_pretty_opts_t opt = {0}; + if (options) { + memmove(&opt, options, sizeof(ini_pretty_opts_t)); + } + + if (!os_handle_valid(opt.custom_target)) { + opt.custom_target = os_stdout(); + } + + if (!opt.use_custom_colours) { + os_log_colour_e default_col[INI_PRETTY_COLOUR__COUNT] = { + LOG_COL_YELLOW, // INI_PRETTY_COLOUR_KEY, + LOG_COL_GREEN, // INI_PRETTY_COLOUR_VALUE, + LOG_COL_WHITE, // INI_PRETTY_COLOUR_DIVIDER, + LOG_COL_RED, // INI_PRETTY_COLOUR_TABLE, + }; + memmove(opt.colours, default_col, sizeof(default_col)); + } + + for_each (t, ini->tables) { + if (!strv_equals(t->name, INI_ROOT)) { + os_log_set_colour(opt.colours[INI_PRETTY_COLOUR_TABLE]); + os_file_puts(opt.custom_target, strv("[")); + os_file_puts(opt.custom_target, t->name); + os_file_puts(opt.custom_target, strv("]\n")); + } + + for_each (pair, t->values) { + if (strv_is_empty(pair->key) || strv_is_empty(pair->value)) continue; + os_log_set_colour(opt.colours[INI_PRETTY_COLOUR_KEY]); + os_file_puts(opt.custom_target, pair->key); + + os_log_set_colour(opt.colours[INI_PRETTY_COLOUR_DIVIDER]); + os_file_puts(opt.custom_target, strv(" = ")); + + os_log_set_colour(opt.colours[INI_PRETTY_COLOUR_VALUE]); + os_file_puts(opt.custom_target, pair->value); + + os_file_puts(opt.custom_target, strv("\n")); + } + } + + os_log_set_colour(LOG_COL_RESET); +} + ///// ini-private //////////////////////////////////// iniopt_t ini__get_options(const iniopt_t *options) { @@ -297,7 +346,6 @@ json_t *json_get(json_t *node, strview_t key) { if (!node) return NULL; if (node->type != JSON_OBJECT) { - err("passed type is not an object"); return NULL; } @@ -351,7 +399,6 @@ bool json__check_char(instream_t *in, char c) { return true; } istr_rewind_n(in, 1); - err("wrong character at %zu, should be '%c' but is 0x%02x '%c'", istr_tell(in), c, istr_peek(in), istr_peek(in)); return false; } @@ -375,12 +422,10 @@ bool json__parse_null(instream_t *in) { bool is_valid = true; if (!strv_equals(null_view, strv("null"))) { - err("should be null but is: (%.*s) at %zu", null_view.len, null_view.buf, istr_tell(in)); is_valid = false; } if (!json__is_value_finished(in)) { - err("null, should be finished, but isn't at %zu", istr_tell(in)); is_valid = false; } @@ -419,7 +464,6 @@ bool json__parse_array(arena_t *arena, instream_t *in, jsonflags_e flags, json_t // trailing comma if (istr_peek(in) == ']') { if (flags & JSON_NO_TRAILING_COMMAS) { - err("trailing comma in array at at %zu: (%c)(%d)", istr_tell(in), *in->cur, *in->cur); goto fail; } else { @@ -438,7 +482,6 @@ bool json__parse_array(arena_t *arena, instream_t *in, jsonflags_e flags, json_t } default: istr_rewind_n(in, 1); - err("unknown char after array at %zu: (%c)(%d)", istr_tell(in), *in->cur, *in->cur); goto fail; } } @@ -550,7 +593,6 @@ bool json__parse_obj(arena_t *arena, instream_t *in, jsonflags_e flags, json_t * } default: istr_rewind_n(in, 1); - err("unknown char after object at %zu: (%c)(%d)", istr_tell(in), *in->cur, *in->cur); goto fail; } } @@ -767,10 +809,12 @@ xmlattr_t *xml__parse_attr(arena_t *arena, instream_t *in) { } strview_t key = strv_trim(istr_get_view(in, '=')); - istr_skip(in, 2); // skip = and " - strview_t val = strv_trim(istr_get_view(in, '"')); - istr_skip(in, 1); // skip " - + istr_skip(in, 1); // skip = + strview_t val = strv_trim(istr_get_view_either(in, strv("\">"))); + if (istr_peek(in) != '>') { + istr_skip(in, 1); // skip " + } + if (strv_is_empty(key) || strv_is_empty(val)) { warn("key or value empty"); return NULL; @@ -842,3 +886,221 @@ xmltag_t *xml__parse_tag(arena_t *arena, instream_t *in) { istr_skip(in, 1); // skip > return tag; } + +// == HTML =========================================== + +htmltag_t *html__parse_tag(arena_t *arena, instream_t *in); + +html_t html_parse(arena_t *arena, strview_t filename) { + str_t str = os_file_read_all_str(arena, filename); + return html_parse_str(arena, strv(str)); +} + +html_t html_parse_str(arena_t *arena, strview_t str) { + html_t out = { + .text = str, + .root = alloc(arena, xmltag_t), + }; + + instream_t in = istr_init(str); + + while (!istr_is_finished(&in)) { + htmltag_t *tag = html__parse_tag(arena, &in); + + if (out.tail) out.tail->next = tag; + else out.root->children = tag; + + out.tail = tag; + } + + return out; +} + +htmltag_t *html__get_tag_internal(htmltag_t *parent, str_t key, bool recursive) { + htmltag_t *t = parent ? parent->children : NULL; + while (t) { + if (str_equals(key, t->key)) { + return t; + } + if (recursive && t->children) { + htmltag_t *out = html__get_tag_internal(t, key, recursive); + if (out) { + return out; + } + } + t = t->next; + } + return NULL; +} + +htmltag_t *html_get_tag(htmltag_t *parent, strview_t key, bool recursive) { + u8 tmpbuf[KB(1)]; + arena_t scratch = arena_make(ARENA_STATIC, sizeof(tmpbuf), tmpbuf); + str_t upper = strv_to_upper(&scratch, key); + return html__get_tag_internal(parent, upper, recursive); +} + +strview_t html_get_attribute(htmltag_t *tag, strview_t key) { + xmlattr_t *a = tag ? tag->attributes : NULL; + while (a) { + if (strv_equals(key, a->key)) { + return a->value; + } + a = a->next; + } + return STRV_EMPTY; +} + +///// html-private /////////////////////////////////// + +/* + +special rules: +

tag does not need to be closed when followed by + address, article, aside, blockquote, details, dialog, div, + dl, fieldset, figcaption, figure, footer, form, h1, h2, h3, + h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre, + search, section, table, or ul +*/ + +strview_t html_closing_p_tags[] = { + cstrv("ADDRESS"), + cstrv("ARTICLE"), + cstrv("ASIDE"), + cstrv("BLOCKQUOTE"), + cstrv("DETAILS"), + cstrv("DIALOG"), + cstrv("DIV"), + cstrv("DL"), + cstrv("FIELDSET"), + cstrv("FIGCAPTION"), + cstrv("FIGURE"), + cstrv("FOOTER"), + cstrv("FORM"), + cstrv("H1"), + cstrv("H2"), + cstrv("H3"), + cstrv("H4"), + cstrv("H5"), + cstrv("H6"), + cstrv("HEADER"), + cstrv("HGROUP"), + cstrv("HR"), + cstrv("MAIN"), + cstrv("MENU"), + cstrv("NAV"), + cstrv("OL"), + cstrv("P"), + cstrv("PRE"), + cstrv("SEARCH"), + cstrv("SECTION"), + cstrv("TABLE"), + cstrv("UL"), +}; + +bool html__closes_p_tag(strview_t tag) { + for (int i = 0; i < arrlen(html_closing_p_tags); ++i) { + if (strv_equals(html_closing_p_tags[i], tag)) { + return true; + } + } + + return false; +} + +htmltag_t *html__parse_tag(arena_t *arena, instream_t *in) { + istr_skip_whitespace(in); + + // we're either parsing the body, or we have finished the object + if (istr_peek(in) != '<' || istr_peek_next(in) == '/') { + return NULL; + } + + istr_skip(in, 1); // skip < + + // meta tag, we don't care about these + if (istr_peek(in) == '?') { + istr_ignore_and_skip(in, '\n'); + return NULL; + } + + htmltag_t *tag = alloc(arena, htmltag_t); + + tag->key = strv_to_upper( + arena, + strv_trim(istr_get_view_either(in, strv(" >"))) + ); + + xmlattr_t *attr = xml__parse_attr(arena, in); + while (attr) { + attr->next = tag->attributes; + tag->attributes = attr; + attr = xml__parse_attr(arena, in); + } + + // this tag does not have children, return + if (istr_peek(in) == '/') { + istr_skip(in, 2); // skip / and > + return tag; + } + + istr_skip(in, 1); // skip > + + bool is_p_tag = strv_equals(strv(tag->key), strv("P")); + while (!istr_is_finished(in)) { + istr_skip_whitespace(in); + strview_t content = strv_trim(istr_get_view(in, '<')); + + // skip < + istr_skip(in, 1); + + bool is_closing = istr_peek(in) == '/'; + if (is_closing) { + istr_skip(in, 1); + } + + + arena_t scratch = *arena; + instream_t scratch_in = *in; + str_t next_tag = strv_to_upper(&scratch, strv_trim(istr_get_view_either(&scratch_in, strv(" >")))); + + // rewind < + istr_rewind_n(in, 1); + + // if we don't have children, it means this is the only content + // otherwise, it means this is content in-between other tags, + // if so: create an empty tag with the content and add it as a child + if (!strv_is_empty(content)) { + if (tag->children == NULL) { + tag->content = content; + } + else { + htmltag_t *empty = alloc(arena, htmltag_t); + empty->content = content; + olist_push(tag->children, tag->tail, empty); + } + } + + bool close_tag = + (is_closing && str_equals(tag->key, next_tag)) || + (is_p_tag && html__closes_p_tag(strv(next_tag))); + + if (close_tag) { + if (is_closing) { + istr_skip(in, 2 + next_tag.len); + } + break; + } + + htmltag_t *child = html__parse_tag(arena, in); + if (tag->tail) { + (tag->tail)->next = (child); + (tag->tail) = (child); + } + else { + (tag->children) = (tag->tail) = (child); + } + } + + return tag; +} diff --git a/parsers.h b/parsers.h index 2c17ee1..f793151 100644 --- a/parsers.h +++ b/parsers.h @@ -60,6 +60,24 @@ i64 ini_as_int(inivalue_t *value); double ini_as_num(inivalue_t *value); bool ini_as_bool(inivalue_t *value); +typedef enum { + INI_PRETTY_COLOUR_KEY, + INI_PRETTY_COLOUR_VALUE, + INI_PRETTY_COLOUR_DIVIDER, + INI_PRETTY_COLOUR_TABLE, + INI_PRETTY_COLOUR__COUNT, +} ini_pretty_colours_e; + +typedef struct ini_pretty_opts_t ini_pretty_opts_t; +struct ini_pretty_opts_t { + oshandle_t custom_target; + bool use_custom_colours; + os_log_colour_e colours[INI_PRETTY_COLOUR__COUNT]; +}; + +void ini_pretty_print(ini_t *ini, const ini_pretty_opts_t *options); + + // == JSON =========================================== typedef enum jsontype_e { @@ -153,4 +171,29 @@ xml_t xml_parse_str(arena_t *arena, strview_t xmlstr); xmltag_t *xml_get_tag(xmltag_t *parent, strview_t key, bool recursive); strview_t xml_get_attribute(xmltag_t *tag, strview_t key); -#endif \ No newline at end of file +// == HTML =========================================== + +typedef struct htmltag_t htmltag_t; +struct htmltag_t { + str_t key; + xmlattr_t *attributes; + strview_t content; + htmltag_t *children; + htmltag_t *tail; + htmltag_t *next; +}; + +typedef struct html_t html_t; +struct html_t { + strview_t text; + htmltag_t *root; + htmltag_t *tail; +}; + +html_t html_parse(arena_t *arena, strview_t filename); +html_t html_parse_str(arena_t *arena, strview_t str); + +htmltag_t *html_get_tag(htmltag_t *parent, strview_t key, bool recursive); +strview_t html_get_attribute(htmltag_t *tag, strview_t key); + +#endif diff --git a/pretty_print.c b/pretty_print.c new file mode 100644 index 0000000..38275a1 --- /dev/null +++ b/pretty_print.c @@ -0,0 +1,78 @@ +#include "pretty_print.h" + +#include +#include "core.h" +#include "os.h" +#include "str.h" + + +strview_t pretty__colour[LOG_COL__COUNT] = { + [LOG_COL_BLACK] = cstrv("black"), + [LOG_COL_BLUE] = cstrv("blue"), + [LOG_COL_GREEN] = cstrv("green"), + [LOG_COL_CYAN] = cstrv("cyan"), + [LOG_COL_RED] = cstrv("red"), + [LOG_COL_MAGENTA] = cstrv("magenta"), + [LOG_COL_YELLOW] = cstrv("yellow"), + [LOG_COL_GREY] = cstrv("grey"), + + [LOG_COL_DARK_GREY] = cstrv("dark_grey"), + [LOG_COL_WHITE] = cstrv("white"), + [LOG_COL_LIGHT_BLUE] = cstrv("light_blue"), + [LOG_COL_LIGHT_GREEN] = cstrv("light_green"), + [LOG_COL_LIGHT_CYAN] = cstrv("light_cyan"), + [LOG_COL_LIGHT_RED] = cstrv("light_red"), + [LOG_COL_LIGHT_MAGENTA] = cstrv("light_magenta"), + [LOG_COL_LIGHT_YELLOW] = cstrv("light_yellow"), + + [LOG_COL_RESET] = cstrv("/"), +}; + +void pretty_print(arena_t scratch, const char *fmt, ...) { + va_list args; + va_start(args, fmt); + pretty_printv(scratch, fmt, args); + va_end(args); +} + +void pretty_printv(arena_t scratch, const char *fmt, va_list args) { + va_list tmp_args; + va_copy(tmp_args, args); + int len = fmt_bufferv(NULL, 0, fmt, tmp_args); + va_end(tmp_args); + + char *buf = alloc(&scratch, char, len + 1); + + fmt_bufferv(buf, len + 1, fmt, args); + + oshandle_t out = os_stdout(); + instream_t in = istr_init(strv(buf, len)); + while (!istr_is_finished(&in)) { + strview_t part = istr_get_view(&in, '<'); + bool has_escape = strv_ends_with(part, '\\'); + + if (has_escape) { + part.len -= 1; + } + + os_file_write(out, part.buf, part.len); + istr_skip(&in, 1); + + if (has_escape) { + os_file_putc(out, '<'); + continue; + } + + strview_t tag = istr_get_view(&in, '>'); + + for (usize i = 0; i < arrlen(pretty__colour); ++i) { + if (strv_equals(tag, pretty__colour[i])) { + os_log_set_colour(i); + break; + } + } + + istr_skip(&in, 1); + } +} + diff --git a/pretty_print.h b/pretty_print.h new file mode 100644 index 0000000..35e88c1 --- /dev/null +++ b/pretty_print.h @@ -0,0 +1,14 @@ +#pragma once + +#if 0 + +pretty_print(arena, + "error!%s!", "wow"); + +#endif + +#include "arena.h" + +void pretty_print(arena_t scratch, const char *fmt, ...); +void pretty_printv(arena_t scratch, const char *fmt, va_list args); + diff --git a/str.c b/str.c index acca8f9..7b29b19 100644 --- a/str.c +++ b/str.c @@ -12,6 +12,11 @@ // == STR_T ======================================================== +strview_t strv__ignore(str_t s, size_t l) { + COLLA_UNUSED(s); COLLA_UNUSED(l); + return STRV_EMPTY; +} + str_t str_init(arena_t *arena, const char *buf) { return str_init_len(arena, buf, buf ? strlen(buf) : 0); } @@ -206,6 +211,18 @@ tstr_t strv_to_tstr(arena_t *arena, strview_t src) { #endif } +str_t strv_to_upper(arena_t *arena, strview_t src) { + str_t out = str(arena, src); + str_upper(&out); + return out; +} + +str_t strv_to_lower(arena_t *arena, strview_t src) { + str_t out = str(arena, src); + str_lower(&out); + return out; +} + strview_t strv_remove_prefix(strview_t ctx, usize n) { if (n > ctx.len) n = ctx.len; return (strview_t){ @@ -285,7 +302,8 @@ bool strv_contains(strview_t ctx, char c) { bool strv_contains_view(strview_t ctx, strview_t view) { if (ctx.len < view.len) return false; - usize end = ctx.len - view.len; + usize end = (ctx.len - view.len) + 1; + for (usize i = 0; i < end; ++i) { if (memcmp(ctx.buf + i, view.buf, view.len) == 0) { return true; @@ -314,7 +332,10 @@ usize strv_find(strview_t ctx, char c, usize from) { } usize strv_find_view(strview_t ctx, strview_t view, usize from) { - usize end = ctx.len - view.len; + if (view.len > ctx.len) return STR_NONE; + + usize end = (ctx.len - view.len) + 1; + for (usize i = from; i < end; ++i) { if (memcmp(ctx.buf + i, view.buf, view.len) == 0) { return i; @@ -374,6 +395,10 @@ bool char_is_num(char c) { return c >= '0' && c <= '9'; } +char char_lower(char c) { + return c >= 'A' && c <= 'Z' ? c - 32 : c; +} + // == INPUT STREAM ================================================= instream_t istr_init(strview_t str) { diff --git a/str.h b/str.h index bd72244..13ef068 100644 --- a/str.h +++ b/str.h @@ -92,10 +92,7 @@ strview_t str_sub(str_t ctx, usize from, usize to); #define STRV_EMPTY (strview_t){0} // needed for strv__init_literal _Generic implementation, it's never actually called -inline strview_t strv__ignore(str_t s, size_t l) { - COLLA_UNUSED(s); COLLA_UNUSED(l); - return STRV_EMPTY; -} +strview_t strv__ignore(str_t s, size_t l); #define strv__check(x, ...) ((#x)[0] == '"') #define strv__init_literal(x, ...) \ @@ -118,6 +115,8 @@ inline strview_t strv__ignore(str_t s, size_t l) { #define strv(...) strv__check(__VA_ARGS__) ? strv__init_literal(__VA_ARGS__) : strv__impl(__VA_ARGS__, 2, 1, 0)(__VA_ARGS__) +#define cstrv(cstr) { cstr, sizeof(cstr) - 1, } + strview_t strv_init(const char *cstr); strview_t strv_init_len(const char *buf, usize size); strview_t strv_init_str(str_t str); @@ -132,6 +131,9 @@ char strv_back(strview_t ctx); str16_t strv_to_str16(arena_t *arena, strview_t src); tstr_t strv_to_tstr(arena_t *arena, strview_t src); +str_t strv_to_upper(arena_t *arena, strview_t src); +str_t strv_to_lower(arena_t *arena, strview_t src); + strview_t strv_remove_prefix(strview_t ctx, usize n); strview_t strv_remove_suffix(strview_t ctx, usize n); strview_t strv_trim(strview_t ctx); @@ -162,6 +164,7 @@ usize strv_rfind_view(strview_t ctx, strview_t view, usize from_right); bool char_is_space(char c); bool char_is_alpha(char c); bool char_is_num(char c); +char char_lower(char c); // == INPUT STREAM ================================================= @@ -273,4 +276,4 @@ bool ibstr_get_i16(ibstream_t *ib, i16 *out); bool ibstr_get_i32(ibstream_t *ib, i32 *out); bool ibstr_get_i64(ibstream_t *ib, i64 *out); -#endif \ No newline at end of file +#endif