update stuff

This commit is contained in:
alessandro bason 2025-06-15 11:32:55 +02:00
parent 6d36aa4442
commit 95d74c2ef4
13 changed files with 1196 additions and 48 deletions

284
parsers.c
View file

@ -1,6 +1,9 @@
#include "parsers.h"
#include <assert.h>
#include "os.h"
#include "darr.h"
// == INI ============================================
@ -126,6 +129,52 @@ bool ini_as_bool(inivalue_t *value) {
return out;
}
void ini_pretty_print(ini_t *ini, const ini_pretty_opts_t *options) {
ini_pretty_opts_t opt = {0};
if (options) {
memmove(&opt, options, sizeof(ini_pretty_opts_t));
}
if (!os_handle_valid(opt.custom_target)) {
opt.custom_target = os_stdout();
}
if (!opt.use_custom_colours) {
os_log_colour_e default_col[INI_PRETTY_COLOUR__COUNT] = {
LOG_COL_YELLOW, // INI_PRETTY_COLOUR_KEY,
LOG_COL_GREEN, // INI_PRETTY_COLOUR_VALUE,
LOG_COL_WHITE, // INI_PRETTY_COLOUR_DIVIDER,
LOG_COL_RED, // INI_PRETTY_COLOUR_TABLE,
};
memmove(opt.colours, default_col, sizeof(default_col));
}
for_each (t, ini->tables) {
if (!strv_equals(t->name, INI_ROOT)) {
os_log_set_colour(opt.colours[INI_PRETTY_COLOUR_TABLE]);
os_file_puts(opt.custom_target, strv("["));
os_file_puts(opt.custom_target, t->name);
os_file_puts(opt.custom_target, strv("]\n"));
}
for_each (pair, t->values) {
if (strv_is_empty(pair->key) || strv_is_empty(pair->value)) continue;
os_log_set_colour(opt.colours[INI_PRETTY_COLOUR_KEY]);
os_file_puts(opt.custom_target, pair->key);
os_log_set_colour(opt.colours[INI_PRETTY_COLOUR_DIVIDER]);
os_file_puts(opt.custom_target, strv(" = "));
os_log_set_colour(opt.colours[INI_PRETTY_COLOUR_VALUE]);
os_file_puts(opt.custom_target, pair->value);
os_file_puts(opt.custom_target, strv("\n"));
}
}
os_log_set_colour(LOG_COL_RESET);
}
///// ini-private ////////////////////////////////////
iniopt_t ini__get_options(const iniopt_t *options) {
@ -297,7 +346,6 @@ json_t *json_get(json_t *node, strview_t key) {
if (!node) return NULL;
if (node->type != JSON_OBJECT) {
err("passed type is not an object");
return NULL;
}
@ -351,7 +399,6 @@ bool json__check_char(instream_t *in, char c) {
return true;
}
istr_rewind_n(in, 1);
err("wrong character at %zu, should be '%c' but is 0x%02x '%c'", istr_tell(in), c, istr_peek(in), istr_peek(in));
return false;
}
@ -375,12 +422,10 @@ bool json__parse_null(instream_t *in) {
bool is_valid = true;
if (!strv_equals(null_view, strv("null"))) {
err("should be null but is: (%.*s) at %zu", null_view.len, null_view.buf, istr_tell(in));
is_valid = false;
}
if (!json__is_value_finished(in)) {
err("null, should be finished, but isn't at %zu", istr_tell(in));
is_valid = false;
}
@ -419,7 +464,6 @@ bool json__parse_array(arena_t *arena, instream_t *in, jsonflags_e flags, json_t
// trailing comma
if (istr_peek(in) == ']') {
if (flags & JSON_NO_TRAILING_COMMAS) {
err("trailing comma in array at at %zu: (%c)(%d)", istr_tell(in), *in->cur, *in->cur);
goto fail;
}
else {
@ -438,7 +482,6 @@ bool json__parse_array(arena_t *arena, instream_t *in, jsonflags_e flags, json_t
}
default:
istr_rewind_n(in, 1);
err("unknown char after array at %zu: (%c)(%d)", istr_tell(in), *in->cur, *in->cur);
goto fail;
}
}
@ -550,7 +593,6 @@ bool json__parse_obj(arena_t *arena, instream_t *in, jsonflags_e flags, json_t *
}
default:
istr_rewind_n(in, 1);
err("unknown char after object at %zu: (%c)(%d)", istr_tell(in), *in->cur, *in->cur);
goto fail;
}
}
@ -767,10 +809,12 @@ xmlattr_t *xml__parse_attr(arena_t *arena, instream_t *in) {
}
strview_t key = strv_trim(istr_get_view(in, '='));
istr_skip(in, 2); // skip = and "
strview_t val = strv_trim(istr_get_view(in, '"'));
istr_skip(in, 1); // skip "
istr_skip(in, 1); // skip =
strview_t val = strv_trim(istr_get_view_either(in, strv("\">")));
if (istr_peek(in) != '>') {
istr_skip(in, 1); // skip "
}
if (strv_is_empty(key) || strv_is_empty(val)) {
warn("key or value empty");
return NULL;
@ -842,3 +886,221 @@ xmltag_t *xml__parse_tag(arena_t *arena, instream_t *in) {
istr_skip(in, 1); // skip >
return tag;
}
// == HTML ===========================================
htmltag_t *html__parse_tag(arena_t *arena, instream_t *in);
html_t html_parse(arena_t *arena, strview_t filename) {
str_t str = os_file_read_all_str(arena, filename);
return html_parse_str(arena, strv(str));
}
html_t html_parse_str(arena_t *arena, strview_t str) {
html_t out = {
.text = str,
.root = alloc(arena, xmltag_t),
};
instream_t in = istr_init(str);
while (!istr_is_finished(&in)) {
htmltag_t *tag = html__parse_tag(arena, &in);
if (out.tail) out.tail->next = tag;
else out.root->children = tag;
out.tail = tag;
}
return out;
}
htmltag_t *html__get_tag_internal(htmltag_t *parent, str_t key, bool recursive) {
htmltag_t *t = parent ? parent->children : NULL;
while (t) {
if (str_equals(key, t->key)) {
return t;
}
if (recursive && t->children) {
htmltag_t *out = html__get_tag_internal(t, key, recursive);
if (out) {
return out;
}
}
t = t->next;
}
return NULL;
}
htmltag_t *html_get_tag(htmltag_t *parent, strview_t key, bool recursive) {
u8 tmpbuf[KB(1)];
arena_t scratch = arena_make(ARENA_STATIC, sizeof(tmpbuf), tmpbuf);
str_t upper = strv_to_upper(&scratch, key);
return html__get_tag_internal(parent, upper, recursive);
}
strview_t html_get_attribute(htmltag_t *tag, strview_t key) {
xmlattr_t *a = tag ? tag->attributes : NULL;
while (a) {
if (strv_equals(key, a->key)) {
return a->value;
}
a = a->next;
}
return STRV_EMPTY;
}
///// html-private ///////////////////////////////////
/*
special rules:
<p> tag does not need to be closed when followed by
address, article, aside, blockquote, details, dialog, div,
dl, fieldset, figcaption, figure, footer, form, h1, h2, h3,
h4, h5, h6, header, hgroup, hr, main, menu, nav, ol, p, pre,
search, section, table, or ul
*/
strview_t html_closing_p_tags[] = {
cstrv("ADDRESS"),
cstrv("ARTICLE"),
cstrv("ASIDE"),
cstrv("BLOCKQUOTE"),
cstrv("DETAILS"),
cstrv("DIALOG"),
cstrv("DIV"),
cstrv("DL"),
cstrv("FIELDSET"),
cstrv("FIGCAPTION"),
cstrv("FIGURE"),
cstrv("FOOTER"),
cstrv("FORM"),
cstrv("H1"),
cstrv("H2"),
cstrv("H3"),
cstrv("H4"),
cstrv("H5"),
cstrv("H6"),
cstrv("HEADER"),
cstrv("HGROUP"),
cstrv("HR"),
cstrv("MAIN"),
cstrv("MENU"),
cstrv("NAV"),
cstrv("OL"),
cstrv("P"),
cstrv("PRE"),
cstrv("SEARCH"),
cstrv("SECTION"),
cstrv("TABLE"),
cstrv("UL"),
};
bool html__closes_p_tag(strview_t tag) {
for (int i = 0; i < arrlen(html_closing_p_tags); ++i) {
if (strv_equals(html_closing_p_tags[i], tag)) {
return true;
}
}
return false;
}
htmltag_t *html__parse_tag(arena_t *arena, instream_t *in) {
istr_skip_whitespace(in);
// we're either parsing the body, or we have finished the object
if (istr_peek(in) != '<' || istr_peek_next(in) == '/') {
return NULL;
}
istr_skip(in, 1); // skip <
// meta tag, we don't care about these
if (istr_peek(in) == '?') {
istr_ignore_and_skip(in, '\n');
return NULL;
}
htmltag_t *tag = alloc(arena, htmltag_t);
tag->key = strv_to_upper(
arena,
strv_trim(istr_get_view_either(in, strv(" >")))
);
xmlattr_t *attr = xml__parse_attr(arena, in);
while (attr) {
attr->next = tag->attributes;
tag->attributes = attr;
attr = xml__parse_attr(arena, in);
}
// this tag does not have children, return
if (istr_peek(in) == '/') {
istr_skip(in, 2); // skip / and >
return tag;
}
istr_skip(in, 1); // skip >
bool is_p_tag = strv_equals(strv(tag->key), strv("P"));
while (!istr_is_finished(in)) {
istr_skip_whitespace(in);
strview_t content = strv_trim(istr_get_view(in, '<'));
// skip <
istr_skip(in, 1);
bool is_closing = istr_peek(in) == '/';
if (is_closing) {
istr_skip(in, 1);
}
arena_t scratch = *arena;
instream_t scratch_in = *in;
str_t next_tag = strv_to_upper(&scratch, strv_trim(istr_get_view_either(&scratch_in, strv(" >"))));
// rewind <
istr_rewind_n(in, 1);
// if we don't have children, it means this is the only content
// otherwise, it means this is content in-between other tags,
// if so: create an empty tag with the content and add it as a child
if (!strv_is_empty(content)) {
if (tag->children == NULL) {
tag->content = content;
}
else {
htmltag_t *empty = alloc(arena, htmltag_t);
empty->content = content;
olist_push(tag->children, tag->tail, empty);
}
}
bool close_tag =
(is_closing && str_equals(tag->key, next_tag)) ||
(is_p_tag && html__closes_p_tag(strv(next_tag)));
if (close_tag) {
if (is_closing) {
istr_skip(in, 2 + next_tag.len);
}
break;
}
htmltag_t *child = html__parse_tag(arena, in);
if (tag->tail) {
(tag->tail)->next = (child);
(tag->tail) = (child);
}
else {
(tag->children) = (tag->tail) = (child);
}
}
return tag;
}