colla/vfs.c
2024-12-05 18:15:05 +01:00

635 lines
17 KiB
C

#include "vfs.h"
#include "lz4/lz4.c"
#include "file.h"
#include "strstream.h"
#include "arena.h"
#include "dir.h"
#include "bits.h"
/*
vfs format:
=====================
header:
-----------
magic: VFS
u32 header_size
u32 file_count
u8 flags
for each file:
u64 hash // todo remove
u8 namelen
char *name
u64 offset
u64 size
u64 compressed_size
-----------
* binary data *
*/
typedef struct vfsfile_t vfsfile_t;
struct vfsfile_t {
vfsfile_t *next;
str_t path;
uint64 size;
uint64 comp_size;
};
typedef struct vfshmap_t vfshmap_t;
typedef struct vfshnode_t vfshnode_t;
typedef struct {
uint64 offset;
uint64 size;
uint64 compsize;
} vfsdata_t;
struct vfshnode_t {
vfshnode_t *next;
uint64 hash;
str_t key;
uint32 index;
};
struct vfshmap_t {
vfshnode_t **buckets;
vfsdata_t *values;
uint32 size;
uint32 count;
uint32 collisions;
uint32 max_values;
};
struct virtualfs_t {
file_t fp;
buffer_t buffer;
vfshmap_t hmap;
uint64 base_offset;
vfsflags_e flags;
};
vfsflags_e vfs_validate_flags(vfsflags_e flags);
bool vfs_read(arena_t *arena, virtualfs_t *vfs, vfsdata_t *data, buffer_t *out, bool null_terminate);
vfsfile_t *vfs_add_dir(arena_t *arena, strview_t path, vfsfile_t *tail, uint32 *count, uint64 *bytesize);
vfshmap_t vfs_hmap_init(arena_t *arena, int pow2, uint32 max_values);
void vfs_hmap_add(arena_t *arena, vfshmap_t *hmap, strview_t key, vfsdata_t value);
vfsdata_t *vfs_hmap_get(vfshmap_t *hmap, strview_t key);
uint64 sdbm_hash(const void *bytes, usize count);
uint64 djb2_hash(const void *bytes, usize count);
bool vfsVirtualiseDir(arena_t scratch, strview_t dirpath, strview_t outfile, vfsflags_e flags) {
bool success = false;
flags = vfs_validate_flags(flags);
if (strvBack(dirpath) != '/') {
str_t newpath = strFmt(&scratch, "%v/", dirpath);
dirpath = strv(newpath);
}
uint32 count = 0;
uint64 bytesize = 0;
vfsfile_t file_head = {0};
vfs_add_dir(&scratch, dirpath, &file_head, &count, &bytesize);
vfsfile_t *files = file_head.next;
arena_t comp_arena = {0};
if (flags & VFS_FLAGS_COMPRESSED) {
arena_t comp_arena = arenaMake(ARENA_VIRTUAL, GB(1));
for_each (file, files) {
arena_t tmp = scratch;
buffer_t buf = fileReadWhole(&tmp, strv(file->path));
usize maxlen = LZ4_compressBound(buf.len);
uint8 *compressed = alloc(&comp_arena, uint8, maxlen);
int actual_len = LZ4_compress_default(buf.data, compressed, buf.len, maxlen);
assert(actual_len > 0 && actual_len <= maxlen);
usize pop = maxlen - (usize)actual_len;
// pop extra bytes that were allocated but not used
arenaPop(&comp_arena, pop);
file->comp_size = actual_len;
}
}
obytestream_t header = obstrInit(&scratch);
obstrAppendU32(&header, count);
obstrAppendU8(&header, flags);
uint64 offset = 0;
for_each (file, files) {
assert(file->path.len < 256);
uint64 hash = djb2_hash(file->path.buf, file->path.len);
obstrAppendU64(&header, hash);
obstrAppendU8(&header, file->path.len);
obstrPuts(&header, strv(file->path));
obstrAppendU64(&header, offset);
obstrAppendU64(&header, file->size);
obstrAppendU64(&header, file->comp_size);
offset += file->comp_size;
}
buffer_t headerbuf = obstrAsBuf(&header);
buffer_t binbuf = {0};
file_t fp = fileOpen(outfile, FILE_WRITE);
if (!fileIsValid(fp)) {
err("could not open file %v", outfile);
goto failed;
}
uint32 header_size = headerbuf.len + 3 + sizeof(uint32); // + strlen("VFS") + sizeof(header_size)
filePuts(fp, strv("VFS"));
fileWrite(fp, &header_size, sizeof(header_size));
fileWrite(fp, headerbuf.data, headerbuf.len);
if (flags & VFS_FLAGS_COMPRESSED) {
buffer_t compressed = {
.data = comp_arena.start,
.len = arenaTell(&comp_arena)
};
fileWrite(fp, compressed.data, compressed.len);
}
else {
for_each (file, files) {
arena_t tmp = scratch;
buffer_t bin = fileReadWhole(&tmp, strv(file->path));
if (flags & VFS_FLAGS_NULL_TERMINATE_FILES) {
alloc(&tmp, uint8);
bin.len += 1;
}
fileWrite(fp, bin.data, bin.len);
}
}
fileClose(fp);
success = true;
failed:
arenaCleanup(&comp_arena);
arenaCleanup(&scratch);
return success;
}
virtualfs_t *vfsReadFromFile(arena_t *arena, strview_t vfs_file, vfsflags_e flags) {
usize pos_before = arenaTell(arena);
virtualfs_t *vfs = alloc(arena, virtualfs_t);
file_t fp = fileOpen(vfs_file, FILE_READ);
if (!fileIsValid(fp)) {
goto failed;
}
// read header
struct {
char magic[3];
uint32 size;
uint32 file_count;
uint8 flags;
} header = {0};
fileRead(fp, &header.magic, sizeof(header.magic));
fileRead(fp, &header.size, sizeof(header.size));
fileRead(fp, &header.file_count, sizeof(header.file_count));
fileRead(fp, &header.flags, sizeof(header.flags));
if (memcmp(header.magic, "VFS", 3) != 0) {
err("VirtualFS: magic characters are wrong: %.3s (0x%x%x%x)", header.magic, header.magic[0], header.magic[1], header.magic[2]);
goto failed;
}
uint32 default_pow2 = 1 << 10;
uint32 pow2 = bitsNextPow2(header.file_count);
pow2 = bitsCtz(max(pow2, default_pow2));
vfs->hmap = vfs_hmap_init(arena, pow2, header.file_count);
for (uint32 i = 0; i < header.file_count; ++i) {
struct {
uint64 hash;
char name[256];
uint64 offset;
uint64 size;
uint64 comp;
} file = {0};
uint8 namelen = 0;
fileRead(fp, &file.hash, sizeof(file.hash));
fileRead(fp, &namelen, sizeof(namelen));
fileRead(fp, &file.name, namelen);
fileRead(fp, &file.offset, sizeof(file.offset));
fileRead(fp, &file.size, sizeof(file.size));
fileRead(fp, &file.comp, sizeof(file.comp));
vfsdata_t data = {
.offset = file.offset,
.size = file.size,
.compsize = file.comp,
};
strview_t path = strvInitLen(file.name, namelen);
vfs_hmap_add(arena, &vfs->hmap, path, data);
}
vfs->flags = vfs_validate_flags(header.flags | flags);
vfs->base_offset = header.size;
if (vfs->flags & VFS_FLAGS_DONT_STREAM) {
// get remaining size of the file
usize pos = fileTell(fp);
fileSeekEnd(fp);
usize endpos = fileTell(fp);
fileSeek(fp, pos);
usize binsize = endpos - pos;
// read binary data and save it to buffer for later
buffer_t buf = {
.data = alloc(arena, uint8, binsize),
.len = binsize,
};
usize read = fileRead(fp, buf.data, buf.len);
if (read != buf.len) {
err("couldn't read all of the binary data, expected %zu bytes but got %zu", buf.len, read);
goto failed;
}
fileClose(fp);
vfs->buffer = buf;
}
else {
vfs->fp = fp;
}
return vfs;
failed:
fileClose(fp);
arenaRewind(arena, pos_before);
return NULL;
}
buffer_t vfsRead(arena_t *arena, virtualfs_t *vfs, strview_t path) {
buffer_t out = {0};
usize pos_before = arenaTell(arena);
if (!vfs) {
goto failed;
}
vfsdata_t *data = vfs_hmap_get(&vfs->hmap, path);
if (!data) {
goto failed;
}
if (!vfs_read(arena, vfs, data, &out, false)) {
goto failed;
}
return out;
failed:
arenaRewind(arena, pos_before);
return (buffer_t){0};
}
str_t vfsReadStr(arena_t *arena, virtualfs_t *vfs, strview_t path) {
buffer_t buf = {0};
usize pos_before = arenaTell(arena);
if (!vfs) {
goto failed;
}
vfsdata_t *data = vfs_hmap_get(&vfs->hmap, path);
if (!data) {
goto failed;
}
if (!vfs_read(arena, vfs, data, &buf, true)) {
goto failed;
}
return (str_t){
.buf = buf.data,
.len = buf.len,
};
failed:
arenaRewind(arena, pos_before);
return STR_EMPTY;
}
// == VFS FILE API ===================================
virtualfs_t *g_vfs = NULL;
void vfsSetGlobalVirtualFS(virtualfs_t *vfs) {
g_vfs = vfs;
}
bool vfsFileExists(strview_t path) {
if (!g_vfs) return false;
return vfs_hmap_get(&g_vfs->hmap, path) != NULL;
}
vfs_file_t vfsFileOpen(strview_t name, int mode) {
if (!g_vfs) goto failed;
if (mode != FILE_READ) {
err("VirtualFS: trying to open file (%v) for write, VirtualFS is read only!", name);
goto failed;
}
vfsdata_t *data = vfs_hmap_get(&g_vfs->hmap, name);
return (vfs_file_t){
.handle = (uintptr_t)data,
};
failed:
return (vfs_file_t){0};
}
void vfsFileClose(vfs_file_t ctx) {
(void)ctx;
}
bool vfsFileIsValid(vfs_file_t ctx) {
return g_vfs && ctx.handle != 0;
}
usize vfsFileSize(vfs_file_t ctx) {
if (!vfsFileIsValid(ctx)) return 0;
vfsdata_t *data = (vfsdata_t *)ctx.handle;
return data->size;
}
buffer_t vfsFileReadWhole(arena_t *arena, strview_t name) {
return vfsRead(arena, g_vfs, name);
}
buffer_t vfsFileReadWholeFP(arena_t *arena, vfs_file_t ctx) {
if (!vfsFileIsValid(ctx)) return (buffer_t){0};
vfsdata_t *data = (vfsdata_t *)ctx.handle;
buffer_t out = {0};
usize pos_before = arenaTell(arena);
if (!vfs_read(arena, g_vfs, data, &out, false)) {
arenaRewind(arena, pos_before);
return (buffer_t){0};
}
return out;
}
str_t vfsFileReadWholeStr(arena_t *arena, strview_t name) {
return vfsReadStr(arena, g_vfs, name);
}
str_t vfsFileReadWholeStrFP(arena_t *arena, vfs_file_t ctx) {
if (!vfsFileIsValid(ctx)) return STR_EMPTY;
vfsdata_t *data = (vfsdata_t *)ctx.handle;
buffer_t buf = {0};
usize pos_before = arenaTell(arena);
if (!vfs_read(arena, g_vfs, data, &buf, true)) {
arenaRewind(arena, pos_before);
return STR_EMPTY;
}
return (str_t){
.buf = buf.data,
.len = buf.len,
};
}
// == PRIVATE FUNCTIONS ==============================
vfsflags_e vfs_validate_flags(vfsflags_e flags) {
if (flags & VFS_FLAGS_COMPRESSED && flags & VFS_FLAGS_NULL_TERMINATE_FILES) {
warn("VirtualFS: both COMPRESSEd and NULL_TERMINATE_FILES flags are set to ON, but they are mutually exclusive. turning NULL_TERMINATE_FILES off");
flags &= ~VFS_FLAGS_NULL_TERMINATE_FILES;
}
return flags;
}
bool vfs_read(arena_t *arena, virtualfs_t *vfs, vfsdata_t *data, buffer_t *out, bool null_terminate) {
if (!vfs || !data || !out) {
return false;
}
bool is_allocated = true;
out->len = data->size;
if (vfs->flags & VFS_FLAGS_COMPRESSED) {
out->data = alloc(arena, uint8, out->len);
uint8 *compressed = NULL;
if (vfs->flags & VFS_FLAGS_DONT_STREAM) {
assert((data->offset + data->compsize) < vfs->buffer.len);
compressed = vfs->buffer.data + data->offset;
}
else {
uint64 offset = vfs->base_offset + data->offset;
fileSeek(vfs->fp, offset);
arena_t scratch = *arena;
uint8 *compressed = alloc(&scratch, uint8, data->compsize);
usize read = fileRead(vfs->fp, compressed, data->compsize);
if (read != data->compsize) {
err("VirtualFS: read %zu bytes, but should have read %zu", read, data->compsize);
return false;
}
}
int decompsize = LZ4_decompress_safe(compressed, out->data, data->compsize, out->len);
if (decompsize < 0) {
err("VirtualFS: couldn't decompress buffer: %d", decompsize);
return false;
}
}
else {
if (vfs->flags & VFS_FLAGS_DONT_STREAM) {
assert((data->offset + data->size) < vfs->buffer.len);
out->data = vfs->buffer.data + data->offset;
is_allocated = false;
}
else {
out->data = alloc(arena, uint8, data->size);
uint64 offset = vfs->base_offset + data->offset;
fileSeek(vfs->fp, offset);
usize read = fileRead(vfs->fp, out->data, out->len);
if (read != out->len) {
err("VirtualFS: read %zu bytes, but should have read %zu", read, out->len);
return false;
}
}
}
if (null_terminate && !(vfs->flags & VFS_FLAGS_NULL_TERMINATE_FILES)) {
if (is_allocated) {
alloc(arena, char);
}
else {
uint8 *buf = alloc(arena, uint8, out->len + 1);
memcpy(buf, out->data, out->len);
out->data = buf;
}
out->len += 1;
}
return true;
}
vfsfile_t *vfs_add_dir(arena_t *arena, strview_t path, vfsfile_t *tail, uint32 *count, uint64 *bytesize) {
uint8 tmpbuf[KB(1)];
dir_t *dir = dirOpen(arena, path);
dir_entry_t *entry = NULL;
if (strvEquals(path, strv("./"))) {
path = STRV_EMPTY;
}
vfsfile_t *head = tail;
vfsfile_t *cur = tail;
while ((entry = dirNext(arena, dir))) {
arena_t scratch = arenaMake(ARENA_STATIC, sizeof(tmpbuf), tmpbuf);
vfsfile_t *newfile = NULL;
if (entry->type == DIRTYPE_DIR) {
if (strvEquals(strv(entry->name), strv(".")) ||
strvEquals(strv(entry->name), strv(".."))
) {
continue;
}
str_t fullpath = strFmt(&scratch, "%v%v/", path, entry->name);
newfile = vfs_add_dir(arena, strv(fullpath), cur, count, bytesize);
}
else {
newfile = alloc(arena, vfsfile_t);
newfile->path = strFmt(arena, "%v%v", path, entry->name);
newfile->size = entry->filesize;
newfile->comp_size = newfile->size;
if (cur) cur->next = newfile;
(*count)++;
(*bytesize) += newfile->size;
}
if (!head) head = newfile;
cur = newfile;
}
return cur;
}
// == HASH MAP =======================================
vfshmap_t vfs_hmap_init(arena_t *arena, int pow2, uint32 max_values) {
uint size = 1 << pow2;
return (vfshmap_t) {
.size = size,
.max_values = max_values,
.buckets = alloc(arena, vfshnode_t*, size),
.values = alloc(arena, vfsdata_t, max_values),
};
}
void vfs_hmap_add(arena_t *arena, vfshmap_t *hmap, strview_t key, vfsdata_t value) {
if (!hmap) return;
if ((float)hmap->count >= (float)hmap->size * 0.6f) {
warn("more than 60%% of the hashmap is being used: %d/%d", hmap->count, hmap->size);
}
uint64 hash = djb2_hash(key.buf, key.len);
usize index = hash & (hmap->size - 1);
vfshnode_t *bucket = hmap->buckets[index];
if (bucket) hmap->collisions++;
while (bucket) {
// already exists
if (bucket->hash == hash && strvEquals(strv(bucket->key), key)) {
hmap->values[bucket->index] = value;
return;
}
bucket = bucket->next;
}
assert(hmap->count < hmap->max_values);
bucket = alloc(arena, vfshnode_t);
bucket->hash = hash;
bucket->key = str(arena, key);
bucket->index = hmap->count;
bucket->next = hmap->buckets[index];
hmap->values[hmap->count++] = value;
hmap->buckets[index] = bucket;
}
vfsdata_t *vfs_hmap_get(vfshmap_t *hmap, strview_t key) {
if (!hmap || hmap->count == 0) {
return NULL;
}
uint64 hash = djb2_hash(key.buf, key.len);
usize index = hash & (hmap->size - 1);
vfshnode_t *bucket = hmap->buckets[index];
while (bucket) {
if (bucket->hash == hash && strvEquals(strv(bucket->key), key)) {
return &hmap->values[bucket->index];
}
bucket = bucket->next;
}
return NULL;
}
uint64 sdbm_hash(const void *bytes, usize count) {
const uint8 *data = bytes;
uint64 hash = 0;
for (usize i = 0; i < count; ++i) {
hash = data[i] + (hash << 6) + (hash << 16) - hash;
}
return hash;
}
uint64 djb2_hash(const void *bytes, usize count) {
const uint8 *data = bytes;
uint64 hash = 5381;
int c;
for (usize i = 0; i < count; ++i) {
hash = ((hash << 5) + hash) + data[i];
}
return hash;
}
uint64 java_hash(const void *bytes, usize count) {
const uint8 *data = bytes;
uint64 hash = 1125899906842597L;
for (usize i = 0; i < count; ++i) {
hash = ((hash << 5) - hash) + data[i];
}
return hash;
}