From 8b938fdd4279dba5bdc6d4f922f01f850db24435 Mon Sep 17 00:00:00 2001 From: Peter Harpending Date: Wed, 22 Oct 2025 13:17:39 -0700 Subject: [PATCH] [wip] static file cache data structure works --- src/fd_sfc.erl | 54 +----------------------- src/fd_sfc_cache.erl | 68 ++++++++++++++++++++++++++++++ src/fd_sfc_entry.erl | 99 +++++++++++++++++++++++++++++++++++++++++++- src/fd_sup.erl | 8 +++- 4 files changed, 174 insertions(+), 55 deletions(-) create mode 100644 src/fd_sfc_cache.erl diff --git a/src/fd_sfc.erl b/src/fd_sfc.erl index dc38007..32347e9 100644 --- a/src/fd_sfc.erl +++ b/src/fd_sfc.erl @@ -14,20 +14,10 @@ -include("$zx_include/zx_logger.hrl"). --record(e, {fs_path :: file:filename(), - last_modified :: file:date_time(), - mime_type :: string(), - contents :: binary()}). - --type entry() :: #e{}. - --record(s, {entries = #{ :: [entry()]}). +-record(s, {cache = fd_sfc_cache:new() :: fd_sfc_cache:cache()}). -type state() :: #s{}. - - - start_link() -> gen_server:start_link({local, ?MODULE}, ?MODULE, none, []). @@ -64,45 +54,3 @@ code_change(_, State, _) -> terminate(_, _) -> ok. - - -%%--------------------- -%% doers -%%--------------------- - - --spec refresh_entry(Entry) -> Result - when Entry :: entry(), - Result :: {found, NewEntry :: entry()} - | not_found. - -refresh_entry(Entry) -> - refresh_entry(Entry, false). - - --spec refresh_entry(Entry, Force) -> Result - when Entry :: entry(), - Force :: boolean(), - Result :: {found, NewEntry :: entry()} - | not_found. -% @private -% Force = even if file has not been modified, still refresh contents - -refresh_entry(E = #e{fs_path = Path}, Force) -> - case file:find_file(Path) of - {ok, _} -> re2(E, Force); - {error, not_found} -> not_found. - end. - -re2(E = #e{fs_path = Path, last_modified = LastModified}, _Force = false) -> - case file:last_modified(Path) > LastModified of - false -> {found, E}; - true -> re2(E, true) - end; -re2(E = #e{http_path = HttpPath}, _Force = true) -> - new_entry(HttpPath). - - --spec new_entry(HttpPath) -> Result - when HttpPath :: binary(), - FilePath :: diff --git a/src/fd_sfc_cache.erl b/src/fd_sfc_cache.erl new file mode 100644 index 0000000..dd629d3 --- /dev/null +++ b/src/fd_sfc_cache.erl @@ -0,0 +1,68 @@ +% @doc +% cache data management +-module(fd_sfc_cache). + +-export_type([ + cache/0 +]). + +-export([ + new/0, new/1 +]). + +-include("$zx_include/zx_logger.hrl"). + +-type cache() :: #{HttpPath :: binary() := Entry :: fd_sfc_entry:entry()}. + + +-spec new() -> cache(). +new() -> #{}. + + +-spec new(BasePath) -> cache() + when BasePath :: file:filename(). +% @doc +% if you give a file path it just takes the parent dir +% +% recursively crawls through file tree and picks +% +% IO errors will be logged but will result in cache misses + +new(BasePath) -> + case filelib:is_file(BasePath) of + true -> new2(BasePath); + false -> + tell("~p:new(~p): no such file or directory, returning empty cache", [?MODULE, BasePath]), + #{} + end. + +new2(BasePath) -> + BaseDir = + case filelib:is_dir(BasePath) of + true -> filename:absname(BasePath); + false -> filename:absname(filename:dirname(BasePath)) + end, + %% hacky, fuck you + RemovePrefix = + fun (Prefix, Size, From) -> + <> = From, + Rest + end, + BBaseDir = unicode:characters_to_binary(BaseDir), + BBS = byte_size(BBaseDir), + HandlePath = + fun(AbsPath, AccCache) -> + BAbsPath = unicode:characters_to_binary(AbsPath), + HttpPath = RemovePrefix(BBaseDir, BBS, BAbsPath), + NewCache = + case fd_sfc_entry:new(AbsPath) of + {found, Entry} -> maps:put(HttpPath, Entry, AccCache); + not_found -> AccCache + end, + NewCache + end, + filelib:fold_files(_dir = BaseDir, + _match = ".+", + _recursive = true, + _fun = HandlePath, + _init_acc = #{}). diff --git a/src/fd_sfc_entry.erl b/src/fd_sfc_entry.erl index b726b60..a363484 100644 --- a/src/fd_sfc_entry.erl +++ b/src/fd_sfc_entry.erl @@ -1,2 +1,99 @@ % @doc non-servery functions for static file caching --module(fd_sfc_entry) +% +% this spams the filesystem, so it's not "pure" code +-module(fd_sfc_entry). + +-export_type([ + encoding/0, + entry/0 +]). + +-export([ + %% constructor + new/1, + %% accessors + fs_path/1, last_modified/1, mime_type/1, encoding/1, contents/1 +]). + +-include("$zx_include/zx_logger.hrl"). + +%% types + +% id = not compressed +-type encoding() :: none | gzip. + +-record(e, {fs_path :: file:filename(), + last_modified :: file:date_time(), + mime_type :: string(), + encoding :: encoding(), + contents :: binary()}). + +-opaque entry() :: #e{}. + +%% accessors + +fs_path(#e{fs_path = X}) -> X. +last_modified(#e{last_modified = X}) -> X. +mime_type(#e{mime_type = X}) -> X. +encoding(#e{encoding = X}) -> X. +contents(#e{contents = X}) -> X. + +%% API + +-spec new(Path) -> Result + when Path :: file:filename(), + Result :: {found, entry()} + | not_found. +% @doc +% absolute file path stored in resulting record +% +% returns not_found if ANY I/O error occurs during the process. will be logged + +new(Path) -> + tell("~tp:new(~tp)", [?MODULE, Path]), + case file:read_file(Path) of + {ok, Binary} -> + {found, new2(Path, Binary)}; + Error -> + tell("~tp:new(~tp): file read error: ~tp", [?MODULE, Path, Error]), + not_found + end. + +%% can assume file exists +new2(FsPath, FileBytes) -> + LastModified = filelib:last_modified(FsPath), + {Encoding, MimeType} = mimetype_compress(FsPath), + Contents = + case Encoding of + none -> FileBytes; + gzip -> zlib:gzip(FileBytes) + end, + #e{fs_path = FsPath, + last_modified = LastModified, + mime_type = MimeType, + encoding = Encoding, + contents = Contents}. + +mimetype_compress(FsPath) -> + case string:casefold(filename:extension(FsPath)) of + %% only including the ones i anticipate encountering + %% plaintext formats + ".css" -> {gzip, "text/css"}; + ".htm" -> {gzip, "text/html"}; + ".html" -> {gzip, "text/html"}; + ".js" -> {gzip, "text/javascript"}; + ".json" -> {gzip, "application/json"}; + ".map" -> {gzip, "application/json"}; + ".md" -> {gzip, "text/markdown"}; + ".ts" -> {gzip, "text/x-typescript"}; + ".txt" -> {gzip, "text/plain"}; + %% binary formats + ".gif" -> {none, "image/gif"}; + ".jpg" -> {none, "image/jpeg"}; + ".jpeg" -> {none, "image/jpeg"}; + ".mp4" -> {none, "video/mp4"}; + ".png" -> {none, "image/png"}; + ".webm" -> {none, "video/webm"}; + ".webp" -> {none, "image/webp"}; + _ -> {none, "application/octet-stream"} + end. diff --git a/src/fd_sup.erl b/src/fd_sup.erl index 9c267f0..76859f3 100644 --- a/src/fd_sup.erl +++ b/src/fd_sup.erl @@ -48,11 +48,17 @@ init([]) -> 5000, worker, [fd_chat]}, + FileCache = {fd_sfc, + {fd_sfc, start_link, []}, + permanent, + 5000, + worker, + [fd_sfc]}, Cache = {fd_cache, {fd_cache, start_link, []}, permanent, 5000, worker, [fd_cache]}, - Children = [Clients, Chat, Cache], + Children = [Clients, Chat, FileCache, Cache], {ok, {RestartStrategy, Children}}.