From 7c729bd93268995331152f6791a5d0be8bbb42c2 Mon Sep 17 00:00:00 2001 From: Ulf Wiger Date: Mon, 24 Oct 2022 14:38:43 +0200 Subject: [PATCH 1/6] Use a serializing mutex --- src/mrdb_mutex.erl | 116 +++++++++++++++++++++++++++++++++------------ 1 file changed, 86 insertions(+), 30 deletions(-) diff --git a/src/mrdb_mutex.erl b/src/mrdb_mutex.erl index d745ae1..97f67eb 100644 --- a/src/mrdb_mutex.erl +++ b/src/mrdb_mutex.erl @@ -7,40 +7,48 @@ -define(LOCK_TAB, ?MODULE). -%% We use a wrapping ets counter (default: 0) as a form of semaphor. -%% The claim operation is done using an atomic list of two updates: -%% first, incrementing with 0 - this returns the previous value -%% then, incrementing with 1, but wrapping at 1, ensuring that we get 1 back, -%% regardless of previous value. This means that if [0,1] is returned, the resource -%% was not locked previously; if [1,1] is returned, it was. +-include_lib("eunit/include/eunit.hrl"). + +%% We use a duplicate_bag ets table as a lock queue, +%% relying on the characteristic that a lookup on a key (the resource name) +%% returns the objects in the order in which they were inserted. +%% We try to claim the lock by inserting our own pid under the Rrsc key, then +%% checking which pid is at the head of the list. If it's our pid, we have the +%% lock, and proceed with calling our fun, then delecting our table entry. +%% If another pid is at the head of the list, we busy-wait on the table. %% %% Releasing the resource is done by deleting the resource. If we just decrement, %% we will end up with lingering unlocked resources, so we might as well delete. %% Either operation is atomic, and the claim op creates the object if it's missing. +%% Another, perhaps cheaper, way of implementing a mutex would be to use a counter +%% object, but we also care about avoiding starvation, and this way, we get a form +%% of serialization of requests. + do(Rsrc, F) when is_function(F, 0) -> - true = claim(Rsrc), - try F() - after - release(Rsrc) + ets:insert(?LOCK_TAB, {Rsrc, self()}), + case have_lock(Rsrc) of + true -> + try F() + after + release(Rsrc) + end; + false -> + busy_wait(Rsrc, 5000) end. -claim(Rsrc) -> - case claim_(Rsrc) of - true -> true; - false -> busy_wait(Rsrc, 1000) - end. - -claim_(Rsrc) -> - case ets:update_counter(?LOCK_TAB, Rsrc, [{2, 0}, - {2, 1, 1, 1}], {Rsrc, 0}) of - [0, 1] -> - %% have lock - true; - [1, 1] -> +have_lock(Rsrc) -> + case ets:lookup(?LOCK_TAB, Rsrc) of + [{_, P}|_] -> + P =:= self(); + [] -> false end. +release(Rsrc) -> + ets:delete_object(?LOCK_TAB, {Rsrc, self()}). + + %% The busy-wait function makes use of the fact that we can read a timer to find out %% if it still has time remaining. This reduces the need for selective receive, looking %% for a timeout message. We yield, then retry the claim op. Yielding at least used to @@ -57,7 +65,7 @@ do_wait(Rsrc, Ref) -> erlang:cancel_timer(Ref), error(lock_wait_timeout); _ -> - case claim_(Rsrc) of + case have_lock(Rsrc) of true -> erlang:cancel_timer(Ref), ok; @@ -66,16 +74,64 @@ do_wait(Rsrc, Ref) -> end end. -release(Rsrc) -> - ets:delete(?LOCK_TAB, Rsrc), - ok. - - %% Called by the process holding the ets table. ensure_tab() -> case ets:info(?LOCK_TAB, name) of undefined -> - ets:new(?LOCK_TAB, [set, public, named_table, {write_concurrency, true}]); + ets:new(?LOCK_TAB, [duplicate_bag, public, named_table, + {read_concurrency, true}, + {write_concurrency, true}]); _ -> true end. + + +-ifdef(TEST). + +mutex_test_() -> + {foreach, + fun setup/0, + fun cleanup/1, + [ + {"Check that all operations complete", fun swarm_do/0} + ]}. + +setup() -> + ensure_tab(). + +cleanup(_) -> + ets:delete(?LOCK_TAB). + +swarm_do() -> + K = ?LINE, + Pids = [spawn_monitor(fun() -> + write_evens(my_rsrc, K, N) + end) || N <- lists:seq(1,25)], + await_pids(Pids), + Written = ets:lookup(?LOCK_TAB, K), + true = lists:all(fun is_even/1, [X || {_, X} <- Written]). + +is_even(N) -> + (N rem 2) =:= 0. + +await_pids([{_, MRef}|Pids]) -> + receive + {'DOWN', MRef, _, _, _} -> + await_pids(Pids) + after 10000 -> + error(timeout) + end; +await_pids([]) -> + ok. + +write_evens(Rsrc, K, N) -> + do(Rsrc, fun() -> + case is_even(N) of + true -> + ets:insert(?LOCK_TAB, {K, N}); + false -> + exit(not_even) + end + end). + +-endif. From 95abe4e36e70d73cd39d622820e5edb010fbe685 Mon Sep 17 00:00:00 2001 From: Ulf Wiger Date: Tue, 25 Oct 2022 10:17:35 +0200 Subject: [PATCH 2/6] Mutex server with fifo queues --- src/mnesia_rocksdb_admin.erl | 1 - src/mnesia_rocksdb_sup.erl | 3 +- src/mrdb_mutex.erl | 114 +++++++++++++--------------------- src/mrdb_mutex_serializer.erl | 98 +++++++++++++++++++++++++++++ 4 files changed, 142 insertions(+), 74 deletions(-) create mode 100644 src/mrdb_mutex_serializer.erl diff --git a/src/mnesia_rocksdb_admin.erl b/src/mnesia_rocksdb_admin.erl index 5c1cd43..aa4662a 100644 --- a/src/mnesia_rocksdb_admin.erl +++ b/src/mnesia_rocksdb_admin.erl @@ -326,7 +326,6 @@ call(Alias, Req, Timeout) -> end. start_link() -> - mrdb_mutex:ensure_tab(), gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). init([]) -> diff --git a/src/mnesia_rocksdb_sup.erl b/src/mnesia_rocksdb_sup.erl index 1b1706a..4d33130 100644 --- a/src/mnesia_rocksdb_sup.erl +++ b/src/mnesia_rocksdb_sup.erl @@ -42,4 +42,5 @@ start_link() -> %% =================================================================== init([]) -> - {ok, { {one_for_one, 5, 10}, [?CHILD(mnesia_rocksdb_params, worker)]} }. + {ok, { {one_for_one, 5, 10}, [ ?CHILD(mrdb_mutex_serializer, worker) + , ?CHILD(mnesia_rocksdb_params, worker)]} }. diff --git a/src/mrdb_mutex.erl b/src/mrdb_mutex.erl index 97f67eb..8f5777e 100644 --- a/src/mrdb_mutex.erl +++ b/src/mrdb_mutex.erl @@ -3,10 +3,6 @@ -export([ do/2 ]). --export([ ensure_tab/0 ]). - --define(LOCK_TAB, ?MODULE). - -include_lib("eunit/include/eunit.hrl"). %% We use a duplicate_bag ets table as a lock queue, @@ -26,66 +22,12 @@ %% of serialization of requests. do(Rsrc, F) when is_function(F, 0) -> - ets:insert(?LOCK_TAB, {Rsrc, self()}), - case have_lock(Rsrc) of - true -> - try F() - after - release(Rsrc) - end; - false -> - busy_wait(Rsrc, 5000) + {ok, Ref} = mrdb_mutex_serializer:wait(Rsrc), + try F() + after + mrdb_mutex_serializer:done(Rsrc, Ref) end. -have_lock(Rsrc) -> - case ets:lookup(?LOCK_TAB, Rsrc) of - [{_, P}|_] -> - P =:= self(); - [] -> - false - end. - -release(Rsrc) -> - ets:delete_object(?LOCK_TAB, {Rsrc, self()}). - - -%% The busy-wait function makes use of the fact that we can read a timer to find out -%% if it still has time remaining. This reduces the need for selective receive, looking -%% for a timeout message. We yield, then retry the claim op. Yielding at least used to -%% also be necessary for the `read_timer/1` value to refresh. -%% -busy_wait(Rsrc, Timeout) -> - Ref = erlang:send_after(Timeout, self(), {claim, Rsrc}), - do_wait(Rsrc, Ref). - -do_wait(Rsrc, Ref) -> - erlang:yield(), - case erlang:read_timer(Ref) of - false -> - erlang:cancel_timer(Ref), - error(lock_wait_timeout); - _ -> - case have_lock(Rsrc) of - true -> - erlang:cancel_timer(Ref), - ok; - false -> - do_wait(Rsrc, Ref) - end - end. - -%% Called by the process holding the ets table. -ensure_tab() -> - case ets:info(?LOCK_TAB, name) of - undefined -> - ets:new(?LOCK_TAB, [duplicate_bag, public, named_table, - {read_concurrency, true}, - {write_concurrency, true}]); - _ -> - true - end. - - -ifdef(TEST). mutex_test_() -> @@ -97,19 +39,47 @@ mutex_test_() -> ]}. setup() -> - ensure_tab(). + case whereis(mrdb_mutex_serializer) of + undefined -> + {ok, Pid} = mrdb_mutex_serializer:start_link(), + Pid; + Pid -> + Pid + end. -cleanup(_) -> - ets:delete(?LOCK_TAB). +cleanup(Pid) -> + unlink(Pid), + exit(Pid, kill). swarm_do() -> - K = ?LINE, + Rsrc = ?LINE, + Pid = spawn(fun() -> collect([]) end), + L = lists:seq(1, 1000), + Evens = [X || X <- L, is_even(X)], Pids = [spawn_monitor(fun() -> - write_evens(my_rsrc, K, N) - end) || N <- lists:seq(1,25)], + send_even(Rsrc, N, Pid) + end) || N <- lists:seq(1,1000)], await_pids(Pids), - Written = ets:lookup(?LOCK_TAB, K), - true = lists:all(fun is_even/1, [X || {_, X} <- Written]). + Results = fetch(Pid), + {incorrect_results, []} = {incorrect_results, Results -- Evens}, + {missing_correct_results, []} = {missing_correct_results, Evens -- Results}, + ok. + +collect(Acc) -> + receive + {_, result, N} -> + collect([N|Acc]); + {From, fetch} -> + From ! {fetch_reply, Acc}, + done + end. + +fetch(Pid) -> + Pid ! {self(), fetch}, + receive + {fetch_reply, Result} -> + Result + end. is_even(N) -> (N rem 2) =:= 0. @@ -124,11 +94,11 @@ await_pids([{_, MRef}|Pids]) -> await_pids([]) -> ok. -write_evens(Rsrc, K, N) -> +send_even(Rsrc, N, Pid) -> do(Rsrc, fun() -> case is_even(N) of true -> - ets:insert(?LOCK_TAB, {K, N}); + Pid ! {self(), result, N}; false -> exit(not_even) end diff --git a/src/mrdb_mutex_serializer.erl b/src/mrdb_mutex_serializer.erl new file mode 100644 index 0000000..eb2e5ac --- /dev/null +++ b/src/mrdb_mutex_serializer.erl @@ -0,0 +1,98 @@ +-module(mrdb_mutex_serializer). + +-behavior(gen_server). + +-export([wait/1, + done/2]). + +-export([start_link/0]). + +-export([init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). + +-record(st, { queues = #{} + , empty_q = queue:new() }). %% perhaps silly optimization + +wait(Rsrc) -> + gen_server:call(?MODULE, {wait, Rsrc}, infinity). + +done(Rsrc, Ref) -> + gen_server:cast(?MODULE, {done, Rsrc, Ref}). + +start_link() -> + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init([]) -> + {ok, #st{}}. + +handle_call({wait, Rsrc}, {Pid, _} = From, #st{ queues = Queues + , empty_q = NewQ } = St) -> + MRef = erlang:monitor(process, Pid), + Q0 = maps:get(Rsrc, Queues, NewQ), + WasEmpty = queue:is_empty(Q0), + Q1 = queue:in({From, MRef}, Q0), + St1 = St#st{ queues = Queues#{Rsrc => Q1} }, + case WasEmpty of + true -> + {reply, {ok, MRef}, St1}; + false -> + {noreply, St1} + end. + +handle_cast({done, Rsrc, MRef}, #st{ queues = Queues } = St) -> + case maps:find(Rsrc, Queues) of + {ok, Q} -> + case queue:out(Q) of + {{value, {_From, MRef}}, Q1} -> + erlang:demonitor(MRef), + ok = maybe_dispatch_one(Q1), + {noreply, St#st{ queues = Queues#{Rsrc := Q1} }}; + {_, _} -> + %% Not the lock holder + {noreply, St} + end; + error -> + {noreply, St} + end. + +handle_info({'DOWN', MRef, process, _, _}, #st{queues = Queues} = St) -> + Queues1 = + maps:map( + fun(_Rsrc, Q) -> + drop_or_filter(Q, MRef) + end, Queues), + {noreply, St#st{ queues = Queues1 }}; +handle_info(_, St) -> + {noreply, St}. + +terminate(_, _) -> + ok. + +code_change(_FromVsn, St, _Extra) -> + {ok, St}. + +%% In this function, we don't actually pop +maybe_dispatch_one(Q) -> + case queue:peek(Q) of + empty -> + ok; + {value, {From, MRef}} -> + gen_server:reply(From, {ok, MRef}), + ok + end. + +drop_or_filter(Q, MRef) -> + case queue:peek(Q) of + {value, {_, MRef}} -> + Q1 = queue:drop(Q), + ok = maybe_dispatch_one(Q1), + Q1; + {value, _Other} -> + queue:filter(fun({_,M}) -> M =/= MRef end, Q); + empty -> + Q + end. From 3635eac71701b7f368c6338834d9f5c4e8ad74dd Mon Sep 17 00:00:00 2001 From: Ulf Wiger Date: Thu, 3 Nov 2022 12:41:14 +0100 Subject: [PATCH 3/6] Test case for inner retries; add mrdb_stats.erl; update docs --- doc/edoc-info | 3 +- doc/index.html | 17 + doc/mnesia_rocksdb.html | 529 ++++++++++++++++++++++++ doc/mnesia_rocksdb_admin.html | 294 +++++++++++++ doc/mnesia_rocksdb_app.html | 40 ++ doc/mnesia_rocksdb_lib.html | 181 ++++++++ doc/mnesia_rocksdb_params.html | 96 +++++ doc/mnesia_rocksdb_sup.html | 40 ++ doc/mnesia_rocksdb_tuning.html | 151 +++++++ doc/modules-frame.html | 24 ++ doc/mrdb.html | 728 +++++++++++++++++++++++++++++++++ doc/mrdb_index.html | 71 ++++ doc/mrdb_mutex.html | 32 ++ doc/mrdb_mutex_serializer.html | 88 ++++ doc/mrdb_select.html | 67 +++ doc/mrdb_stats.html | 87 ++++ doc/overview-summary.html | 254 ++++++++++++ src/mnesia_rocksdb_admin.erl | 2 + src/mrdb.erl | 101 +++-- src/mrdb_stats.erl | 74 ++++ test/mnesia_rocksdb_SUITE.erl | 144 ++++++- 21 files changed, 2982 insertions(+), 41 deletions(-) create mode 100644 doc/index.html create mode 100644 doc/mnesia_rocksdb.html create mode 100644 doc/mnesia_rocksdb_admin.html create mode 100644 doc/mnesia_rocksdb_app.html create mode 100644 doc/mnesia_rocksdb_lib.html create mode 100644 doc/mnesia_rocksdb_params.html create mode 100644 doc/mnesia_rocksdb_sup.html create mode 100644 doc/mnesia_rocksdb_tuning.html create mode 100644 doc/modules-frame.html create mode 100644 doc/mrdb.html create mode 100644 doc/mrdb_index.html create mode 100644 doc/mrdb_mutex.html create mode 100644 doc/mrdb_mutex_serializer.html create mode 100644 doc/mrdb_select.html create mode 100644 doc/mrdb_stats.html create mode 100644 doc/overview-summary.html create mode 100644 src/mrdb_stats.erl diff --git a/doc/edoc-info b/doc/edoc-info index b425e49..9993c3f 100644 --- a/doc/edoc-info +++ b/doc/edoc-info @@ -2,4 +2,5 @@ {application,mnesia_rocksdb}. {modules,[mnesia_rocksdb,mnesia_rocksdb_admin,mnesia_rocksdb_app, mnesia_rocksdb_lib,mnesia_rocksdb_params,mnesia_rocksdb_sup, - mnesia_rocksdb_tuning,mrdb,mrdb_index,mrdb_mutex,mrdb_select]}. + mnesia_rocksdb_tuning,mrdb,mrdb_index,mrdb_mutex, + mrdb_mutex_serializer,mrdb_select,mrdb_stats]}. diff --git a/doc/index.html b/doc/index.html new file mode 100644 index 0000000..aee6928 --- /dev/null +++ b/doc/index.html @@ -0,0 +1,17 @@ + + + +The mnesia_rocksdb application + + + + + + +<h2>This page uses frames</h2> +<p>Your browser does not accept frames. +<br>You should go to the <a href="overview-summary.html">non-frame version</a> instead. +</p> + + + \ No newline at end of file diff --git a/doc/mnesia_rocksdb.html b/doc/mnesia_rocksdb.html new file mode 100644 index 0000000..fb5d9a0 --- /dev/null +++ b/doc/mnesia_rocksdb.html @@ -0,0 +1,529 @@ + + + + +Module mnesia_rocksdb + + + + +
+ +

Module mnesia_rocksdb

+rocksdb storage backend for Mnesia. + +

Behaviours: gen_server, mnesia_backend_type.

+ +

Description

rocksdb storage backend for Mnesia.

+ + This module implements a mnesia backend callback plugin. + It's specifically documented to try to explain the workings of + backend plugins. + +

Data Types

+ +

alias()

+

alias() = atom()

+ + +

data_tab()

+

data_tab() = atom()

+ + +

error()

+

error() = {error, any()}

+ + +

index_info()

+

index_info() = {index_pos(), index_type()}

+ + +

index_pos()

+

index_pos() = integer() | {atom()}

+ + +

index_tab()

+

index_tab() = {data_tab(), index, index_info()}

+ + +

index_type()

+

index_type() = ordered

+ + +

retainer_name()

+

retainer_name() = any()

+ + +

retainer_tab()

+

retainer_tab() = {data_tab(), retainer, retainer_name()}

+ + +

table()

+

table() = data_tab() | index_tab() | retainer_tab()

+ + +

table_type()

+

table_type() = set | ordered_set | bag

+ + +

Function Index

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
add_aliases/1
check_definition/4
close_table/2
code_change/3
create_schema/1
create_schema/2
create_table/3
decode_key/1
decode_key/2
decode_val/1
decode_val/3
default_alias/0
delete/3
delete_table/2
encode_key/1
encode_key/2
encode_val/1
encode_val/2
first/2
fixtable/3
handle_call/3
handle_cast/2
handle_info/2
index_is_consistent/3
info/3
init/1
init_backend/0Called by mnesia_schema in order to intialize the backend.
insert/3
is_index_consistent/2
ix_listvals/3
ix_prefixes/3
last/2
load_table/4
lookup/3
match_delete/3
next/3
prev/3
real_suffixes/0
receive_data/5
receive_done/4
receiver_first_message/4
register/0Equivalent to register(rocksdb_copies). +
register/1Convenience function for registering a mnesia_rocksdb backend plugin.
remove_aliases/1
repair_continuation/2
select/1
select/3
select/4
semantics/2
sender_handle_info/5
sender_init/4
show_table/1A debug function that shows the rocksdb table content.
show_table/2
slot/3
start_proc/6
sync_close_table/2
terminate/2
tmp_suffixes/0
update_counter/4
validate_key/6
validate_record/6
+ +

Function Details

+ +

add_aliases/1

+
+

add_aliases(Aliases) -> any()

+

+
+ +

check_definition/4

+
+

check_definition(Alias, Tab, Nodes, Props) -> any()

+

+
+ +

close_table/2

+
+

close_table(Alias, Tab) -> any()

+

+
+ +

code_change/3

+
+

code_change(FromVsn, St, Extra) -> any()

+

+
+ +

create_schema/1

+
+

create_schema(Nodes) -> any()

+

+
+ +

create_schema/2

+
+

create_schema(Nodes, Aliases) -> any()

+

+
+ +

create_table/3

+
+

create_table(Alias, Tab, Props) -> any()

+

+
+ +

decode_key/1

+
+

decode_key(Key) -> any()

+

+
+ +

decode_key/2

+
+

decode_key(Key, Metadata) -> any()

+

+
+ +

decode_val/1

+
+

decode_val(Val) -> any()

+

+
+ +

decode_val/3

+
+

decode_val(Val, Key, Metadata) -> any()

+

+
+ +

default_alias/0

+
+

default_alias() -> any()

+

+
+ +

delete/3

+
+

delete(Alias, Tab, Key) -> any()

+

+
+ +

delete_table/2

+
+

delete_table(Alias, Tab) -> any()

+

+
+ +

encode_key/1

+
+

encode_key(Key) -> any()

+

+
+ +

encode_key/2

+
+

encode_key(Key, Metadata) -> any()

+

+
+ +

encode_val/1

+
+

encode_val(Val) -> any()

+

+
+ +

encode_val/2

+
+

encode_val(Val, Metadata) -> any()

+

+
+ +

first/2

+
+

first(Alias, Tab) -> any()

+

+
+ +

fixtable/3

+
+

fixtable(Alias, Tab, Bool) -> any()

+

+
+ +

handle_call/3

+
+

handle_call(M, From, St) -> any()

+

+
+ +

handle_cast/2

+
+

handle_cast(X1, St) -> any()

+

+
+ +

handle_info/2

+
+

handle_info(EXIT, St) -> any()

+

+
+ +

index_is_consistent/3

+
+

index_is_consistent(Alias, X2, Bool) -> any()

+

+
+ +

info/3

+
+

info(Alias, Tab, Item) -> any()

+

+
+ +

init/1

+
+

init(X1) -> any()

+

+
+ +

init_backend/0

+
+

init_backend() -> any()

+

+

Called by mnesia_schema in order to intialize the backend

+ +

This is called when the backend is registered with the first alias, or ...

+ +

See OTP issue #425 (16 Feb 2021). This callback is supposed to be called +before first use of the backend, but unfortunately, it is only called at +mnesia startup and when a backend module is registered MORE THAN ONCE. +This means we need to handle this function being called multiple times.

+ +

The bug has been fixed as of OTP 24.0-rc3

+ +

If processes need to be started, this can be done using + mnesia_ext_sup:start_proc(Name, Mod, F, Args [, Opts]) +where Opts are parameters for the supervised child:

+ + * restart (default: transient) + * shutdown (default: 120000) + * type (default: worker) + * modules (default: [Mod])

+ +

insert/3

+
+

insert(Alias, Tab, Obj) -> any()

+

+
+ +

is_index_consistent/2

+
+

is_index_consistent(Alias, X2) -> any()

+

+
+ +

ix_listvals/3

+
+

ix_listvals(Tab, Pos, Obj) -> any()

+

+
+ +

ix_prefixes/3

+
+

ix_prefixes(Tab, Pos, Obj) -> any()

+

+
+ +

last/2

+
+

last(Alias, Tab) -> any()

+

+
+ +

load_table/4

+
+

load_table(Alias, Tab, LoadReason, Props) -> any()

+

+
+ +

lookup/3

+
+

lookup(Alias, Tab, Key) -> any()

+

+
+ +

match_delete/3

+
+

match_delete(Alias, Tab, Pat) -> any()

+

+
+ +

next/3

+
+

next(Alias, Tab, Key) -> any()

+

+
+ +

prev/3

+
+

prev(Alias, Tab, Key) -> any()

+

+
+ +

real_suffixes/0

+
+

real_suffixes() -> any()

+

+
+ +

receive_data/5

+
+

receive_data(Data, Alias, Tab, Sender, State) -> any()

+

+
+ +

receive_done/4

+
+

receive_done(Alias, Tab, Sender, State) -> any()

+

+
+ +

receiver_first_message/4

+
+

receiver_first_message(Pid, Msg, Alias, Tab) -> any()

+

+
+ +

register/0

+
+

register() -> {ok, alias()} | {error, term()}

+

+

Equivalent to register(rocksdb_copies).

+ + +

register/1

+
+

register(Alias::alias()) -> {ok, alias()} | error()

+

+

Convenience function for registering a mnesia_rocksdb backend plugin

+ + The function used to register a plugin is mnesia_schema:add_backend_type(Alias, Module) + where Module implements a backend_type behavior. Alias is an atom, and is used + in the same way as ram_copies etc. The default alias is rocksdb_copies.

+ +

remove_aliases/1

+
+

remove_aliases(Aliases) -> any()

+

+
+ +

repair_continuation/2

+
+

repair_continuation(Cont, Ms) -> any()

+

+
+ +

select/1

+
+

select(Cont) -> any()

+

+
+ +

select/3

+
+

select(Alias, Tab, Ms) -> any()

+

+
+ +

select/4

+
+

select(Alias, IxTab, Ms, Limit) -> any()

+

+
+ +

semantics/2

+
+

semantics(Alias, X2) -> any()

+

+
+ +

sender_handle_info/5

+
+

sender_handle_info(Msg, Alias, Tab, ReceiverPid, Cont) -> any()

+

+
+ +

sender_init/4

+
+

sender_init(Alias, Tab, RemoteStorage, Pid) -> any()

+

+
+ +

show_table/1

+
+

show_table(Tab) -> any()

+

+

A debug function that shows the rocksdb table content

+ +

show_table/2

+
+

show_table(Tab, Limit) -> any()

+

+
+ +

slot/3

+
+

slot(Alias, Tab, Pos) -> any()

+

+
+ +

start_proc/6

+
+

start_proc(Alias, Tab, Type, ProcName, Props, RdbOpts) -> any()

+

+
+ +

sync_close_table/2

+
+

sync_close_table(Alias, Tab) -> any()

+

+
+ +

terminate/2

+
+

terminate(Reason, St) -> any()

+

+
+ +

tmp_suffixes/0

+
+

tmp_suffixes() -> any()

+

+
+ +

update_counter/4

+
+

update_counter(Alias, Tab, C, Val) -> any()

+

+
+ +

validate_key/6

+
+

validate_key(Alias, Tab, RecName, Arity, Type, Key) -> any()

+

+
+ +

validate_record/6

+
+

validate_record(Alias, Tab, RecName, Arity, Type, Obj) -> any()

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/mnesia_rocksdb_admin.html b/doc/mnesia_rocksdb_admin.html new file mode 100644 index 0000000..82dd924 --- /dev/null +++ b/doc/mnesia_rocksdb_admin.html @@ -0,0 +1,294 @@ + + + + +Module mnesia_rocksdb_admin + + + + +
+ +

Module mnesia_rocksdb_admin

+ + +

Behaviours: gen_server.

+ +

Data Types

+ +

alias()

+

alias() = atom()

+ + +

backend()

+

backend() = #{db_ref := db_ref(), cf_info := #{table() := cf()}}

+ + +

cf()

+

cf() = mrdb:db_ref()

+ + +

db_ref()

+

db_ref() = rocksdb:db_handle()

+ + +

gen_server_noreply()

+

gen_server_noreply() = {noreply, st()} | {stop, reason(), st()}

+ + +

gen_server_reply()

+

gen_server_reply() = {reply, reply(), st()} | {stop, reason(), reply(), st()}

+ + +

properties()

+

properties() = [{atom(), any()}]

+ + +

reason()

+

reason() = any()

+ + +

reply()

+

reply() = any()

+ + +

req()

+

req() = {create_table, table(), properties()} | {delete_table, table()} | {load_table, table(), properties()} | {related_resources, table()} | {get_ref, table()} | {add_aliases, [alias()]} | {write_table_property, tabname(), tuple()} | {remove_aliases, [alias()]} | {migrate, [{tabname(), map()}], rpt()} | {prep_close, table()} | {close_table, table()} | {clear_table, table() | cf()}

+ + +

rpt()

+

rpt() = undefined | map()

+ + +

st()

+

st() = #st{backends = #{alias() => backend()}, standalone = #{{alias(), table()} := cf()}, default_opts = [{atom(), term()}]}

+ + +

table()

+

table() = tabname() | {admin, alias()} | {tabname(), index, any()} | {tabname(), retainer, any()}

+ + +

tabname()

+

tabname() = atom()

+ + +

Function Index

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
add_aliases/1
clear_table/1
close_table/2
code_change/3
create_table/3
delete_table/2
ensure_started/0
get_cached_env/2
get_ref/1
get_ref/2
handle_call/3
handle_cast/2
handle_info/2
init/1
load_table/3
meta/0
migrate_standalone/2
migrate_standalone/3
prep_close/2
read_info/1
read_info/2
read_info/4
related_resources/2
remove_aliases/1
request_ref/2
set_and_cache_env/2
start_link/0
terminate/2
write_info/4
write_table_property/3
+ +

Function Details

+ +

add_aliases/1

+
+

add_aliases(Aliases) -> any()

+

+
+ +

clear_table/1

+
+

clear_table(Name) -> any()

+

+
+ +

close_table/2

+
+

close_table(Alias, Name) -> any()

+

+
+ +

code_change/3

+
+

code_change(FromVsn, St, Extra) -> any()

+

+
+ +

create_table/3

+
+

create_table(Alias, Name, Props) -> any()

+

+
+ +

delete_table/2

+
+

delete_table(Alias::alias(), Name::tabname()) -> ok

+

+
+ +

ensure_started/0

+
+

ensure_started() -> ok

+

+
+ +

get_cached_env/2

+
+

get_cached_env(Key, Default) -> any()

+

+
+ +

get_ref/1

+
+

get_ref(Name) -> any()

+

+
+ +

get_ref/2

+
+

get_ref(Name, Default) -> any()

+

+
+ +

handle_call/3

+
+

handle_call(Req::{alias(), req()}, From::any(), St::st()) -> gen_server_reply()

+

+
+ +

handle_cast/2

+
+

handle_cast(Msg::any(), St::st()) -> gen_server_noreply()

+

+
+ +

handle_info/2

+
+

handle_info(Msg::any(), St::st()) -> gen_server_noreply()

+

+
+ +

init/1

+
+

init(X1) -> any()

+

+
+ +

load_table/3

+
+

load_table(Alias, Name, Props) -> any()

+

+
+ +

meta/0

+
+

meta() -> any()

+

+
+ +

migrate_standalone/2

+
+

migrate_standalone(Alias, Tabs) -> any()

+

+
+ +

migrate_standalone/3

+
+

migrate_standalone(Alias, Tabs, Rpt0) -> any()

+

+
+ +

prep_close/2

+
+

prep_close(Alias, Tab) -> any()

+

+
+ +

read_info/1

+
+

read_info(TRec) -> any()

+

+
+ +

read_info/2

+
+

read_info(Alias, Tab) -> any()

+

+
+ +

read_info/4

+
+

read_info(Alias, Tab, K, Default) -> any()

+

+
+ +

related_resources/2

+
+

related_resources(Alias, Name) -> any()

+

+
+ +

remove_aliases/1

+
+

remove_aliases(Aliases) -> any()

+

+
+ +

request_ref/2

+
+

request_ref(Alias, Name) -> any()

+

+
+ +

set_and_cache_env/2

+
+

set_and_cache_env(Key, Value) -> any()

+

+
+ +

start_link/0

+
+

start_link() -> any()

+

+
+ +

terminate/2

+
+

terminate(X1, St) -> any()

+

+
+ +

write_info/4

+
+

write_info(Alias, Tab, K, V) -> any()

+

+
+ +

write_table_property/3

+
+

write_table_property(Alias, Tab, Prop) -> any()

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/mnesia_rocksdb_app.html b/doc/mnesia_rocksdb_app.html new file mode 100644 index 0000000..bd763b9 --- /dev/null +++ b/doc/mnesia_rocksdb_app.html @@ -0,0 +1,40 @@ + + + + +Module mnesia_rocksdb_app + + + + +
+ +

Module mnesia_rocksdb_app

+ + +

Behaviours: application.

+ +

Function Index

+ + +
start/2
stop/1
+ +

Function Details

+ +

start/2

+
+

start(StartType, StartArgs) -> any()

+

+
+ +

stop/1

+
+

stop(State) -> any()

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/mnesia_rocksdb_lib.html b/doc/mnesia_rocksdb_lib.html new file mode 100644 index 0000000..5f6a873 --- /dev/null +++ b/doc/mnesia_rocksdb_lib.html @@ -0,0 +1,181 @@ + + + + +Module mnesia_rocksdb_lib + + + + +
+ +

Module mnesia_rocksdb_lib

+RocksDB update wrappers, in separate module for easy tracing and mocking. + + +

Description

RocksDB update wrappers, in separate module for easy tracing and mocking. + +

Function Index

+ + + + + + + + + + + + + + + + + + + + + + +
check_encoding/2
create_mountpoint/1
data_mountpoint/1
decode/2
decode_key/1
decode_key/2
decode_val/1
decode_val/3
default_encoding/3
delete/3
encode/2
encode_key/1
encode_key/2
encode_val/1
encode_val/2
keypos/1
open_rocksdb/3
put/4
tabname/1
valid_key_type/2
valid_obj_type/2
write/3
+ +

Function Details

+ +

check_encoding/2

+
+

check_encoding(Encoding, Attributes) -> any()

+

+
+ +

create_mountpoint/1

+
+

create_mountpoint(Tab) -> any()

+

+
+ +

data_mountpoint/1

+
+

data_mountpoint(Tab) -> any()

+

+
+ +

decode/2

+
+

decode(Val, X2) -> any()

+

+
+ +

decode_key/1

+
+

decode_key(CodedKey::binary()) -> any()

+

+
+ +

decode_key/2

+
+

decode_key(CodedKey, Enc) -> any()

+

+
+ +

decode_val/1

+
+

decode_val(CodedVal::binary()) -> any()

+

+
+ +

decode_val/3

+
+

decode_val(CodedVal, K, Ref) -> any()

+

+
+ +

default_encoding/3

+
+

default_encoding(X1, Type, As) -> any()

+

+
+ +

delete/3

+
+

delete(Ref, K, Opts) -> any()

+

+
+ +

encode/2

+
+

encode(Value, X2) -> any()

+

+
+ +

encode_key/1

+
+

encode_key(Key::any()) -> binary()

+

+
+ +

encode_key/2

+
+

encode_key(Key, X2) -> any()

+

+
+ +

encode_val/1

+
+

encode_val(Val::any()) -> binary()

+

+
+ +

encode_val/2

+
+

encode_val(Val, Enc) -> any()

+

+
+ +

keypos/1

+
+

keypos(Tab) -> any()

+

+
+ +

open_rocksdb/3

+
+

open_rocksdb(MPd, RdbOpts, CFs) -> any()

+

+
+ +

put/4

+
+

put(Ref, K, V, Opts) -> any()

+

+
+ +

tabname/1

+
+

tabname(Tab) -> any()

+

+
+ +

valid_key_type/2

+
+

valid_key_type(X1, Key) -> any()

+

+
+ +

valid_obj_type/2

+
+

valid_obj_type(X1, Obj) -> any()

+

+
+ +

write/3

+
+

write(X1, L, Opts) -> any()

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/mnesia_rocksdb_params.html b/doc/mnesia_rocksdb_params.html new file mode 100644 index 0000000..087b8ae --- /dev/null +++ b/doc/mnesia_rocksdb_params.html @@ -0,0 +1,96 @@ + + + + +Module mnesia_rocksdb_params + + + + +
+ +

Module mnesia_rocksdb_params

+ + +

Behaviours: gen_server.

+ +

Function Index

+ + + + + + + + + + +
code_change/3
delete/1
handle_call/3
handle_cast/2
handle_info/2
init/1
lookup/2
start_link/0
store/2
terminate/2
+ +

Function Details

+ +

code_change/3

+
+

code_change(X1, S, X3) -> any()

+

+
+ +

delete/1

+
+

delete(Tab) -> any()

+

+
+ +

handle_call/3

+
+

handle_call(X1, X2, S) -> any()

+

+
+ +

handle_cast/2

+
+

handle_cast(X1, S) -> any()

+

+
+ +

handle_info/2

+
+

handle_info(X1, S) -> any()

+

+
+ +

init/1

+
+

init(X1) -> any()

+

+
+ +

lookup/2

+
+

lookup(Tab, Default) -> any()

+

+
+ +

start_link/0

+
+

start_link() -> any()

+

+
+ +

store/2

+
+

store(Tab, Params) -> any()

+

+
+ +

terminate/2

+
+

terminate(X1, X2) -> any()

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/mnesia_rocksdb_sup.html b/doc/mnesia_rocksdb_sup.html new file mode 100644 index 0000000..4a4d957 --- /dev/null +++ b/doc/mnesia_rocksdb_sup.html @@ -0,0 +1,40 @@ + + + + +Module mnesia_rocksdb_sup + + + + +
+ +

Module mnesia_rocksdb_sup

+ + +

Behaviours: supervisor.

+ +

Function Index

+ + +
init/1
start_link/0
+ +

Function Details

+ +

init/1

+
+

init(X1) -> any()

+

+
+ +

start_link/0

+
+

start_link() -> any()

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/mnesia_rocksdb_tuning.html b/doc/mnesia_rocksdb_tuning.html new file mode 100644 index 0000000..1f31903 --- /dev/null +++ b/doc/mnesia_rocksdb_tuning.html @@ -0,0 +1,151 @@ + + + + +Module mnesia_rocksdb_tuning + + + + +
+ +

Module mnesia_rocksdb_tuning

+ + + +

Function Index

+ + + + + + + + + + + + + + + + + + +
cache/1
calc_sizes/0
calc_sizes/1
count_rdb_tabs/0
count_rdb_tabs/1
default/1
describe_env/0
get_avail_ram/0
get_maxfiles/0
get_maxfiles/1
ideal_max_files/0
ideal_max_files/1
max_files/1
rdb_indexes/0
rdb_indexes/1
rdb_tabs/0
rdb_tabs/1
write_buffer/1
+ +

Function Details

+ +

cache/1

+
+

cache(X1) -> any()

+

+
+ +

calc_sizes/0

+
+

calc_sizes() -> any()

+

+
+ +

calc_sizes/1

+
+

calc_sizes(D) -> any()

+

+
+ +

count_rdb_tabs/0

+
+

count_rdb_tabs() -> any()

+

+
+ +

count_rdb_tabs/1

+
+

count_rdb_tabs(Db) -> any()

+

+
+ +

default/1

+
+

default(X1) -> any()

+

+
+ +

describe_env/0

+
+

describe_env() -> any()

+

+
+ +

get_avail_ram/0

+
+

get_avail_ram() -> any()

+

+
+ +

get_maxfiles/0

+
+

get_maxfiles() -> any()

+

+
+ +

get_maxfiles/1

+
+

get_maxfiles(X1) -> any()

+

+
+ +

ideal_max_files/0

+
+

ideal_max_files() -> any()

+

+
+ +

ideal_max_files/1

+
+

ideal_max_files(D) -> any()

+

+
+ +

max_files/1

+
+

max_files(X1) -> any()

+

+
+ +

rdb_indexes/0

+
+

rdb_indexes() -> any()

+

+
+ +

rdb_indexes/1

+
+

rdb_indexes(Db) -> any()

+

+
+ +

rdb_tabs/0

+
+

rdb_tabs() -> any()

+

+
+ +

rdb_tabs/1

+
+

rdb_tabs(Db) -> any()

+

+
+ +

write_buffer/1

+
+

write_buffer(X1) -> any()

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/modules-frame.html b/doc/modules-frame.html new file mode 100644 index 0000000..bebffb0 --- /dev/null +++ b/doc/modules-frame.html @@ -0,0 +1,24 @@ + + + +The mnesia_rocksdb application + + + +

Modules

+ + + + + + + + + + + + + +
mnesia_rocksdb
mnesia_rocksdb_admin
mnesia_rocksdb_app
mnesia_rocksdb_lib
mnesia_rocksdb_params
mnesia_rocksdb_sup
mnesia_rocksdb_tuning
mrdb
mrdb_index
mrdb_mutex
mrdb_mutex_serializer
mrdb_select
mrdb_stats
+ + \ No newline at end of file diff --git a/doc/mrdb.html b/doc/mrdb.html new file mode 100644 index 0000000..ec11659 --- /dev/null +++ b/doc/mrdb.html @@ -0,0 +1,728 @@ + + + + +Module mrdb + + + + +
+ +

Module mrdb

+Mid-level access API for Mnesia-managed rocksdb tables. + + +

Description

Mid-level access API for Mnesia-managed rocksdb tables

+ +

This module implements access functions for the mnesia_rocksdb +backend plugin. The functions are designed to also support +direct access to rocksdb with little overhead. Such direct +access will maintain existing indexes, but not support +replication.

+ +

Each table has a metadata structure stored as a persistent +term for fast access. The structure of the metadata is as +follows:

+ +
  #{ name       := <Logical table name>
+   , db_ref     := <Rocksdb database Ref>
+   , cf_handle  := <Rocksdb column family handle>
+   , activity   := Ongoing batch or transaction, if any (map())
+   , attr_pos   := #{AttrName := Pos}
+   , mode       := <Set to 'mnesia' for mnesia access flows>
+   , properties := <Mnesia table props in map format>
+   , type       := column_family | standalone
+   }
+ +

Helper functions like as_batch(Ref, fun(R) -> ... end) and + with_iterator(Ref, fun(I) -> ... end) add some extra + convenience on top of the rocksdb API.

+ + Note that no automatic provision exists to manage concurrent + updates via mnesia AND direct access to this API. It's advisable + to use ONE primary mode of access. If replication is used, + the mnesia API will support this, but direct mrdb updates will + not be replicated. +

Data Types

+ +

activity()

+

activity() = tx_activity() | batch_activity()

+ + +

activity_type()

+

activity_type() = mrdb_activity_type() | mnesia_activity_type()

+ + +

admin_tab()

+

admin_tab() = {admin, alias()}

+ + +

alias()

+

alias() = atom()

+ + +

attr_pos()

+

attr_pos() = #{atom() := pos()}

+ + +

batch_activity()

+

batch_activity() = #{type := batch, handle := batch_handle()}

+ + +

batch_handle()

+

batch_handle() = rocksdb:batch_handle()

+ + +

cf_handle()

+

cf_handle() = rocksdb:cf_handle()

+ + +

db_handle()

+

db_handle() = rocksdb:db_handle()

+ + +

db_ref()

+

db_ref() = #{name => table(), alias => atom(), vsn => non_neg_integer(), db_ref := db_handle(), cf_handle := cf_handle(), semantics := semantics(), encoding := encoding(), attr_pos := attr_pos(), type := column_family | standalone, status := open | closed | pre_existing, properties := properties(), mode => mnesia, ix_vals_f => fun((tuple()) -> [any()]), activity => activity(), term() => term()}

+ + +

encoding()

+

encoding() = raw | sext | term | {key_encoding(), val_encoding()}

+ + +

error()

+

error() = {error, any()}

+ + +

index()

+

index() = {tab_name(), index, any()}

+ + +

index_position()

+

index_position() = atom() | pos()

+ + +

inner()

+

inner() = non_neg_integer()

+ + +

iterator_action()

+

iterator_action() = first | last | next | prev | binary() | {seek, binary()} | {seek_for_prev, binary()}

+ + +

itr_handle()

+

itr_handle() = rocksdb:itr_handle()

+ + +

key()

+

key() = any()

+ + +

key_encoding()

+

key_encoding() = raw | sext | term

+ + +

mnesia_activity_type()

+

mnesia_activity_type() = transaction | sync_transaction | async_dirty | sync_dirty

+ + +

mrdb_activity_type()

+

mrdb_activity_type() = tx | {tx, tx_options()} | batch

+ + +

mrdb_iterator()

+

mrdb_iterator() = #mrdb_iter{i = itr_handle(), ref = db_ref()}

+ + +

obj()

+

obj() = tuple()

+ + +

outer()

+

outer() = non_neg_integer()

+ + +

pos()

+

pos() = non_neg_integer()

+ + +

properties()

+

properties() = #{record_name := atom(), attributes := [atom()], index := [{pos(), bag | ordered}]}

+ + +

read_options()

+

read_options() = [{verify_checksums, boolean()} | {fill_cache, boolean()} | {iterate_upper_bound, binary()} | {iterate_lower_bound, binary()} | {tailing, boolean()} | {total_order_seek, boolean()} | {prefix_same_as_start, boolean()} | {snapshot, snapshot_handle()}]

+ + +

ref_or_tab()

+

ref_or_tab() = table() | db_ref()

+ + +

retainer()

+

retainer() = {tab_name(), retainer, any()}

+ + +

retries()

+

retries() = outer() | {inner(), outer()}

+ + +

semantics()

+

semantics() = bag | set

+ + +

snapshot_handle()

+

snapshot_handle() = rocksdb:snapshot_handle()

+ + +

tab_name()

+

tab_name() = atom()

+ + +

table()

+

table() = atom() | admin_tab() | index() | retainer()

+ + +

tx_activity()

+

tx_activity() = #{type := tx, handle := tx_handle(), attempt := undefined | retries()}

+ + +

tx_handle()

+

tx_handle() = rocksdb:transaction_handle()

+ + +

tx_options()

+

tx_options() = #{retries => retries(), no_snapshot => boolean()}

+ + +

val_encoding()

+

val_encoding() = {value | object, term | raw} | raw

+ + +

write_options()

+

write_options() = [{sync, boolean()} | {disable_wal, boolean()} | {ignore_missing_column_families, boolean()} | {no_slowdown, boolean()} | {low_pri, boolean()}]

+ + +

Function Index

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
abort/1Aborts an ongoing activity/2
activity/3Run an activity (similar to //mnesia/mnesia:activity/2).
alias_of/1Returns the alias of a given table or table reference.
as_batch/2Creates a rocksdb batch context and executes the fun F in it.
as_batch/3as as_batch/2, but with the ability to pass Opts to rocksdb:write_batch/2
batch_write/2
batch_write/3
clear_table/1
current_context/0
delete/2
delete/3
delete_object/2
delete_object/3
ensure_ref/1
ensure_ref/2
first/1
first/2
fold/3
fold/4
fold/5
get_batch/1
get_ref/1
index_read/3
insert/2
insert/3
iterator/1
iterator/2
iterator_close/1
iterator_move/2
last/1
last/2
match_delete/2
new_tx/1
new_tx/2
next/2
next/3
prev/2
prev/3
rdb_delete/2
rdb_delete/3
rdb_fold/4
rdb_fold/5
rdb_get/2
rdb_get/3
rdb_iterator/1
rdb_iterator/2
rdb_iterator_move/2
rdb_put/3
rdb_put/4
read/2
read/3
read_info/1
read_info/2
release_snapshot/1release a snapshot created by snapshot/1.
select/1
select/2
select/3
snapshot/1Create a snapshot of the database instance associated with the +table reference, table name or alias.
tx_commit/1
tx_ref/2
update_counter/3
update_counter/4
with_iterator/2
with_iterator/3
with_rdb_iterator/2
with_rdb_iterator/3
write_info/3
+ +

Function Details

+ +

abort/1

+
+

abort(Reason) -> any()

+

+

Aborts an ongoing activity/2

+ +

activity/3

+
+

activity(Type::activity_type(), Alias::alias(), F::fun(() -> Res)) -> Res

+

+

Run an activity (similar to //mnesia/mnesia:activity/2).

+ + Supported activity types are: +
    +
  • transaction - An optimistic rocksdb transaction
  • +
  • {tx, TxOpts} - A rocksdb transaction with sligth modifications
  • +
  • batch - A rocksdb batch operation
  • +
+ +

By default, transactions are combined with a snapshot with 1 retry. + The snapshot ensures that writes from concurrent transactions don't leak into the transaction context. + A transaction will be retried if it detects that the commit set conflicts with recent changes. + A mutex is used to ensure that only one of potentially conflicting mrdb transactions is run at a time. +The re-run transaction may still fail, if new transactions, or non-transaction writes interfere with +the commit set. It will then be re-run again, until the retry count is exhausted.

+ +

For finer-grained retries, it's possible to set retries => {Inner, Outer}. Setting the retries to a + single number, Retries, is analogous to {0, Retries}`. Each outer retry requests amutex lock' by + waiting in a FIFO queue. Once it receives the lock, it will try the activity once + as many retries + as specified by Inner. If these fail, the activity again goes to the FIFO queue (ending up last + in line) if there are outer retries remaining. When all retries are exhaused, the activity aborts + with retry_limit. Note that transactions, being optimistic, do not request a lock on the first +attempt, but only on outer retries (the first retry is always an outer retry).

+ +

Valid TxOpts are #{no_snapshot => boolean(), retries => retries()}.

+ + To simplify code adaptation, tx | transaction | sync_transaction are synonyms, and + batch | async_dirty | sync_dirty are synonyms.

+ +

alias_of/1

+
+

alias_of(Tab::ref_or_tab()) -> alias()

+

+

Returns the alias of a given table or table reference.

+ +

as_batch/2

+
+

as_batch(Tab::ref_or_tab(), F::fun((db_ref()) -> Res)) -> Res

+

+

Creates a rocksdb batch context and executes the fun F in it.

+ + %% Rocksdb batches aren't tied to a specific DbRef until written. + This can cause surprising problems if we're juggling multiple + rocksdb instances (as we do if we have standalone tables). + At the time of writing, all objects end up in the DbRef the batch + is written to, albeit not necessarily in the intended column family. + This will probably change, but no failure mode is really acceptable. + The code below ensures that separate batches are created for each + DbRef, under a unique reference stored in the pdict. When writing, + all batches are written separately to the corresponding DbRef, + and when releasing, all batches are released. This will not ensure + atomicity, but there is no way in rocksdb to achieve atomicity + across db instances. At least, data should end up where you expect. +

+ +

as_batch/3

+
+

as_batch(Tab, F, Opts) -> any()

+

+

as as_batch/2, but with the ability to pass Opts to rocksdb:write_batch/2

+ +

batch_write/2

+
+

batch_write(Tab, L) -> any()

+

+
+ +

batch_write/3

+
+

batch_write(Tab, L, Opts) -> any()

+

+
+ +

clear_table/1

+
+

clear_table(Tab) -> any()

+

+
+ +

current_context/0

+
+

current_context() -> any()

+

+
+ +

delete/2

+
+

delete(Tab::ref_or_tab(), Key::key()) -> ok

+

+
+ +

delete/3

+
+

delete(Tab::ref_or_tab(), Key::key(), Opts::write_options()) -> ok

+

+
+ +

delete_object/2

+
+

delete_object(Tab, Obj) -> any()

+

+
+ +

delete_object/3

+
+

delete_object(Tab, Obj, Opts) -> any()

+

+
+ +

ensure_ref/1

+
+

ensure_ref(R::ref_or_tab()) -> db_ref()

+

+
+ +

ensure_ref/2

+
+

ensure_ref(Ref, R) -> any()

+

+
+ +

first/1

+
+

first(Tab::ref_or_tab()) -> key() | '$end_of_table'

+

+
+ +

first/2

+
+

first(Tab::ref_or_tab(), Opts::read_options()) -> key() | '$end_of_table'

+

+
+ +

fold/3

+
+

fold(Tab, Fun, Acc) -> any()

+

+
+ +

fold/4

+
+

fold(Tab, Fun, Acc, MatchSpec) -> any()

+

+
+ +

fold/5

+
+

fold(Tab, Fun, Acc, MatchSpec, Limit) -> any()

+

+
+ +

get_batch/1

+
+

get_batch(X1) -> any()

+

+
+ +

get_ref/1

+
+

get_ref(Tab::table()) -> db_ref()

+

+
+ +

index_read/3

+
+

index_read(Tab, Val, Ix) -> any()

+

+
+ +

insert/2

+
+

insert(Tab::ref_or_tab(), Obj::obj()) -> ok

+

+
+ +

insert/3

+
+

insert(Tab::ref_or_tab(), Obj0::obj(), Opts::write_options()) -> ok

+

+
+ +

iterator/1

+
+

iterator(Tab::ref_or_tab()) -> {ok, mrdb_iterator()} | {error, term()}

+

+
+ +

iterator/2

+
+

iterator(Tab::ref_or_tab(), Opts::read_options()) -> {ok, mrdb_iterator()} | {error, term()}

+

+
+ +

iterator_close/1

+
+

iterator_close(Mrdb_iter::mrdb_iterator()) -> ok

+

+
+ +

iterator_move/2

+
+

iterator_move(Mrdb_iter::mrdb_iterator(), Dir::iterator_action()) -> {ok, tuple()} | {error, any()}

+

+
+ +

last/1

+
+

last(Tab::ref_or_tab()) -> key() | '$end_of_table'

+

+
+ +

last/2

+
+

last(Tab::ref_or_tab(), Opts::read_options()) -> key() | '$end_of_table'

+

+
+ +

match_delete/2

+
+

match_delete(Tab, Pat) -> any()

+

+
+ +

new_tx/1

+
+

new_tx(Tab::table() | db_ref()) -> db_ref()

+

+
+ +

new_tx/2

+
+

new_tx(Tab::ref_or_tab(), Opts::write_options()) -> db_ref()

+

+
+ +

next/2

+
+

next(Tab::ref_or_tab(), K::key()) -> key() | '$end_of_table'

+

+
+ +

next/3

+
+

next(Tab::ref_or_tab(), K::key(), Opts::read_options()) -> key() | '$end_of_table'

+

+
+ +

prev/2

+
+

prev(Tab::ref_or_tab(), K::key()) -> key() | '$end_of_table'

+

+
+ +

prev/3

+
+

prev(Tab::ref_or_tab(), K::key(), Opts::read_options()) -> key() | '$end_of_table'

+

+
+ +

rdb_delete/2

+
+

rdb_delete(R, K) -> any()

+

+
+ +

rdb_delete/3

+
+

rdb_delete(R, K, Opts) -> any()

+

+
+ +

rdb_fold/4

+
+

rdb_fold(Tab, Fun, Acc, Prefix) -> any()

+

+
+ +

rdb_fold/5

+
+

rdb_fold(Tab, Fun, Acc, Prefix, Limit) -> any()

+

+
+ +

rdb_get/2

+
+

rdb_get(R, K) -> any()

+

+
+ +

rdb_get/3

+
+

rdb_get(R, K, Opts) -> any()

+

+
+ +

rdb_iterator/1

+
+

rdb_iterator(R) -> any()

+

+
+ +

rdb_iterator/2

+
+

rdb_iterator(R, Opts) -> any()

+

+
+ +

rdb_iterator_move/2

+
+

rdb_iterator_move(I, Dir) -> any()

+

+
+ +

rdb_put/3

+
+

rdb_put(R, K, V) -> any()

+

+
+ +

rdb_put/4

+
+

rdb_put(R, K, V, Opts) -> any()

+

+
+ +

read/2

+
+

read(Tab, Key) -> any()

+

+
+ +

read/3

+
+

read(Tab, Key, Opts) -> any()

+

+
+ +

read_info/1

+
+

read_info(Tab) -> any()

+

+
+ +

read_info/2

+
+

read_info(Tab, K) -> any()

+

+
+ +

release_snapshot/1

+
+

release_snapshot(SHandle::snapshot_handle()) -> ok | error()

+

+

release a snapshot created by snapshot/1.

+ +

select/1

+
+

select(Cont) -> any()

+

+
+ +

select/2

+
+

select(Tab, Pat) -> any()

+

+
+ +

select/3

+
+

select(Tab, Pat, Limit) -> any()

+

+
+ +

snapshot/1

+
+

snapshot(Name::alias() | ref_or_tab()) -> {ok, snapshot_handle()} | error()

+

+

Create a snapshot of the database instance associated with the +table reference, table name or alias.

+ + Snapshots provide consistent read-only views over the entire state of the key-value store.

+ +

tx_commit/1

+
+

tx_commit(TxH::tx_handle() | db_ref()) -> ok

+

+
+ +

tx_ref/2

+
+

tx_ref(Tab::ref_or_tab() | db_ref() | db_ref(), TxH::tx_handle()) -> db_ref()

+

+
+ +

update_counter/3

+
+

update_counter(Tab, C, Val) -> any()

+

+
+ +

update_counter/4

+
+

update_counter(Tab, C, Val, Opts) -> any()

+

+
+ +

with_iterator/2

+
+

with_iterator(Tab::ref_or_tab(), Fun::fun((mrdb_iterator()) -> Res)) -> Res

+

+
+ +

with_iterator/3

+
+

with_iterator(Tab::ref_or_tab(), Fun::fun((mrdb_iterator()) -> Res), Opts::read_options()) -> Res

+

+
+ +

with_rdb_iterator/2

+
+

with_rdb_iterator(Tab::ref_or_tab(), Fun::fun((itr_handle()) -> Res)) -> Res

+

+
+ +

with_rdb_iterator/3

+
+

with_rdb_iterator(Tab::ref_or_tab(), Fun::fun((itr_handle()) -> Res), Opts::read_options()) -> Res

+

+
+ +

write_info/3

+
+

write_info(Tab, K, V) -> any()

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/mrdb_index.html b/doc/mrdb_index.html new file mode 100644 index 0000000..7603753 --- /dev/null +++ b/doc/mrdb_index.html @@ -0,0 +1,71 @@ + + + + +Module mrdb_index + + + + +
+ +

Module mrdb_index

+ + + +

Data Types

+ +

index_value()

+

index_value() = any()

+ + +

iterator_action()

+

iterator_action() = mrdb:iterator_action()

+ + +

ix_iterator()

+

ix_iterator() = #mrdb_ix_iter{i = mrdb:iterator(), type = set | bag, sub = mrdb:ref() | pid()}

+ + +

object()

+

object() = tuple()

+ + +

Function Index

+ + + + +
iterator/2
iterator_close/1
iterator_move/2
with_iterator/3
+ +

Function Details

+ +

iterator/2

+
+

iterator(Tab::mrdb:ref_or_tab(), IxPos::mrdb:index_position()) -> {ok, ix_iterator()} | {error, term()}

+

+
+ +

iterator_close/1

+
+

iterator_close(Mrdb_ix_iter::ix_iterator()) -> ok

+

+
+ +

iterator_move/2

+
+

iterator_move(Mrdb_ix_iter::ix_iterator(), Dir::iterator_action()) -> {ok, index_value(), object()} | {error, term()}

+

+
+ +

with_iterator/3

+
+

with_iterator(Tab::mrdb:ref_or_tab(), IxPos::mrdb:index_position(), Fun::fun((ix_iterator()) -> Res)) -> Res

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/mrdb_mutex.html b/doc/mrdb_mutex.html new file mode 100644 index 0000000..cb6e597 --- /dev/null +++ b/doc/mrdb_mutex.html @@ -0,0 +1,32 @@ + + + + +Module mrdb_mutex + + + + +
+ +

Module mrdb_mutex

+ + + +

Function Index

+ +
do/2
+ +

Function Details

+ +

do/2

+
+

do(Rsrc, F) -> any()

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/mrdb_mutex_serializer.html b/doc/mrdb_mutex_serializer.html new file mode 100644 index 0000000..8e07aa4 --- /dev/null +++ b/doc/mrdb_mutex_serializer.html @@ -0,0 +1,88 @@ + + + + +Module mrdb_mutex_serializer + + + + +
+ +

Module mrdb_mutex_serializer

+ + + +

Function Index

+ + + + + + + + + +
code_change/3
done/2
handle_call/3
handle_cast/2
handle_info/2
init/1
start_link/0
terminate/2
wait/1
+ +

Function Details

+ +

code_change/3

+
+

code_change(FromVsn, St, Extra) -> any()

+

+
+ +

done/2

+
+

done(Rsrc, Ref) -> any()

+

+
+ +

handle_call/3

+
+

handle_call(X1, From, St) -> any()

+

+
+ +

handle_cast/2

+
+

handle_cast(X1, St) -> any()

+

+
+ +

handle_info/2

+
+

handle_info(X1, St) -> any()

+

+
+ +

init/1

+
+

init(X1) -> any()

+

+
+ +

start_link/0

+
+

start_link() -> any()

+

+
+ +

terminate/2

+
+

terminate(X1, X2) -> any()

+

+
+ +

wait/1

+
+

wait(Rsrc) -> any()

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/mrdb_select.html b/doc/mrdb_select.html new file mode 100644 index 0000000..9acf856 --- /dev/null +++ b/doc/mrdb_select.html @@ -0,0 +1,67 @@ + + + + +Module mrdb_select + + + + +
+ +

Module mrdb_select

+ + + +

Function Index

+ + + + + + +
continuation_info/2
fold/5
rdb_fold/5
select/1
select/3
select/4
+ +

Function Details

+ +

continuation_info/2

+
+

continuation_info(Item, C) -> any()

+

+
+ +

fold/5

+
+

fold(Ref, Fun, Acc, MS, Limit) -> any()

+

+
+ +

rdb_fold/5

+
+

rdb_fold(Ref, Fun, Acc, Prefix, Limit) -> any()

+

+
+ +

select/1

+
+

select(Cont) -> any()

+

+
+ +

select/3

+
+

select(Ref, MS, Limit) -> any()

+

+
+ +

select/4

+
+

select(Ref, MS, AccKeys, Limit) -> any()

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/mrdb_stats.html b/doc/mrdb_stats.html new file mode 100644 index 0000000..b4220fd --- /dev/null +++ b/doc/mrdb_stats.html @@ -0,0 +1,87 @@ + + + + +Module mrdb_stats + + + + +
+ +

Module mrdb_stats

+Statistics API for the mnesia_rocksdb plugin. + + +

Description

Statistics API for the mnesia_rocksdb plugin

+ + Some counters are maintained for each active alias. Currently, the following + counters are supported: + * inner_retries + * outer_retries + +

Data Types

+ +

alias()

+

alias() = mnesia_rocksdb:alias()

+ + +

counter()

+

counter() = atom()

+ + +

counters()

+

counters() = #{counter() := integer()}

+ + +

db_ref()

+

db_ref() = mrdb:db_ref()

+ + +

increment()

+

increment() = integer()

+ + +

Function Index

+ + + + +
get/1Fetches all known counters for Alias, in the form of a map, + #{Counter => Value}.
get/2Fetches the integer value of the known counter Ctr for Alias.
incr/3Increment Ctr counter for Alias` with increment `N.
new/0
+ +

Function Details

+ +

get/1

+
+

get(Alias::alias() | db_ref()) -> counters()

+

+

Fetches all known counters for Alias, in the form of a map, + #{Counter => Value}.

+ +

get/2

+
+

get(Alias::alias() | db_ref(), Ctr::counter()) -> integer()

+

+

Fetches the integer value of the known counter Ctr for Alias.

+ +

incr/3

+
+

incr(Alias::alias() | db_ref(), Ctr::counter(), N::increment()) -> ok

+

+

Increment Ctr counter for Alias` with increment `N.

+ + Note that the first argument may also be a db_ref() map, + corresponding to mrdb:get_ref({admin, Alias}).

+ +

new/0

+
+

new() -> any()

+

+
+
+ + +

Generated by EDoc

+ + diff --git a/doc/overview-summary.html b/doc/overview-summary.html new file mode 100644 index 0000000..328774c --- /dev/null +++ b/doc/overview-summary.html @@ -0,0 +1,254 @@ + + + + +Mnesia Rocksdb - Rocksdb backend plugin for Mnesia + + + + + +

Mnesia Rocksdb - Rocksdb backend plugin for Mnesia +

+

Copyright © 2013-21 Klarna AB

+

Authors: Ulf Wiger (ulf@wiger.net).

+ + +

The Mnesia DBMS, part of Erlang/OTP, supports 'backend plugins', making +it possible to utilize more capable key-value stores than the dets +module (limited to 2 GB per table). Unfortunately, this support is +undocumented. Below, some informal documentation for the plugin system +is provided.

+ +

Table of Contents

+
    +
  1. Usage
  2. +
      +
    1. Prerequisites
    2. +
    3. Getting started
    4. +
    5. Special features
    6. +
    7. Customization
    8. +
    9. Handling of errors in write operations
    10. +
    11. Caveats
    12. +
    +
  3. Mnesia backend plugins
  4. +
      +
    1. Background
    2. +
    3. Design
    4. +
    +
  5. Mnesia index plugins
  6. +
  7. Rocksdb
  8. +
+ +

Usage

+ +

Prerequisites

+ +
    +
  • rocksdb (included as dependency)
  • +
  • sext (included as dependency)
  • +
  • Erlang/OTP 21.0 or newer (https://github.com/erlang/otp)
  • +
+ +

Getting started

+ +

Call mnesia_rocksdb:register() immediately after +starting mnesia.

+ +

Put {rocksdb_copies, [node()]} into the table definitions of +tables you want to be in RocksDB.

+ +

Special features

+ +

RocksDB tables support efficient selects on prefix keys.

+ +

The backend uses the sext module (see +https://github.com/uwiger/sext) for mapping between Erlang terms and the +binary data stored in the tables. This provides two useful properties:

+ +
    +
  • The records are stored in the Erlang term order of their keys.
  • +
  • A prefix of a composite key is ordered just before any key for which + it is a prefix. For example, {x, '_'} is a prefix for keys {x, a}, + {x, b} and so on.
  • +
+ +

This means that a prefix key identifies the start of the sequence of +entries whose keys match the prefix. The backend uses this to optimize +selects on prefix keys.

+ +

### Customization

+ +

RocksDB supports a number of customization options. These can be specified +by providing a {Key, Value} list named rocksdb_opts under user_properties, +for example:

+ +
mnesia:create_table(foo, [{rocksdb_copies, [node()]},
+                          ...
+                          {user_properties,
+                              [{rocksdb_opts, [{max_open_files, 1024}]}]
+                          }])
+ +

Consult the RocksDB documentation +for information on configuration parameters. Also see the section below on handling write errors.

+ +The default configuration for tables in mnesia_rocksdb is: +
default_open_opts() ->
+    [ {create_if_missing, true}
+      , {cache_size,
+         list_to_integer(get_env_default("ROCKSDB_CACHE_SIZE", "32212254"))}
+      , {block_size, 1024}
+      , {max_open_files, 100}
+      , {write_buffer_size,
+         list_to_integer(get_env_default(
+                           "ROCKSDB_WRITE_BUFFER_SIZE", "4194304"))}
+      , {compression,
+         list_to_atom(get_env_default("ROCKSDB_COMPRESSION", "true"))}
+      , {use_bloomfilter, true}
+    ].
+ +

It is also possible, for larger databases, to produce a tuning parameter file. +This is experimental, and mostly copied from mnesia_leveldb. Consult the +source code in mnesia_rocksdb_tuning.erl and mnesia_rocksdb_params.erl. +Contributions are welcome.

+ +

Caveats

+ +

Avoid placing bag tables in RocksDB. Although they work, each write +requires additional reads, causing substantial runtime overheads. There +are better ways to represent and process bag data (see above about +prefix keys).

+ +

The mnesia:table_info(T, size) call always returns zero for RocksDB +tables. RocksDB itself does not track the number of elements in a table, and +although it is possible to make the mnesia_rocksdb backend maintain a size +counter, it incurs a high runtime overhead for writes and deletes since it +forces them to first do a read to check the existence of the key. If you +depend on having an up to date size count at all times, you need to maintain +it yourself. If you only need the size occasionally, you may traverse the +table to count the elements.

+ +

Mnesia backend plugins

+ +

Background

+ +

Mnesia was initially designed to be a RAM-only DBMS, and Erlang's +ets tables were developed for this purpose. In order to support +persistence, e.g. for configuration data, a disk-based version of ets +(called dets) was created. The dets API mimicks the ets API, +and dets is quite convenient and fast for (nowadays) small datasets. +However, using a 32-bit bucket system, it is limited to 2GB of data. +It also doesn't support ordered sets. When used in Mnesia, dets-based +tables are called disc_only_copies.

+ +

To circumvent these limitations, another table type, called disc_copies +was added. This is a combination of ets and disk_log, where Mnesia +periodically snapshots the ets data to a log file on disk, and meanwhile +maintains a log of updates, which can be applied at startup. These tables +are quite performant (especially on read access), but all data is kept in +RAM, which can become a serious limitation.

+ +

A backend plugin system was proposed by Ulf Wiger in 2016, and further +developed with Klarna's support, to finally become included in OTP 19. +Klarna uses a LevelDb backend, but Aeternity, in 2017, instead chose +to implement a Rocksdb backend plugin.

+ +

Design

+ +

As backend plugins were added on a long-since legacy-stable Mnesia, +they had to conform to the existing code structure. For this reason, +the plugin callbacks hook into the already present low-level access +API in the mnesia_lib module. As a consequence, backend plugins have +the same access semantics and granularity as ets and dets. This +isn't much of a disadvantage for key-value stores like LevelDb and RocksDB, +but a more serious issue is that the update part of this API is called +on after the point of no return. That is, Mnesia does not expect +these updates to fail, and has no recourse if they do. As an aside, +this could also happen if a disc_only_copies table exceeds the 2 GB +limit (mnesia will not check it, and dets will not complain, but simply +drop the update.)

+ +

Mnesia index plugins

+ +

When adding support for backend plugins, index plugins were also added. Unfortunately, they remain undocumented.

+ +

An index plugin can be added in one of two ways:

+ +
    +
  1. When creating a schema, provide {index_plugins, [{Name, Module, Function}]} options.
  2. +
  3. Call the function mnesia_schema:add_index_plugin(Name, Module, Function)
  4. +
+ +

Name must be an atom wrapped as a 1-tuple, e.g. {words}.

+ +

The plugin callback is called as Module:Function(Table, Pos, Obj), where Pos=={words} in +our example. It returns a list of index terms.

+ +

Example

+ +

Given the following index plugin implementation:

+ +
-module(words).
+-export([words_f/3]).
+
+words_f(_,_,Obj) when is_tuple(Obj) ->
+    words_(tuple_to_list(Obj)).
+
+words_(Str) when is_binary(Str) ->
+    string:lexemes(Str, [$\s, $\n, [$\r,$\n]]);
+words_(L) when is_list(L) ->
+    lists:flatmap(fun words_/1, L);
+words_(_) ->
+    [].
+ +

We can register the plugin and use it in table definitions:

+ +
Eshell V12.1.3  (abort with ^G)
+1> mnesia:start().
+ok
+2> mnesia_schema:add_index_plugin({words}, words, words_f).
+{atomic,ok}
+3> mnesia:create_table(i, [{index, [{words}]}]).
+{atomic,ok}
+ +

Note that in this case, we had neither a backend plugin, nor even a persistent schema. +Index plugins can be used with all table types. The registered indexing function (arity 3) must exist +as an exported function along the node's code path.

+ +

To see what happens when we insert an object, we can turn on call trace.

+ +
4> dbg:tracer().
+{ok,<0.108.0>}
+5> dbg:tp(words, x).
+{ok,[{matched,nonode@nohost,3},{saved,x}]}
+6> dbg:p(all,[c]).
+{ok,[{matched,nonode@nohost,60}]}
+7> mnesia:dirty_write({i,<<"one two">>, [<<"three">>, <<"four">>]}).
+(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
+(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
+                                             <<"four">>]
+(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
+(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
+                                             <<"four">>]
+ok
+8> dbg:ctp('_'), dbg:stop().
+ok
+9> mnesia:dirty_index_read(i, <<"one">>, {words}).
+[{i,<<"one two">>,[<<"three">>,<<"four">>]}]
+ +

(The fact that the indexing function is called twice, seems like a performance bug.)

+ +

We can observe that the indexing callback is able to operate on the whole object. +It needs to be side-effect free and efficient, since it will be called at least once for each update +(if an old object exists in the table, the indexing function will be called on it too, before it is +replaced by the new object.)

+ +

Rocksdb

+ +

Usage

+ +
+ +

Generated by EDoc

+ + diff --git a/src/mnesia_rocksdb_admin.erl b/src/mnesia_rocksdb_admin.erl index aa4662a..b2be125 100644 --- a/src/mnesia_rocksdb_admin.erl +++ b/src/mnesia_rocksdb_admin.erl @@ -407,12 +407,14 @@ try_load_admin_db(Alias, AliasOpts, #st{ backends = Bs %% We need to store the persistent ref explicitly here, %% since mnesia knows nothing of our admin table. AdminTab = {admin, Alias}, + Stats = mrdb_stats:new(), CfI = update_cf_info(AdminTab, #{ status => open , name => AdminTab , vsn => ?VSN , encoding => {sext,{value,term}} , attr_pos => #{key => 1, value => 2} + , stats => Stats , mountpoint => MP , properties => #{ attributes => [key, val] diff --git a/src/mrdb.erl b/src/mrdb.erl index 4b982a6..99a2a81 100644 --- a/src/mrdb.erl +++ b/src/mrdb.erl @@ -117,7 +117,9 @@ | index() | retainer(). --type retries() :: non_neg_integer(). +-type inner() :: non_neg_integer(). +-type outer() :: non_neg_integer(). +-type retries() :: outer() | {inner(), outer()}. %% activity type 'ets' makes no sense in this context -type mnesia_activity_type() :: transaction @@ -143,7 +145,7 @@ -type tx_activity() :: #{ type := 'tx' , handle := tx_handle() - , attempt := non_neg_integer() }. + , attempt := 'undefined' | retries() }. -type batch_activity() :: #{ type := 'batch' , handle := batch_handle() }. -type activity() :: tx_activity() | batch_activity(). @@ -256,6 +258,14 @@ release_snapshot(SHandle) -> %% The re-run transaction may still fail, if new transactions, or non-transaction writes interfere with %% the commit set. It will then be re-run again, until the retry count is exhausted. %% +%% For finer-grained retries, it's possible to set `retries => {Inner, Outer}'. Setting the retries to a +%% single number, `Retries', is analogous to `{0, Retries}`. Each outer retry requests a 'mutex lock' by +%% waiting in a FIFO queue. Once it receives the lock, it will try the activity once + as many retries +%% as specified by `Inner'. If these fail, the activity again goes to the FIFO queue (ending up last +%% in line) if there are outer retries remaining. When all retries are exhaused, the activity aborts +%% with `retry_limit'. Note that transactions, being optimistic, do not request a lock on the first +%% attempt, but only on outer retries (the first retry is always an outer retry). +%% %% Valid `TxOpts' are `#{no_snapshot => boolean(), retries => retries()}'. %% %% To simplify code adaptation, `tx | transaction | sync_transaction' are synonyms, and @@ -277,32 +287,66 @@ activity(Type, Alias, F) -> , alias => Alias , db_ref => DbRef } end, - do_activity(F, Alias, Ctxt, false). + do_activity(F, Alias, Ctxt). -do_activity(F, Alias, Ctxt, WithLock) -> - try run_f(F, Ctxt, WithLock, Alias) of +do_activity(F, Alias, Ctxt) -> + try try_f(F, Ctxt) + catch + throw:{?MODULE, busy} -> + retry_activity(F, Alias, Ctxt) + end. + +try_f(F, Ctxt) -> + try run_f(F, Ctxt) of Res -> - try commit_and_pop(Res) - catch - throw:{?MODULE, busy} -> - do_activity(F, Alias, Ctxt, true) - end + commit_and_pop(Res) catch Cat:Err -> abort_and_pop(Cat, Err) end. --spec run_f(_, #{'activity':=#{'handle':=_, 'type':='batch' | 'tx', 'attempt'=>1, 'no_snapshot'=>boolean(), 'retries'=>non_neg_integer(), _=>_}, 'alias':=_, 'db_ref':=_, 'no_snapshot'=>boolean(), 'retries'=>non_neg_integer(), _=>_}, boolean(), _) -> any(). -run_f(F, Ctxt, false, _) -> +run_f(F, Ctxt) -> push_ctxt(Ctxt), - F(); -run_f(F, Ctxt, true, Alias) -> - push_ctxt(incr_attempt(Ctxt)), - mrdb_mutex:do(Alias, F). + F(). -incr_attempt(#{ activity := #{type := tx, attempt := A} = Act, db_ref := DbRef } = C) -> +incr_attempt(0, {_,O}) when O > 0 -> + {outer, {0,1}}; +incr_attempt({I,O}, {Ri,Ro}) when is_integer(I), is_integer(O), + is_integer(Ri), is_integer(Ro) -> + if I < Ri -> {inner, {I+1, O}}; + O < Ro -> {outer, {0, O+1}}; + true -> + error + end; +incr_attempt(_, _) -> + error. + +retry_activity(F, Alias, #{activity := #{ type := Type + , attempt := A + , retries := R} = Act} = Ctxt) -> + case incr_attempt(A, R) of + {RetryCtxt, A1} -> + Act1 = Act#{attempt := A1}, + Ctxt1 = Ctxt#{activity := Act1}, + try retry_activity_(RetryCtxt, F, Alias, restart_ctxt(Ctxt1)) + catch + throw:{?MODULE, busy} -> + retry_activity(F, Alias, Ctxt1) + end; + error -> + return_abort(Type, error, retry_limit) + end. + +retry_activity_(inner, F, Alias, Ctxt) -> + mrdb_stats:incr(Alias, inner_retries, 1), + try_f(F, Ctxt); +retry_activity_(outer, F, Alias, Ctxt) -> + mrdb_stats:incr(Alias, outer_retries, 1), + mrdb_mutex:do(Alias, fun() -> try_f(F, Ctxt) end). + +restart_ctxt(#{ activity := #{type := tx} = Act, db_ref := DbRef } = C) -> {ok, TxH} = rdb_transaction(DbRef, []), - Act1 = Act#{attempt := A+1, handle := TxH}, + Act1 = Act#{handle := TxH}, C1 = C#{ activity := Act1 }, case maps:is_key(snapshot, C) of true -> @@ -375,10 +419,14 @@ check_tx_opts(Opts) -> end. check_retries(#{retries := Retries} = Opts) -> - if is_integer(Retries), Retries >= 0 -> - Opts; - true -> - error({invalid_tx_option, {retries, Retries}}) + case Retries of + _ when is_integer(Retries), Retries >= 0 -> + Opts#{retries := {0, Retries}}; + {Inner, Outer} when is_integer(Inner), is_integer(Outer), + Inner >= 0, Outer >= 0 -> + Opts; + _ -> + error({invalid_tx_option, {retries, Retries}}) end. check_nosnap(#{no_snapshot := NoSnap} = Opts) -> @@ -393,7 +441,7 @@ create_tx(Opts, DbRef) -> {ok, TxH} = rdb_transaction(DbRef, []), Opts#{activity => maps:merge(Opts, #{ type => tx , handle => TxH - , attempt => 1})}. + , attempt => 0 })}. maybe_snapshot(#{no_snapshot := NoSnap} = Opts, DbRef) -> case NoSnap of @@ -413,8 +461,7 @@ commit_and_pop(Res) -> Res; {error, {error, "Resource busy" ++ _ = Busy}} -> case A of - #{retries := Retries, attempt := Att} - when Att =< Retries -> + #{retries := {I,O}} when I > 0; O > 0 -> throw({?MODULE, busy}); _ -> error({error, Busy}) @@ -530,7 +577,7 @@ new_tx(#{activity := _}, _) -> new_tx(Tab, Opts) -> #{db_ref := DbRef} = R = ensure_ref(Tab), {ok, TxH} = rdb_transaction(DbRef, write_opts(R, Opts)), - R#{activity => #{type => tx, handle => TxH, attempt => 1}}. + R#{activity => #{type => tx, handle => TxH, attempt => 0}}. -spec tx_ref(ref_or_tab() | db_ref() | db_ref(), tx_handle()) -> db_ref(). tx_ref(Tab, TxH) -> @@ -540,7 +587,7 @@ tx_ref(Tab, TxH) -> #{activity := #{type := tx, handle := OtherTxH}} -> error({tx_handle_conflict, OtherTxH}); R -> - R#{activity => #{type => tx, handle => TxH, attempt => 1}} + R#{activity => #{type => tx, handle => TxH, attempt => 0}} end. -spec tx_commit(tx_handle() | db_ref()) -> ok. diff --git a/src/mrdb_stats.erl b/src/mrdb_stats.erl new file mode 100644 index 0000000..78c5d3d --- /dev/null +++ b/src/mrdb_stats.erl @@ -0,0 +1,74 @@ +%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*- +%% @doc Statistics API for the mnesia_rocksdb plugin +%% +%% Some counters are maintained for each active alias. Currently, the following +%% counters are supported: +%% * inner_retries +%% * outer_retries +%% +-module(mrdb_stats). + +-export([new/0]). + +-export([incr/3, + get/1, + get/2]). + +-type alias() :: mnesia_rocksdb:alias(). +-type db_ref() :: mrdb:db_ref(). +-type counter() :: atom(). +-type increment() :: integer(). + +-type counters() :: #{ counter() := integer() }. + +new() -> + #{ ref => counters:new(map_size(ctr_meta()), [write_concurrency]) + , meta => ctr_meta()}. + +ctr_meta() -> + #{ inner_retries => 1 + , outer_retries => 2 }. + +-spec incr(alias() | db_ref(), counter(), increment()) -> ok. +%% @doc Increment `Ctr' counter for `Alias` with increment `N'. +%% +%% Note that the first argument may also be a `db_ref()' map, +%% corresponding to `mrdb:get_ref({admin, Alias})'. +%% @end +incr(Alias, Ctr, N) when is_atom(Alias) -> + #{stats := #{ref := Ref, meta := Meta}} = mrdb:get_ref({admin, Alias}), + incr_(Ref, Meta, Ctr, N); +incr(#{stats := #{ref := Ref, meta := Meta}}, Ctr, N) -> + incr_(Ref, Meta, Ctr, N). + +-spec get(alias() | db_ref(), counter()) -> integer(). +%% @doc Fetches the integer value of the known counter `Ctr' for `Alias'. +%% @end +get(Alias, Ctr) when is_atom(Alias) -> + #{stats := #{ref := Ref, meta := Meta}} = mrdb:get_ref({admin, Alias}), + get_(Ref, Meta, Ctr); +get(#{stats := #{ref := Ref, meta := Meta}}, Ctr) -> + get_(Ref, Meta, Ctr). + +-spec get(alias() | db_ref()) -> counters(). +%% @doc Fetches all known counters for `Alias', in the form of a map, +%% `#{Counter => Value}'. +%% @end +get(Alias) when is_atom(Alias) -> + get_(mrdb:get_ref({admin, Alias})); +get(Ref) when is_map(Ref) -> + get_(Ref). + +get_(#{stats := #{ref := Ref, meta := Meta}}) -> + lists:foldl( + fun({K, P}, M) -> + M#{K := counters:get(Ref, P)} + end, Meta, maps:to_list(Meta)). + +get_(Ref, Meta, Attr) -> + Ix = maps:get(Attr, Meta), + counters:get(Ref, Ix). + +incr_(Ref, Meta, Attr, N) -> + Ix = maps:get(Attr, Meta), + counters:add(Ref, Ix, N). diff --git a/test/mnesia_rocksdb_SUITE.erl b/test/mnesia_rocksdb_SUITE.erl index 5c4d1eb..3927646 100644 --- a/test/mnesia_rocksdb_SUITE.erl +++ b/test/mnesia_rocksdb_SUITE.erl @@ -24,6 +24,7 @@ , mrdb_abort/1 , mrdb_two_procs/1 , mrdb_two_procs_tx_restart/1 + , mrdb_two_procs_tx_inner_restart/1 , mrdb_two_procs_snap/1 , mrdb_three_procs/1 ]). @@ -53,6 +54,7 @@ groups() -> , mrdb_abort , mrdb_two_procs , mrdb_two_procs_tx_restart + , mrdb_two_procs_tx_inner_restart , mrdb_two_procs_snap , mrdb_three_procs ]} ]. @@ -287,10 +289,17 @@ mrdb_abort(Config) -> mrdb_two_procs(Config) -> tr_ct:with_trace(fun mrdb_two_procs_/1, Config, tr_flags( - {self(), [call, sos, p]}, + {self(), [call, sos, p, 'receive']}, tr_patterns( mrdb, [ {mrdb, insert, 2, x} , {mrdb, read, 2, x} + , {mrdb, retry_activity, 3, x} + , {mrdb, try_f, 2, x} + , {mrdb, incr_attempt, 2, x} + , {mrdb_mutex, do, 2, x} + , {mrdb_mutex_serializer, do, 2, x} + , {?MODULE, wait_for_other, 2, x} + , {?MODULE, go_ahead_other, 1, x} , {mrdb, activity, x}], tr_opts()))). mrdb_two_procs_(Config) -> @@ -314,11 +323,16 @@ mrdb_two_procs_(Config) -> Pre = mrdb:read(R, a), go_ahead_other(POther), await_other_down(POther, MRef, ?LINE), + %% The test proc is still inside the transaction, and POther + %% has terminated/committed. If we now read 'a', we should see + %% the value that POther wrote (no isolation). [{R, a, 17}] = mrdb:read(R, a), ok = mrdb:insert(R, {R, a, 18}) end, - go_ahead_other(1, POther), + go_ahead_other(0, POther), Do0 = get_dict(), + %% Our transaction should fail due to the resource conflict, and because + %% we're not using retries. try mrdb:activity({tx, #{no_snapshot => true, retries => 0}}, rdb, F1) of ok -> error(unexpected) @@ -339,6 +353,7 @@ mrdb_two_procs_tx_restart_(Config) -> R = ?FUNCTION_NAME, Parent = self(), Created = create_tabs([{R, []}], Config), + check_stats(rdb), mrdb:insert(R, {R, a, 1}), Pre = mrdb:read(R, a), F0 = fun() -> @@ -354,7 +369,7 @@ mrdb_two_procs_tx_restart_(Config) -> OtherWrite = [{R, a, 17}], Att = get_attempt(), Expected = case Att of - 1 -> Pre; + 0 -> Pre; _ -> OtherWrite end, Expected = mrdb:read(R, a), @@ -363,14 +378,88 @@ mrdb_two_procs_tx_restart_(Config) -> OtherWrite = mrdb:read(R, a), ok = mrdb:insert(R, {R, a, 18}) end, - go_ahead_other(1, POther), + go_ahead_other(0, POther), Do0 = get_dict(), mrdb:activity({tx, #{no_snapshot => true}}, rdb, F1), dictionary_unchanged(Do0), + check_stats(rdb), [{R, a, 18}] = mrdb:read(R, a), delete_tabs(Created), ok. +mrdb_two_procs_tx_inner_restart(Config) -> + tr_ct:with_trace(fun mrdb_two_procs_tx_inner_restart_/1, Config, + dbg_tr_opts()). + +mrdb_two_procs_tx_inner_restart_(Config) -> + R = ?FUNCTION_NAME, + Parent = self(), + Created = create_tabs([{R, []}], Config), + mrdb:insert(R, {R, a, 1}), + mrdb:insert(R, {R, b, 1}), + PreA = mrdb:read(R, a), + PreB = mrdb:read(R, b), + #{inner_retries := Ri0, outer_retries := Ro0} = check_stats(rdb), + F0 = fun() -> + wait_for_other(Parent, ?LINE), + ok = mrdb:insert(R, {R, a, 17}), + wait_for_other(Parent, ?LINE) + end, + F1 = fun() -> + wait_for_other(Parent, ?LINE), + ok = mrdb:insert(R, {R, b, 147}), + wait_for_other(Parent, ?LINE) + end, + + Spawn = fun(F) -> + Res = spawn_opt( + fun() -> + ok = mrdb:activity(tx, rdb, F) + end, [monitor]), + Res + end, + {P1, MRef1} = Spawn(F0), + {P2, MRef2} = Spawn(F1), + F2 = fun() -> + PostA = [{R, a, 17}], % once F0 (P1) has committed + PostB = [{R, b, 147}], % once F1 (P2) has committed + ARes = mrdb:read(R, a), % We need to read first to establish a pre-image + BRes = mrdb:read(R, b), + Att = get_attempt(), + ct:log("Att = ~p", [Att]), + case Att of + 0 -> % This is the first run (no retry yet) + ARes = PreA, + BRes = PreB, + go_ahead_other(0, P1), % Having our pre-image, now let P1 write + go_ahead_other(0, P1), % Let P1 commit, then await DOWN (normal) + await_other_down(P1, MRef1, ?LINE), + PostA = mrdb:read(R, a); % now, P1's write should leak through here + {0, 1} -> % This is the first (outer) retry + go_ahead_other(0, P2), % Let P2 write + go_ahead_other(0, P2), % Let P2 commit, then await DOWN (normal) + await_other_down(P2, MRef2, ?LINE), + PostA = mrdb:read(R, a), % now we should see writes from both P1 + PostB = mrdb:read(R, b); % ... and P2 + {1, 1} -> + PostA = mrdb:read(R, a), + PostB = mrdb:read(R, b), + ok + end, + mrdb:insert(R, {R, a, 18}), + mrdb:insert(R, {R, b, 18}) + end, + Do0 = get_dict(), + mrdb:activity({tx, #{no_snapshot => true, retries => {1,1}}}, rdb, F2), + check_stats(rdb), + dictionary_unchanged(Do0), + [{R, a, 18}] = mrdb:read(R, a), + [{R, b, 18}] = mrdb:read(R, b), + #{inner_retries := Ri1, outer_retries := Ro1} = check_stats(rdb), + {restarts, {1, 1}} = {restarts, {Ri1 - Ri0, Ro1 - Ro0}}, + delete_tabs(Created), + ok. + % %% For testing purposes, we use side-effects inside the transactions @@ -383,7 +472,7 @@ mrdb_two_procs_tx_restart_(Config) -> %% attempt, and ignore the sync ops on retries. %% -define(IF_FIRST(N, Expr), - if N == 1 -> + if N == 0 -> Expr; true -> ok @@ -413,8 +502,8 @@ mrdb_two_procs_snap(Config) -> go_ahead_other(Att, POther), ARes = mrdb:read(R, a), ARes = case Att of - 1 -> Pre; - 2 -> [{R, a, 17}] + 0 -> Pre; + _ -> [{R, a, 17}] end, await_other_down(POther, MRef, ?LINE), PreB = mrdb:read(R, b), @@ -434,7 +523,7 @@ mrdb_two_procs_snap(Config) -> %% We make sure that P2 commits before finishing the other two, and P3 and the %% main thread sync, so as to maximize the contention for the retry lock. mrdb_three_procs(Config) -> - tr_ct:with_trace(fun mrdb_three_procs_/1, Config, light_tr_opts()). + tr_ct:with_trace(fun mrdb_three_procs_/1, Config, dbg_tr_opts()). mrdb_three_procs_(Config) -> R = ?FUNCTION_NAME, @@ -452,7 +541,7 @@ mrdb_three_procs_(Config) -> spawn_opt(fun() -> D0 = get_dict(), do_when_p_allows( - 1, Parent, ?LINE, + 0, Parent, ?LINE, fun() -> ok = mrdb:activity({tx,#{retries => 0}}, rdb, F1) end), @@ -488,8 +577,8 @@ mrdb_three_procs_(Config) -> fun() -> Att = get_attempt(), ARes = case Att of - 1 -> [A0]; - 2 -> [A1] + 0 -> [A0]; + _ -> [A1] end, %% First, ensure that P2 tx is running go_ahead_other(Att, P2), @@ -519,6 +608,7 @@ tr_opts() -> , {?MODULE, await_other_down, 3, x} , {?MODULE, do_when_p_allows, 4, x} , {?MODULE, allow_p, 3, x} + , {?MODULE, check_stats, 1, x} ]}. light_tr_opts() -> @@ -529,6 +619,22 @@ light_tr_opts() -> , {mrdb, read, 2, x} , {mrdb, activity, x} ], tr_opts())). +dbg_tr_opts() -> + tr_flags( + {self(), [call, sos, p, 'receive']}, + tr_patterns( + mrdb, [ {mrdb, insert, 2, x} + , {mrdb, read, 2, x} + , {mrdb, retry_activity, 3, x} + , {mrdb, try_f, 2, x} + , {mrdb, incr_attempt, 2, x} + , {mrdb, current_context, 0, x} + , {mrdb_mutex, do, 2, x} + , {mrdb_mutex_serializer, do, 2, x} + , {?MODULE, wait_for_other, 2, x} + , {?MODULE, go_ahead_other, 1, x} + , {mrdb, activity, x} ], tr_opts())). + tr_patterns(Mod, Ps, #{patterns := Pats} = Opts) -> Pats1 = [P || P <- Pats, element(1,P) =/= Mod], Opts#{patterns => Ps ++ Pats1}. @@ -542,12 +648,13 @@ wait_for_other(Parent, L) -> wait_for_other(Att, Parent, L) -> wait_for_other(Att, Parent, 1000, L). -wait_for_other(1, Parent, Timeout, L) -> +wait_for_other(0, Parent, Timeout, L) -> MRef = monitor(process, Parent), Parent ! {self(), ready}, receive {Parent, cont} -> demonitor(MRef), + Parent ! {self(), cont_ack}, ok; {'DOWN', MRef, _, _, Reason} -> ct:log("Parent died, Reason = ~p", [Reason]), @@ -586,7 +693,13 @@ go_ahead_other(Att, POther, Timeout) -> go_ahead_other_(POther, Timeout) -> receive {POther, ready} -> - POther ! {self(), cont} + POther ! {self(), cont}, + receive + {POther, cont_ack} -> + ok + after Timeout -> + error(cont_ack_timeout) + end after Timeout -> error(go_ahead_timeout) end. @@ -645,3 +758,8 @@ dictionary_unchanged(Old) -> , added := [] } = #{ deleted => Old -- New , added => New -- Old }, ok. + +check_stats(Alias) -> + Stats = mrdb_stats:get(Alias), + ct:log("Stats: ~p", [Stats]), + Stats. From bed66b2998d0c8497d0e9769c766649a66879bb9 Mon Sep 17 00:00:00 2001 From: Ulf Wiger Date: Thu, 3 Nov 2022 12:54:50 +0100 Subject: [PATCH 4/6] Remove html docs --- doc/index.html | 17 - doc/mnesia_rocksdb.html | 529 ------------------------ doc/mnesia_rocksdb_admin.html | 294 ------------- doc/mnesia_rocksdb_app.html | 40 -- doc/mnesia_rocksdb_lib.html | 181 -------- doc/mnesia_rocksdb_params.html | 96 ----- doc/mnesia_rocksdb_sup.html | 40 -- doc/mnesia_rocksdb_tuning.html | 151 ------- doc/modules-frame.html | 24 -- doc/mrdb.html | 728 --------------------------------- doc/mrdb_index.html | 71 ---- doc/mrdb_mutex.html | 32 -- doc/mrdb_mutex_serializer.html | 88 ---- doc/mrdb_select.html | 67 --- doc/mrdb_stats.html | 87 ---- doc/overview-summary.html | 254 ------------ 16 files changed, 2699 deletions(-) delete mode 100644 doc/index.html delete mode 100644 doc/mnesia_rocksdb.html delete mode 100644 doc/mnesia_rocksdb_admin.html delete mode 100644 doc/mnesia_rocksdb_app.html delete mode 100644 doc/mnesia_rocksdb_lib.html delete mode 100644 doc/mnesia_rocksdb_params.html delete mode 100644 doc/mnesia_rocksdb_sup.html delete mode 100644 doc/mnesia_rocksdb_tuning.html delete mode 100644 doc/modules-frame.html delete mode 100644 doc/mrdb.html delete mode 100644 doc/mrdb_index.html delete mode 100644 doc/mrdb_mutex.html delete mode 100644 doc/mrdb_mutex_serializer.html delete mode 100644 doc/mrdb_select.html delete mode 100644 doc/mrdb_stats.html delete mode 100644 doc/overview-summary.html diff --git a/doc/index.html b/doc/index.html deleted file mode 100644 index aee6928..0000000 --- a/doc/index.html +++ /dev/null @@ -1,17 +0,0 @@ - - - -The mnesia_rocksdb application - - - - - - -<h2>This page uses frames</h2> -<p>Your browser does not accept frames. -<br>You should go to the <a href="overview-summary.html">non-frame version</a> instead. -</p> - - - \ No newline at end of file diff --git a/doc/mnesia_rocksdb.html b/doc/mnesia_rocksdb.html deleted file mode 100644 index fb5d9a0..0000000 --- a/doc/mnesia_rocksdb.html +++ /dev/null @@ -1,529 +0,0 @@ - - - - -Module mnesia_rocksdb - - - - -
- -

Module mnesia_rocksdb

-rocksdb storage backend for Mnesia. - -

Behaviours: gen_server, mnesia_backend_type.

- -

Description

rocksdb storage backend for Mnesia.

- - This module implements a mnesia backend callback plugin. - It's specifically documented to try to explain the workings of - backend plugins. - -

Data Types

- -

alias()

-

alias() = atom()

- - -

data_tab()

-

data_tab() = atom()

- - -

error()

-

error() = {error, any()}

- - -

index_info()

-

index_info() = {index_pos(), index_type()}

- - -

index_pos()

-

index_pos() = integer() | {atom()}

- - -

index_tab()

-

index_tab() = {data_tab(), index, index_info()}

- - -

index_type()

-

index_type() = ordered

- - -

retainer_name()

-

retainer_name() = any()

- - -

retainer_tab()

-

retainer_tab() = {data_tab(), retainer, retainer_name()}

- - -

table()

-

table() = data_tab() | index_tab() | retainer_tab()

- - -

table_type()

-

table_type() = set | ordered_set | bag

- - -

Function Index

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
add_aliases/1
check_definition/4
close_table/2
code_change/3
create_schema/1
create_schema/2
create_table/3
decode_key/1
decode_key/2
decode_val/1
decode_val/3
default_alias/0
delete/3
delete_table/2
encode_key/1
encode_key/2
encode_val/1
encode_val/2
first/2
fixtable/3
handle_call/3
handle_cast/2
handle_info/2
index_is_consistent/3
info/3
init/1
init_backend/0Called by mnesia_schema in order to intialize the backend.
insert/3
is_index_consistent/2
ix_listvals/3
ix_prefixes/3
last/2
load_table/4
lookup/3
match_delete/3
next/3
prev/3
real_suffixes/0
receive_data/5
receive_done/4
receiver_first_message/4
register/0Equivalent to register(rocksdb_copies). -
register/1Convenience function for registering a mnesia_rocksdb backend plugin.
remove_aliases/1
repair_continuation/2
select/1
select/3
select/4
semantics/2
sender_handle_info/5
sender_init/4
show_table/1A debug function that shows the rocksdb table content.
show_table/2
slot/3
start_proc/6
sync_close_table/2
terminate/2
tmp_suffixes/0
update_counter/4
validate_key/6
validate_record/6
- -

Function Details

- -

add_aliases/1

-
-

add_aliases(Aliases) -> any()

-

-
- -

check_definition/4

-
-

check_definition(Alias, Tab, Nodes, Props) -> any()

-

-
- -

close_table/2

-
-

close_table(Alias, Tab) -> any()

-

-
- -

code_change/3

-
-

code_change(FromVsn, St, Extra) -> any()

-

-
- -

create_schema/1

-
-

create_schema(Nodes) -> any()

-

-
- -

create_schema/2

-
-

create_schema(Nodes, Aliases) -> any()

-

-
- -

create_table/3

-
-

create_table(Alias, Tab, Props) -> any()

-

-
- -

decode_key/1

-
-

decode_key(Key) -> any()

-

-
- -

decode_key/2

-
-

decode_key(Key, Metadata) -> any()

-

-
- -

decode_val/1

-
-

decode_val(Val) -> any()

-

-
- -

decode_val/3

-
-

decode_val(Val, Key, Metadata) -> any()

-

-
- -

default_alias/0

-
-

default_alias() -> any()

-

-
- -

delete/3

-
-

delete(Alias, Tab, Key) -> any()

-

-
- -

delete_table/2

-
-

delete_table(Alias, Tab) -> any()

-

-
- -

encode_key/1

-
-

encode_key(Key) -> any()

-

-
- -

encode_key/2

-
-

encode_key(Key, Metadata) -> any()

-

-
- -

encode_val/1

-
-

encode_val(Val) -> any()

-

-
- -

encode_val/2

-
-

encode_val(Val, Metadata) -> any()

-

-
- -

first/2

-
-

first(Alias, Tab) -> any()

-

-
- -

fixtable/3

-
-

fixtable(Alias, Tab, Bool) -> any()

-

-
- -

handle_call/3

-
-

handle_call(M, From, St) -> any()

-

-
- -

handle_cast/2

-
-

handle_cast(X1, St) -> any()

-

-
- -

handle_info/2

-
-

handle_info(EXIT, St) -> any()

-

-
- -

index_is_consistent/3

-
-

index_is_consistent(Alias, X2, Bool) -> any()

-

-
- -

info/3

-
-

info(Alias, Tab, Item) -> any()

-

-
- -

init/1

-
-

init(X1) -> any()

-

-
- -

init_backend/0

-
-

init_backend() -> any()

-

-

Called by mnesia_schema in order to intialize the backend

- -

This is called when the backend is registered with the first alias, or ...

- -

See OTP issue #425 (16 Feb 2021). This callback is supposed to be called -before first use of the backend, but unfortunately, it is only called at -mnesia startup and when a backend module is registered MORE THAN ONCE. -This means we need to handle this function being called multiple times.

- -

The bug has been fixed as of OTP 24.0-rc3

- -

If processes need to be started, this can be done using - mnesia_ext_sup:start_proc(Name, Mod, F, Args [, Opts]) -where Opts are parameters for the supervised child:

- - * restart (default: transient) - * shutdown (default: 120000) - * type (default: worker) - * modules (default: [Mod])

- -

insert/3

-
-

insert(Alias, Tab, Obj) -> any()

-

-
- -

is_index_consistent/2

-
-

is_index_consistent(Alias, X2) -> any()

-

-
- -

ix_listvals/3

-
-

ix_listvals(Tab, Pos, Obj) -> any()

-

-
- -

ix_prefixes/3

-
-

ix_prefixes(Tab, Pos, Obj) -> any()

-

-
- -

last/2

-
-

last(Alias, Tab) -> any()

-

-
- -

load_table/4

-
-

load_table(Alias, Tab, LoadReason, Props) -> any()

-

-
- -

lookup/3

-
-

lookup(Alias, Tab, Key) -> any()

-

-
- -

match_delete/3

-
-

match_delete(Alias, Tab, Pat) -> any()

-

-
- -

next/3

-
-

next(Alias, Tab, Key) -> any()

-

-
- -

prev/3

-
-

prev(Alias, Tab, Key) -> any()

-

-
- -

real_suffixes/0

-
-

real_suffixes() -> any()

-

-
- -

receive_data/5

-
-

receive_data(Data, Alias, Tab, Sender, State) -> any()

-

-
- -

receive_done/4

-
-

receive_done(Alias, Tab, Sender, State) -> any()

-

-
- -

receiver_first_message/4

-
-

receiver_first_message(Pid, Msg, Alias, Tab) -> any()

-

-
- -

register/0

-
-

register() -> {ok, alias()} | {error, term()}

-

-

Equivalent to register(rocksdb_copies).

- - -

register/1

-
-

register(Alias::alias()) -> {ok, alias()} | error()

-

-

Convenience function for registering a mnesia_rocksdb backend plugin

- - The function used to register a plugin is mnesia_schema:add_backend_type(Alias, Module) - where Module implements a backend_type behavior. Alias is an atom, and is used - in the same way as ram_copies etc. The default alias is rocksdb_copies.

- -

remove_aliases/1

-
-

remove_aliases(Aliases) -> any()

-

-
- -

repair_continuation/2

-
-

repair_continuation(Cont, Ms) -> any()

-

-
- -

select/1

-
-

select(Cont) -> any()

-

-
- -

select/3

-
-

select(Alias, Tab, Ms) -> any()

-

-
- -

select/4

-
-

select(Alias, IxTab, Ms, Limit) -> any()

-

-
- -

semantics/2

-
-

semantics(Alias, X2) -> any()

-

-
- -

sender_handle_info/5

-
-

sender_handle_info(Msg, Alias, Tab, ReceiverPid, Cont) -> any()

-

-
- -

sender_init/4

-
-

sender_init(Alias, Tab, RemoteStorage, Pid) -> any()

-

-
- -

show_table/1

-
-

show_table(Tab) -> any()

-

-

A debug function that shows the rocksdb table content

- -

show_table/2

-
-

show_table(Tab, Limit) -> any()

-

-
- -

slot/3

-
-

slot(Alias, Tab, Pos) -> any()

-

-
- -

start_proc/6

-
-

start_proc(Alias, Tab, Type, ProcName, Props, RdbOpts) -> any()

-

-
- -

sync_close_table/2

-
-

sync_close_table(Alias, Tab) -> any()

-

-
- -

terminate/2

-
-

terminate(Reason, St) -> any()

-

-
- -

tmp_suffixes/0

-
-

tmp_suffixes() -> any()

-

-
- -

update_counter/4

-
-

update_counter(Alias, Tab, C, Val) -> any()

-

-
- -

validate_key/6

-
-

validate_key(Alias, Tab, RecName, Arity, Type, Key) -> any()

-

-
- -

validate_record/6

-
-

validate_record(Alias, Tab, RecName, Arity, Type, Obj) -> any()

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/mnesia_rocksdb_admin.html b/doc/mnesia_rocksdb_admin.html deleted file mode 100644 index 82dd924..0000000 --- a/doc/mnesia_rocksdb_admin.html +++ /dev/null @@ -1,294 +0,0 @@ - - - - -Module mnesia_rocksdb_admin - - - - -
- -

Module mnesia_rocksdb_admin

- - -

Behaviours: gen_server.

- -

Data Types

- -

alias()

-

alias() = atom()

- - -

backend()

-

backend() = #{db_ref := db_ref(), cf_info := #{table() := cf()}}

- - -

cf()

-

cf() = mrdb:db_ref()

- - -

db_ref()

-

db_ref() = rocksdb:db_handle()

- - -

gen_server_noreply()

-

gen_server_noreply() = {noreply, st()} | {stop, reason(), st()}

- - -

gen_server_reply()

-

gen_server_reply() = {reply, reply(), st()} | {stop, reason(), reply(), st()}

- - -

properties()

-

properties() = [{atom(), any()}]

- - -

reason()

-

reason() = any()

- - -

reply()

-

reply() = any()

- - -

req()

-

req() = {create_table, table(), properties()} | {delete_table, table()} | {load_table, table(), properties()} | {related_resources, table()} | {get_ref, table()} | {add_aliases, [alias()]} | {write_table_property, tabname(), tuple()} | {remove_aliases, [alias()]} | {migrate, [{tabname(), map()}], rpt()} | {prep_close, table()} | {close_table, table()} | {clear_table, table() | cf()}

- - -

rpt()

-

rpt() = undefined | map()

- - -

st()

-

st() = #st{backends = #{alias() => backend()}, standalone = #{{alias(), table()} := cf()}, default_opts = [{atom(), term()}]}

- - -

table()

-

table() = tabname() | {admin, alias()} | {tabname(), index, any()} | {tabname(), retainer, any()}

- - -

tabname()

-

tabname() = atom()

- - -

Function Index

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
add_aliases/1
clear_table/1
close_table/2
code_change/3
create_table/3
delete_table/2
ensure_started/0
get_cached_env/2
get_ref/1
get_ref/2
handle_call/3
handle_cast/2
handle_info/2
init/1
load_table/3
meta/0
migrate_standalone/2
migrate_standalone/3
prep_close/2
read_info/1
read_info/2
read_info/4
related_resources/2
remove_aliases/1
request_ref/2
set_and_cache_env/2
start_link/0
terminate/2
write_info/4
write_table_property/3
- -

Function Details

- -

add_aliases/1

-
-

add_aliases(Aliases) -> any()

-

-
- -

clear_table/1

-
-

clear_table(Name) -> any()

-

-
- -

close_table/2

-
-

close_table(Alias, Name) -> any()

-

-
- -

code_change/3

-
-

code_change(FromVsn, St, Extra) -> any()

-

-
- -

create_table/3

-
-

create_table(Alias, Name, Props) -> any()

-

-
- -

delete_table/2

-
-

delete_table(Alias::alias(), Name::tabname()) -> ok

-

-
- -

ensure_started/0

-
-

ensure_started() -> ok

-

-
- -

get_cached_env/2

-
-

get_cached_env(Key, Default) -> any()

-

-
- -

get_ref/1

-
-

get_ref(Name) -> any()

-

-
- -

get_ref/2

-
-

get_ref(Name, Default) -> any()

-

-
- -

handle_call/3

-
-

handle_call(Req::{alias(), req()}, From::any(), St::st()) -> gen_server_reply()

-

-
- -

handle_cast/2

-
-

handle_cast(Msg::any(), St::st()) -> gen_server_noreply()

-

-
- -

handle_info/2

-
-

handle_info(Msg::any(), St::st()) -> gen_server_noreply()

-

-
- -

init/1

-
-

init(X1) -> any()

-

-
- -

load_table/3

-
-

load_table(Alias, Name, Props) -> any()

-

-
- -

meta/0

-
-

meta() -> any()

-

-
- -

migrate_standalone/2

-
-

migrate_standalone(Alias, Tabs) -> any()

-

-
- -

migrate_standalone/3

-
-

migrate_standalone(Alias, Tabs, Rpt0) -> any()

-

-
- -

prep_close/2

-
-

prep_close(Alias, Tab) -> any()

-

-
- -

read_info/1

-
-

read_info(TRec) -> any()

-

-
- -

read_info/2

-
-

read_info(Alias, Tab) -> any()

-

-
- -

read_info/4

-
-

read_info(Alias, Tab, K, Default) -> any()

-

-
- -

related_resources/2

-
-

related_resources(Alias, Name) -> any()

-

-
- -

remove_aliases/1

-
-

remove_aliases(Aliases) -> any()

-

-
- -

request_ref/2

-
-

request_ref(Alias, Name) -> any()

-

-
- -

set_and_cache_env/2

-
-

set_and_cache_env(Key, Value) -> any()

-

-
- -

start_link/0

-
-

start_link() -> any()

-

-
- -

terminate/2

-
-

terminate(X1, St) -> any()

-

-
- -

write_info/4

-
-

write_info(Alias, Tab, K, V) -> any()

-

-
- -

write_table_property/3

-
-

write_table_property(Alias, Tab, Prop) -> any()

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/mnesia_rocksdb_app.html b/doc/mnesia_rocksdb_app.html deleted file mode 100644 index bd763b9..0000000 --- a/doc/mnesia_rocksdb_app.html +++ /dev/null @@ -1,40 +0,0 @@ - - - - -Module mnesia_rocksdb_app - - - - -
- -

Module mnesia_rocksdb_app

- - -

Behaviours: application.

- -

Function Index

- - -
start/2
stop/1
- -

Function Details

- -

start/2

-
-

start(StartType, StartArgs) -> any()

-

-
- -

stop/1

-
-

stop(State) -> any()

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/mnesia_rocksdb_lib.html b/doc/mnesia_rocksdb_lib.html deleted file mode 100644 index 5f6a873..0000000 --- a/doc/mnesia_rocksdb_lib.html +++ /dev/null @@ -1,181 +0,0 @@ - - - - -Module mnesia_rocksdb_lib - - - - -
- -

Module mnesia_rocksdb_lib

-RocksDB update wrappers, in separate module for easy tracing and mocking. - - -

Description

RocksDB update wrappers, in separate module for easy tracing and mocking. - -

Function Index

- - - - - - - - - - - - - - - - - - - - - - -
check_encoding/2
create_mountpoint/1
data_mountpoint/1
decode/2
decode_key/1
decode_key/2
decode_val/1
decode_val/3
default_encoding/3
delete/3
encode/2
encode_key/1
encode_key/2
encode_val/1
encode_val/2
keypos/1
open_rocksdb/3
put/4
tabname/1
valid_key_type/2
valid_obj_type/2
write/3
- -

Function Details

- -

check_encoding/2

-
-

check_encoding(Encoding, Attributes) -> any()

-

-
- -

create_mountpoint/1

-
-

create_mountpoint(Tab) -> any()

-

-
- -

data_mountpoint/1

-
-

data_mountpoint(Tab) -> any()

-

-
- -

decode/2

-
-

decode(Val, X2) -> any()

-

-
- -

decode_key/1

-
-

decode_key(CodedKey::binary()) -> any()

-

-
- -

decode_key/2

-
-

decode_key(CodedKey, Enc) -> any()

-

-
- -

decode_val/1

-
-

decode_val(CodedVal::binary()) -> any()

-

-
- -

decode_val/3

-
-

decode_val(CodedVal, K, Ref) -> any()

-

-
- -

default_encoding/3

-
-

default_encoding(X1, Type, As) -> any()

-

-
- -

delete/3

-
-

delete(Ref, K, Opts) -> any()

-

-
- -

encode/2

-
-

encode(Value, X2) -> any()

-

-
- -

encode_key/1

-
-

encode_key(Key::any()) -> binary()

-

-
- -

encode_key/2

-
-

encode_key(Key, X2) -> any()

-

-
- -

encode_val/1

-
-

encode_val(Val::any()) -> binary()

-

-
- -

encode_val/2

-
-

encode_val(Val, Enc) -> any()

-

-
- -

keypos/1

-
-

keypos(Tab) -> any()

-

-
- -

open_rocksdb/3

-
-

open_rocksdb(MPd, RdbOpts, CFs) -> any()

-

-
- -

put/4

-
-

put(Ref, K, V, Opts) -> any()

-

-
- -

tabname/1

-
-

tabname(Tab) -> any()

-

-
- -

valid_key_type/2

-
-

valid_key_type(X1, Key) -> any()

-

-
- -

valid_obj_type/2

-
-

valid_obj_type(X1, Obj) -> any()

-

-
- -

write/3

-
-

write(X1, L, Opts) -> any()

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/mnesia_rocksdb_params.html b/doc/mnesia_rocksdb_params.html deleted file mode 100644 index 087b8ae..0000000 --- a/doc/mnesia_rocksdb_params.html +++ /dev/null @@ -1,96 +0,0 @@ - - - - -Module mnesia_rocksdb_params - - - - -
- -

Module mnesia_rocksdb_params

- - -

Behaviours: gen_server.

- -

Function Index

- - - - - - - - - - -
code_change/3
delete/1
handle_call/3
handle_cast/2
handle_info/2
init/1
lookup/2
start_link/0
store/2
terminate/2
- -

Function Details

- -

code_change/3

-
-

code_change(X1, S, X3) -> any()

-

-
- -

delete/1

-
-

delete(Tab) -> any()

-

-
- -

handle_call/3

-
-

handle_call(X1, X2, S) -> any()

-

-
- -

handle_cast/2

-
-

handle_cast(X1, S) -> any()

-

-
- -

handle_info/2

-
-

handle_info(X1, S) -> any()

-

-
- -

init/1

-
-

init(X1) -> any()

-

-
- -

lookup/2

-
-

lookup(Tab, Default) -> any()

-

-
- -

start_link/0

-
-

start_link() -> any()

-

-
- -

store/2

-
-

store(Tab, Params) -> any()

-

-
- -

terminate/2

-
-

terminate(X1, X2) -> any()

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/mnesia_rocksdb_sup.html b/doc/mnesia_rocksdb_sup.html deleted file mode 100644 index 4a4d957..0000000 --- a/doc/mnesia_rocksdb_sup.html +++ /dev/null @@ -1,40 +0,0 @@ - - - - -Module mnesia_rocksdb_sup - - - - -
- -

Module mnesia_rocksdb_sup

- - -

Behaviours: supervisor.

- -

Function Index

- - -
init/1
start_link/0
- -

Function Details

- -

init/1

-
-

init(X1) -> any()

-

-
- -

start_link/0

-
-

start_link() -> any()

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/mnesia_rocksdb_tuning.html b/doc/mnesia_rocksdb_tuning.html deleted file mode 100644 index 1f31903..0000000 --- a/doc/mnesia_rocksdb_tuning.html +++ /dev/null @@ -1,151 +0,0 @@ - - - - -Module mnesia_rocksdb_tuning - - - - -
- -

Module mnesia_rocksdb_tuning

- - - -

Function Index

- - - - - - - - - - - - - - - - - - -
cache/1
calc_sizes/0
calc_sizes/1
count_rdb_tabs/0
count_rdb_tabs/1
default/1
describe_env/0
get_avail_ram/0
get_maxfiles/0
get_maxfiles/1
ideal_max_files/0
ideal_max_files/1
max_files/1
rdb_indexes/0
rdb_indexes/1
rdb_tabs/0
rdb_tabs/1
write_buffer/1
- -

Function Details

- -

cache/1

-
-

cache(X1) -> any()

-

-
- -

calc_sizes/0

-
-

calc_sizes() -> any()

-

-
- -

calc_sizes/1

-
-

calc_sizes(D) -> any()

-

-
- -

count_rdb_tabs/0

-
-

count_rdb_tabs() -> any()

-

-
- -

count_rdb_tabs/1

-
-

count_rdb_tabs(Db) -> any()

-

-
- -

default/1

-
-

default(X1) -> any()

-

-
- -

describe_env/0

-
-

describe_env() -> any()

-

-
- -

get_avail_ram/0

-
-

get_avail_ram() -> any()

-

-
- -

get_maxfiles/0

-
-

get_maxfiles() -> any()

-

-
- -

get_maxfiles/1

-
-

get_maxfiles(X1) -> any()

-

-
- -

ideal_max_files/0

-
-

ideal_max_files() -> any()

-

-
- -

ideal_max_files/1

-
-

ideal_max_files(D) -> any()

-

-
- -

max_files/1

-
-

max_files(X1) -> any()

-

-
- -

rdb_indexes/0

-
-

rdb_indexes() -> any()

-

-
- -

rdb_indexes/1

-
-

rdb_indexes(Db) -> any()

-

-
- -

rdb_tabs/0

-
-

rdb_tabs() -> any()

-

-
- -

rdb_tabs/1

-
-

rdb_tabs(Db) -> any()

-

-
- -

write_buffer/1

-
-

write_buffer(X1) -> any()

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/modules-frame.html b/doc/modules-frame.html deleted file mode 100644 index bebffb0..0000000 --- a/doc/modules-frame.html +++ /dev/null @@ -1,24 +0,0 @@ - - - -The mnesia_rocksdb application - - - -

Modules

- - - - - - - - - - - - - -
mnesia_rocksdb
mnesia_rocksdb_admin
mnesia_rocksdb_app
mnesia_rocksdb_lib
mnesia_rocksdb_params
mnesia_rocksdb_sup
mnesia_rocksdb_tuning
mrdb
mrdb_index
mrdb_mutex
mrdb_mutex_serializer
mrdb_select
mrdb_stats
- - \ No newline at end of file diff --git a/doc/mrdb.html b/doc/mrdb.html deleted file mode 100644 index ec11659..0000000 --- a/doc/mrdb.html +++ /dev/null @@ -1,728 +0,0 @@ - - - - -Module mrdb - - - - -
- -

Module mrdb

-Mid-level access API for Mnesia-managed rocksdb tables. - - -

Description

Mid-level access API for Mnesia-managed rocksdb tables

- -

This module implements access functions for the mnesia_rocksdb -backend plugin. The functions are designed to also support -direct access to rocksdb with little overhead. Such direct -access will maintain existing indexes, but not support -replication.

- -

Each table has a metadata structure stored as a persistent -term for fast access. The structure of the metadata is as -follows:

- -
  #{ name       := <Logical table name>
-   , db_ref     := <Rocksdb database Ref>
-   , cf_handle  := <Rocksdb column family handle>
-   , activity   := Ongoing batch or transaction, if any (map())
-   , attr_pos   := #{AttrName := Pos}
-   , mode       := <Set to 'mnesia' for mnesia access flows>
-   , properties := <Mnesia table props in map format>
-   , type       := column_family | standalone
-   }
- -

Helper functions like as_batch(Ref, fun(R) -> ... end) and - with_iterator(Ref, fun(I) -> ... end) add some extra - convenience on top of the rocksdb API.

- - Note that no automatic provision exists to manage concurrent - updates via mnesia AND direct access to this API. It's advisable - to use ONE primary mode of access. If replication is used, - the mnesia API will support this, but direct mrdb updates will - not be replicated. -

Data Types

- -

activity()

-

activity() = tx_activity() | batch_activity()

- - -

activity_type()

-

activity_type() = mrdb_activity_type() | mnesia_activity_type()

- - -

admin_tab()

-

admin_tab() = {admin, alias()}

- - -

alias()

-

alias() = atom()

- - -

attr_pos()

-

attr_pos() = #{atom() := pos()}

- - -

batch_activity()

-

batch_activity() = #{type := batch, handle := batch_handle()}

- - -

batch_handle()

-

batch_handle() = rocksdb:batch_handle()

- - -

cf_handle()

-

cf_handle() = rocksdb:cf_handle()

- - -

db_handle()

-

db_handle() = rocksdb:db_handle()

- - -

db_ref()

-

db_ref() = #{name => table(), alias => atom(), vsn => non_neg_integer(), db_ref := db_handle(), cf_handle := cf_handle(), semantics := semantics(), encoding := encoding(), attr_pos := attr_pos(), type := column_family | standalone, status := open | closed | pre_existing, properties := properties(), mode => mnesia, ix_vals_f => fun((tuple()) -> [any()]), activity => activity(), term() => term()}

- - -

encoding()

-

encoding() = raw | sext | term | {key_encoding(), val_encoding()}

- - -

error()

-

error() = {error, any()}

- - -

index()

-

index() = {tab_name(), index, any()}

- - -

index_position()

-

index_position() = atom() | pos()

- - -

inner()

-

inner() = non_neg_integer()

- - -

iterator_action()

-

iterator_action() = first | last | next | prev | binary() | {seek, binary()} | {seek_for_prev, binary()}

- - -

itr_handle()

-

itr_handle() = rocksdb:itr_handle()

- - -

key()

-

key() = any()

- - -

key_encoding()

-

key_encoding() = raw | sext | term

- - -

mnesia_activity_type()

-

mnesia_activity_type() = transaction | sync_transaction | async_dirty | sync_dirty

- - -

mrdb_activity_type()

-

mrdb_activity_type() = tx | {tx, tx_options()} | batch

- - -

mrdb_iterator()

-

mrdb_iterator() = #mrdb_iter{i = itr_handle(), ref = db_ref()}

- - -

obj()

-

obj() = tuple()

- - -

outer()

-

outer() = non_neg_integer()

- - -

pos()

-

pos() = non_neg_integer()

- - -

properties()

-

properties() = #{record_name := atom(), attributes := [atom()], index := [{pos(), bag | ordered}]}

- - -

read_options()

-

read_options() = [{verify_checksums, boolean()} | {fill_cache, boolean()} | {iterate_upper_bound, binary()} | {iterate_lower_bound, binary()} | {tailing, boolean()} | {total_order_seek, boolean()} | {prefix_same_as_start, boolean()} | {snapshot, snapshot_handle()}]

- - -

ref_or_tab()

-

ref_or_tab() = table() | db_ref()

- - -

retainer()

-

retainer() = {tab_name(), retainer, any()}

- - -

retries()

-

retries() = outer() | {inner(), outer()}

- - -

semantics()

-

semantics() = bag | set

- - -

snapshot_handle()

-

snapshot_handle() = rocksdb:snapshot_handle()

- - -

tab_name()

-

tab_name() = atom()

- - -

table()

-

table() = atom() | admin_tab() | index() | retainer()

- - -

tx_activity()

-

tx_activity() = #{type := tx, handle := tx_handle(), attempt := undefined | retries()}

- - -

tx_handle()

-

tx_handle() = rocksdb:transaction_handle()

- - -

tx_options()

-

tx_options() = #{retries => retries(), no_snapshot => boolean()}

- - -

val_encoding()

-

val_encoding() = {value | object, term | raw} | raw

- - -

write_options()

-

write_options() = [{sync, boolean()} | {disable_wal, boolean()} | {ignore_missing_column_families, boolean()} | {no_slowdown, boolean()} | {low_pri, boolean()}]

- - -

Function Index

- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
abort/1Aborts an ongoing activity/2
activity/3Run an activity (similar to //mnesia/mnesia:activity/2).
alias_of/1Returns the alias of a given table or table reference.
as_batch/2Creates a rocksdb batch context and executes the fun F in it.
as_batch/3as as_batch/2, but with the ability to pass Opts to rocksdb:write_batch/2
batch_write/2
batch_write/3
clear_table/1
current_context/0
delete/2
delete/3
delete_object/2
delete_object/3
ensure_ref/1
ensure_ref/2
first/1
first/2
fold/3
fold/4
fold/5
get_batch/1
get_ref/1
index_read/3
insert/2
insert/3
iterator/1
iterator/2
iterator_close/1
iterator_move/2
last/1
last/2
match_delete/2
new_tx/1
new_tx/2
next/2
next/3
prev/2
prev/3
rdb_delete/2
rdb_delete/3
rdb_fold/4
rdb_fold/5
rdb_get/2
rdb_get/3
rdb_iterator/1
rdb_iterator/2
rdb_iterator_move/2
rdb_put/3
rdb_put/4
read/2
read/3
read_info/1
read_info/2
release_snapshot/1release a snapshot created by snapshot/1.
select/1
select/2
select/3
snapshot/1Create a snapshot of the database instance associated with the -table reference, table name or alias.
tx_commit/1
tx_ref/2
update_counter/3
update_counter/4
with_iterator/2
with_iterator/3
with_rdb_iterator/2
with_rdb_iterator/3
write_info/3
- -

Function Details

- -

abort/1

-
-

abort(Reason) -> any()

-

-

Aborts an ongoing activity/2

- -

activity/3

-
-

activity(Type::activity_type(), Alias::alias(), F::fun(() -> Res)) -> Res

-

-

Run an activity (similar to //mnesia/mnesia:activity/2).

- - Supported activity types are: -
    -
  • transaction - An optimistic rocksdb transaction
  • -
  • {tx, TxOpts} - A rocksdb transaction with sligth modifications
  • -
  • batch - A rocksdb batch operation
  • -
- -

By default, transactions are combined with a snapshot with 1 retry. - The snapshot ensures that writes from concurrent transactions don't leak into the transaction context. - A transaction will be retried if it detects that the commit set conflicts with recent changes. - A mutex is used to ensure that only one of potentially conflicting mrdb transactions is run at a time. -The re-run transaction may still fail, if new transactions, or non-transaction writes interfere with -the commit set. It will then be re-run again, until the retry count is exhausted.

- -

For finer-grained retries, it's possible to set retries => {Inner, Outer}. Setting the retries to a - single number, Retries, is analogous to {0, Retries}`. Each outer retry requests amutex lock' by - waiting in a FIFO queue. Once it receives the lock, it will try the activity once + as many retries - as specified by Inner. If these fail, the activity again goes to the FIFO queue (ending up last - in line) if there are outer retries remaining. When all retries are exhaused, the activity aborts - with retry_limit. Note that transactions, being optimistic, do not request a lock on the first -attempt, but only on outer retries (the first retry is always an outer retry).

- -

Valid TxOpts are #{no_snapshot => boolean(), retries => retries()}.

- - To simplify code adaptation, tx | transaction | sync_transaction are synonyms, and - batch | async_dirty | sync_dirty are synonyms.

- -

alias_of/1

-
-

alias_of(Tab::ref_or_tab()) -> alias()

-

-

Returns the alias of a given table or table reference.

- -

as_batch/2

-
-

as_batch(Tab::ref_or_tab(), F::fun((db_ref()) -> Res)) -> Res

-

-

Creates a rocksdb batch context and executes the fun F in it.

- - %% Rocksdb batches aren't tied to a specific DbRef until written. - This can cause surprising problems if we're juggling multiple - rocksdb instances (as we do if we have standalone tables). - At the time of writing, all objects end up in the DbRef the batch - is written to, albeit not necessarily in the intended column family. - This will probably change, but no failure mode is really acceptable. - The code below ensures that separate batches are created for each - DbRef, under a unique reference stored in the pdict. When writing, - all batches are written separately to the corresponding DbRef, - and when releasing, all batches are released. This will not ensure - atomicity, but there is no way in rocksdb to achieve atomicity - across db instances. At least, data should end up where you expect. -

- -

as_batch/3

-
-

as_batch(Tab, F, Opts) -> any()

-

-

as as_batch/2, but with the ability to pass Opts to rocksdb:write_batch/2

- -

batch_write/2

-
-

batch_write(Tab, L) -> any()

-

-
- -

batch_write/3

-
-

batch_write(Tab, L, Opts) -> any()

-

-
- -

clear_table/1

-
-

clear_table(Tab) -> any()

-

-
- -

current_context/0

-
-

current_context() -> any()

-

-
- -

delete/2

-
-

delete(Tab::ref_or_tab(), Key::key()) -> ok

-

-
- -

delete/3

-
-

delete(Tab::ref_or_tab(), Key::key(), Opts::write_options()) -> ok

-

-
- -

delete_object/2

-
-

delete_object(Tab, Obj) -> any()

-

-
- -

delete_object/3

-
-

delete_object(Tab, Obj, Opts) -> any()

-

-
- -

ensure_ref/1

-
-

ensure_ref(R::ref_or_tab()) -> db_ref()

-

-
- -

ensure_ref/2

-
-

ensure_ref(Ref, R) -> any()

-

-
- -

first/1

-
-

first(Tab::ref_or_tab()) -> key() | '$end_of_table'

-

-
- -

first/2

-
-

first(Tab::ref_or_tab(), Opts::read_options()) -> key() | '$end_of_table'

-

-
- -

fold/3

-
-

fold(Tab, Fun, Acc) -> any()

-

-
- -

fold/4

-
-

fold(Tab, Fun, Acc, MatchSpec) -> any()

-

-
- -

fold/5

-
-

fold(Tab, Fun, Acc, MatchSpec, Limit) -> any()

-

-
- -

get_batch/1

-
-

get_batch(X1) -> any()

-

-
- -

get_ref/1

-
-

get_ref(Tab::table()) -> db_ref()

-

-
- -

index_read/3

-
-

index_read(Tab, Val, Ix) -> any()

-

-
- -

insert/2

-
-

insert(Tab::ref_or_tab(), Obj::obj()) -> ok

-

-
- -

insert/3

-
-

insert(Tab::ref_or_tab(), Obj0::obj(), Opts::write_options()) -> ok

-

-
- -

iterator/1

-
-

iterator(Tab::ref_or_tab()) -> {ok, mrdb_iterator()} | {error, term()}

-

-
- -

iterator/2

-
-

iterator(Tab::ref_or_tab(), Opts::read_options()) -> {ok, mrdb_iterator()} | {error, term()}

-

-
- -

iterator_close/1

-
-

iterator_close(Mrdb_iter::mrdb_iterator()) -> ok

-

-
- -

iterator_move/2

-
-

iterator_move(Mrdb_iter::mrdb_iterator(), Dir::iterator_action()) -> {ok, tuple()} | {error, any()}

-

-
- -

last/1

-
-

last(Tab::ref_or_tab()) -> key() | '$end_of_table'

-

-
- -

last/2

-
-

last(Tab::ref_or_tab(), Opts::read_options()) -> key() | '$end_of_table'

-

-
- -

match_delete/2

-
-

match_delete(Tab, Pat) -> any()

-

-
- -

new_tx/1

-
-

new_tx(Tab::table() | db_ref()) -> db_ref()

-

-
- -

new_tx/2

-
-

new_tx(Tab::ref_or_tab(), Opts::write_options()) -> db_ref()

-

-
- -

next/2

-
-

next(Tab::ref_or_tab(), K::key()) -> key() | '$end_of_table'

-

-
- -

next/3

-
-

next(Tab::ref_or_tab(), K::key(), Opts::read_options()) -> key() | '$end_of_table'

-

-
- -

prev/2

-
-

prev(Tab::ref_or_tab(), K::key()) -> key() | '$end_of_table'

-

-
- -

prev/3

-
-

prev(Tab::ref_or_tab(), K::key(), Opts::read_options()) -> key() | '$end_of_table'

-

-
- -

rdb_delete/2

-
-

rdb_delete(R, K) -> any()

-

-
- -

rdb_delete/3

-
-

rdb_delete(R, K, Opts) -> any()

-

-
- -

rdb_fold/4

-
-

rdb_fold(Tab, Fun, Acc, Prefix) -> any()

-

-
- -

rdb_fold/5

-
-

rdb_fold(Tab, Fun, Acc, Prefix, Limit) -> any()

-

-
- -

rdb_get/2

-
-

rdb_get(R, K) -> any()

-

-
- -

rdb_get/3

-
-

rdb_get(R, K, Opts) -> any()

-

-
- -

rdb_iterator/1

-
-

rdb_iterator(R) -> any()

-

-
- -

rdb_iterator/2

-
-

rdb_iterator(R, Opts) -> any()

-

-
- -

rdb_iterator_move/2

-
-

rdb_iterator_move(I, Dir) -> any()

-

-
- -

rdb_put/3

-
-

rdb_put(R, K, V) -> any()

-

-
- -

rdb_put/4

-
-

rdb_put(R, K, V, Opts) -> any()

-

-
- -

read/2

-
-

read(Tab, Key) -> any()

-

-
- -

read/3

-
-

read(Tab, Key, Opts) -> any()

-

-
- -

read_info/1

-
-

read_info(Tab) -> any()

-

-
- -

read_info/2

-
-

read_info(Tab, K) -> any()

-

-
- -

release_snapshot/1

-
-

release_snapshot(SHandle::snapshot_handle()) -> ok | error()

-

-

release a snapshot created by snapshot/1.

- -

select/1

-
-

select(Cont) -> any()

-

-
- -

select/2

-
-

select(Tab, Pat) -> any()

-

-
- -

select/3

-
-

select(Tab, Pat, Limit) -> any()

-

-
- -

snapshot/1

-
-

snapshot(Name::alias() | ref_or_tab()) -> {ok, snapshot_handle()} | error()

-

-

Create a snapshot of the database instance associated with the -table reference, table name or alias.

- - Snapshots provide consistent read-only views over the entire state of the key-value store.

- -

tx_commit/1

-
-

tx_commit(TxH::tx_handle() | db_ref()) -> ok

-

-
- -

tx_ref/2

-
-

tx_ref(Tab::ref_or_tab() | db_ref() | db_ref(), TxH::tx_handle()) -> db_ref()

-

-
- -

update_counter/3

-
-

update_counter(Tab, C, Val) -> any()

-

-
- -

update_counter/4

-
-

update_counter(Tab, C, Val, Opts) -> any()

-

-
- -

with_iterator/2

-
-

with_iterator(Tab::ref_or_tab(), Fun::fun((mrdb_iterator()) -> Res)) -> Res

-

-
- -

with_iterator/3

-
-

with_iterator(Tab::ref_or_tab(), Fun::fun((mrdb_iterator()) -> Res), Opts::read_options()) -> Res

-

-
- -

with_rdb_iterator/2

-
-

with_rdb_iterator(Tab::ref_or_tab(), Fun::fun((itr_handle()) -> Res)) -> Res

-

-
- -

with_rdb_iterator/3

-
-

with_rdb_iterator(Tab::ref_or_tab(), Fun::fun((itr_handle()) -> Res), Opts::read_options()) -> Res

-

-
- -

write_info/3

-
-

write_info(Tab, K, V) -> any()

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/mrdb_index.html b/doc/mrdb_index.html deleted file mode 100644 index 7603753..0000000 --- a/doc/mrdb_index.html +++ /dev/null @@ -1,71 +0,0 @@ - - - - -Module mrdb_index - - - - -
- -

Module mrdb_index

- - - -

Data Types

- -

index_value()

-

index_value() = any()

- - -

iterator_action()

-

iterator_action() = mrdb:iterator_action()

- - -

ix_iterator()

-

ix_iterator() = #mrdb_ix_iter{i = mrdb:iterator(), type = set | bag, sub = mrdb:ref() | pid()}

- - -

object()

-

object() = tuple()

- - -

Function Index

- - - - -
iterator/2
iterator_close/1
iterator_move/2
with_iterator/3
- -

Function Details

- -

iterator/2

-
-

iterator(Tab::mrdb:ref_or_tab(), IxPos::mrdb:index_position()) -> {ok, ix_iterator()} | {error, term()}

-

-
- -

iterator_close/1

-
-

iterator_close(Mrdb_ix_iter::ix_iterator()) -> ok

-

-
- -

iterator_move/2

-
-

iterator_move(Mrdb_ix_iter::ix_iterator(), Dir::iterator_action()) -> {ok, index_value(), object()} | {error, term()}

-

-
- -

with_iterator/3

-
-

with_iterator(Tab::mrdb:ref_or_tab(), IxPos::mrdb:index_position(), Fun::fun((ix_iterator()) -> Res)) -> Res

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/mrdb_mutex.html b/doc/mrdb_mutex.html deleted file mode 100644 index cb6e597..0000000 --- a/doc/mrdb_mutex.html +++ /dev/null @@ -1,32 +0,0 @@ - - - - -Module mrdb_mutex - - - - -
- -

Module mrdb_mutex

- - - -

Function Index

- -
do/2
- -

Function Details

- -

do/2

-
-

do(Rsrc, F) -> any()

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/mrdb_mutex_serializer.html b/doc/mrdb_mutex_serializer.html deleted file mode 100644 index 8e07aa4..0000000 --- a/doc/mrdb_mutex_serializer.html +++ /dev/null @@ -1,88 +0,0 @@ - - - - -Module mrdb_mutex_serializer - - - - -
- -

Module mrdb_mutex_serializer

- - - -

Function Index

- - - - - - - - - -
code_change/3
done/2
handle_call/3
handle_cast/2
handle_info/2
init/1
start_link/0
terminate/2
wait/1
- -

Function Details

- -

code_change/3

-
-

code_change(FromVsn, St, Extra) -> any()

-

-
- -

done/2

-
-

done(Rsrc, Ref) -> any()

-

-
- -

handle_call/3

-
-

handle_call(X1, From, St) -> any()

-

-
- -

handle_cast/2

-
-

handle_cast(X1, St) -> any()

-

-
- -

handle_info/2

-
-

handle_info(X1, St) -> any()

-

-
- -

init/1

-
-

init(X1) -> any()

-

-
- -

start_link/0

-
-

start_link() -> any()

-

-
- -

terminate/2

-
-

terminate(X1, X2) -> any()

-

-
- -

wait/1

-
-

wait(Rsrc) -> any()

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/mrdb_select.html b/doc/mrdb_select.html deleted file mode 100644 index 9acf856..0000000 --- a/doc/mrdb_select.html +++ /dev/null @@ -1,67 +0,0 @@ - - - - -Module mrdb_select - - - - -
- -

Module mrdb_select

- - - -

Function Index

- - - - - - -
continuation_info/2
fold/5
rdb_fold/5
select/1
select/3
select/4
- -

Function Details

- -

continuation_info/2

-
-

continuation_info(Item, C) -> any()

-

-
- -

fold/5

-
-

fold(Ref, Fun, Acc, MS, Limit) -> any()

-

-
- -

rdb_fold/5

-
-

rdb_fold(Ref, Fun, Acc, Prefix, Limit) -> any()

-

-
- -

select/1

-
-

select(Cont) -> any()

-

-
- -

select/3

-
-

select(Ref, MS, Limit) -> any()

-

-
- -

select/4

-
-

select(Ref, MS, AccKeys, Limit) -> any()

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/mrdb_stats.html b/doc/mrdb_stats.html deleted file mode 100644 index b4220fd..0000000 --- a/doc/mrdb_stats.html +++ /dev/null @@ -1,87 +0,0 @@ - - - - -Module mrdb_stats - - - - -
- -

Module mrdb_stats

-Statistics API for the mnesia_rocksdb plugin. - - -

Description

Statistics API for the mnesia_rocksdb plugin

- - Some counters are maintained for each active alias. Currently, the following - counters are supported: - * inner_retries - * outer_retries - -

Data Types

- -

alias()

-

alias() = mnesia_rocksdb:alias()

- - -

counter()

-

counter() = atom()

- - -

counters()

-

counters() = #{counter() := integer()}

- - -

db_ref()

-

db_ref() = mrdb:db_ref()

- - -

increment()

-

increment() = integer()

- - -

Function Index

- - - - -
get/1Fetches all known counters for Alias, in the form of a map, - #{Counter => Value}.
get/2Fetches the integer value of the known counter Ctr for Alias.
incr/3Increment Ctr counter for Alias` with increment `N.
new/0
- -

Function Details

- -

get/1

-
-

get(Alias::alias() | db_ref()) -> counters()

-

-

Fetches all known counters for Alias, in the form of a map, - #{Counter => Value}.

- -

get/2

-
-

get(Alias::alias() | db_ref(), Ctr::counter()) -> integer()

-

-

Fetches the integer value of the known counter Ctr for Alias.

- -

incr/3

-
-

incr(Alias::alias() | db_ref(), Ctr::counter(), N::increment()) -> ok

-

-

Increment Ctr counter for Alias` with increment `N.

- - Note that the first argument may also be a db_ref() map, - corresponding to mrdb:get_ref({admin, Alias}).

- -

new/0

-
-

new() -> any()

-

-
-
- - -

Generated by EDoc

- - diff --git a/doc/overview-summary.html b/doc/overview-summary.html deleted file mode 100644 index 328774c..0000000 --- a/doc/overview-summary.html +++ /dev/null @@ -1,254 +0,0 @@ - - - - -Mnesia Rocksdb - Rocksdb backend plugin for Mnesia - - - - - -

Mnesia Rocksdb - Rocksdb backend plugin for Mnesia -

-

Copyright © 2013-21 Klarna AB

-

Authors: Ulf Wiger (ulf@wiger.net).

- - -

The Mnesia DBMS, part of Erlang/OTP, supports 'backend plugins', making -it possible to utilize more capable key-value stores than the dets -module (limited to 2 GB per table). Unfortunately, this support is -undocumented. Below, some informal documentation for the plugin system -is provided.

- -

Table of Contents

-
    -
  1. Usage
  2. -
      -
    1. Prerequisites
    2. -
    3. Getting started
    4. -
    5. Special features
    6. -
    7. Customization
    8. -
    9. Handling of errors in write operations
    10. -
    11. Caveats
    12. -
    -
  3. Mnesia backend plugins
  4. -
      -
    1. Background
    2. -
    3. Design
    4. -
    -
  5. Mnesia index plugins
  6. -
  7. Rocksdb
  8. -
- -

Usage

- -

Prerequisites

- -
    -
  • rocksdb (included as dependency)
  • -
  • sext (included as dependency)
  • -
  • Erlang/OTP 21.0 or newer (https://github.com/erlang/otp)
  • -
- -

Getting started

- -

Call mnesia_rocksdb:register() immediately after -starting mnesia.

- -

Put {rocksdb_copies, [node()]} into the table definitions of -tables you want to be in RocksDB.

- -

Special features

- -

RocksDB tables support efficient selects on prefix keys.

- -

The backend uses the sext module (see -https://github.com/uwiger/sext) for mapping between Erlang terms and the -binary data stored in the tables. This provides two useful properties:

- -
    -
  • The records are stored in the Erlang term order of their keys.
  • -
  • A prefix of a composite key is ordered just before any key for which - it is a prefix. For example, {x, '_'} is a prefix for keys {x, a}, - {x, b} and so on.
  • -
- -

This means that a prefix key identifies the start of the sequence of -entries whose keys match the prefix. The backend uses this to optimize -selects on prefix keys.

- -

### Customization

- -

RocksDB supports a number of customization options. These can be specified -by providing a {Key, Value} list named rocksdb_opts under user_properties, -for example:

- -
mnesia:create_table(foo, [{rocksdb_copies, [node()]},
-                          ...
-                          {user_properties,
-                              [{rocksdb_opts, [{max_open_files, 1024}]}]
-                          }])
- -

Consult the RocksDB documentation -for information on configuration parameters. Also see the section below on handling write errors.

- -The default configuration for tables in mnesia_rocksdb is: -
default_open_opts() ->
-    [ {create_if_missing, true}
-      , {cache_size,
-         list_to_integer(get_env_default("ROCKSDB_CACHE_SIZE", "32212254"))}
-      , {block_size, 1024}
-      , {max_open_files, 100}
-      , {write_buffer_size,
-         list_to_integer(get_env_default(
-                           "ROCKSDB_WRITE_BUFFER_SIZE", "4194304"))}
-      , {compression,
-         list_to_atom(get_env_default("ROCKSDB_COMPRESSION", "true"))}
-      , {use_bloomfilter, true}
-    ].
- -

It is also possible, for larger databases, to produce a tuning parameter file. -This is experimental, and mostly copied from mnesia_leveldb. Consult the -source code in mnesia_rocksdb_tuning.erl and mnesia_rocksdb_params.erl. -Contributions are welcome.

- -

Caveats

- -

Avoid placing bag tables in RocksDB. Although they work, each write -requires additional reads, causing substantial runtime overheads. There -are better ways to represent and process bag data (see above about -prefix keys).

- -

The mnesia:table_info(T, size) call always returns zero for RocksDB -tables. RocksDB itself does not track the number of elements in a table, and -although it is possible to make the mnesia_rocksdb backend maintain a size -counter, it incurs a high runtime overhead for writes and deletes since it -forces them to first do a read to check the existence of the key. If you -depend on having an up to date size count at all times, you need to maintain -it yourself. If you only need the size occasionally, you may traverse the -table to count the elements.

- -

Mnesia backend plugins

- -

Background

- -

Mnesia was initially designed to be a RAM-only DBMS, and Erlang's -ets tables were developed for this purpose. In order to support -persistence, e.g. for configuration data, a disk-based version of ets -(called dets) was created. The dets API mimicks the ets API, -and dets is quite convenient and fast for (nowadays) small datasets. -However, using a 32-bit bucket system, it is limited to 2GB of data. -It also doesn't support ordered sets. When used in Mnesia, dets-based -tables are called disc_only_copies.

- -

To circumvent these limitations, another table type, called disc_copies -was added. This is a combination of ets and disk_log, where Mnesia -periodically snapshots the ets data to a log file on disk, and meanwhile -maintains a log of updates, which can be applied at startup. These tables -are quite performant (especially on read access), but all data is kept in -RAM, which can become a serious limitation.

- -

A backend plugin system was proposed by Ulf Wiger in 2016, and further -developed with Klarna's support, to finally become included in OTP 19. -Klarna uses a LevelDb backend, but Aeternity, in 2017, instead chose -to implement a Rocksdb backend plugin.

- -

Design

- -

As backend plugins were added on a long-since legacy-stable Mnesia, -they had to conform to the existing code structure. For this reason, -the plugin callbacks hook into the already present low-level access -API in the mnesia_lib module. As a consequence, backend plugins have -the same access semantics and granularity as ets and dets. This -isn't much of a disadvantage for key-value stores like LevelDb and RocksDB, -but a more serious issue is that the update part of this API is called -on after the point of no return. That is, Mnesia does not expect -these updates to fail, and has no recourse if they do. As an aside, -this could also happen if a disc_only_copies table exceeds the 2 GB -limit (mnesia will not check it, and dets will not complain, but simply -drop the update.)

- -

Mnesia index plugins

- -

When adding support for backend plugins, index plugins were also added. Unfortunately, they remain undocumented.

- -

An index plugin can be added in one of two ways:

- -
    -
  1. When creating a schema, provide {index_plugins, [{Name, Module, Function}]} options.
  2. -
  3. Call the function mnesia_schema:add_index_plugin(Name, Module, Function)
  4. -
- -

Name must be an atom wrapped as a 1-tuple, e.g. {words}.

- -

The plugin callback is called as Module:Function(Table, Pos, Obj), where Pos=={words} in -our example. It returns a list of index terms.

- -

Example

- -

Given the following index plugin implementation:

- -
-module(words).
--export([words_f/3]).
-
-words_f(_,_,Obj) when is_tuple(Obj) ->
-    words_(tuple_to_list(Obj)).
-
-words_(Str) when is_binary(Str) ->
-    string:lexemes(Str, [$\s, $\n, [$\r,$\n]]);
-words_(L) when is_list(L) ->
-    lists:flatmap(fun words_/1, L);
-words_(_) ->
-    [].
- -

We can register the plugin and use it in table definitions:

- -
Eshell V12.1.3  (abort with ^G)
-1> mnesia:start().
-ok
-2> mnesia_schema:add_index_plugin({words}, words, words_f).
-{atomic,ok}
-3> mnesia:create_table(i, [{index, [{words}]}]).
-{atomic,ok}
- -

Note that in this case, we had neither a backend plugin, nor even a persistent schema. -Index plugins can be used with all table types. The registered indexing function (arity 3) must exist -as an exported function along the node's code path.

- -

To see what happens when we insert an object, we can turn on call trace.

- -
4> dbg:tracer().
-{ok,<0.108.0>}
-5> dbg:tp(words, x).
-{ok,[{matched,nonode@nohost,3},{saved,x}]}
-6> dbg:p(all,[c]).
-{ok,[{matched,nonode@nohost,60}]}
-7> mnesia:dirty_write({i,<<"one two">>, [<<"three">>, <<"four">>]}).
-(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
-(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
-                                             <<"four">>]
-(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
-(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
-                                             <<"four">>]
-ok
-8> dbg:ctp('_'), dbg:stop().
-ok
-9> mnesia:dirty_index_read(i, <<"one">>, {words}).
-[{i,<<"one two">>,[<<"three">>,<<"four">>]}]
- -

(The fact that the indexing function is called twice, seems like a performance bug.)

- -

We can observe that the indexing callback is able to operate on the whole object. -It needs to be side-effect free and efficient, since it will be called at least once for each update -(if an old object exists in the table, the indexing function will be called on it too, before it is -replaced by the new object.)

- -

Rocksdb

- -

Usage

- -
- -

Generated by EDoc

- - From 19140c738ba5fc7c858eb782f34a25c1e5908a41 Mon Sep 17 00:00:00 2001 From: Ulf Wiger Date: Thu, 3 Nov 2022 12:57:54 +0100 Subject: [PATCH 5/6] Don't warn for export_all in mrdb_bench --- test/mrdb_bench.erl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/mrdb_bench.erl b/test/mrdb_bench.erl index 2ddc23e..ed76644 100644 --- a/test/mrdb_bench.erl +++ b/test/mrdb_bench.erl @@ -1,6 +1,6 @@ -module(mrdb_bench). --compile(export_all). +-compile([export_all, nowarn_export_all]). init() -> mnesia:delete_schema([node()]), From 465a220bfe68d0548a540c85d8310b4885d88d73 Mon Sep 17 00:00:00 2001 From: Ulf Wiger Date: Thu, 3 Nov 2022 13:37:52 +0100 Subject: [PATCH 6/6] Update comment about mutex implementation --- src/mrdb_mutex.erl | 30 ++++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) diff --git a/src/mrdb_mutex.erl b/src/mrdb_mutex.erl index 8f5777e..b32aca4 100644 --- a/src/mrdb_mutex.erl +++ b/src/mrdb_mutex.erl @@ -5,21 +5,23 @@ -include_lib("eunit/include/eunit.hrl"). -%% We use a duplicate_bag ets table as a lock queue, -%% relying on the characteristic that a lookup on a key (the resource name) -%% returns the objects in the order in which they were inserted. -%% We try to claim the lock by inserting our own pid under the Rrsc key, then -%% checking which pid is at the head of the list. If it's our pid, we have the -%% lock, and proceed with calling our fun, then delecting our table entry. -%% If another pid is at the head of the list, we busy-wait on the table. +%% We use a gen_server-based FIFO queue (one queue per alias) to manage the +%% critical section. %% -%% Releasing the resource is done by deleting the resource. If we just decrement, -%% we will end up with lingering unlocked resources, so we might as well delete. -%% Either operation is atomic, and the claim op creates the object if it's missing. - -%% Another, perhaps cheaper, way of implementing a mutex would be to use a counter -%% object, but we also care about avoiding starvation, and this way, we get a form -%% of serialization of requests. +%% Releasing the resource is done by notifying the server. +%% +%% Previous implementations tested: +%% * A counter (in ets), incremented atomically first with 0, then 1. This lets +%% the caller know if the 'semaphore' was 1 or zero before. If it was already +%% 1, busy-loop over the counter until it reads as a transition from 0 to 1. +%% This is a pretty simple implementation, but it lacks ordering, and appeared +%% to sometimes lead to starvation. +%% * A duplicate_bag ets table: These are defined such that insertion order is +%% preserved. Thus, they can serve as a mutex with FIFO characteristics. +%% Unfortunately, performance plummeted when tested with > 25 concurrent +%% processes. While this is likely a very high number, given that we're talking +%% about contention in a system using optimistic concurrency, it's never good +%% if performance falls off a cliff. do(Rsrc, F) when is_function(F, 0) -> {ok, Ref} = mrdb_mutex_serializer:wait(Rsrc),