commit 6c9f5b565f0cbbb4f060e7c55766ba60c3f2df84 Author: Ulf Wiger Date: Tue Feb 6 09:43:57 2018 +0100 first version, based on mnesia_eleveldb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eefd59f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/doc +/_build \ No newline at end of file diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8f71f43 --- /dev/null +++ b/LICENSE @@ -0,0 +1,202 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "{}" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright {yyyy} {name of copyright owner} + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. + diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3dee3bf --- /dev/null +++ b/Makefile @@ -0,0 +1,27 @@ +suite=$(if $(SUITE), suite=$(SUITE), ) +REBAR3=$(shell which rebar3 || echo ./rebar3) + +.PHONY: all check test clean run + +all: + $(REBAR3) compile + +docs: + $(REBAR3) doc + +check: + $(REBAR3) dialyzer + +test: + $(REBAR3) eunit $(suite) + + +conf_clean: + @: + +clean: + $(REBAR3) clean + $(RM) doc/* + +run: + $(REBAR3) shell diff --git a/README.md b/README.md new file mode 100644 index 0000000..890e858 --- /dev/null +++ b/README.md @@ -0,0 +1,50 @@ +# mnesia_rocksdb +A RocksDb backend for Mnesia + +This permits Erlang/OTP applications to use RocksDB as a backend for +mnesia tables. It is based on Klarna's `mnesia_eleveldb`. + +## Prerequisites +- rocksdb +- Erlang/OTP 20.0 or newer (https://github.com/erlang/otp) + +## Getting started + +Call `mnesia_rocksdb:register()` immediately after +starting mnesia. + +Put `{rocksdb_copies, [node()]}` into the table definitions of +tables you want to be in RocksDB. + +## Special features + +RocksDB tables support efficient selects on *prefix keys*. + +The backend uses the `mnesia_rocksdb_sext` module (see +https://github.com/uwiger/sext) for mapping between Erlang terms and the +binary data stored in the tables. This provides two useful properties: + +- The records are stored in the Erlang term order of their keys. +- A prefix of a composite key is ordered just before any key for which + it is a prefix. For example, `{x, '_'}` is a prefix for keys `{x, a}`, + `{x, b}` and so on. + +This means that a prefix key identifies the start of the sequence of +entries whose keys match the prefix. The backend uses this to optimize +selects on prefix keys. + +## Caveats + +Avoid placing `bag` tables in RocksDB. Although they work, each write +requires additional reads, causing substantial runtime overheads. There +are better ways to represent and process bag data (see above about +*prefix keys*). + +The `mnesia:table_info(T, size)` call always returns zero for RocksDB +tables. RocksDB itself does not track the number of elements in a table, and +although it is possible to make the mnesia_rocksdb backend maintain a size +counter, it incurs a high runtime overhead for writes and deletes since it +forces them to first do a read to check the existence of the key. If you +depend on having an up to date size count at all times, you need to maintain +it yourself. If you only need the size occasionally, you may traverse the +table to count the elements. diff --git a/rebar.config b/rebar.config new file mode 100644 index 0000000..e526e58 --- /dev/null +++ b/rebar.config @@ -0,0 +1,13 @@ +%% -*- erlang -*- +{deps, + [ + {rocksdb,"0.14.0"} + ]}. + +{profiles, + [ + {test, + [ + {deps, [{proper, "1.2.0"}]} + ]} + ]}. diff --git a/rebar.config.script b/rebar.config.script new file mode 100644 index 0000000..01c1b67 --- /dev/null +++ b/rebar.config.script @@ -0,0 +1,9 @@ +%% -*- erlang-mode -*- +case os:getenv("DEBUG") of + "true" -> + Opts = proplists:get_value(erl_opts, CONFIG, []), + lists:keystore(erl_opts, 1, CONFIG, + [{d,'DEBUG'} | Opts -- [{d,'DEBUG'}]]); + _ -> + CONFIG +end. diff --git a/rebar.lock b/rebar.lock new file mode 100644 index 0000000..8389a81 --- /dev/null +++ b/rebar.lock @@ -0,0 +1,6 @@ +{"1.1.0", +[{<<"rocksdb">>,{pkg,<<"rocksdb">>,<<"0.14.0">>},0}]}. +[ +{pkg_hash,[ + {<<"rocksdb">>, <<"C92B48703D4812C8BC571E0FBB7681F0899F35C4E4330F1CF646D79357A6AFE4">>}]} +]. diff --git a/rebar3 b/rebar3 new file mode 100755 index 0000000..a0deec9 Binary files /dev/null and b/rebar3 differ diff --git a/src/mnesia_rocksdb.app.src b/src/mnesia_rocksdb.app.src new file mode 100644 index 0000000..2b1b86d --- /dev/null +++ b/src/mnesia_rocksdb.app.src @@ -0,0 +1,12 @@ +{application, mnesia_rocksdb, + [ + {description, "RocksDB backend plugin for Mnesia"}, + {vsn, "1.0"}, + {modules, [mnesia_rocksdb, mnesia_rocksdb_app, + mnesia_rocksdb_params, mnesia_rocksdb_sext, + mnesia_rocksdb_sup, mnesia_rocksdb_tuning]}, + {registered, []}, + {mod, {mnesia_rocksdb_app, []}}, + {env, []}, + {applications, [kernel, stdlib]} + ]}. diff --git a/src/mnesia_rocksdb.erl b/src/mnesia_rocksdb.erl new file mode 100644 index 0000000..14f122f --- /dev/null +++ b/src/mnesia_rocksdb.erl @@ -0,0 +1,1725 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +%% @doc rocksdb storage backend for Mnesia. + +%% Initialization: register() or register(Alias) +%% Usage: mnesia:create_table(Tab, [{rocksdb_copies, Nodes}, ...]). + +-module(mnesia_rocksdb). + + +%% ---------------------------------------------------------------------------- +%% BEHAVIOURS +%% ---------------------------------------------------------------------------- + +-behaviour(mnesia_backend_type). +-behaviour(gen_server). + + +%% ---------------------------------------------------------------------------- +%% EXPORTS +%% ---------------------------------------------------------------------------- + +%% +%% CONVENIENCE API +%% + +-export([register/0, + register/1, + default_alias/0]). + +%% +%% DEBUG API +%% + +-export([show_table/1, + show_table/2, + show_table/3, + fold/6]). + +%% +%% BACKEND CALLBACKS +%% + +%% backend management +-export([init_backend/0, + add_aliases/1, + remove_aliases/1]). + +%% schema level callbacks +-export([semantics/2, + check_definition/4, + create_table/3, + load_table/4, + close_table/2, + sync_close_table/2, + delete_table/2, + info/3]). + +%% table synch calls +-export([sender_init/4, + sender_handle_info/5, + receiver_first_message/4, + receive_data/5, + receive_done/4]). + +%% low-level accessor callbacks. +-export([delete/3, + first/2, + fixtable/3, + insert/3, + last/2, + lookup/3, + match_delete/3, + next/3, + prev/3, + repair_continuation/2, + select/1, + select/3, + select/4, + slot/3, + update_counter/4]). + +%% Index consistency +-export([index_is_consistent/3, + is_index_consistent/2]). + +%% record and key validation +-export([validate_key/6, + validate_record/6]). + +%% file extension callbacks +-export([real_suffixes/0, + tmp_suffixes/0]). + +%% +%% GEN SERVER CALLBACKS AND CALLS +%% + +-export([start_proc/4, + init/1, + handle_call/3, + handle_info/2, + handle_cast/2, + terminate/2, + code_change/3]). + +-export([ix_prefixes/3]). + + +%% ---------------------------------------------------------------------------- +%% DEFINES +%% ---------------------------------------------------------------------------- + +%% Name of the Rocksdb interface module; defaults to rocksdb but can be +%% configured by passing -DROCKSDB_MODULE= to erlc. +-ifdef(ROCKSDB_MODULE). +-define(rocksdb, ?ROCKSDB_MODULE). +-else. +-define(rocksdb, rocksdb). %% Name of the Rocksdb interface module +-endif. + +%% Data and meta data (a.k.a. info) are stored in the same table. +%% This is a table of the first byte in data +%% 0 = before meta data +%% 1 = meta data +%% 2 = before data +%% >= 8 = data + +-define(INFO_START, 0). +-define(INFO_TAG, 1). +-define(DATA_START, 2). +-define(BAG_CNT, 32). % Number of bits used for bag object counter +-define(MAX_BAG, 16#FFFFFFFF). + +%% enable debugging messages through mnesia:set_debug_level(debug) +-ifndef(MNESIA_ROCKSDB_NO_DBG). +-define(dbg(Fmt, Args), + %% avoid evaluating Args if the message will be dropped anyway + case mnesia_monitor:get_env(debug) of + none -> ok; + verbose -> ok; + _ -> mnesia_lib:dbg_out("~p:~p: "++(Fmt),[?MODULE,?LINE|Args]) + end). +-else. +-define(dbg(Fmt, Args), ok). +-endif. + +%% ---------------------------------------------------------------------------- +%% RECORDS +%% ---------------------------------------------------------------------------- + +-record(sel, {alias, % TODO: not used + tab, + ref, + keypat, + ms, % TODO: not used + compiled_ms, + limit, + key_only = false, % TODO: not used + direction = forward}). % TODO: not used + +-record(st, { ets + , ref + , alias + , tab + , type + , size_warnings % integer() + , maintain_size % boolean() + }). + +%% ---------------------------------------------------------------------------- +%% CONVENIENCE API +%% ---------------------------------------------------------------------------- + +register() -> + register(default_alias()). + +register(Alias) -> + Module = ?MODULE, + case mnesia:add_backend_type(Alias, Module) of + {atomic, ok} -> + {ok, Alias}; + {aborted, {backend_type_already_exists, _}} -> + {ok, Alias}; + {aborted, Reason} -> + {error, Reason} + end. + +default_alias() -> + rocksdb_copies. + + +%% ---------------------------------------------------------------------------- +%% DEBUG API +%% ---------------------------------------------------------------------------- + +%% A debug function that shows the rocksdb table content +show_table(Tab) -> + show_table(default_alias(), Tab). + +show_table(Alias, Tab) -> + show_table(Alias, Tab, 100). + +show_table(Alias, Tab, Limit) -> + {Ref, _Type} = get_ref(Alias, Tab), + with_iterator(Ref, fun(I) -> i_show_table(I, first, Limit) end). + +%% PRIVATE + +i_show_table(_, _, 0) -> + {error, skipped_some}; +i_show_table(I, Move, Limit) -> + case ?rocksdb:iterator_move(I, Move) of + {ok, EncKey, EncVal} -> + {Type,Val} = + case EncKey of + << ?INFO_TAG, K/binary >> -> + {info,{decode_key(K),decode_val(EncVal)}}; + _ -> + K = decode_key(EncKey), + V = decode_val(EncVal), + V2 = setelement(2,V,K), + {data,V2} + end, + io:fwrite("~p: ~p~n", [Type, Val]), + i_show_table(I, next, Limit-1); + _ -> + ok + end. + + +%% ---------------------------------------------------------------------------- +%% BACKEND CALLBACKS +%% ---------------------------------------------------------------------------- + +%% backend management + +init_backend() -> + stick_rocksdb_dir(), + application:start(mnesia_rocksdb), + ok. + +%% Prevent reloading of modules in rocksdb itself during runtime, since it +%% can lead to inconsistent state in rocksdb and silent data corruption. +stick_rocksdb_dir() -> + case code:which(rocksdb) of + BeamPath when is_list(BeamPath), BeamPath =/= "" -> + Dir = filename:dirname(BeamPath), + case code:stick_dir(Dir) of + ok -> ok; + error -> warn_stick_dir({error, Dir}) + end; + Other -> + warn_stick_dir({not_found, Other}) + end. + +warn_stick_dir(Reason) -> + mnesia_lib:warning("cannot make rocksdb directory sticky:~n~p~n", + [Reason]). + +add_aliases(_Aliases) -> + ok. + +remove_aliases(_Aliases) -> + ok. + +%% schema level callbacks + +%% This function is used to determine what the plugin supports +%% semantics(Alias, storage) -> +%% ram_copies | disc_copies | disc_only_copies (mandatory) +%% semantics(Alias, types) -> +%% [bag | set | ordered_set] (mandatory) +%% semantics(Alias, index_fun) -> +%% fun(Alias, Tab, Pos, Obj) -> [IxValue] (optional) +%% semantics(Alias, _) -> +%% undefined. +%% +semantics(_Alias, storage) -> disc_only_copies; +semantics(_Alias, types ) -> [set, ordered_set, bag]; +semantics(_Alias, index_types) -> [ordered]; +semantics(_Alias, index_fun) -> fun index_f/4; +semantics(_Alias, _) -> undefined. + +is_index_consistent(Alias, {Tab, index, PosInfo}) -> + case info(Alias, Tab, {index_consistent, PosInfo}) of + true -> true; + _ -> false + end. + +index_is_consistent(Alias, {Tab, index, PosInfo}, Bool) + when is_boolean(Bool) -> + write_info(Alias, Tab, {index_consistent, PosInfo}, Bool). + + +%% PRIVATE FUN +index_f(_Alias, _Tab, Pos, Obj) -> + [element(Pos, Obj)]. + +ix_prefixes(_Tab, _Pos, Obj) -> + lists:foldl( + fun(V, Acc) when is_list(V) -> + try Pfxs = prefixes(list_to_binary(V)), + Pfxs ++ Acc + catch + error:_ -> + Acc + end; + (V, Acc) when is_binary(V) -> + Pfxs = prefixes(V), + Pfxs ++ Acc; + (_, Acc) -> + Acc + end, [], tl(tuple_to_list(Obj))). + +prefixes(<>) -> + [P]; +prefixes(_) -> + []. + +%% For now, only verify that the type is set or ordered_set. +%% set is OK as ordered_set is a kind of set. +check_definition(Alias, Tab, Nodes, Props) -> + Id = {Alias, Nodes}, + Props1 = lists:map( + fun({type, T} = P) -> + if T==set; T==ordered_set; T==bag -> + P; + true -> + mnesia:abort({combine_error, + Tab, + [Id, {type,T}]}) + end; + ({user_properties, _} = P) -> + %% should perhaps verify rocksdb options... + P; + (P) -> P + end, Props), + {ok, Props1}. + +%% -> ok | {error, exists} +create_table(_Alias, Tab, _Props) -> + create_mountpoint(Tab). + +load_table(Alias, Tab, _LoadReason, Opts) -> + Type = proplists:get_value(type, Opts), + LdbUserProps = proplists:get_value( + rocksdb_opts, proplists:get_value( + user_properties, Opts, []), []), + StorageProps = proplists:get_value( + rocksdb, proplists:get_value( + storage_properties, Opts, []), LdbUserProps), + LdbOpts = mnesia_rocksdb_params:lookup(Tab, StorageProps), + ProcName = proc_name(Alias, Tab), + case whereis(ProcName) of + undefined -> + load_table_(Alias, Tab, Type, LdbOpts); + Pid -> + gen_server:call(Pid, {load, Alias, Tab, Type, LdbOpts}, infinity) + end. + +load_table_(Alias, Tab, Type, LdbOpts) -> + ShutdownTime = proplists:get_value( + owner_shutdown_time, LdbOpts, 120000), + case mnesia_ext_sup:start_proc( + Tab, ?MODULE, start_proc, [Alias,Tab,Type, LdbOpts], + [{shutdown, ShutdownTime}]) of + {ok, _Pid} -> + ok; + + %% TODO: This reply is according to the manual, but we dont get it. + {error, {already_started, _Pid}} -> + %% TODO: Is it an error if the table already is + %% loaded. This printout is triggered when running + %% transform_table on a rocksdb_table that has indexing. + ?dbg("ERR: table:~p already loaded pid:~p~n", + [Tab, _Pid]), + ok; + + %% TODO: This reply is not according to the manual, but we get it. + {error, {{already_started, _Pid}, _Stack}} -> + %% TODO: Is it an error if the table already is + %% loaded. This printout is triggered when running + %% transform_table on a rocksdb_table that has indexing. + ?dbg("ERR: table:~p already loaded pid:~p stack:~p~n", + [Tab, _Pid, _Stack]), + ok + end. + +close_table(Alias, Tab) -> + ?dbg("~p: close_table(~p, ~p);~n Trace: ~s~n", + [self(), Alias, Tab, pp_stack()]), + if is_atom(Tab) -> + [close_table(Alias, R) + || {R, _} <- related_resources(Tab)]; + true -> + ok + end, + close_table_(Alias, Tab). + +close_table_(Alias, Tab) -> + case opt_call(Alias, Tab, close_table) of + {error, noproc} -> + ?dbg("~p: close_table_(~p) -> noproc~n", + [self(), Tab]), + ok; + {ok, _} -> + ok; + _Other -> + ?dbg("~p: close_table_(~p) -> _Other = ~p~n", + [self(), Tab, _Other]), + mnesia_ext_sup:stop_proc(Tab), + ok + end. + +-ifndef(MNESIA_ROCKSDB_NO_DBG). +pp_stack() -> + Trace = try throw(true) + catch + _:_ -> + case erlang:get_stacktrace() of + [_|T] -> T; + [] -> [] + end + end, + pp_calls(10, Trace). + +pp_calls(I, [{M,F,A,Pos} | T]) -> + Spc = lists:duplicate(I, $\s), + Pp = fun(Mx,Fx,Ax,Px) -> + [atom_to_list(Mx),":",atom_to_list(Fx),"/",integer_to_list(Ax), + pp_pos(Px)] + end, + [Pp(M,F,A,Pos)|[["\n",Spc,Pp(M1,F1,A1,P1)] || {M1,F1,A1,P1} <- T]]. + +pp_pos([]) -> ""; +pp_pos(L) when is_integer(L) -> + [" (", integer_to_list(L), ")"]; +pp_pos([{file,_},{line,L}]) -> + [" (", integer_to_list(L), ")"]. +-endif. + +sync_close_table(Alias, Tab) -> + ?dbg("~p: sync_close_table(~p, ~p);~n Trace: ~s~n", + [self(), Alias, Tab, pp_stack()]), + close_table(Alias, Tab). + +delete_table(Alias, Tab) -> + ?dbg("~p: delete_table(~p, ~p);~n Trace: ~s~n", + [self(), Alias, Tab, pp_stack()]), + delete_table(Alias, Tab, data_mountpoint(Tab)). + +delete_table(Alias, Tab, MP) -> + if is_atom(Tab) -> + [delete_table(Alias, T, M) || {T,M} <- related_resources(Tab)]; + true -> + ok + end, + case opt_call(Alias, Tab, delete_table) of + {error, noproc} -> + do_delete_table(Tab, MP); + {ok, _} -> + ok + end. + +do_delete_table(Tab, MP) -> + assert_proper_mountpoint(Tab, MP), + destroy_db(MP, []). + + +info(_Alias, Tab, memory) -> + try ets:info(tab_name(icache, Tab), memory) + catch + error:_ -> + 0 + end; +info(Alias, Tab, size) -> + case retrieve_size(Alias, Tab) of + {ok, Size} -> + if Size < 10000 -> ok; + true -> size_warning(Alias, Tab) + end, + Size; + Error -> + Error + end; +info(_Alias, Tab, Item) -> + case try_read_info(Tab, Item, undefined) of + {ok, Value} -> + Value; + Error -> + Error + end. + +retrieve_size(_Alias, Tab) -> + case try_read_info(Tab, size, 0) of + {ok, Size} -> + {ok, Size}; + Error -> + Error + end. + +try_read_info(Tab, Item, Default) -> + try + {ok, read_info(Item, Default, tab_name(icache, Tab))} + catch + error:Reason -> + {error, Reason} + end. + +write_info(Alias, Tab, Key, Value) -> + call(Alias, Tab, {write_info, Key, Value}). + +%% table synch calls + +%% =========================================================== +%% Table synch protocol +%% Callbacks are +%% Sender side: +%% 1. sender_init(Alias, Tab, RemoteStorage, ReceiverPid) -> +%% {standard, InitFun, ChunkFun} | {InitFun, ChunkFun} when +%% InitFun :: fun() -> {Recs, Cont} | '$end_of_table' +%% ChunkFun :: fun(Cont) -> {Recs, Cont1} | '$end_of_table' +%% +%% If {standard, I, C} is returned, the standard init message will be +%% sent to the receiver. Matching on RemoteStorage can reveal if a +%% different protocol can be used. +%% +%% 2. InitFun() is called +%% 3a. ChunkFun(Cont) is called repeatedly until done +%% 3b. sender_handle_info(Msg, Alias, Tab, ReceiverPid, Cont) -> +%% {ChunkFun, NewCont} +%% +%% Receiver side: +%% 1. receiver_first_message(SenderPid, Msg, Alias, Tab) -> +%% {Size::integer(), State} +%% 2. receive_data(Data, Alias, Tab, _Sender, State) -> +%% {more, NewState} | {{more, Msg}, NewState} +%% 3. receive_done(_Alias, _Tab, _Sender, _State) -> +%% ok +%% +%% The receiver can communicate with the Sender by returning +%% {{more, Msg}, St} from receive_data/4. The sender will be called through +%% sender_handle_info(Msg, ...), where it can adjust its ChunkFun and +%% Continuation. Note that the message from the receiver is sent once the +%% receive_data/4 function returns. This is slightly different from the +%% normal mnesia table synch, where the receiver acks immediately upon +%% reception of a new chunk, then processes the data. +%% + +sender_init(Alias, Tab, _RemoteStorage, _Pid) -> + %% Need to send a message to the receiver. It will be handled in + %% receiver_first_message/4 below. There could be a volley of messages... + {standard, + fun() -> + select(Alias, Tab, [{'_',[],['$_']}], 100) + end, + chunk_fun()}. + +sender_handle_info(_Msg, _Alias, _Tab, _ReceiverPid, Cont) -> + %% ignore - we don't expect any message from the receiver + {chunk_fun(), Cont}. + +receiver_first_message(_Pid, {first, Size} = _Msg, _Alias, _Tab) -> + {Size, _State = []}. + +receive_data(Data, Alias, Tab, _Sender, State) -> + [insert(Alias, Tab, Obj) || Obj <- Data], + {more, State}. + +receive_done(_Alias, _Tab, _Sender, _State) -> + ok. + +%% End of table synch protocol +%% =========================================================== + +%% PRIVATE + +chunk_fun() -> + fun(Cont) -> + select(Cont) + end. + +%% low-level accessor callbacks. + +delete(Alias, Tab, Key) -> + opt_call(Alias, Tab, {delete, encode_key(Key)}), + ok. + +first(Alias, Tab) -> + {Ref, _Type} = get_ref(Alias, Tab), + with_keys_only_iterator(Ref, fun i_first/1). + +%% PRIVATE ITERATOR +i_first(I) -> + case ?rocksdb:iterator_move(I, <>) of + {ok, First} -> + decode_key(First); + _ -> + '$end_of_table' + end. + +%% Not relevant for an ordered_set +fixtable(_Alias, _Tab, _Bool) -> + true. + +%% To save storage space, we avoid storing the key twice. We replace the key +%% in the record with []. It has to be put back in lookup/3. +insert(Alias, Tab, Obj) -> + Pos = keypos(Tab), + EncKey = encode_key(element(Pos, Obj)), + EncVal = encode_val(setelement(Pos, Obj, [])), + call(Alias, Tab, {insert, EncKey, EncVal}). + +last(Alias, Tab) -> + {Ref, _Type} = get_ref(Alias, Tab), + with_keys_only_iterator(Ref, fun i_last/1). + +%% PRIVATE ITERATOR +i_last(I) -> + case ?rocksdb:iterator_move(I, last) of + {ok, << ?INFO_TAG, _/binary >>} -> + '$end_of_table'; + {ok, Last} -> + decode_key(Last); + _ -> + '$end_of_table' + end. + +%% Since we replace the key with [] in the record, we have to put it back +%% into the found record. +lookup(Alias, Tab, Key) -> + Enc = encode_key(Key), + {Ref, Type} = call(Alias, Tab, get_ref), + case Type of + bag -> lookup_bag(Ref, Key, Enc, keypos(Tab)); + _ -> + case ?rocksdb:get(Ref, Enc, []) of + {ok, EncVal} -> + [setelement(keypos(Tab), decode_val(EncVal), Key)]; + _ -> + [] + end + end. + +lookup_bag(Ref, K, Enc, KP) -> + Sz = byte_size(Enc), + with_iterator( + Ref, fun(I) -> + lookup_bag_(Sz, Enc, ?rocksdb:iterator_move(I, Enc), + K, I, KP) + end). + +lookup_bag_(Sz, Enc, {ok, Enc, _}, K, I, KP) -> + lookup_bag_(Sz, Enc, ?rocksdb:iterator_move(I, next), K, I, KP); +lookup_bag_(Sz, Enc, Res, K, I, KP) -> + case Res of + {ok, <>, V} -> + [setelement(KP, decode_val(V), K)| + lookup_bag_(Sz, Enc, ?rocksdb:iterator_move(I, next), K, I, KP)]; + _ -> + [] + end. + +match_delete(Alias, Tab, Pat) when is_atom(Pat) -> + %do_match_delete(Alias, Tab, '_'), + case is_wild(Pat) of + true -> + call(Alias, Tab, clear_table), + ok; + false -> + %% can this happen?? + error(badarg) + end; +match_delete(Alias, Tab, Pat) when is_tuple(Pat) -> + KP = keypos(Tab), + Key = element(KP, Pat), + case is_wild(Key) of + true -> + call(Alias, Tab, clear_table); + false -> + call(Alias, Tab, {match_delete, Pat}) + end, + ok. + + +next(Alias, Tab, Key) -> + {Ref, _Type} = get_ref(Alias, Tab), + EncKey = encode_key(Key), + with_keys_only_iterator(Ref, fun(I) -> i_next(I, EncKey, Key) end). + +%% PRIVATE ITERATOR +i_next(I, EncKey, Key) -> + case ?rocksdb:iterator_move(I, EncKey) of + {ok, EncKey} -> + i_next_loop(?rocksdb:iterator_move(I, next), I, Key); + Other -> + i_next_loop(Other, I, Key) + end. + +i_next_loop({ok, EncKey}, I, Key) -> + case decode_key(EncKey) of + Key -> + i_next_loop(?rocksdb:iterator_move(I, next), I, Key); + NextKey -> + NextKey + end; +i_next_loop(_, _I, _Key) -> + '$end_of_table'. + +prev(Alias, Tab, Key0) -> + {Ref, _Type} = call(Alias, Tab, get_ref), + Key = encode_key(Key0), + with_keys_only_iterator(Ref, fun(I) -> i_prev(I, Key) end). + +%% PRIVATE ITERATOR +i_prev(I, Key) -> + case ?rocksdb:iterator_move(I, Key) of + {ok, _} -> + i_move_to_prev(I, Key); + {error, invalid_iterator} -> + i_last(I) + end. + +%% PRIVATE ITERATOR +i_move_to_prev(I, Key) -> + case ?rocksdb:iterator_move(I, prev) of + {ok, << ?INFO_TAG, _/binary >>} -> + '$end_of_table'; + {ok, Prev} when Prev < Key -> + decode_key(Prev); + {ok, _} -> + i_move_to_prev(I, Key); + _ -> + '$end_of_table' + end. + +repair_continuation(Cont, _Ms) -> + Cont. + +select(Cont) -> + %% Handle {ModOrAlias, Cont} wrappers for backwards compatibility with + %% older versions of mnesia_ext (before OTP 20). + case Cont of + {_, '$end_of_table'} -> '$end_of_table'; + {_, Cont1} -> Cont1(); + '$end_of_table' -> '$end_of_table'; + _ -> Cont() + end. + +select(Alias, Tab, Ms) -> + case select(Alias, Tab, Ms, infinity) of + {Res, '$end_of_table'} -> + Res; + '$end_of_table' -> + '$end_of_table' + end. + +select(Alias, Tab, Ms, Limit) when Limit==infinity; is_integer(Limit) -> + {Ref, Type} = get_ref(Alias, Tab), + do_select(Ref, Tab, Type, Ms, Limit). + +slot(Alias, Tab, Pos) when is_integer(Pos), Pos >= 0 -> + {Ref, Type} = get_ref(Alias, Tab), + First = fun(I) -> ?rocksdb:iterator_move(I, <>) end, + F = case Type of + bag -> fun(I) -> slot_iter_set(First(I), I, 0, Pos) end; + _ -> fun(I) -> slot_iter_set(First(I), I, 0, Pos) end + end, + with_iterator(Ref, F); +slot(_, _, _) -> + error(badarg). + +%% Exactly which objects Mod:slot/2 is supposed to return is not defined, +%% so let's just use the same version for both set and bag. No one should +%% use this function anyway, as it is ridiculously inefficient. +slot_iter_set({ok, K, V}, _I, P, P) -> + [setelement(2, decode_val(V), decode_key(K))]; +slot_iter_set({ok, _, _}, I, P1, P) when P1 < P -> + slot_iter_set(?rocksdb:iterator_move(I, next), I, P1+1, P); +slot_iter_set(Res, _, _, _) when element(1, Res) =/= ok -> + '$end_of_table'. + +update_counter(Alias, Tab, C, Val) when is_integer(Val) -> + case call(Alias, Tab, {update_counter, C, Val}) of + badarg -> + mnesia:abort(badarg); + Res -> + Res + end. + +%% server-side part +do_update_counter(C, Val, Ref) -> + Enc = encode_key(C), + case ?rocksdb:get(Ref, Enc, [{fill_cache, true}]) of + {ok, EncVal} -> + case decode_val(EncVal) of + {_, _, Old} = Rec when is_integer(Old) -> + Res = Old+Val, + ?rocksdb:put(Ref, Enc, + encode_val( + setelement(3, Rec, Res)), + []), + Res; + _ -> + badarg + end; + _ -> + badarg + end. + +%% PRIVATE + +%% key+data iterator: iterator_move/2 returns {ok, EncKey, EncVal} +with_iterator(Ref, F) -> + {ok, I} = ?rocksdb:iterator(Ref, []), + try F(I) + after + ?rocksdb:iterator_close(I) + end. + +%% keys_only iterator: iterator_move/2 returns {ok, EncKey} +with_keys_only_iterator(Ref, F) -> + {ok, I} = ?rocksdb:iterator(Ref, [], keys_only), + try F(I) + after + ?rocksdb:iterator_close(I) + end. + +%% TODO - use with_keys_only_iterator for match_delete + +%% record and key validation + +validate_key(_Alias, _Tab, RecName, Arity, Type, _Key) -> + {RecName, Arity, Type}. + +validate_record(_Alias, _Tab, RecName, Arity, Type, _Obj) -> + {RecName, Arity, Type}. + +%% file extension callbacks + +%% Extensions for files that are permanent. Needs to be cleaned up +%% e.g. at deleting the schema. +real_suffixes() -> + [".extldb"]. + +%% Extensions for temporary files. Can be cleaned up when mnesia +%% cleans up other temporary files. +tmp_suffixes() -> + []. + + +%% ---------------------------------------------------------------------------- +%% GEN SERVER CALLBACKS AND CALLS +%% ---------------------------------------------------------------------------- + +start_proc(Alias, Tab, Type, LdbOpts) -> + ProcName = proc_name(Alias, Tab), + gen_server:start_link({local, ProcName}, ?MODULE, + {Alias, Tab, Type, LdbOpts}, []). + +init({Alias, Tab, Type, LdbOpts}) -> + process_flag(trap_exit, true), + {ok, Ref, Ets} = do_load_table(Tab, LdbOpts), + St = #st{ ets = Ets + , ref = Ref + , alias = Alias + , tab = Tab + , type = Type + , size_warnings = 0 + , maintain_size = should_maintain_size(Tab) + }, + {ok, recover_size_info(St)}. + +do_load_table(Tab, LdbOpts) -> + MPd = data_mountpoint(Tab), + ?dbg("** Mountpoint: ~p~n ~s~n", [MPd, os:cmd("ls " ++ MPd)]), + Ets = ets:new(tab_name(icache,Tab), [set, protected, named_table]), + {ok, Ref} = open_rocksdb(MPd, LdbOpts), + rocksdb_to_ets(Ref, Ets), + {ok, Ref, Ets}. + +handle_call({load, Alias, Tab, Type, LdbOpts}, _From, + #st{type = Type, alias = Alias, tab = Tab} = St) -> + {ok, Ref, Ets} = do_load_table(Tab, LdbOpts), + {reply, ok, St#st{ref = Ref, ets = Ets}}; +handle_call(get_ref, _From, #st{ref = Ref, type = Type} = St) -> + {reply, {Ref, Type}, St}; +handle_call({write_info, Key, Value}, _From, #st{} = St) -> + _ = write_info_(Key, Value, St), + {reply, ok, St}; +handle_call({update_counter, C, Incr}, _From, #st{ref = Ref} = St) -> + {reply, do_update_counter(C, Incr, Ref), St}; +handle_call({insert, Key, Val}, _From, St) -> + do_insert(Key, Val, St), + {reply, ok, St}; +handle_call({delete, Key}, _From, St) -> + do_delete(Key, St), + {reply, ok, St}; +handle_call(clear_table, _From, #st{ets = Ets, tab = Tab, ref = Ref} = St) -> + MPd = data_mountpoint(Tab), + ?dbg("Attempting clear_table(~p)~n", [Tab]), + _ = rocksdb_close(Ref), + {ok, NewRef} = destroy_recreate(MPd, rocksdb_open_opts(Tab)), + ets:delete_all_objects(Ets), + rocksdb_to_ets(NewRef, Ets), + {reply, ok, St#st{ref = NewRef}}; +handle_call({match_delete, Pat}, _From, #st{} = St) -> + Res = do_match_delete(Pat, St), + {reply, Res, St}; +handle_call(close_table, _From, #st{ref = Ref, ets = Ets} = St) -> + _ = rocksdb_close(Ref), + ets:delete(Ets), + {reply, ok, St#st{ref = undefined}}; +handle_call(delete_table, _From, #st{tab = T, ref = Ref, ets = Ets} = St) -> + _ = (catch rocksdb_close(Ref)), + _ = (catch ets:delete(Ets)), + do_delete_table(T, data_mountpoint(T)), + {stop, normal, ok, St#st{ref = undefined}}. + +handle_cast(size_warning, #st{tab = T, size_warnings = W} = St) when W < 10 -> + mnesia_lib:warning("large size retrieved from table: ~p~n", [T]), + if W =:= 9 -> + OneHrMs = 60 * 60 * 1000, + erlang:send_after(OneHrMs, self(), unmute_size_warnings); + true -> + ok + end, + {noreply, St#st{size_warnings = W + 1}}; +handle_cast(size_warning, #st{size_warnings = W} = St) when W >= 10 -> + {noreply, St#st{size_warnings = W + 1}}; +handle_cast(_, St) -> + {noreply, St}. + +handle_info(unmute_size_warnings, #st{tab = T, size_warnings = W} = St) -> + C = W - 10, + if C > 0 -> + mnesia_lib:warning("warnings suppressed~ntable: ~p, count: ~p~n", + [T, C]); + true -> + ok + end, + {noreply, St#st{size_warnings = 0}}; +handle_info({'EXIT', _, _} = _EXIT, St) -> + ?dbg("rocksdb owner received ~p~n", [_EXIT]), + {noreply, St}; +handle_info(_, St) -> + {noreply, St}. + +code_change(_FromVsn, St, _Extra) -> + {ok, St}. + +terminate(_Reason, #st{ref = Ref}) -> + if Ref =/= undefined -> + ?rocksdb:close(Ref); + true -> ok + end, + ok. + + +%% ---------------------------------------------------------------------------- +%% GEN SERVER PRIVATE +%% ---------------------------------------------------------------------------- + +get_env_default(Key, Default) -> + case os:getenv(Key) of + false -> + Default; + Value -> + Value + end. + +rocksdb_open_opts({Tab, index, {Pos,_}}) -> + UserProps = mnesia_lib:val({Tab, user_properties}), + IxOpts = proplists:get_value(rocksdb_index_opts, UserProps, []), + PosOpts = proplists:get_value(Pos, IxOpts, []), + rocksdb_open_opts_(PosOpts); +rocksdb_open_opts(Tab) -> + UserProps = mnesia_lib:val({Tab, user_properties}), + LdbOpts = proplists:get_value(rocksdb_opts, UserProps, []), + rocksdb_open_opts_(LdbOpts). + +rocksdb_open_opts_(LdbOpts) -> + lists:foldl( + fun({K,_} = Item, Acc) -> + lists:keystore(K, 1, Acc, Item) + end, default_open_opts(), LdbOpts). + +default_open_opts() -> + [ {create_if_missing, true} + , {cache_size, + list_to_integer(get_env_default("ROCKSDB_CACHE_SIZE", "32212254"))} + , {block_size, 1024} + , {max_open_files, 100} + , {write_buffer_size, + list_to_integer(get_env_default( + "ROCKSDB_WRITE_BUFFER_SIZE", "4194304"))} + , {compression, + list_to_atom(get_env_default("ROCKSDB_COMPRESSION", "true"))} + , {use_bloomfilter, true} + ]. + +destroy_recreate(MPd, LdbOpts) -> + ok = destroy_db(MPd, []), + open_rocksdb(MPd, LdbOpts). + +open_rocksdb(MPd, LdbOpts) -> + open_rocksdb(MPd, rocksdb_open_opts_(LdbOpts), get_retries()). + +%% Code adapted from basho/riak_kv_eleveldb_backend.erl +open_rocksdb(MPd, Opts, Retries) -> + open_db(MPd, Opts, max(1, Retries), undefined). + +open_db(_, _, 0, LastError) -> + {error, LastError}; +open_db(MPd, Opts, RetriesLeft, _) -> + case ?rocksdb:open(MPd, Opts) of + {ok, Ref} -> + ?dbg("~p: Open - Rocksdb: ~s~n -> {ok, ~p}~n", + [self(), MPd, Ref]), + {ok, Ref}; + %% Check specifically for lock error, this can be caused if + %% a crashed mnesia takes some time to flush rocksdb information + %% out to disk. The process is gone, but the NIF resource cleanup + %% may not have completed. + {error, {db_open, OpenErr}=Reason} -> + case lists:prefix("IO error: lock ", OpenErr) of + true -> + SleepFor = get_retry_delay(), + ?dbg("~p: Open - Rocksdb backend retrying ~p in ~p ms" + " after error ~s\n", + [self(), MPd, SleepFor, OpenErr]), + timer:sleep(SleepFor), + open_db(MPd, Opts, RetriesLeft - 1, Reason); + false -> + {error, Reason} + end; + {error, Reason} -> + {error, Reason} + end. + +%% await_db_closed(Tab) -> +%% MPd = data_mountpoint(Tab), +%% await_db_closed_(MPd). + +%% await_db_closed_(MPd) -> +%% case filelib:is_file(filename:join(MPd, "LOCK")) of +%% true -> +%% SleepFor = get_retry_delay(), +%% timer:sleep(SleepFor), +%% await_db_closed_(MPd); +%% false -> +%% ok +%% end. + +rocksdb_close(undefined) -> + ok; +rocksdb_close(Ref) -> + Res = ?rocksdb:close(Ref), + erlang:garbage_collect(), + Res. + +destroy_db(MPd, Opts) -> + destroy_db(MPd, Opts, get_retries()). + +%% Essentially same code as above. +destroy_db(MPd, Opts, Retries) -> + _DRes = destroy_db(MPd, Opts, max(1, Retries), undefined), + ?dbg("~p: Destroy ~s -> ~p~n", [self(), MPd, _DRes]), + [_|_] = MPd, % ensure MPd is non-empty + _RmRes = os:cmd("rm -rf " ++ MPd ++ "/*"), + ?dbg("~p: RmRes = '~s'~n", [self(), _RmRes]), + ok. + +destroy_db(_, _, 0, LastError) -> + {error, LastError}; +destroy_db(MPd, Opts, RetriesLeft, _) -> + case ?rocksdb:destroy(MPd, Opts) of + ok -> + ok; + %% Check specifically for lock error, this can be caused if + %% destroy follows quickly after close. + {error, {error_db_destroy, Err}=Reason} -> + case lists:prefix("IO error: lock ", Err) of + true -> + SleepFor = get_retry_delay(), + ?dbg("~p: Destroy - Rocksdb backend retrying ~p in ~p ms" + " after error ~s\n" + " children = ~p~n", + [self(), MPd, SleepFor, Err, + supervisor:which_children(mnesia_ext_sup)]), + timer:sleep(SleepFor), + destroy_db(MPd, Opts, RetriesLeft - 1, Reason); + false -> + {error, Reason} + end; + {error, Reason} -> + {error, Reason} + end. + +get_retries() -> 30. +get_retry_delay() -> 10000. + +rocksdb_to_ets(Ref, Ets) -> + with_iterator(Ref, fun(I) -> + i_rocksdb_to_ets(I, Ets, <>) + end). + +i_rocksdb_to_ets(I, Ets, Move) -> + case ?rocksdb:iterator_move(I, Move) of + {ok, << ?INFO_TAG, EncKey/binary >>, EncVal} -> + Item = decode_key(EncKey), + Val = decode_val(EncVal), + ets:insert(Ets, {{info,Item}, Val}), + i_rocksdb_to_ets(I, Ets, next); + _ -> + '$end_of_table' + end. + +opt_call(Alias, Tab, Req) -> + ProcName = proc_name(Alias, Tab), + case whereis(ProcName) of + undefined -> + ?dbg("proc_name(~p, ~p): ~p; NO PROCESS~n", + [Alias, Tab, ProcName]), + {error, noproc}; + Pid when is_pid(Pid) -> + ?dbg("proc_name(~p, ~p): ~p; Pid = ~p~n", + [Alias, Tab, ProcName, Pid]), + {ok, gen_server:call(Pid, Req, infinity)} + end. + +call(Alias, Tab, Req) -> + ProcName = proc_name(Alias, Tab), + case gen_server:call(ProcName, Req, infinity) of + badarg -> + mnesia:abort(badarg); + {abort, _} = Err -> + mnesia:abort(Err); + Reply -> + Reply + end. + +size_warning(Alias, Tab) -> + ProcName = proc_name(Alias, Tab), + gen_server:cast(ProcName, size_warning). + +%% server-side end of insert/3. +do_insert(K, V, #st{ref = Ref, type = bag, maintain_size = false}) -> + do_insert_bag(Ref, K, V, false); +do_insert(K, V, #st{ets = Ets, ref = Ref, type = bag, maintain_size = true}) -> + CurSz = read_info(size, 0, Ets), + NewSz = do_insert_bag(Ref, K, V, CurSz), + ets_insert_info(Ets, size, NewSz), + ok; +do_insert(K, V, #st{ref = Ref, maintain_size = false}) -> + ?rocksdb:put(Ref, K, V, []); +do_insert(K, V, #st{ets = Ets, ref = Ref, maintain_size = true}) -> + IsNew = + case ?rocksdb:get(Ref, K, []) of + {ok, _} -> + false; + _ -> + true + end, + case IsNew of + true -> + NewSz = read_info(size, 0, Ets) + 1, + {Ki, Vi} = info_obj(size, NewSz), + ?rocksdb:write(Ref, [{put, Ki, Vi}, {put, K, V}], []), + ets_insert_info(Ets, size, NewSz); + false -> + ?rocksdb:put(Ref, K, V, []) + end, + ok. + +do_insert_bag(Ref, K, V, CurSz) -> + KSz = byte_size(K), + with_iterator( + Ref, fun(I) -> + do_insert_bag_( + KSz, K, ?rocksdb:iterator_move(I, K), I, V, 0, Ref, CurSz) + end). + + +%% There's a potential access pattern that would force counters to +%% creep upwards and eventually hit the limit. This could be addressed, +%% with compaction. TODO. +do_insert_bag_(Sz, K, Res, I, V, Prev, Ref, TSz) when Prev < ?MAX_BAG -> + case Res of + {ok, <>, V} -> + %% object exists + TSz; + {ok, <>, _} -> + do_insert_bag_( + Sz, K, ?rocksdb:iterator_move(I, next), I, V, N, Ref, TSz); + _ when TSz =:= false -> + Key = <>, + ?rocksdb:put(Ref, Key, V, []); + _ -> + NewSz = TSz + 1, + {Ki, Vi} = info_obj(size, NewSz), + Key = <>, + ?rocksdb:write(Ref, [{put, Ki, Vi}, {put, Key, V}], []), + NewSz + end. + +%% server-side part +do_delete(Key, #st{ref = Ref, type = bag, maintain_size = false}) -> + do_delete_bag(byte_size(Key), Key, Ref, false); +do_delete(Key, #st{ets = Ets, ref = Ref, type = bag, maintain_size = true}) -> + Sz = byte_size(Key), + CurSz = read_info(size, 0, Ets), + NewSz = do_delete_bag(Sz, Key, Ref, CurSz), + ets_insert_info(Ets, size, NewSz), + ok; +do_delete(Key, #st{ref = Ref, maintain_size = false}) -> + ?rocksdb:delete(Ref, Key, []); +do_delete(Key, #st{ets = Ets, ref = Ref, maintain_size = true}) -> + CurSz = read_info(size, 0, Ets), + case ?rocksdb:get(Ref, Key, [{fill_cache,true}]) of + {ok, _} -> + NewSz = CurSz -1, + {Ki, Vi} = info_obj(size, NewSz), + ok = ?rocksdb:write(Ref, [{delete, Key}, {put, Ki, Vi}], []), + ets_insert_info(Ets, size, NewSz); + not_found -> + false + end. + +do_delete_bag(Sz, Key, Ref, TSz) -> + Found = + with_keys_only_iterator( + Ref, fun(I) -> + do_delete_bag_(Sz, Key, ?rocksdb:iterator_move(I, Key), + Ref, I) + end), + case {Found, TSz} of + {[], _} -> + TSz; + {_, false} -> + ?rocksdb:write(Ref, [{delete, K} || K <- Found], []); + {_, _} -> + N = length(Found), + NewSz = TSz - N, + {Ki, Vi} = info_obj(size, NewSz), + ?rocksdb:write(Ref, [{put, Ki, Vi} | + [{delete, K} || K <- Found]], []), + NewSz + end. + +do_delete_bag_(Sz, K, Res, Ref, I) -> + case Res of + {ok, K} -> + do_delete_bag_(Sz, K, ?rocksdb:iterator_move(I, next), + Ref, I); + {ok, <> = Key} -> + [Key | + do_delete_bag_(Sz, K, ?rocksdb:iterator_move(I, next), + Ref, I)]; + _ -> + [] + end. + +do_match_delete(Pat, #st{ets = Ets, ref = Ref, tab = Tab, type = Type, + maintain_size = MaintainSize}) -> + Fun = fun(_, Key, Acc) -> [Key|Acc] end, + Keys = do_fold(Ref, Tab, Type, Fun, [], [{Pat,[],['$_']}], 30), + case {Keys, MaintainSize} of + {[], _} -> + ok; + {_, false} -> + ?rocksdb:write(Ref, [{delete, K} || K <- Keys], []), + ok; + {_, true} -> + CurSz = read_info(size, 0, Ets), + NewSz = max(CurSz - length(Keys), 0), + {Ki, Vi} = info_obj(size, NewSz), + ?rocksdb:write(Ref, [{put, Ki, Vi} | + [{delete, K} || K <- Keys]], []), + ets_insert_info(Ets, size, NewSz), + ok + end. + +recover_size_info(#st{ ref = Ref + , tab = Tab + , type = Type + , maintain_size = MaintainSize + } = St) -> + %% TODO: shall_update_size_info is obsolete, remove + case shall_update_size_info(Tab) of + true -> + Sz = do_fold(Ref, Tab, Type, fun(_, Acc) -> Acc+1 end, + 0, [{'_',[],['$_']}], 3), + write_info_(size, Sz, St); + false -> + case MaintainSize of + true -> + %% info initialized by rocksdb_to_ets/2 + %% TODO: if there is no stored size, recompute it + ignore; + false -> + %% size is not maintained, ensure it's marked accordingly + delete_info_(size, St) + end + end, + St. + +shall_update_size_info({_, index, _}) -> + false; +shall_update_size_info(Tab) -> + property(Tab, update_size_info, false). + +should_maintain_size(Tab) -> + property(Tab, maintain_size, false). + +property(Tab, Prop, Default) -> + try mnesia:read_table_property(Tab, Prop) of + {Prop, P} -> + P + catch + error:_ -> Default; + exit:_ -> Default + end. + +write_info_(Item, Val, #st{ets = Ets, ref = Ref}) -> + rocksdb_insert_info(Ref, Item, Val), + ets_insert_info(Ets, Item, Val). + +ets_insert_info(Ets, Item, Val) -> + ets:insert(Ets, {{info, Item}, Val}). + +ets_delete_info(Ets, Item) -> + ets:delete(Ets, {info, Item}). + +rocksdb_insert_info(Ref, Item, Val) -> + EncKey = info_key(Item), + EncVal = encode_val(Val), + ?rocksdb:put(Ref, EncKey, EncVal, []). + +rocksdb_delete_info(Ref, Item) -> + EncKey = info_key(Item), + ?rocksdb:delete(Ref, EncKey, []). + +info_obj(Item, Val) -> + {info_key(Item), encode_val(Val)}. + +info_key(Item) -> + <>. + +delete_info_(Item, #st{ets = Ets, ref = Ref}) -> + rocksdb_delete_info(Ref, Item), + ets_delete_info(Ets, Item). + +read_info(Item, Default, Ets) -> + case ets:lookup(Ets, {info,Item}) of + [] -> + Default; + [{_,Val}] -> + Val + end. + +tab_name(icache, Tab) -> + list_to_atom("mnesia_ext_icache_" ++ tabname(Tab)); +tab_name(info, Tab) -> + list_to_atom("mnesia_ext_info_" ++ tabname(Tab)). + +proc_name(_Alias, Tab) -> + list_to_atom("mnesia_ext_proc_" ++ tabname(Tab)). + + +%% ---------------------------------------------------------------------------- +%% PRIVATE SELECT MACHINERY +%% ---------------------------------------------------------------------------- + +do_select(Ref, Tab, Type, MS, Limit) -> + do_select(Ref, Tab, Type, MS, false, Limit). + +do_select(Ref, Tab, _Type, MS, AccKeys, Limit) when is_boolean(AccKeys) -> + Keypat = keypat(MS, keypos(Tab)), + Sel = #sel{tab = Tab, + ref = Ref, + keypat = Keypat, + ms = MS, + compiled_ms = ets:match_spec_compile(MS), + key_only = needs_key_only(MS), + limit = Limit}, + with_iterator(Ref, fun(I) -> i_do_select(I, Sel, AccKeys, []) end). + +i_do_select(I, #sel{keypat = {Pfx, _KP}, + compiled_ms = MS, + limit = Limit} = Sel, AccKeys, Acc) -> + StartKey = + case Pfx of + <<>> -> + <>; + _ -> + Pfx + end, + select_traverse(?rocksdb:iterator_move(I, StartKey), Limit, + Pfx, MS, I, Sel, AccKeys, Acc). + +needs_key_only([{HP,_,Body}]) -> + BodyVars = lists:flatmap(fun extract_vars/1, Body), + %% Note that we express the conditions for "needs more than key" and negate. + not(wild_in_body(BodyVars) orelse + case bound_in_headpat(HP) of + {all,V} -> lists:member(V, BodyVars); + none -> false; + Vars -> any_in_body(lists:keydelete(2,1,Vars), BodyVars) + end); +needs_key_only(_) -> + %% don't know + false. + +extract_vars([H|T]) -> + extract_vars(H) ++ extract_vars(T); +extract_vars(T) when is_tuple(T) -> + extract_vars(tuple_to_list(T)); +extract_vars(T) when T=='$$'; T=='$_' -> + [T]; +extract_vars(T) when is_atom(T) -> + case is_wild(T) of + true -> + [T]; + false -> + [] + end; +extract_vars(_) -> + []. + +any_in_body(Vars, BodyVars) -> + lists:any(fun({_,Vs}) -> + intersection(Vs, BodyVars) =/= [] + end, Vars). + +intersection(A,B) when is_list(A), is_list(B) -> + A -- (A -- B). + +wild_in_body(BodyVars) -> + intersection(BodyVars, ['$$','$_']) =/= []. + +bound_in_headpat(HP) when is_atom(HP) -> + {all, HP}; +bound_in_headpat(HP) when is_tuple(HP) -> + [_|T] = tuple_to_list(HP), + map_vars(T, 2); +bound_in_headpat(_) -> + %% this is not the place to throw an exception + none. + +map_vars([H|T], P) -> + case extract_vars(H) of + [] -> + map_vars(T, P+1); + Vs -> + [{P, Vs}|map_vars(T, P+1)] + end; +map_vars([], _) -> + []. + +select_traverse({ok, K, V}, Limit, Pfx, MS, I, #sel{tab = Tab} = Sel, + AccKeys, Acc) -> + case is_prefix(Pfx, K) of + true -> + Rec = setelement(keypos(Tab), decode_val(V), decode_key(K)), + case ets:match_spec_run([Rec], MS) of + [] -> + select_traverse( + ?rocksdb:iterator_move(I, next), Limit, Pfx, MS, + I, Sel, AccKeys, Acc); + [Match] -> + Acc1 = if AccKeys -> + [{K, Match}|Acc]; + true -> + [Match|Acc] + end, + traverse_continue(K, decr(Limit), Pfx, MS, I, Sel, AccKeys, Acc1) + end; + false -> + {lists:reverse(Acc), '$end_of_table'} + end; +select_traverse({error, _}, _, _, _, _, _, _, Acc) -> + {lists:reverse(Acc), '$end_of_table'}. + +is_prefix(A, B) when is_binary(A), is_binary(B) -> + Sa = byte_size(A), + case B of + <> -> + true; + _ -> + false + end. + +decr(I) when is_integer(I) -> + I-1; +decr(infinity) -> + infinity. + +traverse_continue(K, 0, Pfx, MS, _I, #sel{limit = Limit, ref = Ref} = Sel, AccKeys, Acc) -> + {lists:reverse(Acc), + fun() -> + with_iterator(Ref, + fun(NewI) -> + select_traverse(iterator_next(NewI, K), + Limit, Pfx, MS, NewI, Sel, + AccKeys, []) + end) + end}; +traverse_continue(_K, Limit, Pfx, MS, I, Sel, AccKeys, Acc) -> + select_traverse(?rocksdb:iterator_move(I, next), Limit, Pfx, MS, I, Sel, AccKeys, Acc). + +iterator_next(I, K) -> + case ?rocksdb:iterator_move(I, K) of + {ok, K, _} -> + ?rocksdb:iterator_move(I, next); + Other -> + Other + end. + +keypat([{HeadPat,Gs,_}|_], KeyPos) when is_tuple(HeadPat) -> + KP = element(KeyPos, HeadPat), + KeyVars = extract_vars(KP), + Guards = relevant_guards(Gs, KeyVars), + Pfx = mnesia_rocksdb_sext:prefix(KP), + {Pfx, [{KP, Guards, [true]}]}; +keypat(_, _) -> + {<<>>, [{'_',[],[true]}]}. + +relevant_guards(Gs, Vars) -> + case Vars -- ['_'] of + [] -> + []; + Vars1 -> + Fun = + fun(G) -> + Vg = extract_vars(G), + intersection(Vg, Vars1) =/= [] andalso (Vg--Vars1) == [] + end, + lists:filter(Fun, Gs) + end. + +%% ---------------------------------------------------------------------------- +%% COMMON PRIVATE +%% ---------------------------------------------------------------------------- + +%% Note that since a callback can be used as an indexing backend, we +%% cannot assume that keypos will always be 2. For indexes, the tab +%% name will be {Tab, index, Pos}, and The object structure will be +%% {{IxKey,Key}} for an ordered_set index, and {IxKey,Key} for a bag +%% index. +%% +keypos({_, index, _}) -> + 1; +keypos({_, retainer, _}) -> + 2; +keypos(Tab) when is_atom(Tab) -> + 2. + +encode_key(Key) -> + mnesia_rocksdb_sext:encode(Key). + +decode_key(CodedKey) -> + case mnesia_rocksdb_sext:partial_decode(CodedKey) of + {full, Result, _} -> + Result; + _ -> + error(badarg, CodedKey) + end. + +encode_val(Val) -> + term_to_binary(Val). + +decode_val(CodedVal) -> + binary_to_term(CodedVal). + +create_mountpoint(Tab) -> + MPd = data_mountpoint(Tab), + case filelib:is_dir(MPd) of + false -> + file:make_dir(MPd), + ok; + true -> + Dir = mnesia_lib:dir(), + case lists:prefix(Dir, MPd) of + true -> + ok; + false -> + {error, exists} + end + end. + +%% delete_mountpoint(Tab) -> +%% MPd = data_mountpoint(Tab), +%% assert_proper_mountpoint(Tab, MPd), +%% ok = destroy_db(MPd, []). + +assert_proper_mountpoint(_Tab, _MPd) -> + %% TODO: not yet implemented. How to verify that the MPd var points + %% to the directory we actually want deleted? + ok. + +data_mountpoint(Tab) -> + Dir = mnesia_monitor:get_env(dir), + filename:join(Dir, tabname(Tab) ++ ".extldb"). + +tabname({Tab, index, {{Pos},_}}) -> + atom_to_list(Tab) ++ "-=" ++ atom_to_list(Pos) ++ "=-_ix"; +tabname({Tab, index, {Pos,_}}) -> + atom_to_list(Tab) ++ "-" ++ integer_to_list(Pos) ++ "-_ix"; +tabname({Tab, retainer, Name}) -> + atom_to_list(Tab) ++ "-" ++ retainername(Name) ++ "-_RET"; +tabname(Tab) when is_atom(Tab) -> + atom_to_list(Tab) ++ "-_tab". + +retainername(Name) when is_atom(Name) -> + atom_to_list(Name); +retainername(Name) when is_list(Name) -> + try binary_to_list(list_to_binary(Name)) + catch + error:_ -> + lists:flatten(io_lib:write(Name)) + end; +retainername(Name) -> + lists:flatten(io_lib:write(Name)). + +related_resources(Tab) -> + TabS = atom_to_list(Tab), + Dir = mnesia_monitor:get_env(dir), + case file:list_dir(Dir) of + {ok, Files} -> + lists:flatmap( + fun(F) -> + Full = filename:join(Dir, F), + case is_index_dir(F, TabS) of + false -> + case is_retainer_dir(F, TabS) of + false -> + []; + {true, Name} -> + [{{Tab, retainer, Name}, Full}] + end; + {true, Pos} -> + [{{Tab, index, {Pos,ordered}}, Full}] + end + end, Files); + _ -> + [] + end. + +is_index_dir(F, TabS) -> + case re:run(F, TabS ++ "-([0-9]+)-_ix.extldb", [{capture, [1], list}]) of + nomatch -> + false; + {match, [P]} -> + {true, list_to_integer(P)} + end. + +is_retainer_dir(F, TabS) -> + case re:run(F, TabS ++ "-(.+)-_RET", [{capture, [1], list}]) of + nomatch -> + false; + {match, [Name]} -> + {true, Name} + end. + +get_ref(Alias, Tab) -> + call(Alias, Tab, get_ref). + +fold(Alias, Tab, Fun, Acc, MS, N) -> + {Ref, Type} = get_ref(Alias, Tab), + do_fold(Ref, Tab, Type, Fun, Acc, MS, N). + +%% can be run on the server side. +do_fold(Ref, Tab, Type, Fun, Acc, MS, N) -> + {AccKeys, F} = + if is_function(Fun, 3) -> + {true, fun({K,Obj}, Acc1) -> + Fun(Obj, K, Acc1) + end}; + is_function(Fun, 2) -> + {false, Fun} + end, + do_fold1(do_select(Ref, Tab, Type, MS, AccKeys, N), F, Acc). + +do_fold1('$end_of_table', _, Acc) -> + Acc; +do_fold1({L, Cont}, Fun, Acc) -> + Acc1 = lists:foldl(Fun, Acc, L), + do_fold1(select(Cont), Fun, Acc1). + +is_wild('_') -> + true; +is_wild(A) when is_atom(A) -> + case atom_to_list(A) of + "\$" ++ S -> + try begin + _ = list_to_integer(S), + true + end + catch + error:_ -> + false + end; + _ -> + false + end; +is_wild(_) -> + false. diff --git a/src/mnesia_rocksdb_app.erl b/src/mnesia_rocksdb_app.erl new file mode 100644 index 0000000..9f84291 --- /dev/null +++ b/src/mnesia_rocksdb_app.erl @@ -0,0 +1,34 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +-module(mnesia_rocksdb_app). + +-behaviour(application). + +%% Application callbacks +-export([start/2, stop/1]). + +%% =================================================================== +%% Application callbacks +%% =================================================================== + +start(_StartType, _StartArgs) -> + mnesia_rocksdb_sup:start_link(). + +stop(_State) -> + ok. diff --git a/src/mnesia_rocksdb_params.erl b/src/mnesia_rocksdb_params.erl new file mode 100644 index 0000000..de2eeab --- /dev/null +++ b/src/mnesia_rocksdb_params.erl @@ -0,0 +1,149 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +-module(mnesia_rocksdb_params). + +-behaviour(gen_server). + +-export([lookup/2, + store/2, + delete/1]). + +-export([start_link/0, + init/1, + handle_call/3, + handle_cast/2, + handle_info/2, + terminate/2, + code_change/3]). + +-include("mnesia_rocksdb_tuning.hrl"). + +-define(KB, 1024). +-define(MB, 1024 * 1024). +-define(GB, 1024 * 1024 * 1024). + +-ifdef(DEBUG). +-define(dbg(Fmt, Args), io:fwrite(user,"~p:~p: "++(Fmt),[?MODULE,?LINE|Args])). +-else. +-define(dbg(Fmt, Args), ok). +-endif. + +lookup(Tab, Default) -> + try ets:lookup(?MODULE, Tab) of + [{_, Params}] -> + Params; + [] -> + Default + catch error:badarg -> + Default + end. + +store(Tab, Params) -> + ets:insert(?MODULE, {Tab, Params}). + +delete(Tab) -> + ets:delete(?MODULE, Tab). + +start_link() -> + case ets:info(?MODULE, name) of + undefined -> + ets:new(?MODULE, [ordered_set, public, named_table]), + load_tuning_parameters(); + _ -> + ok + end, + gen_server:start_link({local, ?MODULE}, ?MODULE, [], []). + +init(_) -> + {ok, []}. + +handle_call(_, _, S) -> {reply, error, S}. +handle_cast(_, S) -> {noreply, S}. +handle_info(_, S) -> {noreply, S}. +terminate(_, _) -> ok. +code_change(_, S, _) -> {ok, S}. + +load_tuning_parameters() -> + case application:get_env(mnesia_rocksdb, tuning_params) of + {ok, Ps} -> + case Ps of + {consult, F} -> consult(F); + {script, F} -> script(F); + _ when is_list(Ps) -> + store_params(Ps) + end; + _ -> + ok + end. + +consult(F) -> + case file:consult(F) of + {ok, Terms} -> + store_params(Terms); + {error, Reason} -> + {error, {Reason, F}} + end. + +script(F) -> + case file:script(F) of + {ok, Terms} -> + store_params(Terms); + {error, Reason} -> + {error, {Reason, F}} + end. + +store_params(Params) -> + _ = lists:foreach(fun({_,S}) -> valid_size(S) end, Params), + NTabs = length(Params), + Env0= mnesia_rocksdb_tuning:describe_env(), + Env = Env0#tuning{n_tabs = NTabs}, + ?dbg("Env = ~p~n", [Env]), + TotalFiles = lists:sum([mnesia_rocksdb_tuning:max_files(Sz) || + {_, Sz} <- Params]), + ?dbg("TotalFiles = ~p~n", [TotalFiles]), + MaxFs = Env#tuning.max_files, + ?dbg("MaxFs = ~p~n", [MaxFs]), + FsHeadroom = MaxFs * 0.6, + ?dbg("FsHeadroom = ~p~n", [FsHeadroom]), + FilesFactor = if TotalFiles =< FsHeadroom -> + 1; % don't have to scale down + true -> + FsHeadroom / TotalFiles + end, + Env1 = Env#tuning{files_factor = FilesFactor}, + ?dbg("Env1 = ~p~n", [Env1]), + lists:foreach( + fun({Tab, Sz}) when is_atom(Tab); + is_atom(element(1,Tab)), + is_integer(element(2,Tab)) -> + ets:insert(?MODULE, {Tab, ldb_params(Sz, Env1, Tab)}) + end, Params). + +ldb_params(Sz, Env, _Tab) -> + MaxFiles = mnesia_rocksdb_tuning:max_files(Sz) * Env#tuning.files_factor, + Opts = if Env#tuning.avail_ram > 100 -> % Gigabytes + [{write_buffer_size, mnesia_rocksdb_tuning:write_buffer(Sz)}, + {cache_size, mnesia_rocksdb_tuning:cache(Sz)}]; + true -> + [] + end, + [{max_open_files, MaxFiles} | Opts]. + +valid_size({I,U}) when is_number(I) -> + true = lists:member(U, [k,m,g]). diff --git a/src/mnesia_rocksdb_sext.erl b/src/mnesia_rocksdb_sext.erl new file mode 100644 index 0000000..74bea96 --- /dev/null +++ b/src/mnesia_rocksdb_sext.erl @@ -0,0 +1,1100 @@ +%% -*- erlang-indent-level: 4; indent-tabs-mode: nil +%%============================================================================== +%% Copyright 2014-2016 Ulf Wiger +%% +%% Licensed under the Apache License, Version 2.0 (the "License"); +%% you may not use this file except in compliance with the License. +%% You may obtain a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, software +%% distributed under the License is distributed on an "AS IS" BASIS, +%% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +%% See the License for the specific language governing permissions and +%% limitations under the License. +%%============================================================================== + +%% @author Ulf Wiger +%% @doc Sortable serialization library +%% @end + +-module(mnesia_rocksdb_sext). + +-export([encode/1, encode/2, decode/1, decode_next/1]). +-export([encode_hex/1, decode_hex/1]). +-export([encode_sb32/1, decode_sb32/1]). +-export([prefix/1, + partial_decode/1]). +-export([prefix_hex/1]). +-export([prefix_sb32/1]). +-export([to_sb32/1, from_sb32/1]). +-export([to_hex/1, from_hex/1]). + + +-define(negbig , 8). +-define(neg4 , 9). +-define(pos4 , 10). +-define(posbig , 11). +-define(atom , 12). +-define(reference, 13). +-define(port , 14). +-define(pid , 15). +-define(tuple , 16). +-define(list , 17). +-define(binary , 18). +-define(bin_tail , 19). + +-define(is_sext(X), + X==?negbig; + X==?neg4; + X==?pos4; + X==?posbig; + X==?atom; + X==?reference; + X==?port; + X==?pid; + X==?tuple; + X==?list; + X==?binary; + X==?bin_tail). + +-define(IMAX1, 16#ffffFFFFffffFFFF). + +-ifdef(DEBUG). +-define(dbg(Fmt, Args), + case os:getenv("MNESIA_SEXT_DEBUG") of + false -> ok; + _ -> io:fwrite(user,"~p:~p: "++(Fmt),[?MODULE,?LINE|Args]) + end). +-else. +-define(dbg(F,A), ok). +-endif. + +%% @spec encode(T::term()) -> binary() +%% @doc Encodes any Erlang term into a binary. +%% The lexical sorting properties of the encoded binary match those of the +%% original Erlang term. That is, encoded terms sort the same way as the +%% original terms would. +%% @end +%% +encode(X) -> encode(X, false). + +%% @spec encode(T::term(), Legacy::boolean()) -> binary() +%% @doc Encodes an Erlang term using legacy bignum encoding. +%% On March 4 2013, Basho noticed that encoded bignums didn't always sort +%% properly. This bug has been fixed, but the encoding of bignums necessarily +%% changed in an incompatible way. +%% +%% The new decode/1 version can read the old bignum format, but the old +%% version obviously cannot read the new. Using `encode(Term, true)', the term +%% will be encoded using the old format. +%% +%% Use only as transition support. This function will be deprecated in time. +%% @end +encode(X, Legacy) when is_tuple(X) -> encode_tuple(X, Legacy); +encode(X, Legacy) when is_list(X) -> encode_list(X, Legacy); +encode(X, _) when is_pid(X) -> encode_pid(X); +encode(X, _) when is_port(X) -> encode_port(X); +encode(X, _) when is_reference(X) -> encode_ref(X); +encode(X, Legacy) when is_number(X) -> encode_number(X, Legacy); +encode(X, _) when is_binary(X) -> encode_binary(X); +encode(X, _) when is_bitstring(X) -> encode_bitstring(X); +encode(X, _) when is_atom(X) -> encode_atom(X). + +%% @spec encode_sb32(Term::any()) -> binary() +%% @doc Encodes any Erlang term into an sb32-encoded binary. +%% This is similar to {@link encode/1}, but produces an octet string that +%% can be used without escaping in file names (containing only the characters +%% 0..9, A..V and '-'). The sorting properties are preserved. +%% +%% Note: The encoding used is inspired by the base32 encoding described in +%% RFC3548, but uses a different alphabet in order to preserve the sort order. +%% @end +%% +encode_sb32(Term) -> + to_sb32(encode(Term)). + +%% @spec encode_hex(Term::any()) -> binary() +%% @doc Encodes any Erlang term into a hex-encoded binary. +%% This is similar to {@link encode/1}, but produces an octet string that +%% can be used without escaping in file names (containing only the characters +%% 0..9 and A..F). The sorting properties are preserved. +%% +%% Note: The encoding used is regular hex-encoding, with the proviso that only +%% capital letters are used (mixing upper- and lowercase characters would break +%% the sorting property). +%% @end +%% +encode_hex(Term) -> + to_hex(encode(Term)). + +%% @spec prefix(X::term()) -> binary() +%% @doc Encodes a binary for prefix matching of similar encoded terms. +%% Lists and tuples can be prefixed by using the '_' marker, +%% similarly to Erlang match specifications. For example: +%%
    +%%
  • prefix({1,2,'_','_'}) will result in a binary that is +%% the same as the first part of any encoded 4-tuple with the first two +%% elements being 1 and 2. The prefix algorithm will search for the +%% first '_', and treat all following elements as if they +%% were '_'.
  • +%%
  • prefix([1,2|'_']) will result in a binary that is the +%% same as the first part of any encoded list where the first two elements +%% are 1 and 2. prefix([1,2,'_']) will give the same result, +%% as the prefix pattern is the same for all lists starting with +%% `[1,2|...]'.
  • +%%
  • `prefix(Binary)' will result in a binary that is the same as the +%% encoded version of Binary, except that, instead of padding and +%% terminating, the encoded binary is truncated to the longest byte-aligned +%% binary. The same is done for bitstrings.
  • +%%
  • prefix({1,[1,2|'_'],'_'}) will prefix-encode the second +%% element, and let it end the resulting binary. This prefix will match +%% any 3-tuple where the first element is 1 and the second element is a +%% list where the first two elements are 1 and 2.
  • +%%
  • prefix([1,[1|'_']|'_']) will result in a prefix that +%% matches all lists where the first element is 1 and the second element is +%% a list where the first element is 1.
  • +%%
  • For all other data types, the prefix is the same as the encoded term. +%%
  • +%%
+%% @end +%% +prefix(X) -> + {_, P} = enc_prefix(X), + P. + +enc_prefix(X) when is_tuple(X) -> prefix_tuple(X); +enc_prefix(X) when is_list(X) -> prefix_list(X); +enc_prefix(X) when is_pid(X) -> {false, encode_pid(X)}; +enc_prefix(X) when is_port(X) -> {false, encode_port(X)}; +enc_prefix(X) when is_reference(X) -> {false, encode_ref(X)}; +enc_prefix(X) when is_number(X) -> {false, encode_number(X)}; +enc_prefix(X) when is_binary(X) -> prefix_binary(X); +enc_prefix(X) when is_bitstring(X) -> prefix_bitstring(X); +enc_prefix(X) when is_atom(X) -> + case is_wild(X) of + true -> + {true, <<>>}; + false -> + {false, encode_atom(X)} + end. + +%% @spec prefix_sb32(X::term()) -> binary() +%% @doc Generates an sb32-encoded binary for prefix matching. +%% This is similar to {@link prefix/1}, but generates a prefix for binaries +%% encoded with {@link encode_sb32/1}, rather than {@link encode/1}. +%% @end +%% +prefix_sb32(X) -> + chop_prefix_tail(to_sb32(prefix(X))). + +%% @spec prefix_hex(X::term()) -> binary() +%% @doc Generates a hex-encoded binary for prefix matching. +%% This is similar to {@link prefix/1}, but generates a prefix for binaries +%% encoded with {@link encode_hex/1}, rather than {@link encode/1}. +%% @end +%% +prefix_hex(X) -> + to_hex(prefix(X)). + +%% Must chop of the pad character and the last encoded unit (which, if pad +%% characters are present, is not a whole byte) +%% +chop_prefix_tail(Bin) -> + Sz = byte_size(Bin), + Sz6 = Sz-7, Sz4 = Sz - 5, Sz3 = Sz - 4, Sz1 = Sz - 2, + case Bin of + << P:Sz6/binary, _, "------" >> -> P; + << P:Sz4/binary, _, "----" >> -> P; + << P:Sz3/binary, _, "---" >> -> P; + << P:Sz1/binary, _, "-" >> -> P; + _ -> Bin + end. + +%% @spec decode(B::binary()) -> term() +%% @doc Decodes a binary generated using the function {@link sext:encode/1}. +%% @end +%% +decode(Elems) -> + case decode_next(Elems) of + {Term, <<>>} -> Term; + Other -> erlang:error(badarg, Other) + end. + +%% spec decode_sb32(B::binary()) -> term() +%% @doc Decodes a binary generated using the function {@link encode_sb32/1}. +%% @end +%% +decode_sb32(Data) -> + decode(from_sb32(Data)). + +decode_hex(Data) -> + decode(from_hex(Data)). + +encode_tuple(T, Legacy) -> + Sz = size(T), + encode_tuple_elems(1, Sz, T, <>, Legacy). + +prefix_tuple(T) -> + Sz = size(T), + Elems = tuple_to_list(T), + prefix_tuple_elems(Elems, <>). + +%% It's easier to iterate over a tuple by converting it to a list, but +%% since the tuple /can/ be huge, let's do it this way. +encode_tuple_elems(P, Sz, T, Acc, Legacy) when P =< Sz -> + E = encode(element(P,T), Legacy), + encode_tuple_elems(P+1, Sz, T, <>, Legacy); +encode_tuple_elems(_, _, _, Acc, _) -> + Acc. + +prefix_tuple_elems([A|T], Acc) when is_atom(A) -> + case is_wild(A) of + true -> + {true, Acc}; + false -> + E = encode(A), + prefix_tuple_elems(T, <>) + end; +prefix_tuple_elems([H|T], Acc) -> + case enc_prefix(H) of + {true, P} -> + {true, <>}; + {false, E} -> + prefix_tuple_elems(T, <>) + end; +prefix_tuple_elems([], Acc) -> + {false, Acc}. + +encode_list(L, Legacy) -> + encode_list_elems(L, <>, Legacy). + +prefix_list(L) -> + prefix_list_elems(L, <>). + +encode_binary(B) -> + Enc = encode_bin_elems(B), + <>. + +prefix_binary(B) -> + Enc = encode_bin_elems(B), + {false, <>}. + +encode_bitstring(B) -> + Enc = encode_bits_elems(B), + <>. + +prefix_bitstring(B) -> + Enc = encode_bits_elems(B), + {false, <>}. + +encode_pid(P) -> + PBin = term_to_binary(P), + <<131,103,100,ALen:16,Name:ALen/binary,Rest:9/binary>> = PBin, + NameEnc = encode_bin_elems(Name), + <>. + +encode_port(P) -> + PBin = term_to_binary(P), + <<131,102,100,ALen:16,Name:ALen/binary,Rest:5/binary>> = PBin, + NameEnc = encode_bin_elems(Name), + <>. + +encode_ref(R) -> + RBin = term_to_binary(R), + <<131,114,_Len:16,100,NLen:16,Name:NLen/binary,Rest/binary>> = RBin, + NameEnc = encode_bin_elems(Name), + RestEnc = encode_bin_elems(Rest), + <>. + +encode_atom(A) -> + Bin = list_to_binary(atom_to_list(A)), + Enc = encode_bin_elems(Bin), + <>. + +encode_number(N) -> + encode_number(N, false). + +encode_number(N, Legacy) when is_integer(N) -> + encode_int(N, none, Legacy); +encode_number(F, _Legacy) when is_float(F) -> + encode_float(F). + +%% +%% IEEE 764 Binary 64 standard representation +%% http://en.wikipedia.org/wiki/Double_precision_floating-point_format +%% +%% |12345678 12345678 12345678 12345678 12345678 12345678 12345678 12345678 +%% |iEEEEEEE EEEEffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff| +%% +%% i: sign bit +%% E: Exponent, 11 bits +%% f: fraction, 52 bits +%% +%% We perform the following operations: +%% - if E < 1023 (see Exponent bias), the integer part is 0 +%% +encode_float(F) -> + <> = <>, + ?dbg("F = ~p | Exp0 = ~p | Frac = ~p~n", [cF, Exp0, Frac]), + {Int0, Fraction} = + case Exp0 - 1023 of + NegExp when NegExp < 0 -> + Offs = -NegExp, + ?dbg("NegExp = ~p, Offs = ~p~n" + "Frac = ~p~n", [NegExp, Offs, Frac]), + {0, << 0:Offs, 1:1,Frac:52 >>}; + Exp1 -> + ?dbg("Exp1 = ~p~n", [Exp1]), + if Exp1 >= 52 -> + %% Decimal part will be zero + {trunc(F), <<0:52>>}; + true -> + R = 52-Exp1, + ?dbg("R = ~p~n", [R]), + Exp2 = Exp1 + 1, % add the leading 1-bit + ?dbg("Exp2 = ~p~n", [Exp2]), + <> = <<1:1, Frac:52>>, + ?dbg("I = ~p, Frac1 = ~p~n", [I,Frac1]), + {I, <>} + end + end, + if Sign == 1 -> + %% explicitly encode a negative int, since Int0 can be zero. + Int = if Int0 >= 0 -> -Int0; + true -> Int0 + end, + encode_neg_int(Int, Fraction); + Sign == 0 -> + encode_int(Int0, Fraction) + end. + +encode_neg_int(Int, Fraction)-> + encode_neg_int(Int, Fraction,false). +encode_int(I, R) -> + encode_int(I, R, false). + +encode_int(I,R, _Legacy) when I >= 0, I =< 16#7fffffff -> + ?dbg("encode_int(~p, ~p)~n", [I,R]), + if R == none -> + << ?pos4, I:31, 0:1 >>; + true -> + RSz = bit_size(R), + <> = R, + ?dbg("Fraction = ~p~n", [Fraction]), + if Fraction == 0 -> + << ?pos4, I:31, 1:1, 8:8 >>; + true -> + Rbits = encode_bits_elems(R), + << ?pos4, I:31, 1:1, Rbits/binary >> + end + end; +encode_int(I,R, Legacy) when I > 16#7fffffff -> + ?dbg("encode_int(~p, ~p)~n", [I,R]), + Bytes = encode_big(I, Legacy), + if R == none -> + <>; + true -> + RSz = bit_size(R), + <> = R, + ?dbg("Fraction = ~p~n", [Fraction]), + if Fraction == 0 -> + << ?posbig, Bytes/binary, 1:8, 8:8 >>; + true -> + Rbits = encode_bits_elems(R), + <> + end + end; +encode_int(I, R, Legacy) when I < 0 -> + encode_neg_int(I, R,Legacy). + +encode_neg_int(I,R,_Legacy) when I =< 0, I >= -16#7fffffff -> + ?dbg("encode_neg_int(~p, ~p [sz: ~p])~n", [I,pp(R), try bit_size(R) catch error:_ -> "***" end]), + Adj = max_value(31) + I, % keep in mind that I < 0 + ?dbg("Adj = ~p~n", [erlang:integer_to_list(Adj,2)]), + if R == none -> + << ?neg4, Adj:31, 1:1 >>; + true -> + Rbits = encode_neg_bits(R), + ?dbg("R = ~p -> RBits = ~p~n", [pp(R), pp(Rbits)]), + << ?neg4, Adj:31, 0:1, Rbits/binary >> + end; +encode_neg_int(I,R,Legacy) when I < -16#7fFFffFF -> + ?dbg("encode_neg_int(BIG ~p)~n", [I]), + Bytes = encode_big_neg(I,Legacy), + ?dbg("Bytes = ~p~n", [Bytes]), + if R == none -> + <>; + true -> + Rbits = encode_neg_bits(R), + ?dbg("R = ~p -> RBits = ~p~n", [pp(R), pp(Rbits)]), + <> + end. + +encode_big(I, Legacy) -> + Bl = encode_big1(I), + ?dbg("Bl = ~p~n", [Bl]), + Bb = case Legacy of + false -> + prepend_size(list_to_binary(Bl)); + true -> + list_to_binary(Bl) + end, + ?dbg("Bb = ~p~n", [Bb]), + encode_bin_elems(Bb). + +prepend_size(B) -> + Sz = byte_size(B), + <<255, (encode_size(Sz))/binary, B/binary>>. + +remove_size_bits(<<255, T/binary>>) -> + {_, Rest} = untag_7bits(T, <<>>), + Rest; +remove_size_bits(B) -> + %% legacy bignum + B. + +encode_size(I) when I > 127 -> + B = int_to_binary(I), + tag_7bits(B); +encode_size(I) -> + <>. + +tag_7bits(B) when bit_size(B) > 7 -> + <> = B, + <<1:1, H:7, (tag_7bits(T))/binary>>; +tag_7bits(B) -> + Sz = bit_size(B), + <> = B, + <<0:1, I:7>>. + +untag_7bits(<<1:1, H:7, T/binary>>, Acc) -> + untag_7bits(T, <>); +untag_7bits(<<0:1, H:7, T/binary>>, Acc) -> + AccBits = bit_size(Acc), + HBits = 8 - (AccBits rem 8), + {<>, T}. + +int_to_binary(I) when I =< 16#ff -> <>; +int_to_binary(I) when I =< 16#ffff -> <>; +int_to_binary(I) when I =< 16#ffffff -> <>; +int_to_binary(I) when I =< 16#ffffffff -> <>; +int_to_binary(I) when I =< 16#ffffffffff -> <>; +int_to_binary(I) when I =< 16#ffffffffffff -> <>; +int_to_binary(I) when I =< 16#ffffffffffffff -> <>; +int_to_binary(I) when I =< 16#ffffffffffffffff -> <>; +int_to_binary(I) -> + %% Realm of the ridiculous + list_to_binary( + lists:dropwhile(fun(X) -> X==0 end, binary_to_list(<>))). + +%% This function exists for documentation, but not used right now. +%% It's the reverse of encode_size/1, used for encoding bignums. +%% +%% decode_size(<<1:1, _/bitstring>> = T) -> +%% {SzBin, Rest} = untag_7bits(T, <<>>), +%% Bits = bit_size(SzBin), +%% <> = SzBin, +%% {Sz, Rest}; +%% decode_size(<<0:1, H:7, T/binary>>) -> +%% {H, T}. + +encode_big_neg(I,Legacy) -> + {Words, Max} = get_max(-I), + ?dbg("Words = ~p | Max = ~p~n", [Words,Max]), + Iadj = Max + I, % keep in mind that I < 0 + ?dbg("IAdj = ~p~n", [Iadj]), + Bin = encode_big(Iadj,Legacy), + ?dbg("Bin = ~p~n", [Bin]), + WordsAdj = 16#ffffFFFF - Words, + ?dbg("WordsAdj = ~p~n", [WordsAdj]), + <>. + +encode_big1(I) -> + encode_big1(I, []). + +encode_big1(I, Acc) when I < 16#ff -> + [I|Acc]; +encode_big1(I, Acc) -> + encode_big1(I bsr 8, [I band 16#ff | Acc]). + +encode_list_elems([], Acc, _) -> + <>; +encode_list_elems(B, Acc, Legacy) when is_bitstring(B) -> + %% improper list + <>; +encode_list_elems(E, Acc, Legacy) when not(is_list(E)) -> + %% improper list + <>; +encode_list_elems([H|T], Acc, Legacy) -> + Enc = encode(H,Legacy), + encode_list_elems(T, <>, Legacy). + +prefix_list_elems([], Acc) -> + {false, <>}; +prefix_list_elems(E, Acc) when not(is_list(E)) -> + case is_wild(E) of + true -> + {true, Acc}; + false -> + Marker = if is_bitstring(E) -> ?bin_tail; + true -> 1 + end, + {Bool, P} = enc_prefix(E), + {Bool, <>} + end; +prefix_list_elems([H|T], Acc) -> + case enc_prefix(H) of + {true, P} -> + {true, <>}; + {false, E} -> + prefix_list_elems(T, <>) + end. + +is_wild('_') -> + true; +is_wild(A) when is_atom(A) -> + case atom_to_list(A) of + "\$" ++ S -> + try begin + _ = list_to_integer(S), + true + end + catch + error:_ -> + false + end; + _ -> + false + end; +is_wild(_) -> + false. + +encode_bin_elems(<<>>) -> + <<8>>; +encode_bin_elems(B) -> + Pad = 8 - (size(B) rem 8), + << (<< <<1:1, B1:8>> || <> <= B >>)/bitstring, 0:Pad, 8 >>. + +encode_neg_bits(<<>>) -> + <<247>>; +encode_neg_bits(B) -> + {Padded, TailBits} = pad_neg_bytes(B), + ?dbg("TailBits = ~p~n", [TailBits]), + TailSz0 = bit_size(TailBits), + TailSz = 16#ff - TailSz0, + if TailSz0 == 0 -> + Pad = 8 - (bit_size(Padded) rem 8), + Ip = max_value(Pad), % e.g. max_value(3) -> 2#111 + <>; + true -> + ?dbg("TailSz0 = ~p~n", [TailSz0]), + TailPad = 8 - TailSz0, + ?dbg("TailPad = ~p~n", [TailPad]), + Itp = (1 bsl TailPad)-1, + ?dbg("Itp = ~p~n", [Itp]), + Pad = 8 - ((bit_size(Padded) + 1) rem 8), + ?dbg("Pad = ~p~n", [Pad]), + Ip = max_value(Pad), + ?dbg("Ip = ~p~n", [Ip]), + ?dbg("Pad = ~p~n", [Pad]), + ?dbg("TailSz = ~p~n", [TailSz]), + <> + end. + +pad_neg_bytes(Bin) -> + pad_neg_bytes(Bin, <<>>). + +pad_neg_bytes(<>, Acc) -> + H1 = 16#ff - H, + pad_neg_bytes(T, <>); +pad_neg_bytes(Bits, Acc) when is_bitstring(Bits) -> + Sz = bit_size(Bits), + Max = (1 bsl Sz) - 1, + <> = Bits, + I1 = Max - I0, + {Acc, <>}. + +encode_bits_elems(B) -> + {Padded, TailBits} = pad_bytes(B), + TailSz = bit_size(TailBits), + TailPad = 8-TailSz, + Pad = 8 - ((TailSz + TailPad + bit_size(Padded) + 1) rem 8), + <>. + +pad_bytes(Bin) -> + pad_bytes(Bin, <<>>). + +pad_bytes(<>, Acc) -> + pad_bytes(T, <>); +pad_bytes(Bits, Acc) when is_bitstring(Bits) -> + {Acc, Bits}. + + +%% ------------------------------------------------------ +%% Decoding routines + +-spec decode_next(binary()) -> {any(), binary()}. +%% @spec decode_next(Bin) -> {N, Rest} +%% @doc Decode a binary stream, returning the next decoded term and the +%% stream remainder +%% +%% This function will raise an exception if the beginning of `Bin' is not +%% a valid sext-encoded term. +%% @end +decode_next(<>) -> decode_atom(Rest); +decode_next(<>) -> decode_pid(Rest); +decode_next(<>) -> decode_port(Rest); +decode_next(<>) -> decode_ref(Rest); +decode_next(<>) -> decode_tuple(Sz,Rest); +decode_next(<>) -> decode_list(Rest); +decode_next(<>) -> decode_neg_big(Rest); +decode_next(<>) -> decode_pos_big(Rest); +decode_next(<>) -> decode_neg(I,F,Rest); +decode_next(<>) -> decode_pos(I,F,Rest); +decode_next(<>) -> decode_binary(Rest). + +-spec partial_decode(binary()) -> {full | partial, any(), binary()}. +%% @spec partial_decode(Bytes) -> {full | partial, DecodedTerm, Rest} +%% @doc Decode a sext-encoded term or prefix embedded in a byte stream. +%% +%% Example: +%% ``` +%% 1> T = sext:encode({a,b,c}). +%% <<16,0,0,0,3,12,176,128,8,12,177,0,8,12,177,128,8>> +%% 2> sext:partial_decode(<<T/binary, "tail">>). +%% {full,{a,b,c},<<"tail">>} +%% 3> P = sext:prefix({a,b,'_'}). +%% <<16,0,0,0,3,12,176,128,8,12,177,0,8>> +%% 4> sext:partial_decode(<<P/binary, "tail">>). +%% {partial,{a,b,'_'},<<"tail">>} +%% ''' +%% +%% Note that a decoded prefix may not be exactly like the encoded prefix. +%% For example, ['_'] will be encoded as +%% <<17>>, i.e. only the 'list' opcode. The +%% decoded prefix will be '_', since the encoded prefix would +%% also match the empty list. The decoded prefix will always be a prefix to +%% anything to which the original prefix is a prefix. +%% +%% For tuples, {1,'_',3} encoded and decoded, will result in +%% {1,'_','_'}, i.e. the tuple size is kept, but the elements +%% after the first wildcard are replaced with wildcards. +%% @end +partial_decode(<>) -> + partial_decode_tuple(Sz, Rest); +partial_decode(<>) -> + partial_decode_list(Rest); +partial_decode(Other) -> + try decode_next(Other) of + {Dec, Rest} -> + {full, Dec, Rest} + catch + error:function_clause -> + {partial, '_', Other} + end. + +decode_atom(B) -> + {Bin, Rest} = decode_binary(B), + {list_to_atom(binary_to_list(Bin)), Rest}. + +decode_tuple(Sz, Elems) -> + decode_tuple(Sz,Elems,[]). + +decode_tuple(0, Rest, Acc) -> + {list_to_tuple(lists:reverse(Acc)), Rest}; +decode_tuple(N, Elems, Acc) -> + {Term, Rest} = decode_next(Elems), + decode_tuple(N-1, Rest, [Term|Acc]). + +partial_decode_tuple(Sz, Elems) -> + partial_decode_tuple(Sz, Elems, []). + +partial_decode_tuple(0, Rest, Acc) -> + {full, list_to_tuple(lists:reverse(Acc)), Rest}; +partial_decode_tuple(N, Elems, Acc) -> + case partial_decode(Elems) of + {partial, Term, Rest} -> + {partial, list_to_tuple( + lists:reverse([Term|Acc]) ++ pad_(N-1)), Rest}; + {full, Dec, Rest} -> + partial_decode_tuple(N-1, Rest, [Dec|Acc]) + end. + +pad_(0) -> + []; +pad_(N) when N > 0 -> + ['_'|pad_(N-1)]. + +partial_decode_list(Elems) -> + partial_decode_list(Elems, []). + +partial_decode_list(<<>>, Acc) -> + {partial, lists:reverse(Acc) ++ '_', <<>>}; +partial_decode_list(<<2, Rest/binary>>, Acc) -> + {full, lists:reverse(Acc), Rest}; +partial_decode_list(<>, Acc) -> + %% improper list, binary tail + {Term, Rest} = decode_next(Next), + {full, lists:reverse(Acc) ++ Term, Rest}; +partial_decode_list(<<1, Next/binary>>, Acc) -> + {Result, Term, Rest} = partial_decode(Next), + {Result, lists:reverse(Acc) ++ Term, Rest}; +partial_decode_list(<> = Next, Acc) when ?is_sext(X) -> + case partial_decode(Next) of + {full, Term, Rest} -> + partial_decode_list(Rest, [Term|Acc]); + {partial, Term, Rest} -> + {partial, lists:reverse([Term|Acc]) ++ '_', Rest} + end; +partial_decode_list(Rest, Acc) -> + {partial, lists:reverse(Acc) ++ '_', Rest}. + +decode_list(Elems) -> + decode_list(Elems, []). + +decode_list(<<2, Rest/binary>>, Acc) -> + {lists:reverse(Acc), Rest}; +decode_list(<>, Acc) -> + %% improper list, binary tail + {Term, Rest} = decode_next(Next), + {lists:reverse(Acc) ++ Term, Rest}; +decode_list(<<1, Next/binary>>, Acc) -> + %% improper list, non-binary tail + {Term, Rest} = decode_next(Next), + {lists:reverse(Acc) ++ Term, Rest}; +decode_list(Elems, Acc) -> + {Term, Rest} = decode_next(Elems), + decode_list(Rest, [Term|Acc]). + +decode_pid(Bin) -> + {Name, Rest} = decode_binary(Bin), + <> = Rest, + NameSz = size(Name), + {binary_to_term(<<131,103,100,NameSz:16,Name/binary,Tail/binary>>), Rest1}. + +decode_port(Bin) -> + {Name, Rest} = decode_binary(Bin), + <> = Rest, + NameSz = size(Name), + {binary_to_term(<<131,102,100,NameSz:16,Name/binary,Tail/binary>>), Rest1}. + +decode_ref(Bin) -> + {Name, Rest} = decode_binary(Bin), + {Tail, Rest1} = decode_binary(Rest), + NLen = size(Name), + Len = (size(Tail)-1) div 4, + RefBin = <<131,114,Len:16,100,NLen:16,Name/binary,Tail/binary>>, + {binary_to_term(RefBin), Rest1}. + +decode_neg(I, 1, Rest) -> + {(I - 16#7fffFFFF), Rest}; +decode_neg(I0, 0, Bin) -> % for negative numbers, 0 means that it's a float + I = 16#7fffFFFF - I0, + ?dbg("decode_neg()... I = ~p | Bin = ~p~n", [I, Bin]), + decode_neg_float(I, Bin). + +decode_neg_float(0, Bin) -> + {R, Rest} = decode_neg_binary(Bin), + ?dbg("Bin = ~p~n", [pp(Bin)]), + ?dbg("R = ~p | Rest = ~p~n", [pp(R), Rest]), + Sz = bit_size(R), + Offs = Sz - 53, + ?dbg("Offs = ~p | Sz - ~p~n", [Offs, Sz]), + <<_:Offs, 1:1, I:52>> = R, + Exp = 1023 - Offs, + <> = <<1:1, Exp:11, I:52>>, + {F, Rest}; +decode_neg_float(I, Bin) -> + {R, Rest} = decode_neg_binary(Bin), + ?dbg("decode_neg_float: I = ~p | R = ~p~n", [I, R]), + Sz = bit_size(R), + ?dbg("Sz = ~p~n", [Sz]), + <> = R, + ?dbg("Ri = ~p~n", [Ri]), + if Ri == 0 -> + %% special case + {0.0-I, Rest}; + true -> + IBits = strip_first_one(I), + ?dbg("IBits = ~p~n", [pp(IBits)]), + Bits = <>, + ?dbg("Bits = ~p (Sz: ~p)~n", [pp(Bits), bit_size(Bits)]), + Exp = bit_size(IBits) + 1023, + ?dbg("Exp = ~p~n", [Exp]), + <> = <>, + ?dbg("Frac = ~p~n", [Frac]), + <> = <<1:1, Exp:11, Frac:52>>, + {F, Rest} + end. + +decode_pos(I, 0, Rest) -> + {I, Rest}; +decode_pos(0, 1, Bin) -> + {Real, Rest} = decode_binary(Bin), + Offs = bit_size(Real) - 53, + <<0:Offs, 1:1, Frac:52>> = Real, + Exp = 1023 - Offs, + <> = <<0:1, Exp:11, Frac:52>>, + {F, Rest}; +decode_pos(I, 1, Bin) -> % float > 1 + ?dbg("decode_pos(~p, 1, ~p)~n", [I, Bin]), + {Real, Rest} = decode_binary(Bin), + case decode_binary(Bin) of + {<<>>, Rest} -> + <> = <>, + {F, Rest}; + {Real, Rest} -> + ?dbg("Real = ~p~n", [Real]), + Exp = 52 - bit_size(Real) + 1023, + ?dbg("Exp = ~p~n", [Exp]), + Bits0 = <>, + ?dbg("Bits0 = ~p~n", [Bits0]), + Bits = strip_one(Bits0), + <> = Bits, + <> = <<0:1, Exp:11, Frac:52>>, + {F, Rest} + end. + +decode_pos_big(Bin) -> + ?dbg("decode_pos_big(~p)~n", [Bin]), + {Ib0, Rest} = decode_binary(Bin), + Ib = remove_size_bits(Ib0), + ?dbg("Ib = ~p~n", [Ib]), + ISz = size(Ib) * 8, + ?dbg("ISz = ~p~n", [ISz]), + <> = Ib, + ?dbg("I = ~p~n", [I]), + <> = Rest, + ?dbg("Rest1 = ~p~n", [Rest1]), + decode_pos(I, F, Rest1). + +decode_neg_big(Bin) -> + ?dbg("decode_neg_big(~p)~n", [Bin]), + <> = Bin, + Words = 16#ffffFFFF - WordsAdj, + ?dbg("Words = ~p~n", [Words]), + {Ib0, Rest1} = decode_binary(Rest), + Ib = remove_size_bits(Ib0), + ?dbg("Ib = ~p | Rest1 = ~p~n", [Ib, Rest1]), + ISz = size(Ib) * 8, + <> = Ib, + ?dbg("I0 = ~p~n", [I0]), + Max = imax(Words), + ?dbg("Max = ~p~n", [Max]), + I = Max - I0, + ?dbg("I = ~p~n", [I]), + <> = Rest1, + ?dbg("F = ~p | Rest2 = ~p~n", [F, Rest2]), + if F == 0 -> + decode_neg_float(I, Rest2); + F == 16#ff -> + {-I, Rest2} + end. + +%% optimization - no need to loop through a very large number of zeros. +strip_first_one(I) -> + Sz = if I < 16#ff -> 8; + I < 16#ffff -> 16; + I < 16#ffffff -> 24; + I < 16#ffffffff -> 32; + true -> 52 + end, + strip_one(<>). + +strip_one(<<0:1, Rest/bitstring>>) -> strip_one(Rest); +strip_one(<<1:1, Rest/bitstring>>) -> Rest. + + +decode_binary(<<8, Rest/binary>>) -> {<<>>, Rest}; +decode_binary(B) -> decode_binary(B, 0, <<>>). + +decode_binary(<<1:1,H:8,Rest/bitstring>>, N, Acc) -> + case Rest of + <<1:1,_/bitstring>> -> + decode_binary(Rest, N+9, << Acc/binary, H >>); + _ -> + Pad = 8 - ((N+9) rem 8), + <<0:Pad,EndBits,Rest1/binary>> = Rest, + TailPad = 8-EndBits, + <> = <>, + {<< Acc/binary, Tail:EndBits >>, Rest1} + end. + +decode_neg_binary(<<247, Rest/binary>>) -> {<<>>, Rest}; % 16#ff - 8 +decode_neg_binary(B) -> decode_neg_binary(B, 0, <<>>). + +decode_neg_binary(<<0:1,H:8,Rest/bitstring>>, N, Acc) -> + case Rest of + <<0:1,_/bitstring>> -> + decode_neg_binary(Rest, N+9, << Acc/binary, (16#ff - H) >>); + _ -> + Pad = 8 - ((N+9) rem 8), + ?dbg("Pad = ~p~n", [Pad]), + IPad = (1 bsl Pad) - 1, + <> = Rest, + ?dbg("EndBits0 = ~p~n", [EndBits0]), + EndBits = 16#ff - EndBits0, + ?dbg("EndBits = ~p~n", [EndBits]), + if EndBits == 0 -> + {<< Acc/binary, (16#ff - H)>>, Rest1}; + true -> + <> = <<(16#ff - H)>>, + ?dbg("Tail = ~p~n", [Tail]), + {<< Acc/binary, Tail:EndBits >>, Rest1} + end + end. + +%% The largest value that fits in Sz bits +max_value(Sz) -> + (1 bsl Sz) - 1. + +%% The largest value that fits in Words*64 bits. +imax(1) -> max_value(64); +imax(2) -> max_value(128); +imax(Words) -> max_value(Words*64). + +%% Get the smallest imax/1 value that's larger than I. +get_max(I) -> get_max(I, 1, imax(1)). +get_max(I, W, Max) when I > Max -> + get_max(I, W+1, (Max bsl 64) bor ?IMAX1); +get_max(_, W, Max) -> + {W, Max}. + +%% @spec to_sb32(Bits::bitstring()) -> binary() +%% @doc Converts a bitstring into an sb-encoded bitstring +%% +%% sb32 (Sortable base32) is a variant of RFC3548, slightly rearranged to +%% preserve the lexical sorting properties. Base32 was chosen to avoid +%% filename-unfriendly characters. Also important is that the padding +%% character be less than any character in the alphabet +%% +%% sb32 alphabet: +%%
+%% 0 0     6 6     12 C     18 I     24 O     30 U
+%% 1 1     7 7     13 D     19 J     25 P     31 V
+%% 2 2     8 8     14 E     20 K     26 Q  (pad) -
+%% 3 3     9 9     15 F     21 L     27 R
+%% 4 4    10 A     16 G     22 M     28 S
+%% 5 5    11 B     17 H     23 N     29 T
+%% 
+%% @end +%% +to_sb32(Bits) when is_bitstring(Bits) -> + Sz = bit_size(Bits), + {Chunk, Rest, Pad} = + case Sz rem 5 of + 0 -> {Bits, <<>>, <<>>}; + R -> sb32_encode_chunks(Sz, R, Bits) + end, + Enc = << << (c2sb32(C1)) >> || + <> <= Chunk >>, + if Rest == << >> -> + Enc; + true -> + << Enc/bitstring, (c2sb32(Rest)):8, Pad/binary >> + end. + +sb32_encode_chunks(Sz, Rem, Bits) -> + ChunkSz = Sz - Rem, + << C:ChunkSz/bitstring, Rest:Rem >> = Bits, + Pad = encode_pad(Rem), + {C, Rest, Pad}. + +encode_pad(3) -> <<"------">>; +encode_pad(1) -> <<"----">>; +encode_pad(4) -> <<"---">>; +encode_pad(2) -> <<"-">>. + +%% @spec from_sb32(Bits::bitstring()) -> bitstring() +%% @doc Converts from an sb32-encoded bitstring into a 'normal' bitstring +%% +%% This function is the reverse of {@link to_sb32/1}. +%% @end +%% +from_sb32(<< C:8, "------" >>) -> << (sb322c(C)):3 >>; +from_sb32(<< C:8, "----" >> ) -> << (sb322c(C)):1 >>; +from_sb32(<< C:8, "---" >> ) -> << (sb322c(C)):4 >>; +from_sb32(<< C:8, "-" >> ) -> << (sb322c(C)):2 >>; +from_sb32(<< C:8, Rest/bitstring >>) -> + << (sb322c(C)):5, (from_sb32(Rest))/bitstring >>; +from_sb32(<< >>) -> + << >>. + +c2sb32(I) when 0 =< I, I =< 9 -> $0 + I; +c2sb32(I) when 10 =< I, I =< 31 -> $A + I - 10. + +sb322c(I) when $0 =< I, I =< $9 -> I - $0; +sb322c(I) when $A =< I, I =< $V -> I - $A + 10. + +%% @spec to_hex(Bin::binary()) -> binary() +%% @doc Converts a binary into a hex-encoded binary +%% This is conventional hex encoding, with the proviso that +%% only capital letters are used, e.g. `0..9A..F'. +%% @end +to_hex(Bin) -> + << << (nib2hex(N)):8 >> || <> <= Bin >>. + +%% @spec from_hex(Bin::binary()) -> binary() +%% @doc Converts from a hex-encoded binary into a 'normal' binary +%% +%% This function is the reverse of {@link to_hex/1}. +%% +from_hex(Bin) -> + << << (hex2nib(H)):4 >> || <> <= Bin >>. + +nib2hex(N) when 0 =< N, N =< 9 -> $0 + N; +nib2hex(N) when 10 =< N, N =< 15-> $A + N - 10. + +hex2nib(C) when $0 =< C, C =< $9 -> C - $0; +hex2nib(C) when $A =< C, C =< $F -> C - $A + 10. + +-ifdef(DEBUG). +pp(none) -> ""; +pp(B) when is_bitstring(B) -> + [ $0 + I || <> <= B ]. +-endif. + +-ifdef(TEST). +-include_lib("eunit/include/eunit.hrl"). + +encode_test() -> + L = test_list(), + [{I,I} = {I,catch decode(encode(I))} || I <- L]. + +test_list() -> + [-456453453477456464.45456, + -5.23423564, + -1.234234, + -1.23423, + -0.345, + -0.34567, + -0.0034567, + 0, + 0.00012345, + 0.12345, + 1.2345, + 123.45, + 456453453477456464.45456, + a, + aaa, + {}, + {1}, + {1,2}, + {"","123"}, + {"1","234"}, + <<>>, + <<1>>, + <<1,5:3>>, + <<1,5:4>>, + [1,2,3], + [], + self(), + spawn(fun() -> ok end), + make_ref(), + make_ref()| + lists:sublist(erlang:ports(),1,2)]. + +-endif. diff --git a/src/mnesia_rocksdb_sup.erl b/src/mnesia_rocksdb_sup.erl new file mode 100644 index 0000000..3b6b23c --- /dev/null +++ b/src/mnesia_rocksdb_sup.erl @@ -0,0 +1,44 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +-module(mnesia_rocksdb_sup). + +-behaviour(supervisor). + +%% API +-export([start_link/0]). + +%% Supervisor callbacks +-export([init/1]). + +%% Helper macro for declaring children of supervisor +-define(CHILD(I, Type), {I, {I, start_link, []}, permanent, 5000, Type, [I]}). + +%% =================================================================== +%% API functions +%% =================================================================== + +start_link() -> + supervisor:start_link({local, ?MODULE}, ?MODULE, []). + +%% =================================================================== +%% Supervisor callbacks +%% =================================================================== + +init([]) -> + {ok, { {one_for_one, 5, 10}, [?CHILD(mnesia_rocksdb_params, worker)]} }. diff --git a/src/mnesia_rocksdb_tuning.erl b/src/mnesia_rocksdb_tuning.erl new file mode 100644 index 0000000..417ba03 --- /dev/null +++ b/src/mnesia_rocksdb_tuning.erl @@ -0,0 +1,198 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +-module(mnesia_rocksdb_tuning). + +-export([describe_env/0, + get_maxfiles/0, get_maxfiles/1, + get_avail_ram/0, + ldb_tabs/0, ldb_tabs/1, + ldb_indexes/0, ldb_indexes/1, + count_ldb_tabs/0, count_ldb_tabs/1, + calc_sizes/0, calc_sizes/1, + ideal_max_files/0, ideal_max_files/1, + max_files/1, + write_buffer/1, + cache/1, + default/1]). + +-include("mnesia_rocksdb_tuning.hrl"). + +-define(KB, 1024). +-define(MB, 1024 * 1024). +-define(GB, 1024 * 1024 * 1024). + +describe_env() -> + #tuning{max_files = get_maxfiles(), + avail_ram = get_avail_ram()}. + +get_maxfiles() -> + get_maxfiles(os:type()). + +get_maxfiles({unix,darwin}) -> + Limit = os:cmd("launchctl limit maxfiles"), + [_, Soft, _] = string:tokens(Limit, " \t\n"), + list_to_integer(Soft); +get_maxfiles({unix,linux}) -> + [L|_] = string:tokens(os:cmd("ulimit -n"), "\n"), + list_to_integer(L). + +%% Returns installed RAM in Gigabytes +get_avail_ram() -> + get_avail_ram(os:type()). + +get_avail_ram({unix,darwin}) -> + {match, [S]} = + re:run(os:cmd("/usr/sbin/system_profiler SPHardwareDataType"), + "Memory: (.+) GB", [{capture, [1], list}]), + list_to_integer(S); +get_avail_ram({unix,linux}) -> + {match, [S]} = + re:run(os:cmd("free -g"), "Mem:[ ]+([0-9]+) ",[{capture,[1],list}]), + list_to_integer(S). + +ldb_tabs() -> + ldb_tabs(mnesia_lib:dir()). + +ldb_tabs(Db) -> + ldb_tabs(list_dir(Db), Db). + +ldb_tabs(Fs, _Db) -> + lists:flatmap( + fun(F) -> + case re:run(F, "(.+)-_tab\\.extldb", + [global,{capture,[1],list}]) of + {match, [Match]} -> + Match; + _ -> + [] + end + end, Fs). + +ldb_indexes() -> + ldb_indexes(mnesia_lib:dir()). + +ldb_indexes(Db) -> + ldb_indexes(list_dir(Db), Db). + +ldb_indexes(Fs, _Db) -> + lists:flatmap( + fun(F) -> + case re:run(F, "(.+)-([0-9]+)-_ix\\.extldb", + [global,{capture,[1,2],list}]) of + {match, [[T,P]]} -> + [{T,P}]; + _ -> + [] + end + end, Fs). + +list_dir(D) -> + case file:list_dir(D) of + {ok, Fs} -> Fs; + {error, Reason} -> erlang:error({Reason,D}) + end. + +fname({Tab,IxPos}, Dir) -> + filename:join(Dir, Tab ++ "-" ++ IxPos ++ "-_ix.extldb"); +fname(Tab, Dir) when is_list(Tab) -> + filename:join(Dir, Tab ++ "-_tab.extldb"). + +%% Number of leveldb tables + indexes +count_ldb_tabs() -> + count_ldb_tabs(mnesia_lib:dir()). + +count_ldb_tabs(Db) -> + Fs = list_dir(Db), + length(ldb_tabs(Fs, Db)) + length(ldb_indexes(Fs, Db)). + +calc_sizes() -> + calc_sizes(mnesia_lib:dir()). + +calc_sizes(D) -> + lists:sort( + fun(A,B) -> sort_size(B,A) end, % rev sort + [{T, dir_size(fname(T, D))} || T <- ldb_tabs(D)] + ++ [{I, dir_size(fname(I, D))} || I <- ldb_indexes(D)]). + +ideal_max_files() -> + ideal_max_files(mnesia_lib:dir()). + +ideal_max_files(D) -> + [{T,Sz,max_files(Sz)} || {T, Sz} <- calc_sizes(D)]. + +max_files({I,g}) -> + round(I * 1000) div 20; +max_files({I,m}) when I > 400 -> + round(I) div 20; +max_files(_) -> + default(max_open_files). + +write_buffer({_,g}) -> + 120 * ?MB; +write_buffer({I,m}) when I > 400 -> + 120 * ?MB; +write_buffer(_) -> + default(write_buffer). + +cache({_,g}) -> + 8 * ?MB; +cache({I,m}) when I > 400 -> + 6 * ?MB; +cache(_) -> + default(cache). + +default(write_buffer) -> 2 * ?MB; +default(max_open_files) -> 20; +default(cache) -> 2 * ?MB. + +%% open_file_memory() -> +%% (max_open_files-10) * +%% (184 + (average_sst_filesize/2048) * +%% (8 + ((average_key_size+average_value_size)/2048 +1) * 0.6). + +dir_size(D) -> + R = os:cmd("du -hc " ++ D ++ " | grep total"), + parse_sz(hd(string:tokens(R," \t\n"))). + +parse_sz(S) -> + {match,[I,U]} = re:run(S, "([\\.0-9]+)([BKMG])", [{capture,[1,2],list}]), + {scan_num(I), unit(U)}. + +scan_num(S) -> + case erl_scan:string(S) of + {ok, [{integer,_,I}],_} -> + I; + {ok, [{float,_,F}],_} -> + F + end. + +unit("B") -> b; +unit("K") -> k; +unit("M") -> m; +unit("G") -> g. + +%% Custom sort: b < k < m < g +sort_size({_,{A,U}},{_,{B,U}}) -> A < B; +sort_size({_,{_,U1}},{_,{_,U2}}) -> + case {U1,U2} of + {b,_} -> true; + {k,_} when U2 =/= b -> true; + {m,g} -> true; + _ -> false + end. diff --git a/src/mnesia_rocksdb_tuning.hrl b/src/mnesia_rocksdb_tuning.hrl new file mode 100644 index 0000000..bc3b563 --- /dev/null +++ b/src/mnesia_rocksdb_tuning.hrl @@ -0,0 +1,22 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +-record(tuning, {max_files, + n_tabs, + avail_ram, + files_factor = 1}). diff --git a/test/Emakefile b/test/Emakefile new file mode 100644 index 0000000..b63a34d --- /dev/null +++ b/test/Emakefile @@ -0,0 +1,2 @@ +mnesia_rocksdb_xform. +{'*', [{parse_transform, mnesia_rocksdb_xform},debug_info]}. diff --git a/test/README b/test/README new file mode 100644 index 0000000..c0f656d --- /dev/null +++ b/test/README @@ -0,0 +1,17 @@ +To run the mnesia test suite, replacing disc_only_copies references with +rocksdb_copies: + +``` +cd $ERL_TOP +make release_tests +cd release/tests/mnesia_test +cp $MNESIA_ROCKSDB/test/mnesia_rocksdb_backend_xform.erl . +cp $MNESIA_ROCKSDB/test/Emakefile . +``` + +You may use github.com/uwiger/parse_trans, and pretty-print the +debug_info in the transformed test suite modules using the following alias: + +``` +alias pp='escript $PARSE_TRANS_ROOT/ebin/parse_trans_pp.beam' +``` diff --git a/test/basho_bench_driver_mnesia_rocksdb.erl b/test/basho_bench_driver_mnesia_rocksdb.erl new file mode 100644 index 0000000..a3cf3db --- /dev/null +++ b/test/basho_bench_driver_mnesia_rocksdb.erl @@ -0,0 +1,64 @@ +%% ------------------------------------------------------------------- +%% +%% basho_bench: Benchmarking Suite +%% +%% Copyright (c) 2009-2010 Basho Techonologies +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%% +%% ------------------------------------------------------------------- + +-module(basho_bench_driver_mnesia_rocksdb). + +-export([new/1, + run/4]). + +-include("mnesia_rocksdb_basho_bench.hrl"). + +%% ==================================================================== +%% API +%% ==================================================================== + +new(_Id) -> + Type = basho_bench_config:get(backend, ram_copies), + Tab = basho_bench_config:get(mnesia_table, t), + ok = bootstrap_mnesia(Tab, Type), + {ok, Tab}. + +bootstrap_mnesia(Tab, Type) -> + ok = mnesia:create_schema([node()], + [{backend_types, + [{rocksdb_copies, mnesia_rocksdb}]}]), + ok = mnesia:start(), + {atomic,ok} = mnesia:create_table(Tab, [{Type, [node()]}]), + mnesia:wait_for_tables([Tab], 10000). + +run(get, KeyGen, _ValueGen, State) -> + Tab = State, + Key = KeyGen(), + case mnesia:dirty_read({Tab, Key}) of + [] -> + {ok, State}; + [{_, Key, _}] -> + {ok, State} + end; +run(put, KeyGen, ValueGen, State) -> + Tab = State, + ok = mnesia:dirty_write({Tab, KeyGen(), ValueGen()}), + {ok, State}; +run(delete, KeyGen, _ValueGen, State) -> + Tab = State, + ok = mnesia:dirty_delete({Tab, KeyGen()}), + {ok, State}. diff --git a/test/mnesia_rocksdb_basho_bench.hrl b/test/mnesia_rocksdb_basho_bench.hrl new file mode 100644 index 0000000..bb06af3 --- /dev/null +++ b/test/mnesia_rocksdb_basho_bench.hrl @@ -0,0 +1,15 @@ + +-define(FAIL_MSG(Str, Args), ?ERROR(Str, Args), basho_bench_app:halt_or_kill()). +-define(STD_ERR(Str, Args), io:format(standard_error, Str, Args)). + +-define(CONSOLE(Str, Args), lager:info(Str, Args)). + +-define(DEBUG(Str, Args), lager:debug(Str, Args)). +-define(INFO(Str, Args), lager:info(Str, Args)). +-define(WARN(Str, Args), lager:warning(Str, Args)). +-define(ERROR(Str, Args), lager:error(Str, Args)). + +-define(FMT(Str, Args), lists:flatten(io_lib:format(Str, Args))). + +-define(VAL_GEN_BLOB_CFG, value_generator_blob_file). +-define(VAL_GEN_SRC_SIZE, value_generator_source_size). diff --git a/test/mnesia_rocksdb_bench_disc_only.config b/test/mnesia_rocksdb_bench_disc_only.config new file mode 100644 index 0000000..187bdd0 --- /dev/null +++ b/test/mnesia_rocksdb_bench_disc_only.config @@ -0,0 +1,18 @@ +{mode, max}. + +{duration, 10}. + +{concurrent, 1}. + +{driver, basho_bench_driver_mnesia_rocksdb}. + +{key_generator, {int_to_bin,{uniform_int, 5000000}}}. + +{value_generator, {fixed_bin, 10000}}. + +{operations, [{get, 2}, {put, 2}, {delete, 1}]}. + +{code_paths, []}. + +{mnesia_table, doc}. +{backend, disc_only_copies}. diff --git a/test/mnesia_rocksdb_bench_rocksdb_copies.config b/test/mnesia_rocksdb_bench_rocksdb_copies.config new file mode 100644 index 0000000..c0bd928 --- /dev/null +++ b/test/mnesia_rocksdb_bench_rocksdb_copies.config @@ -0,0 +1,19 @@ +{mode, max}. + +{duration, 10}. + +{concurrent, 1}. + +{driver, basho_bench_driver_mnesia_rocksdb}. + +{key_generator, {int_to_bin,{uniform_int, 5000000}}}. + +{value_generator, {fixed_bin, 10000}}. + +{operations, [{get, 2}, {put, 2}, {delete, 1}]}. + +{code_paths, ["/Users/uwiger/git/rocksdb", + "/Users/uwiger/git/mnesia_rocksdb"]}. + +{mnesia_table, rdb}. +{backend, rocksdb_copies}. diff --git a/test/mnesia_rocksdb_chg_tbl_copy.erl b/test/mnesia_rocksdb_chg_tbl_copy.erl new file mode 100644 index 0000000..016bd6c --- /dev/null +++ b/test/mnesia_rocksdb_chg_tbl_copy.erl @@ -0,0 +1,131 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +%% @doc Run through all combinations of change_table_copy_type +%% @author Ulf Wiger + +-module(mnesia_rocksdb_chg_tbl_copy). + +%% This module implements a test (to be run manually) for iterating through +%% all table copy types on a mnesia table. + +-export([full/0, + run/0, + run/1]). +-export([trace/0]). + +full() -> + Perms = perms(copies()), + Res = [run(P) || P <- Perms], + Res = [ok || _ <- Perms]. + +run() -> + run([rdb,disc_copies,rdb,ram_copies,disc_only_copies,rdb]). + %% run([rdb,disc_only_copies]). + %% run([rdb,ram_copies]). + +perms([]) -> + [[]]; +perms(L) -> [[H|T] || H <- L, T <- perms(L--[H])]. + +copies() -> + [rdb,ram_copies,disc_copies,disc_only_copies]. + +run([T|Types]) -> + mnesia:stop(), + start_mnesia(), + ok = create_tab(T), + ok = change_type(Types, T). + +create_tab(Type) -> + {atomic,ok} = mnesia:create_table( + t, [{Type, [node()]}, + {attributes, [k,v]}, + {index, [v]}]), + fill_tab(), + check_tab(), + ok. + +change_type([To|Types], From) -> + io:fwrite("changing from ~p to ~p~n", [From, To]), + {atomic, ok} = mnesia:change_table_copy_type(t, node(), To), + ok = check_tab(), + io:fwrite("...ok~n", []), + change_type(Types, To); +change_type([], _) -> + ok. + +fill_tab() -> + Res = [mnesia:dirty_write({t,K,V}) || {t,K,V} <- l()], + Res = [ok || _ <- Res], + ok. + +l() -> [{t,a,1}, + {t,b,2}, + {t,c,3}, + {t,d,4}]. + +check_tab() -> + L = l(), + L = lists:append([mnesia:dirty_read({t,K}) || K <- [a,b,c,d]]), + L = lists:append([mnesia:dirty_index_read(t,V,v) || + V <- [1,2,3,4]]), + ok. + +start_mnesia() -> mnesia_rocksdb_tlib:start_mnesia(reset). + +trace() -> + dbg:tracer(), + [tp(M) || M <- mods()], + dbg:p(all,[c]), + try run() + after + [ctp(M) || M <- mods()], + dbg:stop() + end. + +tp({l,M} ) -> dbg:tpl(M,x); +tp({g,M} ) -> dbg:tp(M,x); +tp({l,M,F}) -> dbg:tpl(M,F,x); +tp({g,M,F}) -> dbg:tp(M,F,x). + +ctp({l,M} ) -> dbg:ctpl(M); +ctp({g,M} ) -> dbg:ctp(M); +ctp({l,M,F}) -> dbg:ctpl(M,F); +ctp({g,M,F}) -> dbg:ctp(M,F). + +mods() -> + [ + %% {l, mnesia_index}, + %% {l, mnesia_lib, semantics}]. + %% {g,mnesia_monitor}, + %% {l,mnesia_dumper}, + %% {g,mnesia_loader}, + %% {g,mnesia_checkpoint}, + %% {g,mnesia_lib}, + {l,mnesia_schema,expand_index_attrs}, + {l,mnesia_schema,list2cs}, + {g,mnesia_schema,new_cs}, + {g,mnesia_schema,make_change_table_copy_type}, + {g,mnesia_schema,make_create_table}, + {g,mnesia_lib,semantics}, + {l,mnesia_dumper}, + {g,mnesia_lib,exists}, + {g,mnesia}, + {l,mnesia_schema,intersect_types}, + {g,ets,new}]. diff --git a/test/mnesia_rocksdb_conv_bigtab.erl b/test/mnesia_rocksdb_conv_bigtab.erl new file mode 100644 index 0000000..c70ea9a --- /dev/null +++ b/test/mnesia_rocksdb_conv_bigtab.erl @@ -0,0 +1,59 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +-module(mnesia_rocksdb_conv_bigtab). + +-export([init/0, mktab/2, run/1]). + +-record(t, {k, i, v}). + +run(Sz) -> + mnesia:stop(), + init(), + mktab(disc_copies, Sz), + mnesia:change_table_copy_type(t, node(), rdb). + +init() -> + mnesia_rocksdb_tlib:start_mnesia(reset). + +mktab(Backend, Sz) -> + mnesia_rocksdb_tlib:create_table(Backend, [k, i, v], [i]), + fill_table(Sz). + + +fill_table(Sz) when is_integer(Sz), Sz > 0 -> + fill_table(1, Sz). + +fill_table(N, Max) when N =< Max -> + mnesia:dirty_write(#t{k = N, i = N, v = val()}), + fill_table(N+1, Max); +fill_table(N, _) when is_integer(N) -> + ok. + +val() -> + {1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0, + 1,2,3,4,5,6,7,8,9,0}. diff --git a/test/mnesia_rocksdb_fallback.erl b/test/mnesia_rocksdb_fallback.erl new file mode 100644 index 0000000..43f1874 --- /dev/null +++ b/test/mnesia_rocksdb_fallback.erl @@ -0,0 +1,98 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +-module(mnesia_rocksdb_fallback). + +-export([run/0]). + +-define(m(A,B), fun() -> L = ?LINE, + case {A,B} of + {__X, __X} -> + B; + Other -> + error({badmatch, [Other, + {line, L}]}) + end + end()). + +run() -> + cleanup(), + mnesia_rocksdb_tlib:start_mnesia(reset), + mnesia_rocksdb_tlib:create_table(rdb), + ok = mnesia:backup("bup0.BUP"), + [mnesia:dirty_write({t,K,V}) || {K,V} <- [{a,1}, + {b,2}, + {c,3}]], + ok = mnesia:backup("bup1.BUP"), + [mnesia:dirty_write({t,K,V}) || {K,V} <- [{d,4}, + {e,5}, + {f,6}]], + ok = mnesia:backup("bup2.BUP"), + io:fwrite("*****************************************~n", []), + load_backup("bup0.BUP"), + ?m([], mnesia:dirty_match_object(t, {t,'_','_'})), + ?m([], mnesia:dirty_index_read(t,2,v)), + io:fwrite("*****************************************~n", []), + load_backup("bup1.BUP"), + ?m([{t,a,1},{t,b,2},{t,c,3}], mnesia:dirty_match_object(t, {t,'_','_'})), + ?m([{t,b,2}], mnesia:dirty_index_read(t,2,v)), + io:fwrite("*****************************************~n", []), + load_backup("bup2.BUP"), + ?m([{t,a,1},{t,b,2},{t,c,3}, + {t,d,4},{t,e,5},{t,f,6}], mnesia:dirty_match_object(t, {t,'_','_'})), + ?m([{t,b,2}], mnesia:dirty_index_read(t,2,v)), + ?m([{t,e,5}], mnesia:dirty_index_read(t,5,v)), + ok. + +load_backup(BUP) -> + mnesia_rocksdb_tlib:trace( + fun() -> + io:fwrite("loading backup ~s~n", [BUP]), + ok = mnesia:install_fallback(BUP), + io:fwrite("stopping~n", []), + mnesia:stop(), + timer:sleep(3000), + io:fwrite("starting~n", []), + mnesia:start(), + WaitRes = mnesia:wait_for_tables([t], 5000), + io:fwrite("WaitRes = ~p~n", [WaitRes]) + end, + mods(0) + ). + +cleanup() -> + os:cmd("rm *.BUP"). + +mods(0) -> + []; +mods(1) -> + [ + {l, mnesia_rocksdb}, + {g, rocksdb} + ]; +mods(2) -> + [ + %% {l, mnesia_monitor}, + {g, mnesia_rocksdb}, + {l, mnesia_bup}, + {g, mnesia_lib}, + {g, mnesia_schema}, + %% {g, mnesia_loader}, + {g, mnesia_index}, + {l, mnesia_tm} + ]. diff --git a/test/mnesia_rocksdb_indexes.erl b/test/mnesia_rocksdb_indexes.erl new file mode 100644 index 0000000..997d62c --- /dev/null +++ b/test/mnesia_rocksdb_indexes.erl @@ -0,0 +1,174 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +-module(mnesia_rocksdb_indexes). + +-export([run/0, + r1/0]). + +run() -> + mnesia:stop(), + ok = mnesia_rocksdb_tlib:start_mnesia(reset), + test(1, ram_copies, r1), + test(1, disc_copies, d1), + fail(test, [1, disc_only_copies, do1]), % doesn't support ordered + test(2, disc_only_copies, do1), + fail(test, [1, rdb, l1]), % doesn't support bag + test(3, rdb, l1), + add_del_indexes(), + {atomic,ok} = mnesia_schema:add_index_plugin( + {pfx},mnesia_rocksdb, ix_prefixes), + test_index_plugin(pr1, ram_copies, ordered), + test_index_plugin(pr2, ram_copies, bag), + test_index_plugin(pd1, disc_copies, ordered), + fail(test_index_plugin, [pd2, disc_only_copies, ordered]), + test_index_plugin(pd2, disc_copies, bag), + test_index_plugin(pl2, rdb, ordered), + test_index_plugin_mgmt(), + ok. + +r1() -> + mnesia:stop(), + ok = mnesia_rocksdb_tlib:start_mnesia(reset), + {atomic,ok} = mnesia_schema:add_index_plugin( + {pfx},mnesia_rocksdb, ix_prefixes), + dbg:tracer(), + dbg:tpl(mnesia_schema,x), + dbg:tpl(mnesia_index,x), + dbg:p(all,[c]), + test_index_plugin(pd2, disc_only_copies, ordered). + +fail(F, Args) -> + try apply(?MODULE, F, Args), + error(should_fail) + catch + error:_ -> + io:fwrite("apply(~p, ~p, ~p) -> fails as expected~n", + [?MODULE, F, Args]) + end. + +test(N, Type, T) -> + {atomic, ok} = mnesia:create_table(T, [{Type,[node()]}, + {attributes,[k,a,b,c]}, + {index, indexes(N)}]), + ok = test_index(N, T). + +add_del_indexes() -> + {atomic, ok} = mnesia:del_table_index(r1, a), + {aborted, _} = mnesia:del_table_index(r1, a), + {atomic, ok} = mnesia:add_table_index(r1, a), + {aborted, _} = mnesia:add_table_index(r1, a), + {atomic, ok} = mnesia:del_table_index(d1, a), + {atomic, ok} = mnesia:add_table_index(d1, a), + {atomic, ok} = mnesia:del_table_index(do1, a), + {atomic, ok} = mnesia:add_table_index(do1, a), + {atomic, ok} = mnesia:del_table_index(l1, a), + {atomic, ok} = mnesia:add_table_index(l1, a), + io:fwrite("add_del_indexes() -> ok~n", []). + +test_index_plugin(Tab, Type, IxType) -> + {atomic, ok} = mnesia:create_table(Tab, [{Type, [node()]}, + {index, [{{pfx}, IxType}]}]), + mnesia:dirty_write({Tab, "foobar", "sentence"}), + mnesia:dirty_write({Tab, "yellow", "sensor"}), + mnesia:dirty_write({Tab, "truth", "white"}), + mnesia:dirty_write({Tab, "fulcrum", "white"}), + Res1 = [{Tab, "foobar", "sentence"}, {Tab, "yellow", "sensor"}], + Res2 = [{Tab, "fulcrum", "white"}, {Tab, "truth", "white"}], + if IxType == bag -> + Res1 = lists:sort(mnesia:dirty_index_read(Tab,<<"sen">>, {pfx})), + Res2 = lists:sort(mnesia:dirty_index_read(Tab,<<"whi">>, {pfx})), + [{Tab,"foobar","sentence"}] = mnesia:dirty_index_read( + Tab, <<"foo">>, {pfx}); + IxType == ordered -> + Res1 = lists:sort(mnesia:dirty_index_read(Tab,<<"sen">>, {pfx})), + Res2 = lists:sort(mnesia:dirty_index_read(Tab,<<"whi">>, {pfx})), + [{Tab,"foobar","sentence"}] = mnesia:dirty_index_read( + Tab, <<"foo">>, {pfx}) + end, + io:fwrite("test_index_plugin(~p, ~p, ~p) -> ok~n", [Tab,Type,IxType]). + +test_index_plugin_mgmt() -> + {aborted,_} = mnesia:create_table(x, [{index,[{unknown}]}]), + {aborted,_} = mnesia:create_table(x, [{index,[{{unknown},bag}]}]), + {aborted,_} = mnesia:create_table(x, [{index,[{{unknown},ordered}]}]), + {atomic,ok} = mnesia_schema:add_index_plugin( + {t}, mnesia_rocksdb,ix_prefixes), + {atomic,ok} = mnesia_schema:delete_index_plugin({t}), + {aborted,{bad_type,x,_}} = + mnesia:create_table(x, [{index,[{{t},ordered}]}]), + %% re-add plugin + {atomic,ok} = mnesia_schema:add_index_plugin( + {t}, mnesia_rocksdb,ix_prefixes), + {atomic,ok} = + mnesia:create_table(x, [{index,[{{t},ordered}]}]), + {aborted,{plugin_in_use,{t}}} = + mnesia_schema:delete_index_plugin({t}). + +test_index(1, T) -> + L2 = [{T,K,x,y,z} || K <- lists:seq(4,6)], + L1 = [{T,K,a,b,c} || K <- lists:seq(1,3)], + true = lists:all(fun(X) -> X == ok end, + [mnesia:dirty_write(Obj) || Obj <- L1 ++ L2]), + L1 = lists:sort(mnesia:dirty_index_read(T,a,a)), + L1 = lists:sort(mnesia:dirty_index_read(T,a,3)), + L1 = mnesia:dirty_index_read(T,b,b), + L1 = lists:sort(mnesia:dirty_index_read(T,c,c)), + L2 = lists:sort(mnesia:dirty_index_read(T,x,a)), + L2 = lists:sort(mnesia:dirty_index_read(T,x,3)), + L2 = mnesia:dirty_index_read(T,y,b), + L2 = lists:sort(mnesia:dirty_index_read(T,z,c)), + io:fwrite("test_index(1, ~p) -> ok~n", [T]), + ok; +test_index(2, T) -> + L1 = [{T,K,a,b,c} || K <- lists:seq(1,3)], + L2 = [{T,K,x,y,z} || K <- lists:seq(4,6)], + true = lists:all(fun(X) -> X == ok end, + [mnesia:dirty_write(Obj) || Obj <- L1 ++ L2]), + L1 = lists:sort(mnesia:dirty_index_read(T,a,a)), + L1 = lists:sort(mnesia:dirty_index_read(T,a,3)), + L1 = lists:sort(mnesia:dirty_index_read(T,b,b)), + L1 = lists:sort(mnesia:dirty_index_read(T,c,c)), + L2 = lists:sort(mnesia:dirty_index_read(T,x,a)), + L2 = lists:sort(mnesia:dirty_index_read(T,x,3)), + L2 = lists:sort(mnesia:dirty_index_read(T,y,b)), + L2 = lists:sort(mnesia:dirty_index_read(T,z,c)), + io:fwrite("test_index(1, ~p) -> ok~n", [T]), + ok; +test_index(3, T) -> + L2 = [{T,K,x,y,z} || K <- lists:seq(4,6)], + L1 = [{T,K,a,b,c} || K <- lists:seq(1,3)], + true = lists:all(fun(X) -> X == ok end, + [mnesia:dirty_write(Obj) || Obj <- L1 ++ L2]), + L1 = mnesia:dirty_index_read(T,a,a), + L1 = mnesia:dirty_index_read(T,a,3), + L1 = mnesia:dirty_index_read(T,b,b), + L1 = mnesia:dirty_index_read(T,c,c), + L2 = mnesia:dirty_index_read(T,x,a), + L2 = mnesia:dirty_index_read(T,x,3), + L2 = mnesia:dirty_index_read(T,y,b), + L2 = mnesia:dirty_index_read(T,z,c), + io:fwrite("test_index(1, ~p) -> ok~n", [T]), + ok. + +indexes(1) -> + [a,{b,ordered},{c,bag}]; +indexes(2) -> + [a,b,{c,bag}]; +indexes(3) -> + [a,{b,ordered},{c,ordered}]. diff --git a/test/mnesia_rocksdb_proper_semantics_test.erl b/test/mnesia_rocksdb_proper_semantics_test.erl new file mode 100644 index 0000000..82290c6 --- /dev/null +++ b/test/mnesia_rocksdb_proper_semantics_test.erl @@ -0,0 +1,161 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +%% @doc Verify dirty vs transaction semantics against rocksdb mnesia backend +%% @author Ulf Wiger + +-module(mnesia_rocksdb_proper_semantics_test). + +%% This module uses the proper_statem pattern to generate random +%% sequences of commands, mixing dirty and transaction operations +%% (including dirty ops from within transactions). Each sequence is run +%% against a disc_copies table and a rocksdb_copies table, after +%% which the result of each operation in the sequence is compared between +%% the two runs. The postcondition is that every command in every sequence +%% should yield the same value against both backends. + +-export([test/1, + prop_seq/0]). + +%% statem callbacks +-export([initial_state/0, + command/1, + precondition/2, + postcondition/3, + next_state/3]). + +%% command callbacks +-export([activity/2]). + +-include_lib("proper/include/proper.hrl"). +-include_lib("eunit/include/eunit.hrl"). + +-record(st, {}). +-define(KEYS, [a,b,c]). + +basic_test_() -> + {timeout, 60000, [fun() -> test(100) end]}. + +test(N) -> + setup_mnesia(), + true = proper:quickcheck(?MODULE:prop_seq(), N), + ok. + +prop_seq() -> + ?FORALL(Cmds, proper_statem:commands(?MODULE), + begin + setup(), + {H, S, Res} = + proper_statem:run_commands(?MODULE, Cmds), + cleanup(), + ?WHENFAIL( + io:fwrite("History: ~w~n" + "State : ~w~n" + "Result : ~w~n", [H, S, Res]), + proper:aggregate( + proper_statem:command_names(Cmds), Res =:= ok)) + end). + +%% Note that this requires the rocksdb application to be in the path, +%% and obviously an OTP patched with the backend plugin behavior. +setup_mnesia() -> + stopped = mnesia:stop(), + ok = mnesia:delete_schema([node()]), + ok = mnesia:create_schema([node()]), + ok = mnesia:start(), + {ok, rocksdb_copies} = mnesia_rocksdb:register(). + +setup() -> + {atomic,ok} = mnesia:create_table(d, [{disc_copies, [node()]}, + {record_name, x}]), + {atomic,ok} = mnesia:create_table(l, [{rocksdb_copies, [node()]}, + {record_name, x}]), + ok = mnesia:wait_for_tables([d, l], 30000), + ok. + +cleanup() -> + {atomic, ok} = mnesia:delete_table(d), + {atomic, ok} = mnesia:delete_table(l), + ok. + +initial_state() -> + #st{}. + +command(#st{}) -> + ?LET(Type, type(), + {call, ?MODULE, activity, [Type, sequence()]}). + +type() -> + proper_types:oneof([async_dirty, transaction]). + +precondition(_, _) -> + true. + +postcondition(_, {call,?MODULE,activity,_}, {A, B}) -> + A == B; +postcondition(_, _, _) -> + false. + +next_state(St, _, _) -> + St. + +sequence() -> + proper_types:list(db_cmd()). + +db_cmd() -> + ?LET(Type, type(), + proper_types:oneof([{Type, read, key()}, + {Type, write, key(), value()}, + {Type, delete, key()}])). + +key() -> + proper_types:oneof([a,b,c]). + +value() -> + proper_types:oneof([1,2,3]). + +activity(Type, Seq) -> + {mnesia:activity(Type, fun() -> + apply_seq(Type, d, Seq) + end), + mnesia:activity(Type, fun() -> + apply_seq(Type, l, Seq) + end)}. + +apply_seq(Type, Tab, Seq) -> + apply_seq(Type, Tab, Seq, []). + +apply_seq(transaction=X, Tab, [H|T], Acc) -> + Res = case H of + {X,read, K} -> mnesia:read(Tab, K, read); + {_,read, K} -> mnesia:dirty_read(Tab,K); + {X,write,K,V} -> mnesia:write(Tab, {x, K, V}, write); + {_,write,K,V} -> mnesia:dirty_write(Tab, {x,K,V}); + {X,delete,K} -> mnesia:delete(Tab, K, write); + {_,delete,K} -> mnesia:dirty_delete(Tab,K) + end, + apply_seq(X, Tab, T, [Res|Acc]); +apply_seq(X, Tab, [H|T], Acc) -> + Res = case H of + {_,read, K} -> mnesia:read(Tab, K, read); + {_,write,K,V} -> mnesia:write(Tab, {x, K, V}, write); + {_,delete,K} -> mnesia:delete(Tab, K, write) + end, + apply_seq(X, Tab, T, [Res|Acc]); +apply_seq(_, _, [], Acc) -> + lists:reverse(Acc). diff --git a/test/mnesia_rocksdb_size_info.erl b/test/mnesia_rocksdb_size_info.erl new file mode 100644 index 0000000..172ffbd --- /dev/null +++ b/test/mnesia_rocksdb_size_info.erl @@ -0,0 +1,84 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +-module(mnesia_rocksdb_size_info). + +-export([run/0]). + +-define(m(A, B), (fun(L) -> {L,A} = {L,B} end)(?LINE)). + + +run() -> + initialize_mnesia(), + test_set(), + test_bag(). + +initialize_mnesia() -> + mnesia:stop(), + mnesia:delete_schema([node()]), + mnesia:create_schema([node()], [{backend_types, + [{rocksdb_copies, mnesia_rocksdb}]}]), + mnesia:start(), + {atomic,ok} = mnesia:create_table(s, [{type, set}, + {record_name, x}, + {rocksdb_copies, [node()]}]), + {atomic,ok} = mnesia:create_table(b, [{type, bag}, + {record_name, x}, + {rocksdb_copies, [node()]}]), + ok. + +test_set() -> + ?m(0, mnesia:table_info(s, size)), + ?m(1, w(s, 1, a)), + ?m(1, w(s, 1, b)), + ?m(2, w(s, 2, c)), + ?m(3, w(s, 3, d)), + ?m(2, d(s, 3)), + mnesia:stop(), + mnesia:start(), + await(s), + ?m(2, mnesia:table_info(s, size)). + +test_bag() -> + ?m(0, mnesia:table_info(b, size)), + ?m(1, w(b, 1, a)), + ?m(2, w(b, 1, b)), + ?m(3, w(b, 2, a)), + ?m(4, w(b, 2, d)), + ?m(5, w(b, 2, c)), + ?m(4, do(b, 2, c)), + ?m(2, d(b, 2)), + mnesia:stop(), + mnesia:start(), + await(b), + ?m(2, mnesia:table_info(b, size)). + +w(T, K, V) -> + ok = mnesia:dirty_write(T, {x, K, V}), + mnesia:table_info(T, size). + +d(T, K) -> + mnesia:dirty_delete({T, K}), + mnesia:table_info(T, size). + +do(T, K, V) -> + mnesia:dirty_delete_object(T, {x, K, V}), + mnesia:table_info(T, size). + +await(T) -> + ?m(ok, mnesia:wait_for_tables([T], 10000)). diff --git a/test/mnesia_rocksdb_tlib.erl b/test/mnesia_rocksdb_tlib.erl new file mode 100644 index 0000000..4c2a3da --- /dev/null +++ b/test/mnesia_rocksdb_tlib.erl @@ -0,0 +1,66 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +-module(mnesia_rocksdb_tlib). + +-export([start_mnesia/0, + start_mnesia/1, + create_table/1, + create_table/3, + trace/2]). + + +start_mnesia() -> + start_mnesia(false). + +start_mnesia(Mode) -> + if Mode==reset -> + mnesia:delete_schema([node()]), + mnesia:create_schema([node()], + [{backend_types, + [{rdb,mnesia_rocksdb}]}]); + true -> ok + end, + mnesia:start(). + +create_table(Backend) -> + create_table(Backend, [k,v], [v]). + +create_table(Backend, Attrs, Indexes) -> + mnesia:create_table(t, [{index,Indexes}, {attributes,Attrs}, + {Backend, [node()]}]). + +trace(F, Ms) -> + dbg:tracer(), + [tp(M) || M <- Ms], + dbg:p(all,[c]), + try F() + after + [ctp(M) || M <- Ms], + dbg:stop() + end. + +tp({l,M} ) -> dbg:tpl(M,x); +tp({g,M} ) -> dbg:tp(M,x); +tp({l,M,F}) -> dbg:tpl(M,F,x); +tp({g,M,F}) -> dbg:tp(M,F,x). + +ctp({l,M} ) -> dbg:ctpl(M); +ctp({g,M} ) -> dbg:ctp(M); +ctp({l,M,F}) -> dbg:ctpl(M,F); +ctp({g,M,F}) -> dbg:ctp(M,F). diff --git a/test/mnesia_rocksdb_xform.erl b/test/mnesia_rocksdb_xform.erl new file mode 100644 index 0000000..85c6508 --- /dev/null +++ b/test/mnesia_rocksdb_xform.erl @@ -0,0 +1,334 @@ +%%---------------------------------------------------------------- +%% Copyright (c) 2013-2016 Klarna AB +%% +%% This file is provided to you under the Apache License, +%% Version 2.0 (the "License"); you may not use this file +%% except in compliance with the License. You may obtain +%% a copy of the License at +%% +%% http://www.apache.org/licenses/LICENSE-2.0 +%% +%% Unless required by applicable law or agreed to in writing, +%% software distributed under the License is distributed on an +%% "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +%% KIND, either express or implied. See the License for the +%% specific language governing permissions and limitations +%% under the License. +%%---------------------------------------------------------------- + +%% This module is used to test backend plugin extensions to the mnesia +%% backend. It also indirectly tests the mnesia backend plugin +%% extension machinery +%% +%% Usage: mnesia_ext_rocksdb_test:recompile(Extension). +%% Usage: mnesia_ext_rocksdb_test:recompile(). +%% This command is executed in the release/tests/test_server directory +%% before running the normal tests. The command patches the test code, +%% via a parse_transform, to replace disc_only_copies with the Alias. + +-module(mnesia_rocksdb_xform). + +-author("roland.karlsson@erlang-solutions.com"). +-author("ulf.wiger@klarna.com"). + +%% EXPORTS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Exporting API +-export([recompile/0, recompile/1]). + +%% Exporting parse_transform callback +-export([parse_transform/2]). + +%% Exporting replacement for mnesia:create_table/2 +-export([create_table/1, create_table/2, rpc/4]). + +%% API %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% Recompiling the test code, replacing disc_only_copies with +%% Extension. +recompile() -> + [{Module,Alias}|_] = extensions(), + recompile(Module, Alias). + +recompile(MorA) -> + case { lists:keyfind(MorA, 1, extensions()), + lists:keyfind(MorA, 2, extensions()) + } of + {{Module,Alias}, _} -> + recompile(Module, Alias); + {false, {Module,Alias}} -> + recompile(Module, Alias); + {false,false} -> + {error, cannot_find_module_or_alias} + end. + +recompile(Module, Alias) -> + io:format("recompile(~p,~p)~n",[Module, Alias]), + put_ext(module, Module), + put_ext(alias, Alias), + Modules = [ begin {M,_} = lists:split(length(F)-4, F), + list_to_atom(M) end || + F <- begin {ok,L} = file:list_dir("."), L end, + lists:suffix(".erl", F), + F=/= atom_to_list(?MODULE) ++ ".erl" ], + io:format("Modules = ~p~n",[Modules]), + lists:foreach(fun(M) -> + c:c(M, [{parse_transform, ?MODULE}]) + end, Modules). + +%% TEST REPLACEMENT CALLBACKS %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% replacement for mnesia:create_table that ensures that +create_table(Name, Parameters) -> + create_table([{name,Name} | Parameters]). + +create_table(Parameters) when is_list(Parameters) -> + case lists:keymember(rocksdb_copies, 1, Parameters) of + true -> + %% case lists:member({type, bag}, Parameters) of + %% true -> + %% ct:comment("ERROR: Contains rocksdb table with bag"), + %% {aborted, {rocksdb_does_not_support_bag, Parameters}}; + %% false -> + ct:comment("INFO: Contains rocksdb table"), + io:format("INFO: create_table(~p)~n", [Parameters]), + mnesia:create_table(Parameters); + %% end; + false -> + mnesia:create_table(Parameters) + end; +create_table(Param) -> + %% Probably bad input, e.g. from mnesia_evil_coverage_SUITE.erl + mnesia:create_table(Param). + + +rpc(N, mnesia, start, [Opts]) -> + case lists:keymember(schema, 1, Opts) of + true -> rpc:call(N, mnesia, call, [Opts]); + false -> + Opts1 = [{schema, [{backend_types, backends()}]}|Opts], + rpc:call(N, mnesia, start, [Opts1]) + end; +rpc(N, M, F, A) -> + rpc:call(N, M, F, A). + + +%% PARSE_TRANSFORM CALLBACK %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% The callback for c:c(Module, [{parse_transform,?MODULE}]) +parse_transform(Forms, _Options) -> + plain_transform(fun do_transform/1, Forms). + +do_transform({'attribute', _, module, Module}) -> + io:format("~n~nMODULE: ~p~n", [Module]), + continue; +do_transform({'atom', Line, disc_only_copies}) -> + io:format("replacing disc_only_copies with ~p~n", [get_ext(alias)]), + {'atom', Line, get_ext(alias)}; +do_transform(Form = { call, L1, + { remote, L2, + {atom, L3, mnesia}, + {atom, L4, create_table}}, + Arguments}) -> + NewForm = { call, L1, + { remote, L2, + {atom, L3, ?MODULE}, + {atom, L4, create_table}}, + plain_transform(fun do_transform/1, Arguments)}, + io:format("~nConvert Form:~n~s~n~s~n", [pp_form(Form), pp_form(NewForm)]), + NewForm; +do_transform(Form = { call, L1, + { remote, L2, + {atom, L3, rpc}, + {atom, L4, call}}, + [{var, _, _} = N, {atom, _, mnesia} = Mnesia, + {atom, _, start} = Start, Args]}) -> + NewForm = { call, L1, { remote, L2, + {atom, L3, ?MODULE}, + {atom, L4, rpc}}, + [N, Mnesia, Start, Args]}, + io:format("~nConvert Form:~n~s~n~s~n", [pp_form(Form), pp_form(NewForm)]), + NewForm; + +do_transform(Form = { call, L1, + { remote, L2, + {atom, L3, mnesia}, + {atom, L4, create_schema}}, + [Nodes]}) -> + P = element(2, Nodes), + NewForm = { call, L1, + { remote, L2, + {atom, L3, mnesia}, + {atom, L4, create_schema}}, + [Nodes, erl_parse:abstract([{backend_types, backends()}], P)]}, + io:format("~nConvert Form:~n~s~n~s~n", [pp_form(Form), pp_form(NewForm)]), + NewForm; +do_transform(Form = { call, L1, + { remote, L2, + {atom, L3, mnesia}, + {atom, L4, start}}, + []}) -> + NewForm = { call, L1, + { remote, L2, + {atom, L3, mnesia}, + {atom, L4, start}}, + [erl_parse:abstract( + [{schema, [{backend_types, backends()}]}], L4)]}, + io:format("~nConvert Form:~n~s~n~s~n", [pp_form(Form), pp_form(NewForm)]), + NewForm; +do_transform(Form = { call, L1, + { remote, L2, + {atom, L3, mnesia}, + {atom, L4, start}}, + [Opts]}) -> + P = element(2, Opts), + NewForm = { call, L1, + { remote, L2, + {atom, L3, mnesia}, + {atom, L4, start}}, + [{cons, P, + erl_parse:abstract( + {schema, [{backend_types, backends()}]}, L4), Opts}]}, + io:format("~nConvert Form:~n~s~n~s~n", [pp_form(Form), pp_form(NewForm)]), + NewForm; + + %% 1354:unsupp_user_props(doc) -> + %% 1355: ["Simple test of adding user props in a schema_transaction"]; + %% 1356:unsupp_user_props(suite) -> []; + %% 1357:unsupp_user_props(Config) when is_list(Config) -> +do_transform(Form = { function, L1, F, 1, [C1, C2, C3] }) + when F == unsupp_user_props -> + L3 = element(2, C3), + NewForm = { function, L1, F, 1, + [C1, C2, {clause, L3, [{var, L3, '_'}], [], + [{tuple, L3, [{atom, L3, skip}, + erl_parse:abstract( + "Skipped for rocksdb test", L3)]} + ]} ] }, + io:format("~nConvert Form:" + "~n=============~n~s" + "==== To: ====~n~s" + "=============~n", + [cut(20, pp_form(Form)), cut(20, pp_form(NewForm))]), + NewForm; +do_transform(Form = { function, L1, F, 1, [C1, C2] }) + when F == storage_options -> + L2 = element(2, C2), + NewForm = { function, L1, F, 1, + [C1, {clause, L2, [{var, L2, '_'}], [], + [{tuple, L2, [{atom, L2, skip}, + erl_parse:abstract( + "Skipped for rocksdb test", L2)]} + ]} ] }, + io:format("~nConvert Form:" + "~n=============~n~s" + "==== To: ====~n~s" + "=============~n", + [cut(20, pp_form(Form)), cut(20, pp_form(NewForm))]), + NewForm; +do_transform(_Form) -> + continue. + +pp_form(F) when element(1,F) == attribute; element(1,F) == function -> + erl_pp:form(F); +pp_form(F) -> + erl_pp:expr(F). + +cut(Lines, S) -> + case re:split(S, "\\v", [{return,list}]) of + Lns when length(Lns) =< Lines -> + S; + Lns -> + lists:flatten( + add_lf(lists:sublist(Lns, 1, Lines) ++ ["...\n"])) + end. + +add_lf([H|T]) -> + [H | ["\n" ++ L || L <- T]]. + +%% INTERNAL %%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%% + +%% A trick for doing parse transforms easier + +plain_transform(Fun, Forms) when is_function(Fun, 1), is_list(Forms) -> + plain_transform1(Fun, Forms). + +plain_transform1(_, []) -> + []; +plain_transform1(Fun, [F|Fs]) when is_atom(element(1,F)) -> + case Fun(F) of + continue -> + [list_to_tuple(plain_transform1(Fun, tuple_to_list(F))) | + plain_transform1(Fun, Fs)]; + {done, NewF} -> + [NewF | Fs]; + {error, Reason} -> + io:format("Error: ~p (~p)~n", [F,Reason]); + NewF when is_tuple(NewF) -> + [NewF | plain_transform1(Fun, Fs)] + end; +plain_transform1(Fun, [L|Fs]) when is_list(L) -> + [plain_transform1(Fun, L) | plain_transform1(Fun, Fs)]; +plain_transform1(Fun, [F|Fs]) -> + [F | plain_transform1(Fun, Fs)]; +plain_transform1(_, F) -> + F. + +%% Existing extensions. +%% NOTE: The first is default. +extensions() -> + [ {mnesia_rocksdb, rocksdb_copies} + ]. + %% {mnesia_ext_filesystem, fs_copies}, + %% {mnesia_ext_filesystem, fstab_copies}, + %% {mnesia_ext_filesystem, raw_fs_copies} + %% ]. + +backends() -> + [{T,M} || {M,T} <- extensions()]. + +%% Process global storage + +put_ext(Key, Value) -> + ets:insert(global_storage(), {Key, Value}). + +global_storage() -> + case whereis(?MODULE) of + undefined -> + Me = self(), + P = spawn(fun() -> + T = ets:new(?MODULE, [public,named_table]), + init_ext(T), + register(?MODULE, self()), + Me ! {self(), done}, + wait() + end), + receive {P, done} -> + ok + end; + _ -> + ok + end, + ?MODULE. + +init_ext(T) -> + [{Mod,Alias}|_] = extensions(), + ets:insert(T, {alias, Alias}), + ets:insert(T, {module, Mod}). + +wait() -> + receive stop -> + ok + end. + +get_ext(Key) -> + case catch ets:lookup(global_storage(), Key) of + [] -> + io:format("Data for ~p not stored~n", [Key]), + undefined; + {'EXIT', Reason} -> + io:format("Get value for ~p failed (~p)~n", [Key, Reason]), + undefined; + [{Key,Value}] -> + Value + end.