Merge pull request #22 from aeternity/gh3553-refactor-plugin-final

Refactor to support column families, direct rocksdb access
This commit is contained in:
Ulf Wiger 2022-07-11 13:53:49 +02:00 committed by GitHub
commit b0bf4b6b9c
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
47 changed files with 9106 additions and 1566 deletions

1
.gitignore vendored
View File

@ -1,2 +1 @@
/doc
/_build /_build

View File

@ -1,7 +1,7 @@
suite=$(if $(SUITE), suite=$(SUITE), ) suite=$(if $(SUITE), suite=$(SUITE), )
REBAR3=$(shell which rebar3 || echo ./rebar3) REBAR3=$(shell which rebar3 || echo ./rebar3)
.PHONY: all check test clean run .PHONY: all check test clean run dialyzer xref
all: all:
$(REBAR3) compile $(REBAR3) compile
@ -18,7 +18,13 @@ eunit:
ct: ct:
$(REBAR3) ct $(suite) $(REBAR3) ct $(suite)
test: eunit ct test: dialyzer xref eunit ct
dialyzer:
$(REBAR3) dialyzer
xref:
$(REBAR3) xref
conf_clean: conf_clean:
@: @:

271
README.md
View File

@ -1,16 +1,53 @@
# mnesia_rocksdb
A RocksDB backend for Mnesia.
This permits Erlang/OTP applications to use RocksDB as a backend for # Mnesia Rocksdb - Rocksdb backend plugin for Mnesia #
mnesia tables. It is based on Klarna's `mnesia_eleveldb`.
## Prerequisites Copyright (c) 2013-21 Klarna AB
- rocksdb (included as dependency) __Authors:__ Ulf Wiger ([`ulf@wiger.net`](mailto:ulf@wiger.net)).
- Erlang/OTP 20.0 or newer (https://github.com/erlang/otp)
## Getting started The Mnesia DBMS, part of Erlang/OTP, supports 'backend plugins', making
it possible to utilize more capable key-value stores than the `dets`
module (limited to 2 GB per table). Unfortunately, this support is
undocumented. Below, some informal documentation for the plugin system
is provided.
### <a name="Table_of_Contents">Table of Contents</a> ###
1. [Usage](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Usage)
1. [Prerequisites](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Prerequisites)
1. [Getting started](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Getting_started)
1. [Special features](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Special_features)
1. [Customization](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Customization)
1. [Handling of errors in write operations](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Handling_of_errors_in_write_operations)
1. [Caveats](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Caveats)
1. [Mnesia backend plugins](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Mnesia_backend_plugins)
1. [Background](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Background)
1. [Design](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Design)
1. [Mnesia index plugins](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Mnesia_index_plugins)
1. [Rocksdb](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Rocksdb)
### <a name="Usage">Usage</a> ###
#### <a name="Prerequisites">Prerequisites</a> ####
* rocksdb (included as dependency)
* sext (included as dependency)
* Erlang/OTP 21.0 or newer (https://github.com/erlang/otp)
#### <a name="Getting_started">Getting started</a> ####
Call `mnesia_rocksdb:register()` immediately after Call `mnesia_rocksdb:register()` immediately after
starting mnesia. starting mnesia.
@ -18,30 +55,32 @@ starting mnesia.
Put `{rocksdb_copies, [node()]}` into the table definitions of Put `{rocksdb_copies, [node()]}` into the table definitions of
tables you want to be in RocksDB. tables you want to be in RocksDB.
## Special features
RocksDB tables support efficient selects on *prefix keys*. #### <a name="Special_features">Special features</a> ####
RocksDB tables support efficient selects on _prefix keys_.
The backend uses the `sext` module (see The backend uses the `sext` module (see
https://github.com/uwiger/sext) for mapping between Erlang terms and the [`https://github.com/uwiger/sext`](https://github.com/uwiger/sext)) for mapping between Erlang terms and the
binary data stored in the tables. This provides two useful properties: binary data stored in the tables. This provides two useful properties:
- The records are stored in the Erlang term order of their keys. * The records are stored in the Erlang term order of their keys.
- A prefix of a composite key is ordered just before any key for which
it is a prefix. For example, `{x, '_'}` is a prefix for keys `{x, a}`, * A prefix of a composite key is ordered just before any key for which
`{x, b}` and so on. it is a prefix. For example, `{x, '_'}` is a prefix for keys `{x, a}`,`{x, b}` and so on.
This means that a prefix key identifies the start of the sequence of This means that a prefix key identifies the start of the sequence of
entries whose keys match the prefix. The backend uses this to optimize entries whose keys match the prefix. The backend uses this to optimize
selects on prefix keys. selects on prefix keys.
## Customization ### Customization
RocksDB supports a number of customization options. These can be specified RocksDB supports a number of customization options. These can be specified
by providing a `{Key, Value}` list named `rocksdb_opts` under `user_properties`, by providing a `{Key, Value}` list named `rocksdb_opts` under `user_properties`,
for example: for example:
```erlang ```
mnesia:create_table(foo, [{rocksdb_copies, [node()]}, mnesia:create_table(foo, [{rocksdb_copies, [node()]},
... ...
{user_properties, {user_properties,
@ -53,6 +92,7 @@ Consult the [RocksDB documentation](https://github.com/facebook/rocksdb/wiki/Set
for information on configuration parameters. Also see the section below on handling write errors. for information on configuration parameters. Also see the section below on handling write errors.
The default configuration for tables in `mnesia_rocksdb` is: The default configuration for tables in `mnesia_rocksdb` is:
``` ```
default_open_opts() -> default_open_opts() ->
[ {create_if_missing, true} [ {create_if_missing, true}
@ -74,60 +114,169 @@ This is experimental, and mostly copied from `mnesia_leveldb`. Consult the
source code in `mnesia_rocksdb_tuning.erl` and `mnesia_rocksdb_params.erl`. source code in `mnesia_rocksdb_tuning.erl` and `mnesia_rocksdb_params.erl`.
Contributions are welcome. Contributions are welcome.
## Handling of errors in write operations
The RocksDB update operations return either `ok` or `{error, any()}`. #### <a name="Caveats">Caveats</a> ####
Since the actual updates are performed after the 'point-of-no-return',
returning an `error` result will cause mnesia to behave unpredictably,
since the operations are expected to simply work.
### Option 1: `on_write_error`
An `on_write_error` option can be provided, per-table, in the `rocksdb_opts`
user property (see [Customization](#customization) above).
Supported values indicate at which level an error indication should be reported.
Mnesia may save reported events in RAM, and may also print them,
depending on the debug level (controlled with `mnesia:set_debug_level/1`).
Mnesia debug levels are, in increasing detail, `none | verbose | debug | trace`
The supported values for `on_write_error` are:
| Value | Saved at debug level | Printed at debug level | Action |
| ------- | -------------------- | ---------------------- | --------- |
| debug | unless none | verbose, debug, trace | ignore |
| verbose | unless none | verbose, debug, trace | ignore |
| warning | always | always | ignore |
| error | always | always | exception |
| fatal | always | always | core dump |
### Option 2: `on_write_error_store`
An `on_write_error_store` option can be provided, per-table, in the `rocksdb_opts`
user property (see [Customization](#customization) above).
When set, the backend will use the value of the option as the name for an ETS table
which is used as storage for runtime write errors. The table must be set up outside
of the backend by the clients themselves.
Entries to the table are in the form of a tuple `{{Table, Key}, Error, InsertedAt}`
where `Table` refers to the Mnesia table name, `Key` is the primary key being used by Mnesia,
`Error` is the error encountered by the backend, and `InsertedAt` refers to the time
the error was encountered as system time in milliseconds.
The backend will only insert entries and otherwise not manage the table. Thus, clients
are expected to clean up the table during runtime to prevent memory leakage.
## Caveats
Avoid placing `bag` tables in RocksDB. Although they work, each write Avoid placing `bag` tables in RocksDB. Although they work, each write
requires additional reads, causing substantial runtime overheads. There requires additional reads, causing substantial runtime overheads. There
are better ways to represent and process bag data (see above about are better ways to represent and process bag data (see above about
*prefix keys*). _prefix keys_).
The `mnesia:table_info(T, size)` call always returns zero for RocksDB The `mnesia:table_info(T, size)` call always returns zero for RocksDB
tables. RocksDB itself does not track the number of elements in a table, and tables. RocksDB itself does not track the number of elements in a table, and
although it is possible to make the mnesia_rocksdb backend maintain a size although it is possible to make the `mnesia_rocksdb` backend maintain a size
counter, it incurs a high runtime overhead for writes and deletes since it counter, it incurs a high runtime overhead for writes and deletes since it
forces them to first do a read to check the existence of the key. If you forces them to first do a read to check the existence of the key. If you
depend on having an up to date size count at all times, you need to maintain depend on having an up to date size count at all times, you need to maintain
it yourself. If you only need the size occasionally, you may traverse the it yourself. If you only need the size occasionally, you may traverse the
table to count the elements. table to count the elements.
### <a name="Mnesia_backend_plugins">Mnesia backend plugins</a> ###
#### <a name="Background">Background</a> ####
Mnesia was initially designed to be a RAM-only DBMS, and Erlang's
`ets` tables were developed for this purpose. In order to support
persistence, e.g. for configuration data, a disk-based version of `ets`
(called `dets`) was created. The `dets` API mimicks the `ets` API,
and `dets` is quite convenient and fast for (nowadays) small datasets.
However, using a 32-bit bucket system, it is limited to 2GB of data.
It also doesn't support ordered sets. When used in Mnesia, dets-based
tables are called `disc_only_copies`.
To circumvent these limitations, another table type, called `disc_copies`
was added. This is a combination of `ets` and `disk_log`, where Mnesia
periodically snapshots the `ets` data to a log file on disk, and meanwhile
maintains a log of updates, which can be applied at startup. These tables
are quite performant (especially on read access), but all data is kept in
RAM, which can become a serious limitation.
A backend plugin system was proposed by Ulf Wiger in 2016, and further
developed with Klarna's support, to finally become included in OTP 19.
Klarna uses a LevelDb backend, but Aeternity, in 2017, instead chose
to implement a Rocksdb backend plugin.
### <a name="Design">Design</a> ###
As backend plugins were added on a long-since legacy-stable Mnesia,
they had to conform to the existing code structure. For this reason,
the plugin callbacks hook into the already present low-level access
API in the `mnesia_lib` module. As a consequence, backend plugins have
the same access semantics and granularity as `ets` and `dets`. This
isn't much of a disadvantage for key-value stores like LevelDb and RocksDB,
but a more serious issue is that the update part of this API is called
on _after_ the point of no return. That is, Mnesia does not expect
these updates to fail, and has no recourse if they do. As an aside,
this could also happen if a `disc_only_copies` table exceeds the 2 GB
limit (mnesia will not check it, and `dets` will not complain, but simply
drop the update.)
### <a name="Mnesia_index_plugins">Mnesia index plugins</a> ###
When adding support for backend plugins, index plugins were also added. Unfortunately, they remain undocumented.
An index plugin can be added in one of two ways:
1. When creating a schema, provide `{index_plugins, [{Name, Module, Function}]}` options.
1. Call the function `mnesia_schema:add_index_plugin(Name, Module, Function)`
`Name` must be an atom wrapped as a 1-tuple, e.g. `{words}`.
The plugin callback is called as `Module:Function(Table, Pos, Obj)`, where `Pos=={words}` in
our example. It returns a list of index terms.
<strong>Example</strong>
Given the following index plugin implementation:
```
-module(words).
-export([words_f/3]).
words_f(_,_,Obj) when is_tuple(Obj) ->
words_(tuple_to_list(Obj)).
words_(Str) when is_binary(Str) ->
string:lexemes(Str, [$\s, $\n, [$\r,$\n]]);
words_(L) when is_list(L) ->
lists:flatmap(fun words_/1, L);
words_(_) ->
[].
```
We can register the plugin and use it in table definitions:
```
Eshell V12.1.3 (abort with ^G)
1> mnesia:start().
ok
2> mnesia_schema:add_index_plugin({words}, words, words_f).
{atomic,ok}
3> mnesia:create_table(i, [{index, [{words}]}]).
{atomic,ok}
```
Note that in this case, we had neither a backend plugin, nor even a persistent schema.
Index plugins can be used with all table types. The registered indexing function (arity 3) must exist
as an exported function along the node's code path.
To see what happens when we insert an object, we can turn on call trace.
```
4> dbg:tracer().
{ok,<0.108.0>}
5> dbg:tp(words, x).
{ok,[{matched,nonode@nohost,3},{saved,x}]}
6> dbg:p(all,[c]).
{ok,[{matched,nonode@nohost,60}]}
7> mnesia:dirty_write({i,<<"one two">>, [<<"three">>, <<"four">>]}).
(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
<<"four">>]
(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
<<"four">>]
ok
8> dbg:ctp('_'), dbg:stop().
ok
9> mnesia:dirty_index_read(i, <<"one">>, {words}).
[{i,<<"one two">>,[<<"three">>,<<"four">>]}]
```
(The fact that the indexing function is called twice, seems like a performance bug.)
We can observe that the indexing callback is able to operate on the whole object.
It needs to be side-effect free and efficient, since it will be called at least once for each update
(if an old object exists in the table, the indexing function will be called on it too, before it is
replaced by the new object.)
### <a name="Rocksdb">Rocksdb</a> ###
### <a name="Usage">Usage</a> ###
## Modules ##
<table width="100%" border="0" summary="list of modules">
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb.md" class="module">mnesia_rocksdb</a></td></tr>
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb_admin.md" class="module">mnesia_rocksdb_admin</a></td></tr>
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb_app.md" class="module">mnesia_rocksdb_app</a></td></tr>
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb_lib.md" class="module">mnesia_rocksdb_lib</a></td></tr>
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb_params.md" class="module">mnesia_rocksdb_params</a></td></tr>
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb_sup.md" class="module">mnesia_rocksdb_sup</a></td></tr>
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb_tuning.md" class="module">mnesia_rocksdb_tuning</a></td></tr>
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mrdb.md" class="module">mrdb</a></td></tr>
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mrdb_index.md" class="module">mrdb_index</a></td></tr>
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mrdb_mutex.md" class="module">mrdb_mutex</a></td></tr>
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mrdb_select.md" class="module">mrdb_select</a></td></tr></table>

282
doc/README.md Normal file
View File

@ -0,0 +1,282 @@
# Mnesia Rocksdb - Rocksdb backend plugin for Mnesia #
Copyright (c) 2013-21 Klarna AB
__Authors:__ Ulf Wiger ([`ulf@wiger.net`](mailto:ulf@wiger.net)).
The Mnesia DBMS, part of Erlang/OTP, supports 'backend plugins', making
it possible to utilize more capable key-value stores than the `dets`
module (limited to 2 GB per table). Unfortunately, this support is
undocumented. Below, some informal documentation for the plugin system
is provided.
### <a name="Table_of_Contents">Table of Contents</a> ###
1. [Usage](#Usage)
1. [Prerequisites](#Prerequisites)
1. [Getting started](#Getting_started)
1. [Special features](#Special_features)
1. [Customization](#Customization)
1. [Handling of errors in write operations](#Handling_of_errors_in_write_operations)
1. [Caveats](#Caveats)
1. [Mnesia backend plugins](#Mnesia_backend_plugins)
1. [Background](#Background)
1. [Design](#Design)
1. [Mnesia index plugins](#Mnesia_index_plugins)
1. [Rocksdb](#Rocksdb)
### <a name="Usage">Usage</a> ###
#### <a name="Prerequisites">Prerequisites</a> ####
* rocksdb (included as dependency)
* sext (included as dependency)
* Erlang/OTP 21.0 or newer (https://github.com/erlang/otp)
#### <a name="Getting_started">Getting started</a> ####
Call `mnesia_rocksdb:register()` immediately after
starting mnesia.
Put `{rocksdb_copies, [node()]}` into the table definitions of
tables you want to be in RocksDB.
#### <a name="Special_features">Special features</a> ####
RocksDB tables support efficient selects on _prefix keys_.
The backend uses the `sext` module (see
[`https://github.com/uwiger/sext`](https://github.com/uwiger/sext)) for mapping between Erlang terms and the
binary data stored in the tables. This provides two useful properties:
* The records are stored in the Erlang term order of their keys.
* A prefix of a composite key is ordered just before any key for which
it is a prefix. For example, `{x, '_'}` is a prefix for keys `{x, a}`,`{x, b}` and so on.
This means that a prefix key identifies the start of the sequence of
entries whose keys match the prefix. The backend uses this to optimize
selects on prefix keys.
### Customization
RocksDB supports a number of customization options. These can be specified
by providing a `{Key, Value}` list named `rocksdb_opts` under `user_properties`,
for example:
```
mnesia:create_table(foo, [{rocksdb_copies, [node()]},
...
{user_properties,
[{rocksdb_opts, [{max_open_files, 1024}]}]
}])
```
Consult the [RocksDB documentation](https://github.com/facebook/rocksdb/wiki/Setup-Options-and-Basic-Tuning)
for information on configuration parameters. Also see the section below on handling write errors.
The default configuration for tables in `mnesia_rocksdb` is:
```
default_open_opts() ->
[ {create_if_missing, true}
, {cache_size,
list_to_integer(get_env_default("ROCKSDB_CACHE_SIZE", "32212254"))}
, {block_size, 1024}
, {max_open_files, 100}
, {write_buffer_size,
list_to_integer(get_env_default(
"ROCKSDB_WRITE_BUFFER_SIZE", "4194304"))}
, {compression,
list_to_atom(get_env_default("ROCKSDB_COMPRESSION", "true"))}
, {use_bloomfilter, true}
].
```
It is also possible, for larger databases, to produce a tuning parameter file.
This is experimental, and mostly copied from `mnesia_leveldb`. Consult the
source code in `mnesia_rocksdb_tuning.erl` and `mnesia_rocksdb_params.erl`.
Contributions are welcome.
#### <a name="Caveats">Caveats</a> ####
Avoid placing `bag` tables in RocksDB. Although they work, each write
requires additional reads, causing substantial runtime overheads. There
are better ways to represent and process bag data (see above about
_prefix keys_).
The `mnesia:table_info(T, size)` call always returns zero for RocksDB
tables. RocksDB itself does not track the number of elements in a table, and
although it is possible to make the `mnesia_rocksdb` backend maintain a size
counter, it incurs a high runtime overhead for writes and deletes since it
forces them to first do a read to check the existence of the key. If you
depend on having an up to date size count at all times, you need to maintain
it yourself. If you only need the size occasionally, you may traverse the
table to count the elements.
### <a name="Mnesia_backend_plugins">Mnesia backend plugins</a> ###
#### <a name="Background">Background</a> ####
Mnesia was initially designed to be a RAM-only DBMS, and Erlang's
`ets` tables were developed for this purpose. In order to support
persistence, e.g. for configuration data, a disk-based version of `ets`
(called `dets`) was created. The `dets` API mimicks the `ets` API,
and `dets` is quite convenient and fast for (nowadays) small datasets.
However, using a 32-bit bucket system, it is limited to 2GB of data.
It also doesn't support ordered sets. When used in Mnesia, dets-based
tables are called `disc_only_copies`.
To circumvent these limitations, another table type, called `disc_copies`
was added. This is a combination of `ets` and `disk_log`, where Mnesia
periodically snapshots the `ets` data to a log file on disk, and meanwhile
maintains a log of updates, which can be applied at startup. These tables
are quite performant (especially on read access), but all data is kept in
RAM, which can become a serious limitation.
A backend plugin system was proposed by Ulf Wiger in 2016, and further
developed with Klarna's support, to finally become included in OTP 19.
Klarna uses a LevelDb backend, but Aeternity, in 2017, instead chose
to implement a Rocksdb backend plugin.
### <a name="Design">Design</a> ###
As backend plugins were added on a long-since legacy-stable Mnesia,
they had to conform to the existing code structure. For this reason,
the plugin callbacks hook into the already present low-level access
API in the `mnesia_lib` module. As a consequence, backend plugins have
the same access semantics and granularity as `ets` and `dets`. This
isn't much of a disadvantage for key-value stores like LevelDb and RocksDB,
but a more serious issue is that the update part of this API is called
on _after_ the point of no return. That is, Mnesia does not expect
these updates to fail, and has no recourse if they do. As an aside,
this could also happen if a `disc_only_copies` table exceeds the 2 GB
limit (mnesia will not check it, and `dets` will not complain, but simply
drop the update.)
### <a name="Mnesia_index_plugins">Mnesia index plugins</a> ###
When adding support for backend plugins, index plugins were also added. Unfortunately, they remain undocumented.
An index plugin can be added in one of two ways:
1. When creating a schema, provide `{index_plugins, [{Name, Module, Function}]}` options.
1. Call the function `mnesia_schema:add_index_plugin(Name, Module, Function)`
`Name` must be an atom wrapped as a 1-tuple, e.g. `{words}`.
The plugin callback is called as `Module:Function(Table, Pos, Obj)`, where `Pos=={words}` in
our example. It returns a list of index terms.
<strong>Example</strong>
Given the following index plugin implementation:
```
-module(words).
-export([words_f/3]).
words_f(_,_,Obj) when is_tuple(Obj) ->
words_(tuple_to_list(Obj)).
words_(Str) when is_binary(Str) ->
string:lexemes(Str, [$\s, $\n, [$\r,$\n]]);
words_(L) when is_list(L) ->
lists:flatmap(fun words_/1, L);
words_(_) ->
[].
```
We can register the plugin and use it in table definitions:
```
Eshell V12.1.3 (abort with ^G)
1> mnesia:start().
ok
2> mnesia_schema:add_index_plugin({words}, words, words_f).
{atomic,ok}
3> mnesia:create_table(i, [{index, [{words}]}]).
{atomic,ok}
```
Note that in this case, we had neither a backend plugin, nor even a persistent schema.
Index plugins can be used with all table types. The registered indexing function (arity 3) must exist
as an exported function along the node's code path.
To see what happens when we insert an object, we can turn on call trace.
```
4> dbg:tracer().
{ok,<0.108.0>}
5> dbg:tp(words, x).
{ok,[{matched,nonode@nohost,3},{saved,x}]}
6> dbg:p(all,[c]).
{ok,[{matched,nonode@nohost,60}]}
7> mnesia:dirty_write({i,<<"one two">>, [<<"three">>, <<"four">>]}).
(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
<<"four">>]
(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
<<"four">>]
ok
8> dbg:ctp('_'), dbg:stop().
ok
9> mnesia:dirty_index_read(i, <<"one">>, {words}).
[{i,<<"one two">>,[<<"three">>,<<"four">>]}]
```
(The fact that the indexing function is called twice, seems like a performance bug.)
We can observe that the indexing callback is able to operate on the whole object.
It needs to be side-effect free and efficient, since it will be called at least once for each update
(if an old object exists in the table, the indexing function will be called on it too, before it is
replaced by the new object.)
### <a name="Rocksdb">Rocksdb</a> ###
### <a name="Usage">Usage</a> ###
## Modules ##
<table width="100%" border="0" summary="list of modules">
<tr><td><a href="mnesia_rocksdb.md" class="module">mnesia_rocksdb</a></td></tr>
<tr><td><a href="mnesia_rocksdb_admin.md" class="module">mnesia_rocksdb_admin</a></td></tr>
<tr><td><a href="mnesia_rocksdb_app.md" class="module">mnesia_rocksdb_app</a></td></tr>
<tr><td><a href="mnesia_rocksdb_lib.md" class="module">mnesia_rocksdb_lib</a></td></tr>
<tr><td><a href="mnesia_rocksdb_params.md" class="module">mnesia_rocksdb_params</a></td></tr>
<tr><td><a href="mnesia_rocksdb_sup.md" class="module">mnesia_rocksdb_sup</a></td></tr>
<tr><td><a href="mnesia_rocksdb_tuning.md" class="module">mnesia_rocksdb_tuning</a></td></tr>
<tr><td><a href="mrdb.md" class="module">mrdb</a></td></tr>
<tr><td><a href="mrdb_index.md" class="module">mrdb_index</a></td></tr>
<tr><td><a href="mrdb_mutex.md" class="module">mrdb_mutex</a></td></tr>
<tr><td><a href="mrdb_select.md" class="module">mrdb_select</a></td></tr></table>

5
doc/edoc-info Normal file
View File

@ -0,0 +1,5 @@
%% encoding: UTF-8
{application,mnesia_rocksdb}.
{modules,[mnesia_rocksdb,mnesia_rocksdb_admin,mnesia_rocksdb_app,
mnesia_rocksdb_lib,mnesia_rocksdb_params,mnesia_rocksdb_sup,
mnesia_rocksdb_tuning,mrdb,mrdb_index,mrdb_mutex,mrdb_select]}.

BIN
doc/erlang.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 2.1 KiB

547
doc/mnesia_rocksdb.md Normal file

File diff suppressed because one or more lines are too long

326
doc/mnesia_rocksdb_admin.md Normal file
View File

@ -0,0 +1,326 @@
# Module mnesia_rocksdb_admin #
* [Data Types](#types)
* [Function Index](#index)
* [Function Details](#functions)
__Behaviours:__ [`gen_server`](gen_server.md).
<a name="types"></a>
## Data Types ##
### <a name="type-alias">alias()</a> ###
<pre><code>
alias() = atom()
</code></pre>
### <a name="type-backend">backend()</a> ###
<pre><code>
backend() = #{db_ref =&gt; <a href="#type-db_ref">db_ref()</a>, cf_info =&gt; #{<a href="#type-table">table()</a> =&gt; <a href="#type-cf">cf()</a>}}
</code></pre>
### <a name="type-cf">cf()</a> ###
<pre><code>
cf() = <a href="http://www.erlang.org/doc/man/mrdb.html#type-db_ref">mrdb:db_ref()</a>
</code></pre>
### <a name="type-db_ref">db_ref()</a> ###
<pre><code>
db_ref() = <a href="/home/uwiger/ae/mnesia_rocksdb/_build/default/lib/rocksdb/doc/rocksdb.md#type-db_handle">rocksdb:db_handle()</a>
</code></pre>
### <a name="type-gen_server_noreply">gen_server_noreply()</a> ###
<pre><code>
gen_server_noreply() = {noreply, <a href="#type-st">st()</a>} | {stop, <a href="#type-reason">reason()</a>, <a href="#type-st">st()</a>}
</code></pre>
### <a name="type-gen_server_reply">gen_server_reply()</a> ###
<pre><code>
gen_server_reply() = {reply, <a href="#type-reply">reply()</a>, <a href="#type-st">st()</a>} | {stop, <a href="#type-reason">reason()</a>, <a href="#type-reply">reply()</a>, <a href="#type-st">st()</a>}
</code></pre>
### <a name="type-properties">properties()</a> ###
<pre><code>
properties() = [{atom(), any()}]
</code></pre>
### <a name="type-reason">reason()</a> ###
<pre><code>
reason() = any()
</code></pre>
### <a name="type-reply">reply()</a> ###
<pre><code>
reply() = any()
</code></pre>
### <a name="type-req">req()</a> ###
<pre><code>
req() = {create_table, <a href="#type-table">table()</a>, <a href="#type-properties">properties()</a>} | {delete_table, <a href="#type-table">table()</a>} | {load_table, <a href="#type-table">table()</a>} | {related_resources, <a href="#type-table">table()</a>} | {get_ref, <a href="#type-table">table()</a>} | {add_aliases, [<a href="#type-alias">alias()</a>]} | {write_table_property, <a href="#type-tabname">tabname()</a>, tuple()} | {remove_aliases, [<a href="#type-alias">alias()</a>]} | {migrate, [{<a href="#type-tabname">tabname()</a>, map()}]} | {prep_close, <a href="#type-table">table()</a>} | {close_table, <a href="#type-table">table()</a>}
</code></pre>
### <a name="type-st">st()</a> ###
<pre><code>
st() = #st{backends = #{<a href="#type-alias">alias()</a> =&gt; <a href="#type-backend">backend()</a>}, standalone = #{{<a href="#type-alias">alias()</a>, <a href="#type-table">table()</a>} =&gt; <a href="#type-cf">cf()</a>}, default_opts = [{atom(), term()}]}
</code></pre>
### <a name="type-table">table()</a> ###
<pre><code>
table() = <a href="#type-tabname">tabname()</a> | {admin, <a href="#type-alias">alias()</a>} | {<a href="#type-tabname">tabname()</a>, index, any()} | {<a href="#type-tabname">tabname()</a>, retainer, any()}
</code></pre>
### <a name="type-tabname">tabname()</a> ###
<pre><code>
tabname() = atom()
</code></pre>
<a name="index"></a>
## Function Index ##
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#add_aliases-1">add_aliases/1</a></td><td></td></tr><tr><td valign="top"><a href="#close_table-2">close_table/2</a></td><td></td></tr><tr><td valign="top"><a href="#code_change-3">code_change/3</a></td><td></td></tr><tr><td valign="top"><a href="#create_table-3">create_table/3</a></td><td></td></tr><tr><td valign="top"><a href="#delete_table-2">delete_table/2</a></td><td></td></tr><tr><td valign="top"><a href="#ensure_started-0">ensure_started/0</a></td><td></td></tr><tr><td valign="top"><a href="#get_ref-1">get_ref/1</a></td><td></td></tr><tr><td valign="top"><a href="#get_ref-2">get_ref/2</a></td><td></td></tr><tr><td valign="top"><a href="#handle_call-3">handle_call/3</a></td><td></td></tr><tr><td valign="top"><a href="#handle_cast-2">handle_cast/2</a></td><td></td></tr><tr><td valign="top"><a href="#handle_info-2">handle_info/2</a></td><td></td></tr><tr><td valign="top"><a href="#init-1">init/1</a></td><td></td></tr><tr><td valign="top"><a href="#load_table-2">load_table/2</a></td><td></td></tr><tr><td valign="top"><a href="#meta-0">meta/0</a></td><td></td></tr><tr><td valign="top"><a href="#migrate_standalone-2">migrate_standalone/2</a></td><td></td></tr><tr><td valign="top"><a href="#prep_close-2">prep_close/2</a></td><td></td></tr><tr><td valign="top"><a href="#read_info-1">read_info/1</a></td><td></td></tr><tr><td valign="top"><a href="#read_info-2">read_info/2</a></td><td></td></tr><tr><td valign="top"><a href="#read_info-4">read_info/4</a></td><td></td></tr><tr><td valign="top"><a href="#related_resources-2">related_resources/2</a></td><td></td></tr><tr><td valign="top"><a href="#remove_aliases-1">remove_aliases/1</a></td><td></td></tr><tr><td valign="top"><a href="#request_ref-2">request_ref/2</a></td><td></td></tr><tr><td valign="top"><a href="#start_link-0">start_link/0</a></td><td></td></tr><tr><td valign="top"><a href="#terminate-2">terminate/2</a></td><td></td></tr><tr><td valign="top"><a href="#write_info-4">write_info/4</a></td><td></td></tr><tr><td valign="top"><a href="#write_table_property-3">write_table_property/3</a></td><td></td></tr></table>
<a name="functions"></a>
## Function Details ##
<a name="add_aliases-1"></a>
### add_aliases/1 ###
`add_aliases(Aliases) -> any()`
<a name="close_table-2"></a>
### close_table/2 ###
`close_table(Alias, Name) -> any()`
<a name="code_change-3"></a>
### code_change/3 ###
`code_change(FromVsn, St, Extra) -> any()`
<a name="create_table-3"></a>
### create_table/3 ###
`create_table(Alias, Name, Props) -> any()`
<a name="delete_table-2"></a>
### delete_table/2 ###
<pre><code>
delete_table(Alias::<a href="#type-alias">alias()</a>, Name::<a href="#type-tabname">tabname()</a>) -&gt; ok
</code></pre>
<br />
<a name="ensure_started-0"></a>
### ensure_started/0 ###
<pre><code>
ensure_started() -&gt; ok
</code></pre>
<br />
<a name="get_ref-1"></a>
### get_ref/1 ###
`get_ref(Name) -> any()`
<a name="get_ref-2"></a>
### get_ref/2 ###
`get_ref(Name, Default) -> any()`
<a name="handle_call-3"></a>
### handle_call/3 ###
<pre><code>
handle_call(Req::{<a href="#type-alias">alias()</a>, <a href="#type-req">req()</a>}, From::any(), St::<a href="#type-st">st()</a>) -&gt; <a href="#type-gen_server_reply">gen_server_reply()</a>
</code></pre>
<br />
<a name="handle_cast-2"></a>
### handle_cast/2 ###
<pre><code>
handle_cast(Msg::any(), St::<a href="#type-st">st()</a>) -&gt; <a href="#type-gen_server_noreply">gen_server_noreply()</a>
</code></pre>
<br />
<a name="handle_info-2"></a>
### handle_info/2 ###
<pre><code>
handle_info(Msg::any(), St::<a href="#type-st">st()</a>) -&gt; <a href="#type-gen_server_noreply">gen_server_noreply()</a>
</code></pre>
<br />
<a name="init-1"></a>
### init/1 ###
`init(X1) -> any()`
<a name="load_table-2"></a>
### load_table/2 ###
`load_table(Alias, Name) -> any()`
<a name="meta-0"></a>
### meta/0 ###
`meta() -> any()`
<a name="migrate_standalone-2"></a>
### migrate_standalone/2 ###
`migrate_standalone(Alias, Tabs) -> any()`
<a name="prep_close-2"></a>
### prep_close/2 ###
`prep_close(Alias, Tab) -> any()`
<a name="read_info-1"></a>
### read_info/1 ###
`read_info(TRec) -> any()`
<a name="read_info-2"></a>
### read_info/2 ###
`read_info(Alias, Tab) -> any()`
<a name="read_info-4"></a>
### read_info/4 ###
`read_info(Alias, Tab, K, Default) -> any()`
<a name="related_resources-2"></a>
### related_resources/2 ###
`related_resources(Alias, Name) -> any()`
<a name="remove_aliases-1"></a>
### remove_aliases/1 ###
`remove_aliases(Aliases) -> any()`
<a name="request_ref-2"></a>
### request_ref/2 ###
`request_ref(Alias, Name) -> any()`
<a name="start_link-0"></a>
### start_link/0 ###
`start_link() -> any()`
<a name="terminate-2"></a>
### terminate/2 ###
`terminate(X1, St) -> any()`
<a name="write_info-4"></a>
### write_info/4 ###
`write_info(Alias, Tab, K, V) -> any()`
<a name="write_table_property-3"></a>
### write_table_property/3 ###
`write_table_property(Alias, Tab, Prop) -> any()`

32
doc/mnesia_rocksdb_app.md Normal file
View File

@ -0,0 +1,32 @@
# Module mnesia_rocksdb_app #
* [Function Index](#index)
* [Function Details](#functions)
__Behaviours:__ [`application`](application.md).
<a name="index"></a>
## Function Index ##
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#start-2">start/2</a></td><td></td></tr><tr><td valign="top"><a href="#stop-1">stop/1</a></td><td></td></tr></table>
<a name="functions"></a>
## Function Details ##
<a name="start-2"></a>
### start/2 ###
`start(StartType, StartArgs) -> any()`
<a name="stop-1"></a>
### stop/1 ###
`stop(State) -> any()`

168
doc/mnesia_rocksdb_lib.md Normal file
View File

@ -0,0 +1,168 @@
# Module mnesia_rocksdb_lib #
* [Description](#description)
* [Function Index](#index)
* [Function Details](#functions)
RocksDB update wrappers, in separate module for easy tracing and mocking.
<a name="description"></a>
## Description ##
<a name="index"></a>
## Function Index ##
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#check_encoding-2">check_encoding/2</a></td><td></td></tr><tr><td valign="top"><a href="#create_mountpoint-1">create_mountpoint/1</a></td><td></td></tr><tr><td valign="top"><a href="#data_mountpoint-1">data_mountpoint/1</a></td><td></td></tr><tr><td valign="top"><a href="#decode-2">decode/2</a></td><td></td></tr><tr><td valign="top"><a href="#decode_key-1">decode_key/1</a></td><td></td></tr><tr><td valign="top"><a href="#decode_key-2">decode_key/2</a></td><td></td></tr><tr><td valign="top"><a href="#decode_val-1">decode_val/1</a></td><td></td></tr><tr><td valign="top"><a href="#decode_val-3">decode_val/3</a></td><td></td></tr><tr><td valign="top"><a href="#default_encoding-3">default_encoding/3</a></td><td></td></tr><tr><td valign="top"><a href="#delete-3">delete/3</a></td><td></td></tr><tr><td valign="top"><a href="#encode-2">encode/2</a></td><td></td></tr><tr><td valign="top"><a href="#encode_key-1">encode_key/1</a></td><td></td></tr><tr><td valign="top"><a href="#encode_key-2">encode_key/2</a></td><td></td></tr><tr><td valign="top"><a href="#encode_val-1">encode_val/1</a></td><td></td></tr><tr><td valign="top"><a href="#encode_val-2">encode_val/2</a></td><td></td></tr><tr><td valign="top"><a href="#keypos-1">keypos/1</a></td><td></td></tr><tr><td valign="top"><a href="#open_rocksdb-3">open_rocksdb/3</a></td><td></td></tr><tr><td valign="top"><a href="#put-4">put/4</a></td><td></td></tr><tr><td valign="top"><a href="#tabname-1">tabname/1</a></td><td></td></tr><tr><td valign="top"><a href="#valid_key_type-2">valid_key_type/2</a></td><td></td></tr><tr><td valign="top"><a href="#valid_obj_type-2">valid_obj_type/2</a></td><td></td></tr><tr><td valign="top"><a href="#write-3">write/3</a></td><td></td></tr></table>
<a name="functions"></a>
## Function Details ##
<a name="check_encoding-2"></a>
### check_encoding/2 ###
`check_encoding(Encoding, Attributes) -> any()`
<a name="create_mountpoint-1"></a>
### create_mountpoint/1 ###
`create_mountpoint(Tab) -> any()`
<a name="data_mountpoint-1"></a>
### data_mountpoint/1 ###
`data_mountpoint(Tab) -> any()`
<a name="decode-2"></a>
### decode/2 ###
`decode(Val, X2) -> any()`
<a name="decode_key-1"></a>
### decode_key/1 ###
<pre><code>
decode_key(CodedKey::binary()) -&gt; any()
</code></pre>
<br />
<a name="decode_key-2"></a>
### decode_key/2 ###
`decode_key(CodedKey, Enc) -> any()`
<a name="decode_val-1"></a>
### decode_val/1 ###
<pre><code>
decode_val(CodedVal::binary()) -&gt; any()
</code></pre>
<br />
<a name="decode_val-3"></a>
### decode_val/3 ###
`decode_val(CodedVal, K, Ref) -> any()`
<a name="default_encoding-3"></a>
### default_encoding/3 ###
`default_encoding(X1, Type, As) -> any()`
<a name="delete-3"></a>
### delete/3 ###
`delete(Ref, K, Opts) -> any()`
<a name="encode-2"></a>
### encode/2 ###
`encode(Value, X2) -> any()`
<a name="encode_key-1"></a>
### encode_key/1 ###
<pre><code>
encode_key(Key::any()) -&gt; binary()
</code></pre>
<br />
<a name="encode_key-2"></a>
### encode_key/2 ###
`encode_key(Key, X2) -> any()`
<a name="encode_val-1"></a>
### encode_val/1 ###
<pre><code>
encode_val(Val::any()) -&gt; binary()
</code></pre>
<br />
<a name="encode_val-2"></a>
### encode_val/2 ###
`encode_val(Val, Enc) -> any()`
<a name="keypos-1"></a>
### keypos/1 ###
`keypos(Tab) -> any()`
<a name="open_rocksdb-3"></a>
### open_rocksdb/3 ###
`open_rocksdb(MPd, RdbOpts, CFs) -> any()`
<a name="put-4"></a>
### put/4 ###
`put(Ref, K, V, Opts) -> any()`
<a name="tabname-1"></a>
### tabname/1 ###
`tabname(Tab) -> any()`
<a name="valid_key_type-2"></a>
### valid_key_type/2 ###
`valid_key_type(X1, Key) -> any()`
<a name="valid_obj_type-2"></a>
### valid_obj_type/2 ###
`valid_obj_type(X1, Obj) -> any()`
<a name="write-3"></a>
### write/3 ###
`write(X1, L, Opts) -> any()`

View File

@ -0,0 +1,80 @@
# Module mnesia_rocksdb_params #
* [Function Index](#index)
* [Function Details](#functions)
__Behaviours:__ [`gen_server`](gen_server.md).
<a name="index"></a>
## Function Index ##
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#code_change-3">code_change/3</a></td><td></td></tr><tr><td valign="top"><a href="#delete-1">delete/1</a></td><td></td></tr><tr><td valign="top"><a href="#handle_call-3">handle_call/3</a></td><td></td></tr><tr><td valign="top"><a href="#handle_cast-2">handle_cast/2</a></td><td></td></tr><tr><td valign="top"><a href="#handle_info-2">handle_info/2</a></td><td></td></tr><tr><td valign="top"><a href="#init-1">init/1</a></td><td></td></tr><tr><td valign="top"><a href="#lookup-2">lookup/2</a></td><td></td></tr><tr><td valign="top"><a href="#start_link-0">start_link/0</a></td><td></td></tr><tr><td valign="top"><a href="#store-2">store/2</a></td><td></td></tr><tr><td valign="top"><a href="#terminate-2">terminate/2</a></td><td></td></tr></table>
<a name="functions"></a>
## Function Details ##
<a name="code_change-3"></a>
### code_change/3 ###
`code_change(X1, S, X3) -> any()`
<a name="delete-1"></a>
### delete/1 ###
`delete(Tab) -> any()`
<a name="handle_call-3"></a>
### handle_call/3 ###
`handle_call(X1, X2, S) -> any()`
<a name="handle_cast-2"></a>
### handle_cast/2 ###
`handle_cast(X1, S) -> any()`
<a name="handle_info-2"></a>
### handle_info/2 ###
`handle_info(X1, S) -> any()`
<a name="init-1"></a>
### init/1 ###
`init(X1) -> any()`
<a name="lookup-2"></a>
### lookup/2 ###
`lookup(Tab, Default) -> any()`
<a name="start_link-0"></a>
### start_link/0 ###
`start_link() -> any()`
<a name="store-2"></a>
### store/2 ###
`store(Tab, Params) -> any()`
<a name="terminate-2"></a>
### terminate/2 ###
`terminate(X1, X2) -> any()`

32
doc/mnesia_rocksdb_sup.md Normal file
View File

@ -0,0 +1,32 @@
# Module mnesia_rocksdb_sup #
* [Function Index](#index)
* [Function Details](#functions)
__Behaviours:__ [`supervisor`](supervisor.md).
<a name="index"></a>
## Function Index ##
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#init-1">init/1</a></td><td></td></tr><tr><td valign="top"><a href="#start_link-0">start_link/0</a></td><td></td></tr></table>
<a name="functions"></a>
## Function Details ##
<a name="init-1"></a>
### init/1 ###
`init(X1) -> any()`
<a name="start_link-0"></a>
### start_link/0 ###
`start_link() -> any()`

View File

@ -0,0 +1,126 @@
# Module mnesia_rocksdb_tuning #
* [Function Index](#index)
* [Function Details](#functions)
<a name="index"></a>
## Function Index ##
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#cache-1">cache/1</a></td><td></td></tr><tr><td valign="top"><a href="#calc_sizes-0">calc_sizes/0</a></td><td></td></tr><tr><td valign="top"><a href="#calc_sizes-1">calc_sizes/1</a></td><td></td></tr><tr><td valign="top"><a href="#count_rdb_tabs-0">count_rdb_tabs/0</a></td><td></td></tr><tr><td valign="top"><a href="#count_rdb_tabs-1">count_rdb_tabs/1</a></td><td></td></tr><tr><td valign="top"><a href="#default-1">default/1</a></td><td></td></tr><tr><td valign="top"><a href="#describe_env-0">describe_env/0</a></td><td></td></tr><tr><td valign="top"><a href="#get_avail_ram-0">get_avail_ram/0</a></td><td></td></tr><tr><td valign="top"><a href="#get_maxfiles-0">get_maxfiles/0</a></td><td></td></tr><tr><td valign="top"><a href="#get_maxfiles-1">get_maxfiles/1</a></td><td></td></tr><tr><td valign="top"><a href="#ideal_max_files-0">ideal_max_files/0</a></td><td></td></tr><tr><td valign="top"><a href="#ideal_max_files-1">ideal_max_files/1</a></td><td></td></tr><tr><td valign="top"><a href="#max_files-1">max_files/1</a></td><td></td></tr><tr><td valign="top"><a href="#rdb_indexes-0">rdb_indexes/0</a></td><td></td></tr><tr><td valign="top"><a href="#rdb_indexes-1">rdb_indexes/1</a></td><td></td></tr><tr><td valign="top"><a href="#rdb_tabs-0">rdb_tabs/0</a></td><td></td></tr><tr><td valign="top"><a href="#rdb_tabs-1">rdb_tabs/1</a></td><td></td></tr><tr><td valign="top"><a href="#write_buffer-1">write_buffer/1</a></td><td></td></tr></table>
<a name="functions"></a>
## Function Details ##
<a name="cache-1"></a>
### cache/1 ###
`cache(X1) -> any()`
<a name="calc_sizes-0"></a>
### calc_sizes/0 ###
`calc_sizes() -> any()`
<a name="calc_sizes-1"></a>
### calc_sizes/1 ###
`calc_sizes(D) -> any()`
<a name="count_rdb_tabs-0"></a>
### count_rdb_tabs/0 ###
`count_rdb_tabs() -> any()`
<a name="count_rdb_tabs-1"></a>
### count_rdb_tabs/1 ###
`count_rdb_tabs(Db) -> any()`
<a name="default-1"></a>
### default/1 ###
`default(X1) -> any()`
<a name="describe_env-0"></a>
### describe_env/0 ###
`describe_env() -> any()`
<a name="get_avail_ram-0"></a>
### get_avail_ram/0 ###
`get_avail_ram() -> any()`
<a name="get_maxfiles-0"></a>
### get_maxfiles/0 ###
`get_maxfiles() -> any()`
<a name="get_maxfiles-1"></a>
### get_maxfiles/1 ###
`get_maxfiles(X1) -> any()`
<a name="ideal_max_files-0"></a>
### ideal_max_files/0 ###
`ideal_max_files() -> any()`
<a name="ideal_max_files-1"></a>
### ideal_max_files/1 ###
`ideal_max_files(D) -> any()`
<a name="max_files-1"></a>
### max_files/1 ###
`max_files(X1) -> any()`
<a name="rdb_indexes-0"></a>
### rdb_indexes/0 ###
`rdb_indexes() -> any()`
<a name="rdb_indexes-1"></a>
### rdb_indexes/1 ###
`rdb_indexes(Db) -> any()`
<a name="rdb_tabs-0"></a>
### rdb_tabs/0 ###
`rdb_tabs() -> any()`
<a name="rdb_tabs-1"></a>
### rdb_tabs/1 ###
`rdb_tabs(Db) -> any()`
<a name="write_buffer-1"></a>
### write_buffer/1 ###
`write_buffer(X1) -> any()`

943
doc/mrdb.md Normal file

File diff suppressed because one or more lines are too long

99
doc/mrdb_index.md Normal file
View File

@ -0,0 +1,99 @@
# Module mrdb_index #
* [Data Types](#types)
* [Function Index](#index)
* [Function Details](#functions)
<a name="types"></a>
## Data Types ##
### <a name="type-index_value">index_value()</a> ###
<pre><code>
index_value() = any()
</code></pre>
### <a name="type-iterator_action">iterator_action()</a> ###
<pre><code>
iterator_action() = <a href="http://www.erlang.org/doc/man/mrdb.html#type-iterator_action">mrdb:iterator_action()</a>
</code></pre>
### <a name="type-ix_iterator">ix_iterator()</a> ###
<pre><code>
ix_iterator() = #mrdb_ix_iter{i = <a href="http://www.erlang.org/doc/man/mrdb.html#type-iterator">mrdb:iterator()</a>, type = set | bag, sub = <a href="http://www.erlang.org/doc/man/mrdb.html#type-ref">mrdb:ref()</a> | pid()}
</code></pre>
### <a name="type-object">object()</a> ###
<pre><code>
object() = tuple()
</code></pre>
<a name="index"></a>
## Function Index ##
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#iterator-2">iterator/2</a></td><td></td></tr><tr><td valign="top"><a href="#iterator_close-1">iterator_close/1</a></td><td></td></tr><tr><td valign="top"><a href="#iterator_move-2">iterator_move/2</a></td><td></td></tr><tr><td valign="top"><a href="#with_iterator-3">with_iterator/3</a></td><td></td></tr></table>
<a name="functions"></a>
## Function Details ##
<a name="iterator-2"></a>
### iterator/2 ###
<pre><code>
iterator(Tab::<a href="http://www.erlang.org/doc/man/mrdb.html#type-ref_or_tab">mrdb:ref_or_tab()</a>, IxPos::<a href="http://www.erlang.org/doc/man/mrdb.html#type-index_position">mrdb:index_position()</a>) -&gt; {ok, <a href="#type-ix_iterator">ix_iterator()</a>} | {error, term()}
</code></pre>
<br />
<a name="iterator_close-1"></a>
### iterator_close/1 ###
<pre><code>
iterator_close(Mrdb_ix_iter::<a href="#type-ix_iterator">ix_iterator()</a>) -&gt; ok
</code></pre>
<br />
<a name="iterator_move-2"></a>
### iterator_move/2 ###
<pre><code>
iterator_move(Mrdb_ix_iter::<a href="#type-ix_iterator">ix_iterator()</a>, Dir::<a href="#type-iterator_action">iterator_action()</a>) -&gt; {ok, <a href="#type-index_value">index_value()</a>, <a href="#type-object">object()</a>} | {error, term()}
</code></pre>
<br />
<a name="with_iterator-3"></a>
### with_iterator/3 ###
<pre><code>
with_iterator(Tab::<a href="http://www.erlang.org/doc/man/mrdb.html#type-ref_or_tab">mrdb:ref_or_tab()</a>, IxPos::<a href="http://www.erlang.org/doc/man/mrdb.html#type-index_position">mrdb:index_position()</a>, Fun::fun((<a href="#type-ix_iterator">ix_iterator()</a>) -&gt; Res)) -&gt; Res
</code></pre>
<br />

30
doc/mrdb_mutex.md Normal file
View File

@ -0,0 +1,30 @@
# Module mrdb_mutex #
* [Function Index](#index)
* [Function Details](#functions)
<a name="index"></a>
## Function Index ##
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#do-2">do/2</a></td><td></td></tr><tr><td valign="top"><a href="#ensure_tab-0">ensure_tab/0</a></td><td></td></tr></table>
<a name="functions"></a>
## Function Details ##
<a name="do-2"></a>
### do/2 ###
`do(Rsrc, F) -> any()`
<a name="ensure_tab-0"></a>
### ensure_tab/0 ###
`ensure_tab() -> any()`

48
doc/mrdb_select.md Normal file
View File

@ -0,0 +1,48 @@
# Module mrdb_select #
* [Function Index](#index)
* [Function Details](#functions)
<a name="index"></a>
## Function Index ##
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#fold-5">fold/5</a></td><td></td></tr><tr><td valign="top"><a href="#rdb_fold-5">rdb_fold/5</a></td><td></td></tr><tr><td valign="top"><a href="#select-1">select/1</a></td><td></td></tr><tr><td valign="top"><a href="#select-3">select/3</a></td><td></td></tr><tr><td valign="top"><a href="#select-4">select/4</a></td><td></td></tr></table>
<a name="functions"></a>
## Function Details ##
<a name="fold-5"></a>
### fold/5 ###
`fold(Ref, Fun, Acc, MS, Limit) -> any()`
<a name="rdb_fold-5"></a>
### rdb_fold/5 ###
`rdb_fold(Ref, Fun, Acc, Prefix, Limit) -> any()`
<a name="select-1"></a>
### select/1 ###
`select(Cont) -> any()`
<a name="select-3"></a>
### select/3 ###
`select(Ref, MS, Limit) -> any()`
<a name="select-4"></a>
### select/4 ###
`select(Ref, MS, AccKeys, Limit) -> any()`

250
doc/overview.edoc Normal file
View File

@ -0,0 +1,250 @@
@author Ulf Wiger <ulf@wiger.net>
@copyright 2013-21 Klarna AB
@title Mnesia Rocksdb - Rocksdb backend plugin for Mnesia
@doc
The Mnesia DBMS, part of Erlang/OTP, supports 'backend plugins', making
it possible to utilize more capable key-value stores than the `dets'
module (limited to 2 GB per table). Unfortunately, this support is
undocumented. Below, some informal documentation for the plugin system
is provided.
== Table of Contents ==
<ol>
<li>{@section Usage}</li>
<ol>
<li>{@section Prerequisites}</li>
<li>{@section Getting started}</li>
<li>{@section Special features}</li>
<li>{@section Customization}</li>
<li>{@section Handling of errors in write operations}</li>
<li>{@section Caveats}</li>
</ol>
<li>{@section Mnesia backend plugins}</li>
<ol>
<li>{@section Background}</li>
<li>{@section Design}</li>
</ol>
<li>{@section Mnesia index plugins}</li>
<li>{@section Rocksdb}</li>
</ol>
== Usage ==
=== Prerequisites ===
<ul>
<li>rocksdb (included as dependency)</li>
<li>sext (included as dependency)</li>
<li>Erlang/OTP 21.0 or newer (https://github.com/erlang/otp)</li>
</ul>
=== Getting started ===
Call `mnesia_rocksdb:register()' immediately after
starting mnesia.
Put `{rocksdb_copies, [node()]}' into the table definitions of
tables you want to be in RocksDB.
=== Special features ===
RocksDB tables support efficient selects on <em>prefix keys</em>.
The backend uses the `sext' module (see
[https://github.com/uwiger/sext]) for mapping between Erlang terms and the
binary data stored in the tables. This provides two useful properties:
<ul>
<li>The records are stored in the Erlang term order of their keys.</li>
<li>A prefix of a composite key is ordered just before any key for which
it is a prefix. For example, ``{x, '_'}'' is a prefix for keys `{x, a}',
`{x, b}' and so on.</li>
</ul>
This means that a prefix key identifies the start of the sequence of
entries whose keys match the prefix. The backend uses this to optimize
selects on prefix keys.
### Customization
RocksDB supports a number of customization options. These can be specified
by providing a `{Key, Value}' list named `rocksdb_opts' under `user_properties',
for example:
```
mnesia:create_table(foo, [{rocksdb_copies, [node()]},
...
{user_properties,
[{rocksdb_opts, [{max_open_files, 1024}]}]
}])
'''
Consult the <a href="https://github.com/facebook/rocksdb/wiki/Setup-Options-and-Basic-Tuning">RocksDB documentation</a>
for information on configuration parameters. Also see the section below on handling write errors.
The default configuration for tables in `mnesia_rocksdb' is:
```
default_open_opts() ->
[ {create_if_missing, true}
, {cache_size,
list_to_integer(get_env_default("ROCKSDB_CACHE_SIZE", "32212254"))}
, {block_size, 1024}
, {max_open_files, 100}
, {write_buffer_size,
list_to_integer(get_env_default(
"ROCKSDB_WRITE_BUFFER_SIZE", "4194304"))}
, {compression,
list_to_atom(get_env_default("ROCKSDB_COMPRESSION", "true"))}
, {use_bloomfilter, true}
].
'''
It is also possible, for larger databases, to produce a tuning parameter file.
This is experimental, and mostly copied from `mnesia_leveldb'. Consult the
source code in `mnesia_rocksdb_tuning.erl' and `mnesia_rocksdb_params.erl'.
Contributions are welcome.
=== Caveats ===
Avoid placing `bag' tables in RocksDB. Although they work, each write
requires additional reads, causing substantial runtime overheads. There
are better ways to represent and process bag data (see above about
<em>prefix keys</em>).
The `mnesia:table_info(T, size)' call always returns zero for RocksDB
tables. RocksDB itself does not track the number of elements in a table, and
although it is possible to make the `mnesia_rocksdb' backend maintain a size
counter, it incurs a high runtime overhead for writes and deletes since it
forces them to first do a read to check the existence of the key. If you
depend on having an up to date size count at all times, you need to maintain
it yourself. If you only need the size occasionally, you may traverse the
table to count the elements.
== Mnesia backend plugins ==
=== Background ===
Mnesia was initially designed to be a RAM-only DBMS, and Erlang's
`ets' tables were developed for this purpose. In order to support
persistence, e.g. for configuration data, a disk-based version of `ets'
(called `dets') was created. The `dets' API mimicks the `ets' API,
and `dets' is quite convenient and fast for (nowadays) small datasets.
However, using a 32-bit bucket system, it is limited to 2GB of data.
It also doesn't support ordered sets. When used in Mnesia, dets-based
tables are called `disc_only_copies'.
To circumvent these limitations, another table type, called `disc_copies'
was added. This is a combination of `ets' and `disk_log', where Mnesia
periodically snapshots the `ets' data to a log file on disk, and meanwhile
maintains a log of updates, which can be applied at startup. These tables
are quite performant (especially on read access), but all data is kept in
RAM, which can become a serious limitation.
A backend plugin system was proposed by Ulf Wiger in 2016, and further
developed with Klarna's support, to finally become included in OTP 19.
Klarna uses a LevelDb backend, but Aeternity, in 2017, instead chose
to implement a Rocksdb backend plugin.
== Design ==
As backend plugins were added on a long-since legacy-stable Mnesia,
they had to conform to the existing code structure. For this reason,
the plugin callbacks hook into the already present low-level access
API in the `mnesia_lib' module. As a consequence, backend plugins have
the same access semantics and granularity as `ets' and `dets'. This
isn't much of a disadvantage for key-value stores like LevelDb and RocksDB,
but a more serious issue is that the update part of this API is called
on <em>after</em> the point of no return. That is, Mnesia does not expect
these updates to fail, and has no recourse if they do. As an aside,
this could also happen if a `disc_only_copies' table exceeds the 2 GB
limit (mnesia will not check it, and `dets' will not complain, but simply
drop the update.)
== Mnesia index plugins ==
When adding support for backend plugins, index plugins were also added. Unfortunately, they remain undocumented.
An index plugin can be added in one of two ways:
<ol>
<li>When creating a schema, provide `{index_plugins, [{Name, Module, Function}]}' options.</li>
<li>Call the function `mnesia_schema:add_index_plugin(Name, Module, Function)'</li>
</ol>
`Name' must be an atom wrapped as a 1-tuple, e.g. `{words}'.
The plugin callback is called as `Module:Function(Table, Pos, Obj)', where `Pos=={words}' in
our example. It returns a list of index terms.
<strong>Example</strong>
Given the following index plugin implementation:
```
-module(words).
-export([words_f/3]).
words_f(_,_,Obj) when is_tuple(Obj) ->
words_(tuple_to_list(Obj)).
words_(Str) when is_binary(Str) ->
string:lexemes(Str, [$\s, $\n, [$\r,$\n]]);
words_(L) when is_list(L) ->
lists:flatmap(fun words_/1, L);
words_(_) ->
[].
'''
We can register the plugin and use it in table definitions:
```
Eshell V12.1.3 (abort with ^G)
1> mnesia:start().
ok
2> mnesia_schema:add_index_plugin({words}, words, words_f).
{atomic,ok}
3> mnesia:create_table(i, [{index, [{words}]}]).
{atomic,ok}
'''
Note that in this case, we had neither a backend plugin, nor even a persistent schema.
Index plugins can be used with all table types. The registered indexing function (arity 3) must exist
as an exported function along the node's code path.
To see what happens when we insert an object, we can turn on call trace.
```
4> dbg:tracer().
{ok,<0.108.0>}
5> dbg:tp(words, x).
{ok,[{matched,nonode@nohost,3},{saved,x}]}
6> dbg:p(all,[c]).
{ok,[{matched,nonode@nohost,60}]}
7> mnesia:dirty_write({i,<<"one two">>, [<<"three">>, <<"four">>]}).
(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
<<"four">>]
(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
<<"four">>]
ok
8> dbg:ctp('_'), dbg:stop().
ok
9> mnesia:dirty_index_read(i, <<"one">>, {words}).
[{i,<<"one two">>,[<<"three">>,<<"four">>]}]
'''
(The fact that the indexing function is called twice, seems like a performance bug.)
We can observe that the indexing callback is able to operate on the whole object.
It needs to be side-effect free and efficient, since it will be called at least once for each update
(if an old object exists in the table, the indexing function will be called on it too, before it is
replaced by the new object.)
== Rocksdb ==
== Usage ==
@end

300
doc/plugin-userguide.md Normal file
View File

@ -0,0 +1,300 @@
# Using Mnesia Plugins #
Copyright (c) 2017-21 Aeternity Anstalt. All Rights Reserved.
__Authors:__ Ulf Wiger ([`ulf@wiger.net`](mailto:ulf@wiger.net)).
The Mnesia DBMS, part of Erlang/OTP, supports 'backend plugins', making
it possible to utilize more capable key-value stores than the `dets`
module (limited to 2 GB per table). Unfortunately, this support is
undocumented. Below, some informal documentation for the plugin system
is provided.
This user guide illustrates these concepts using `mnesia_rocksdb`
as an example.
We will deal with two types of plugin:
1. backend plugins
2. index plugins
A backend plugin is a module that implements a `mnesia_backend_type`
behavior. Each plugin can support any number of `aliases`, which
combined with the plugin module make up a `backend_type`.
When using `mnesia_rocksdb`, the default alias is `rocksdb_copies`,
and it is registered as a `{rocksdb_copies, mnesia_rocksdb}` pair.
Once registered, the alias can be used just like the built-in
backend types `ram_copies`, `disc_copies`, `disc_only_copies`.
Mnesia asks the plugin module which one of the built-in types'
semantics the new type is supposed to mimick: ram-only, ram+disk
or disk-only. This is mainly relevant for how Mnesia checkpoints and
backs up data.
### <a name="Table_of_Contents">Table of Contents</a> ###
1. [Usage](#Usage)
1. [Prerequisites](#Prerequisites)
2. [Getting started](#Getting_started)
3. [New indexing functionality](#New_indexing_functionality)
## Usage
### Prerequisites
- rocksdb (included as dependency)
- sext (included as dependency)
- Erlang/OTP 22.0 or newer (https://github.com/erlang/otp)
### Getting started
For the purposes of this user guide, we assume an unnamed, single node
mnesia installation. The only place where plugins are affected by
distributed Mnesia, is in the table sync callbacks. The simplest way
to get all paths in order for experimentation is to check out
`mnesia_rocksdb`, building it, and then calling `rebar3 shell`. Unless
we note otherwise, this is how a node has been started for each example.
> Erlang shell interactions have been slightly beautified by eliding
> some text and breaking and indenting some lines
#### Adding a backend type to mnesia
There are three different ways, all undocumented, to register a
backend plugin in mnesia:
1. Add a `backend_types` option when creating the schema, using
`mnesia:create_schema/2`
```erlang
Erlang/OTP 22 [erts-10.7] ...
Eshell V10.7 (abort with ^G)
1> mnesia:create_schema([node()],
[{backend_types,[{rocksdb_copies,mnesia_rocksdb}]}]).
ok
2> mnesia:start().
ok
3> mnesia_schema:backend_types().
[ram_copies,disc_copies,disc_only_copies,rocksdb_copies]
```
(In `mnesia_rocksdb`, a shortcut for this exists in `mnesia_rocksdb:create_schema(Nodes)`.)
2. Add it when starting mnesia, using `mnesia:start/1` (undocumented)
```erlang
Eshell V10.7 (abort with ^G)
1> mnesia:create_schema([node()]).
ok
2> mnesia:start([{schema,[{backend_types,
[{rocksdb_copies,mnesia_rocksdb}]}]}]).
ok
3> mnesia_schema:backend_types().
[ram_copies,disc_copies,disc_only_copies]
```
3. Call `mnesia_schema:add_backend_type/2` when mnesia is running.
```erlang
Eshell V10.7 (abort with ^G)
1> mnesia:create_schema([node()]).
ok
2> mnesia:start().
ok
3> mnesia_schema:add_backend_type(rocksdb_copies,mnesia_rocksdb).
{atomic,ok}
4> mnesia_schema:backend_types().
[ram_copies,disc_copies,disc_only_copies,rocksdb_copies]
```
In all cases the schema is updated, and other nodes, and subsequently
added nodes, will automatically receive the information.
The function `mnesia_schema:backend_types()` shows which backend plugin
aliases are registered.
The information is also displayed when calling `mnesia:info()`:
```erlang
5> mnesia:info().
---> Processes holding locks <---
---> Processes waiting for locks <---
---> Participant transactions <---
---> Coordinator transactions <---
---> Uncertain transactions <---
---> Active tables <---
schema : with 1 records occupying 443 words of mem
===> System info in version "4.16.3", debug level = none <===
opt_disc. Directory "/.../Mnesia.nonode@nohost" is used.
use fallback at restart = false
running db nodes = [nonode@nohost]
stopped db nodes = []
master node tables = []
backend types = rocksdb_copies - mnesia_rocksdb
remote = []
ram_copies = []
disc_copies = [schema]
disc_only_copies = []
[{nonode@nohost,disc_copies}] = [schema]
2 transactions committed, 0 aborted, 0 restarted, 0 logged to disc
0 held locks, 0 in queue; 0 local transactions, 0 remote
0 transactions waits for other nodes: []
ok
```
To illustrate how mnesia persists the information in the schema:
```erlang
6> mnesia:table_info(schema,user_properties).
[{mnesia_backend_types,[{rocksdb_copies,mnesia_rocksdb}]}]
```
#### Rocksdb registration shortcut
Call `mnesia_rocksdb:register()` after starting mnesia.
#### Creating a table
Put `{rocksdb_copies, [node()]}` into the table definitions of
tables you want to be in RocksDB.
```erlang
4> mnesia:create_table(t, [{rocksdb_copies,[node()]}]).
{atomic,ok}
5> rr(mnesia).
[commit,cstruct,cyclic,decision,log_header,mnesia_select,
tid,tidstore]
6> mnesia:table_info(t,cstruct).
#cstruct{name = t,type = set,ram_copies = [],
disc_copies = [],disc_only_copies = [],
external_copies = [{{rocksdb_copies,mnesia_rocksdb},
[nonode@nohost]}],
load_order = 0,access_mode = read_write,majority = false,
index = [],snmp = [],local_content = false,record_name = t,
attributes = [key,val],
user_properties = [],frag_properties = [],
storage_properties = [],
cookie = {{1621758137965715000,-576460752303423420,1},
nonode@nohost},
version = {{2,0},[]}}
```
In the example above, we take a peek at the `cstruct`, which is the
internal metadata structure for mnesia tables. The attribute showing
that the table has been created with a `rocksdb_copies` instance, is
the `external_copies` attribute. It lists the alias, the callback module
and the nodes, where the instances reside.
The table works essentially like one of the built-in table types.
If we want to find out which type, we can query the callback module:
```erlang
8> mnesia_rocksdb:semantics(rocksdb_copies, storage).
disc_only_copies
```
Consult the `mnesia_rocksdb` man page for more info on the
`Mod:semantics/2` function.
### New indexing functionality
With the introduction of backend plugins, a few improvements were made
to mnesia's indexing support.
#### Persistent indexes
In the past, and still with the built-in types, indexes were always
rebuilt on startup. Since backend plugins were introduced mainly in
order to support very large tables, a couple of callback functions
were added in order to detect whether a full rebuild is needed.
> The callback functions are `Mod:is_index_consistent/2` and
> `Mod:index_is_consistent/3`.
> The first function (figuratively) always returns `false` for indexes
> on built-in table types. Backend plugin modules should always return
> `false` if they have no information. After building the index, mnesia
> calls `Mod:index_is_consistent(Alias, IxTab, true)`, and the callback
> is expected to persist this information. `IxTab`, in this case, is
> a logical name for the index 'table': `{Tab, index, PosInfo}`
#### Ordered indexes
A problem in the past with mnesia indexing has been that indexes with
very large fan-out were inefficient. Indexes were represented as `bag`
tables, and the cost of inserting a secondary key was proportional to
the number of identical secondary keys already in the index.
When adding the backend plugin support - also not least because the
first candidate LevelDb didn't do bags well - support for ordered
indexes was added. They turn out to be have much more stable performance
for indexes with large fan-out. They also work on all built-in table
types.
When creating an index, you can specify the type of index as `bag` or
`ordered`. If you omit the type, it will default to `bag` for built-in
table types, and for external types, whatever is the first type in the
list of supported index types returned by `Mod:semantics(Alias, index_types)`.
> For `mnesia_rocksdb`, only `ordered` is supported, but a bug in mnesia
> makes it ignore this, and try to create a bag index anyway. The
> `mnesia_rocksdb` plugin rejects this.
> Note that while e.g. mnesia_rocksdb supports regular bag tables, they are not
> efficiently implemented.
Mnesia currently doesn't allow specifying an index type in
`mnesia:add_table_index/2`, so simply indicate the index position,
and let the backend choose the default.
Having ordered indexes opens up for some new possibilities, but
there are currently no functions in mnesia such as index_first, index_next
etc., or performing a select in index order.
#### Index plugins
Index plugins are a great new feature, also almost entirely undocumented.
An index plugin is a registered indexing function, which can operate
on the entire object, and shall return a list of secondary keys.
When registering an index plugin, it is given an alias, a callback module,
and an function name, not unlike backend plugins. The index plugin alias
must be an atom wrapped inside a 1-tuple, i.e. `{atom()}`.
To illustrate, we use a sample indexing function implemented in
mnesia_rocksdb, which checks all non-key attributes of an object,
and for each value that is a list, makes each list element a secondary
key value.
```erlang
9> mnesia_schema:add_index_plugin({lv}, mnesia_rocksdb, ix_listvals).
{atomic,ok}
10> mnesia:add_table_index(t,{lv}).
{atomic,ok}
11> mnesia:dirty_write({t,1,[a,b]}).
ok
12> mnesia:dirty_write({t,2,[b,c]}).
ok
13> mnesia:dirty_index_read(t,a,{lv}).
[{t,1,[a,b]}]
14> mnesia:dirty_index_read(t,b,{lv}).
[{t,1,[a,b]},{t,2,[b,c]}]
15> mnesia:dirty_index_read(t,c,{lv}).
[{t,2,[b,c]}]
```
For clarity, this is the implementation of the index callback:
```erlang
ix_listvals(_Tab, _Pos, Obj) ->
lists:foldl(
fun(V, Acc) when is_list(V) ->
V ++ Acc;
(_, Acc) ->
Acc
end, [], tl(tuple_to_list(Obj))).
```
Note that the index callback must be a pure function, as it
is also relied upon when deleting objects. That is, it must
always return the same values when called with a specific
set of input arguments.

55
doc/stylesheet.css Normal file
View File

@ -0,0 +1,55 @@
/* standard EDoc style sheet */
body {
font-family: Verdana, Arial, Helvetica, sans-serif;
margin-left: .25in;
margin-right: .2in;
margin-top: 0.2in;
margin-bottom: 0.2in;
color: #000000;
background-color: #ffffff;
}
h1,h2 {
margin-left: -0.2in;
}
div.navbar {
background-color: #add8e6;
padding: 0.2em;
}
h2.indextitle {
padding: 0.4em;
background-color: #add8e6;
}
h3.function,h3.typedecl {
background-color: #add8e6;
padding-left: 1em;
}
div.spec {
margin-left: 2em;
background-color: #eeeeee;
}
a.module {
text-decoration:none
}
a.module:hover {
background-color: #eeeeee;
}
ul.definitions {
list-style-type: none;
}
ul.index {
list-style-type: none;
background-color: #eeeeee;
}
/*
* Minor style tweaks
*/
ul {
list-style-type: square;
}
table {
border-collapse: collapse;
}
td {
padding: 3
}

7
erlang_ls.config Normal file
View File

@ -0,0 +1,7 @@
deps_dirs:
- "_build/default/lib/*"
include_paths:
- "src/*"
include_dirs:
- "include"
- "_build/default/lib"

View File

@ -0,0 +1,15 @@
%% Data and meta data (a.k.a. info) are stored in the same table.
%% This is a table of the first byte in data
%% 0 = before meta data
%% 1 = meta data
%% 2 = before data
%% >= 8 = data
-define(INFO_START, 0).
-define(INFO_TAG, 1).
-define(DATA_START, 2).
-define(BAG_CNT, 32). % Number of bits used for bag object counter
-define(MAX_BAG, 16#FFFFFFFF).
-define(VSN, 2).

View File

@ -4,14 +4,40 @@
{deps, {deps,
[ [
{sext, "1.8.0"}, {sext, "1.8.0"},
{rocksdb,"1.7.0"} {rocksdb, {git, "https://gitlab.com/seanhinde/erlang-rocksdb.git", {ref,"9ae37839"}}},
{hut, "1.3.0"}
]}. ]}.
{xref_checks, [
undefined_function_calls,
locals_not_used,
deprecated_function_calls
]}.
{profiles, {profiles,
[ [
{test, {test,
[ [
{deps, [ {proper, "1.3.0"} {deps, [ {proper, "1.4.0"}
, {meck, "0.9.0"}]} , {meck, "0.9.2"}
]} , {trace_runner, {git, "https://github.com/uwiger/trace_runner.git",
{ref, "2e56677"}}}
]}
]},
{edown,
%% Use as `rebar3 as edown do edoc`
[
{deps, [{edown, "0.8.4"}]},
{edoc_opts,
[{doclet, edown_doclet},
{app_default, "http://www.erlang.org/doc/man"},
{doc_path, []},
{top_level_readme,
{"./README.md", "https://github.com/aeternity/mnesia_rocksdb"}}]}]}
]}. ]}.
{ex_doc, [
{extras, [<<"README.md">>, <<"LICENSE">>]},
{main, <<"readme">>},
{source_url, <<"https://github.com/aeternity/mnesia_rocksdb">>}
]}.

View File

@ -1,11 +1,15 @@
{"1.2.0", {"1.2.0",
[{<<"rocksdb">>,{pkg,<<"rocksdb">>,<<"1.7.0">>},0}, [{<<"hut">>,{pkg,<<"hut">>,<<"1.3.0">>},0},
{<<"rocksdb">>,
{git,"https://gitlab.com/seanhinde/erlang-rocksdb.git",
{ref,"9ae378391ffc94200bde24efcd7a4921eba688d0"}},
0},
{<<"sext">>,{pkg,<<"sext">>,<<"1.8.0">>},0}]}. {<<"sext">>,{pkg,<<"sext">>,<<"1.8.0">>},0}]}.
[ [
{pkg_hash,[ {pkg_hash,[
{<<"rocksdb">>, <<"5D23319998A7FCE5FFD5D7824116C905CABA7F91BAF8EDDABD0180F1BB272CEF">>}, {<<"hut">>, <<"71F2F054E657C03F959CF1ACC43F436EA87580696528CA2A55C8AFB1B06C85E7">>},
{<<"sext">>, <<"90A95B889F5C781B70BBCF44278B763148E313C376B60D87CE664CB1C1DD29B5">>}]}, {<<"sext">>, <<"90A95B889F5C781B70BBCF44278B763148E313C376B60D87CE664CB1C1DD29B5">>}]},
{pkg_hash_ext,[ {pkg_hash_ext,[
{<<"rocksdb">>, <<"A4BDC5DD80ED137161549713062131E8240523787EBE7B51DF61CFB48B1786CE">>}, {<<"hut">>, <<"7E15D28555D8A1F2B5A3A931EC120AF0753E4853A4C66053DB354F35BF9AB563">>},
{<<"sext">>, <<"BC6016CB8690BAF677EACACFE6E7CADFEC8DC7E286CBBED762F6CD55B0678E73">>}]} {<<"sext">>, <<"BC6016CB8690BAF677EACACFE6E7CADFEC8DC7E286CBBED762F6CD55B0678E73">>}]}
]. ].

View File

@ -1,3 +1,4 @@
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
{application, mnesia_rocksdb, {application, mnesia_rocksdb,
[ [
{description, "RocksDB backend plugin for Mnesia"}, {description, "RocksDB backend plugin for Mnesia"},

File diff suppressed because it is too large Load Diff

1495
src/mnesia_rocksdb_admin.erl Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1,3 +1,4 @@
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
%%---------------------------------------------------------------- %%----------------------------------------------------------------
%% Copyright (c) 2013-2016 Klarna AB %% Copyright (c) 2013-2016 Klarna AB
%% %%

View File

@ -0,0 +1,18 @@
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
-include_lib("hut/include/hut.hrl").
%% enable debugging messages through mnesia:set_debug_level(debug)
-ifndef(MNESIA_ROCKSDB_NO_DBG).
-define(dbg(Fmt, Args), ?log(debug, Fmt, Args)).
%% -define(dbg(Fmt, Args),
%% %% avoid evaluating Args if the message will be dropped anyway
%% case mnesia_monitor:get_env(debug) of
%% none -> ok;
%% verbose -> ok;
%% _ -> mnesia_lib:dbg_out("~p:~p: "++(Fmt),[?MODULE,?LINE|Args])
%% end).
-else.
-define(dbg(Fmt, Args), ok).
-endif.
-define(DEFAULT_RETRIES, 1).

View File

@ -1,17 +1,341 @@
%%% @doc RocksDB update wrappers, in separate module for easy tracing and mocking. %%% @doc RocksDB update wrappers, in separate module for easy tracing and mocking.
%%% %%%
-module(mnesia_rocksdb_lib). -module(mnesia_rocksdb_lib).
-export([put/4, -export([ put/4
write/3, , write/3
delete/3]). , delete/3
]).
-export([ open_rocksdb/3
, data_mountpoint/1
, create_mountpoint/1
, tabname/1
]).
-export([ default_encoding/3
, check_encoding/2
, valid_obj_type/2
, valid_key_type/2 ]).
-export([ keypos/1
, encode_key/1, encode_key/2
, decode_key/1, decode_key/2
, encode_val/1, encode_val/2
, decode_val/1, decode_val/3
, encode/2
, decode/2
]).
-include("mnesia_rocksdb.hrl").
-include_lib("hut/include/hut.hrl").
put(#{db := Ref, cf := CF}, K, V, Opts) ->
rocksdb:put(Ref, CF, K, V, Opts);
put(Ref, K, V, Opts) -> put(Ref, K, V, Opts) ->
rocksdb:put(Ref, K, V, Opts). rocksdb:put(Ref, K, V, Opts).
write(Ref, L, Opts) -> write(#{db := Ref, cf := CF}, L, Opts) ->
rocksdb:write(Ref, L, Opts). write_as_batch(L, Ref, CF, Opts).
delete(Ref, K, Opts) -> delete(Ref, K, Opts) ->
rocksdb:delete(Ref, K, Opts). rocksdb:delete(Ref, K, Opts).
write_as_batch(L, Ref, CF, Opts) ->
{ok, Batch} = rocksdb:batch(),
lists:foreach(
fun({put, K, V}) ->
ok = rocksdb:batch_put(Batch, CF, K, V);
({delete, K}) ->
ok = rocksdb:batch_delete(Batch, CF, K)
end, L),
rocksdb:write_batch(Ref, Batch, Opts).
create_mountpoint(Tab) ->
MPd = data_mountpoint(Tab),
case filelib:is_dir(MPd) of
false ->
file:make_dir(MPd),
ok;
true ->
Dir = mnesia_lib:dir(),
case lists:prefix(Dir, MPd) of
true ->
ok;
false ->
{error, exists}
end
end.
data_mountpoint(Tab) ->
Dir = mnesia_monitor:get_env(dir),
filename:join(Dir, tabname(Tab) ++ ".extrdb").
tabname({admin, Alias}) ->
"mnesia_rocksdb-" ++ atom_to_list(Alias) ++ "-_db";
tabname({Tab, index, {{Pos},_}}) ->
atom_to_list(Tab) ++ "-=" ++ atom_to_list(Pos) ++ "=-_ix";
tabname({Tab, index, {Pos,_}}) ->
atom_to_list(Tab) ++ "-" ++ integer_to_list(Pos) ++ "-_ix";
tabname({Tab, retainer, Name}) ->
atom_to_list(Tab) ++ "-" ++ retainername(Name) ++ "-_RET";
tabname(Tab) when is_atom(Tab) ->
atom_to_list(Tab) ++ "-_tab".
default_encoding({_, index, _}, _, _) ->
{sext, {value, raw}};
default_encoding({_, retainer, _}, _, _) ->
{term, {value, term}};
default_encoding(_, Type, As) ->
KeyEnc = case Type of
ordered_set -> sext;
set -> term;
bag -> sext
end,
ValEnc = case As of
[_, _] ->
{value, term};
[_, _ | _] ->
{object, term}
end,
{KeyEnc, ValEnc}.
check_encoding(Encoding, Attributes) ->
try check_encoding_(Encoding, Attributes)
catch
throw:Error ->
Error
end.
check_encoding_({Key, Val}, As) ->
Key1 = check_key_encoding(Key),
Val1 = check_value_encoding(Val, As),
{ok, {Key1, Val1}};
check_encoding_(E, _) ->
throw({error, {invalid_encoding, E}}).
check_key_encoding(E) when E==sext; E==term; E==raw ->
E;
check_key_encoding(E) ->
throw({error, {invalid_key_encoding, E}}).
check_value_encoding(raw, [_, _]) -> {value, raw};
check_value_encoding({value, E} = V, [_, _]) when E==term; E==raw; E==sext -> V;
check_value_encoding({object, E} = V, _) when E==term; E==raw; E==sext -> V;
check_value_encoding(term, As) -> {val_encoding_type(As), term};
check_value_encoding(sext, As) -> {val_encoding_type(As), sext};
check_value_encoding(E, _) ->
throw({error, {invalid_value_encoding, E}}).
val_encoding_type(Attrs) ->
case Attrs of
[_, _] -> value;
[_, _|_] -> object
end.
valid_obj_type(#{encoding := Enc}, Obj) ->
case {Enc, Obj} of
{{binary, {value, binary}}, {_, K, V}} ->
is_binary(K) andalso is_binary(V);
{{binary, _}, _} ->
is_binary(element(2, Obj));
{{_, {value, binary}}, {_, _, V}} ->
is_binary(V);
_ ->
%% No restrictions on object type
%% unless key and/or value typed to binary
true
end.
valid_key_type(#{encoding := Enc}, Key) ->
case Enc of
{binary, _} when is_binary(Key) ->
true;
{binary, _} ->
false;
_ ->
true
end.
-spec encode_key(any()) -> binary().
encode_key(Key) ->
encode(Key, sext).
encode(Value, sext) ->
sext:encode(Value);
encode(Value, raw) when is_binary(Value) ->
Value;
encode(Value, term) ->
term_to_binary(Value).
encode_key(Key, #{encoding := {Enc,_}}) ->
encode(Key, Enc);
encode_key(Key, _) ->
encode(Key, sext).
-spec decode_key(binary()) -> any().
decode_key(CodedKey) ->
decode(CodedKey, sext).
decode_key(CodedKey, #{encoding := {Enc, _}}) ->
decode(CodedKey, Enc);
decode_key(CodedKey, Enc) ->
decode(CodedKey, Enc).
decode(Val, sext) ->
case sext:partial_decode(Val) of
{full, Result, _} ->
Result;
_ ->
error(badarg, Val)
end;
decode(Val, raw) ->
Val;
decode(Val, term) ->
binary_to_term(Val).
-spec encode_val(any()) -> binary().
encode_val(Val) ->
encode(Val, term).
encode_val(Val, Enc) when is_atom(Enc) ->
encode(Val, Enc);
encode_val(_, #{name := {_,index,_}}) ->
<<>>;
encode_val(Val, #{encoding := {_, Enc0}, attr_pos := AP}) ->
{Type, Enc} = enc_type(Enc0),
case {map_size(AP), Type} of
{2, value} ->
encode(element(3, Val), Enc);
{_, object} ->
encode(setelement(2, Val, []), Enc)
end.
enc_type({T, _} = E) when T==value; T==object ->
E;
enc_type(E) when is_atom(E) ->
{object, E}.
-spec decode_val(binary()) -> any().
decode_val(CodedVal) ->
binary_to_term(CodedVal).
decode_val(<<>>, K, #{name := {_,index,_}}) ->
{K};
decode_val(CodedVal, Key, Ref) ->
{Type, Enc} = value_encoding(Ref),
case Type of
object ->
setelement(2, decode(CodedVal, Enc), Key);
value ->
make_rec(Key, decode(CodedVal, Enc), Ref)
end.
make_rec(Key, _Val, #{name := {_, index, {_,ordered}}}) ->
{Key};
make_rec(Key, Val, #{properties := #{record_name := Tag}}) ->
{Tag, Key, Val};
make_rec(Key, Val, #{attr_pos := AP}) ->
%% no record name
case AP of
#{key := 1} -> {Key, Val};
#{key := 2} -> {Val, Key} %% Yeah, right, but people are weird
end.
value_encoding(#{encoding := {_, Enc}}) ->
enc_type(Enc);
value_encoding(#{}) ->
{object, term};
value_encoding({Type, Enc} = E) when is_atom(Type), is_atom(Enc) ->
E.
keypos({admin, _}) ->
1;
keypos({_, index, _}) ->
1;
keypos({_, retainer, _}) ->
2;
keypos(Tab) when is_atom(Tab) ->
2.
%% ======================================================================
%% Private functions
%% ======================================================================
retainername(Name) when is_atom(Name) ->
atom_to_list(Name);
retainername(Name) when is_list(Name) ->
try binary_to_list(list_to_binary(Name))
catch
error:_ ->
lists:flatten(io_lib:write(Name))
end;
retainername(Name) ->
lists:flatten(io_lib:write(Name)).
open_rocksdb(MPd, RdbOpts, CFs) ->
open_rocksdb(MPd, rocksdb_open_opts_(RdbOpts), CFs, get_retries()).
%% Code adapted from basho/riak_kv_eleveldb_backend.erl
open_rocksdb(MPd, Opts, CFs, Retries) ->
open_db(MPd, Opts, CFs, max(1, Retries), undefined).
open_db(_, _, _, 0, LastError) ->
{error, LastError};
open_db(MPd, Opts, CFs, RetriesLeft, _) ->
case rocksdb:open_optimistic_transaction_db(MPd, Opts, CFs) of
{ok, _Ref, _CFRefs} = Ok ->
?log(debug, "Open - Rocksdb: ~s (~p) -> ~p", [MPd, Opts, Ok]),
Ok;
%% Check specifically for lock error, this can be caused if
%% a crashed mnesia takes some time to flush rocksdb information
%% out to disk. The process is gone, but the NIF resource cleanup
%% may not have completed.
{error, {db_open, OpenErr}=Reason} ->
case lists:prefix("IO error: lock ", OpenErr) of
true ->
SleepFor = get_retry_delay(),
?log(debug, ("Open - Rocksdb backend retrying ~p in ~p ms"
" after error ~s"), [MPd, SleepFor, OpenErr]),
timer:sleep(SleepFor),
open_db(MPd, Opts, CFs, RetriesLeft - 1, Reason);
false ->
{error, Reason}
end;
{error, Reason} ->
{error, Reason}
end.
get_retries() -> 30.
get_retry_delay() -> 100.
rocksdb_open_opts_(RdbOpts) ->
lists:foldl(
fun({K,_} = Item, Acc) ->
lists:keystore(K, 1, Acc, Item)
end, default_open_opts(), RdbOpts).
default_open_opts() ->
[ {create_if_missing, true}
, {cache_size,
list_to_integer(get_env_default("ROCKSDB_CACHE_SIZE", "32212254"))}
, {block_size, 1024}
, {max_open_files, 30}
, {write_buffer_size,
list_to_integer(get_env_default(
"ROCKSDB_WRITE_BUFFER_SIZE", "4194304"))}
, {compression,
list_to_atom(get_env_default("ROCKSDB_COMPRESSION", "true"))}
, {use_bloomfilter, true}
].
get_env_default(Key, Default) ->
case os:getenv(Key) of
false ->
Default;
Value ->
Value
end.

View File

@ -1,3 +1,4 @@
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
%%---------------------------------------------------------------- %%----------------------------------------------------------------
%% Copyright (c) 2013-2016 Klarna AB %% Copyright (c) 2013-2016 Klarna AB
%% %%
@ -33,17 +34,12 @@
code_change/3]). code_change/3]).
-include("mnesia_rocksdb_tuning.hrl"). -include("mnesia_rocksdb_tuning.hrl").
-include("mnesia_rocksdb_int.hrl").
-define(KB, 1024). -define(KB, 1024).
-define(MB, 1024 * 1024). -define(MB, 1024 * 1024).
-define(GB, 1024 * 1024 * 1024). -define(GB, 1024 * 1024 * 1024).
-ifdef(DEBUG).
-define(dbg(Fmt, Args), io:fwrite(user,"~p:~p: "++(Fmt),[?MODULE,?LINE|Args])).
-else.
-define(dbg(Fmt, Args), ok).
-endif.
lookup(Tab, Default) -> lookup(Tab, Default) ->
try ets:lookup(?MODULE, Tab) of try ets:lookup(?MODULE, Tab) of
[{_, Params}] -> [{_, Params}] ->
@ -113,21 +109,21 @@ store_params(Params) ->
NTabs = length(Params), NTabs = length(Params),
Env0= mnesia_rocksdb_tuning:describe_env(), Env0= mnesia_rocksdb_tuning:describe_env(),
Env = Env0#tuning{n_tabs = NTabs}, Env = Env0#tuning{n_tabs = NTabs},
?dbg("Env = ~p~n", [Env]), ?log(debug, "Env = ~p~n", [Env]),
TotalFiles = lists:sum([mnesia_rocksdb_tuning:max_files(Sz) || TotalFiles = lists:sum([mnesia_rocksdb_tuning:max_files(Sz) ||
{_, Sz} <- Params]), {_, Sz} <- Params]),
?dbg("TotalFiles = ~p~n", [TotalFiles]), ?log(debug, "TotalFiles = ~p~n", [TotalFiles]),
MaxFs = Env#tuning.max_files, MaxFs = Env#tuning.max_files,
?dbg("MaxFs = ~p~n", [MaxFs]), ?log(debug, "MaxFs = ~p~n", [MaxFs]),
FsHeadroom = MaxFs * 0.6, FsHeadroom = MaxFs * 0.6,
?dbg("FsHeadroom = ~p~n", [FsHeadroom]), ?log(debug, "FsHeadroom = ~p~n", [FsHeadroom]),
FilesFactor = if TotalFiles =< FsHeadroom -> FilesFactor = if TotalFiles =< FsHeadroom ->
1; % don't have to scale down 1; % don't have to scale down
true -> true ->
FsHeadroom / TotalFiles FsHeadroom / TotalFiles
end, end,
Env1 = Env#tuning{files_factor = FilesFactor}, Env1 = Env#tuning{files_factor = FilesFactor},
?dbg("Env1 = ~p~n", [Env1]), ?log(debug, "Env1 = ~p~n", [Env1]),
lists:foreach( lists:foreach(
fun({Tab, Sz}) when is_atom(Tab); fun({Tab, Sz}) when is_atom(Tab);
is_atom(element(1,Tab)), is_atom(element(1,Tab)),

View File

@ -1,3 +1,4 @@
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
%%---------------------------------------------------------------- %%----------------------------------------------------------------
%% Copyright (c) 2013-2016 Klarna AB %% Copyright (c) 2013-2016 Klarna AB
%% %%

View File

@ -1,3 +1,4 @@
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
%%---------------------------------------------------------------- %%----------------------------------------------------------------
%% Copyright (c) 2013-2016 Klarna AB %% Copyright (c) 2013-2016 Klarna AB
%% %%

View File

@ -1,3 +1,4 @@
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
%%---------------------------------------------------------------- %%----------------------------------------------------------------
%% Copyright (c) 2013-2016 Klarna AB %% Copyright (c) 2013-2016 Klarna AB
%% %%

1560
src/mrdb.erl Normal file

File diff suppressed because it is too large Load Diff

192
src/mrdb_index.erl Normal file
View File

@ -0,0 +1,192 @@
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
-module(mrdb_index).
-export([
with_iterator/3
, iterator_move/2
, iterator/2
, iterator_close/1
]).
-record(mrdb_ix_iter, { i :: mrdb:iterator()
, type = set :: set | bag
, sub :: mrdb:ref() | pid()
}).
-type ix_iterator() :: #mrdb_ix_iter{}.
-type index_value() :: any().
-type iterator_action() :: mrdb:iterator_action().
-type object() :: tuple().
-record(subst, { i :: mrdb:iterator()
, vals_f
, cur
, mref }).
-define(TIMEOUT, 5000).
-import(mnesia_rocksdb_lib, [ encode_key/2 ]).
-export_type([ ix_iterator/0 ]).
-spec with_iterator(mrdb:ref_or_tab(), mrdb:index_position(), fun( (ix_iterator()) -> Res)) -> Res.
with_iterator(Tab, IxPos, Fun) when is_function(Fun, 1) ->
{ok, I} = iterator(Tab, IxPos),
try Fun(I)
after
iterator_close(I)
end.
-spec iterator(mrdb:ref_or_tab(), mrdb:index_position()) -> {ok, ix_iterator()}
| {error, _}.
iterator(Tab, IxPos) ->
#{semantics := Sem} = R = mrdb:ensure_ref(Tab),
#{ix_vals_f := IxValsF} = IxR = ensure_index_ref(IxPos, R),
case mrdb:iterator(IxR, []) of
{ok, I} ->
case Sem of
bag ->
P = sub_new(R, IxValsF),
{ok, #mrdb_ix_iter{ i = I
, sub = P }};
_ ->
{ok, #mrdb_ix_iter{i = I, sub = R}}
end;
Err ->
Err
end.
-spec iterator_move(ix_iterator(), iterator_action()) -> {ok, index_value(), object()}
| {error, _}.
iterator_move(#mrdb_ix_iter{type = set} = IxI, Dir) -> iterator_move_set(IxI, Dir);
iterator_move(#mrdb_ix_iter{type = bag} = IxI, Dir) -> iterator_move_bag(IxI, Dir).
iterator_move_set(#mrdb_ix_iter{i = I, sub = Sub}, Dir) ->
case mrdb:iterator_move(I, Dir) of
{ok, {{FKey, PKey}}} ->
{ok, FKey, opt_read(Sub, PKey)};
Other ->
Other
end.
iterator_move_bag(#mrdb_ix_iter{i = I, sub = Sub}, Dir) ->
case call_sub(Sub, {move_rel, Dir}) of
not_found ->
case mrdb:iterator_move(I, Dir) of
{ok, {FKey, PKey}} ->
call_sub(Sub, {move_abs, FKey, PKey});
Other ->
Other
end;
Other ->
Other
end.
opt_read(R, Key) ->
case mrdb:read(R, Key, []) of
[Obj] ->
Obj;
[] ->
[]
end.
sub_new(R, ValsF) when is_function(ValsF, 1) ->
Me = self(),
{Pid, MRef} = spawn_monitor(
fun() ->
MRef = monitor(process, Me),
case mrdb:iterator(R) of
{ok, I} ->
Me ! {self(), ok},
sub_loop(#subst{ mref = MRef
, i = I
, vals_f = ValsF
, cur = undefined});
Error ->
Me ! {self(), Error}
end
end),
receive
{'DOWN', MRef, _, _, Crash} ->
mrdb:abort({error, Crash});
{Pid, ok} ->
demonitor(MRef),
Pid;
{Pid, Error} ->
demonitor(MRef),
mrdb:abort(Error)
end.
sub_loop(#subst{i = I, mref = MRef} = St) ->
receive
{'DOWN', MRef, _, _, _} ->
mrdb:iterator_close(I);
{Pid, Ref, close} ->
mrdb:iterator_close(I),
Pid ! {Ref, ok};
{Pid, Ref, cur} ->
Pid ! {Ref, St#subst.cur},
sub_loop(St);
{Pid, Ref, {move, Cur, Dir}} when is_binary(Dir) ->
{Res, St1} = sub_abs_move(Cur, Dir, St),
Pid ! {Ref, Res},
sub_loop(St1);
{Pid, Ref, {move_rel, Dir}} ->
{Res, St1} = sub_rel_move(Dir, St),
Pid ! {Ref, Res},
sub_loop(St1)
end.
sub_abs_move(Cur, Dir, #subst{i = I} = St) ->
case mrdb:iterator_move(I, Dir) of
{ok, _} = Ok ->
{Ok, St#subst{cur = Cur}};
Other ->
{Other, St#subst{cur = undefined}}
end.
sub_rel_move(Dir, #subst{i = I, vals_f = VF, cur = Prev} = St) ->
case mrdb:iterator_move(I, Dir) of
{ok, Obj} = Ok ->
case lists:member(Prev, VF(Obj)) of
true ->
{Ok, St};
false ->
{not_found, St#subst{cur = undefined}}
end;
Other ->
{Other, St#subst{cur = undefined}}
end.
call_sub(Pid, Req) ->
MRef = monitor(process, Pid),
Pid ! {self(), MRef, Req},
receive
{MRef, Reply} ->
demonitor(MRef),
Reply;
{'DOWN', MRef, _, _, Reason} ->
error(Reason)
after ?TIMEOUT ->
error(timeout)
end.
-spec iterator_close(ix_iterator()) -> ok.
iterator_close(#mrdb_ix_iter{i = I, sub = Sub}) ->
mrdb:iterator_close(I),
iterator_close_sub(Sub).
iterator_close_sub(P) when is_pid(P) ->
call_sub(P, close);
iterator_close_sub(_) ->
ok.
ensure_index_ref(IxPos, #{name := Name, attr_pos := AP, properties := #{index := Ixs}}) ->
{_,ordered} = Ix = lists:keyfind(index_pos(IxPos, AP), 1, Ixs),
mrdb:get_ref({Name, index, Ix}).
index_pos(P, AP) when is_atom(P) ->
maps:get(P, AP);
index_pos(P, _) ->
P.

81
src/mrdb_mutex.erl Normal file
View File

@ -0,0 +1,81 @@
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
-module(mrdb_mutex).
-export([ do/2 ]).
-export([ ensure_tab/0 ]).
-define(LOCK_TAB, ?MODULE).
%% We use a wrapping ets counter (default: 0) as a form of semaphor.
%% The claim operation is done using an atomic list of two updates:
%% first, incrementing with 0 - this returns the previous value
%% then, incrementing with 1, but wrapping at 1, ensuring that we get 1 back,
%% regardless of previous value. This means that if [0,1] is returned, the resource
%% was not locked previously; if [1,1] is returned, it was.
%%
%% Releasing the resource is done by deleting the resource. If we just decrement,
%% we will end up with lingering unlocked resources, so we might as well delete.
%% Either operation is atomic, and the claim op creates the object if it's missing.
do(Rsrc, F) when is_function(F, 0) ->
true = claim(Rsrc),
try F()
after
release(Rsrc)
end.
claim(Rsrc) ->
case claim_(Rsrc) of
true -> true;
false -> busy_wait(Rsrc, 1000)
end.
claim_(Rsrc) ->
case ets:update_counter(?LOCK_TAB, Rsrc, [{2, 0},
{2, 1, 1, 1}], {Rsrc, 0}) of
[0, 1] ->
%% have lock
true;
[1, 1] ->
false
end.
%% The busy-wait function makes use of the fact that we can read a timer to find out
%% if it still has time remaining. This reduces the need for selective receive, looking
%% for a timeout message. We yield, then retry the claim op. Yielding at least used to
%% also be necessary for the `read_timer/1` value to refresh.
%%
busy_wait(Rsrc, Timeout) ->
Ref = erlang:send_after(Timeout, self(), {claim, Rsrc}),
do_wait(Rsrc, Ref).
do_wait(Rsrc, Ref) ->
erlang:yield(),
case erlang:read_timer(Ref) of
false ->
erlang:cancel_timer(Ref),
error(lock_wait_timeout);
_ ->
case claim_(Rsrc) of
true ->
erlang:cancel_timer(Ref),
ok;
false ->
do_wait(Rsrc, Ref)
end
end.
release(Rsrc) ->
ets:delete(?LOCK_TAB, Rsrc),
ok.
%% Called by the process holding the ets table.
ensure_tab() ->
case ets:info(?LOCK_TAB, name) of
undefined ->
ets:new(?LOCK_TAB, [set, public, named_table, {write_concurrency, true}]);
_ ->
true
end.

285
src/mrdb_select.erl Normal file
View File

@ -0,0 +1,285 @@
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
-module(mrdb_select).
-export([ select/3 %% (Ref, MatchSpec, Limit)
, select/4 %% (Ref, MatchSpec, AccKeys, Limit)
, select/1 %% (Cont)
, fold/5 %% (Ref, Fun, Acc, MatchSpec, Limit)
, rdb_fold/5 %% (Ref, Fun, Acc, Prefix, Limit)
]).
-export([continuation_info/2]).
-import(mnesia_rocksdb_lib, [ keypos/1
, decode_key/2
, decode_val/3
]).
-include("mnesia_rocksdb.hrl").
-record(sel, { alias % TODO: not used
, tab
, ref
, keypat
, ms % TODO: not used
, compiled_ms
, limit
, key_only = false % TODO: not used
, direction = forward % TODO: not used
}).
select(Ref, MS, Limit) when is_map(Ref), is_list(MS) ->
select(Ref, MS, false, Limit).
select(Ref, MS, AccKeys, Limit)
when is_map(Ref), is_list(MS), is_boolean(AccKeys) ->
Sel = mk_sel(Ref, MS, Limit),
mrdb:with_rdb_iterator(Ref, fun(I) -> i_select(I, Sel, AccKeys, []) end).
mk_sel(#{name := Tab} = Ref, MS, Limit) ->
Keypat = keypat(MS, keypos(Tab), Ref),
#sel{tab = Tab,
ref = Ref,
keypat = Keypat,
ms = MS,
compiled_ms = ets:match_spec_compile(MS),
key_only = needs_key_only(MS),
limit = Limit}.
select(Cont) ->
case Cont of
'$end_of_table' -> '$end_of_table';
_ when is_function(Cont, 1) ->
Cont(cont)
end.
continuation_info(Item, C) when is_atom(Item), is_function(C, 1) ->
continuation_info_(Item, C(sel));
continuation_info(_, _) -> undefined.
continuation_info_(ref, #sel{ref = Ref}) -> Ref;
continuation_info_(ms, #sel{ms = MS }) -> MS;
continuation_info_(limit, #sel{limit = L }) -> L;
continuation_info_(direction, #sel{direction = Dir}) -> Dir;
continuation_info_(_, _) -> undefined.
fold(Ref, Fun, Acc, MS, Limit) ->
{AccKeys, F} =
if is_function(Fun, 3) ->
{true, fun({K, Obj}, Acc1) ->
Fun(Obj, K, Acc1)
end};
is_function(Fun, 2) ->
{false, Fun};
true ->
mrdb:abort(invalid_fold_fun)
end,
fold_(select(Ref, MS, AccKeys, Limit), F, Acc).
fold_('$end_of_table', _, Acc) ->
Acc;
fold_(L, Fun, Acc) when is_list(L) ->
lists:foldl(Fun, Acc, L);
fold_({L, Cont}, Fun, Acc) ->
fold_(select(Cont), Fun, lists:foldl(Fun, Acc, L)).
rdb_fold(Ref, Fun, Acc, Prefix, Limit) ->
mrdb:with_rdb_iterator(
Ref, fun(I) ->
MovRes = rocksdb:iterator_move(I, first(Ref)),
i_rdb_fold(MovRes, I, Prefix, Fun, Acc, Limit)
end).
first(#{vsn := 1}) -> <<?DATA_START>>;
first(_) -> first.
i_rdb_fold({ok, K, V}, I, Pfx, Fun, Acc, Limit) when Limit > 0 ->
case is_prefix(Pfx, K) of
true ->
i_rdb_fold(rocksdb:iterator_move(I, next), I, Pfx, Fun,
Fun(K, V, Acc), decr(Limit));
false ->
Acc
end;
i_rdb_fold(_, _, _, _, Acc, _) ->
Acc.
i_select(I, #sel{ keypat = Pfx
, compiled_ms = MS
, limit = Limit
, ref = #{vsn := Vsn, encoding := Enc} } = Sel, AccKeys, Acc) ->
StartKey = case {Pfx, Vsn, Enc} of
{<<>>, 1, {sext, _}} ->
<<?DATA_START>>;
{_, _, {term, _}} ->
<<>>;
_ ->
Pfx
end,
select_traverse(rocksdb:iterator_move(I, StartKey), Limit,
Pfx, MS, I, Sel, AccKeys, Acc).
needs_key_only([Pat]) ->
needs_key_only_(Pat);
needs_key_only([_|_] = Pats) ->
lists:all(fun needs_key_only_/1, Pats).
needs_key_only_({HP, _, Body}) ->
BodyVars = lists:flatmap(fun extract_vars/1, Body),
%% Note that we express the conditions for "needs more than key" and negate.
not(wild_in_body(BodyVars) orelse
case bound_in_headpat(HP) of
{all,V} -> lists:member(V, BodyVars);
Vars when is_list(Vars) -> any_in_body(lists:keydelete(2,1,Vars), BodyVars)
end).
extract_vars([H|T]) ->
extract_vars(H) ++ extract_vars(T);
extract_vars(T) when is_tuple(T) ->
extract_vars(tuple_to_list(T));
extract_vars(T) when T=='$$'; T=='$_' ->
[T];
extract_vars(T) when is_atom(T) ->
case is_wild(T) of
true ->
[T];
false ->
[]
end;
extract_vars(_) ->
[].
any_in_body(Vars, BodyVars) ->
lists:any(fun({_,Vs}) ->
intersection(Vs, BodyVars) =/= []
end, Vars).
intersection(A,B) when is_list(A), is_list(B) ->
A -- (A -- B).
is_wild('_') ->
true;
is_wild(A) when is_atom(A) ->
case atom_to_list(A) of
"\$" ++ S ->
try begin
_ = list_to_integer(S),
true
end
catch
error:_ ->
false
end;
_ ->
false
end.
wild_in_body(BodyVars) ->
intersection(BodyVars, ['$$','$_']) =/= [].
bound_in_headpat(HP) when is_atom(HP) ->
{all, HP};
bound_in_headpat(HP) when is_tuple(HP) ->
[_|T] = tuple_to_list(HP),
map_vars(T, 2).
map_vars([H|T], P) ->
case extract_vars(H) of
[] ->
map_vars(T, P+1);
Vs ->
[{P, Vs}|map_vars(T, P+1)]
end;
map_vars([], _) ->
[].
select_traverse({ok, K, V}, Limit, Pfx, MS, I, #sel{ref = R} = Sel,
AccKeys, Acc) ->
case is_prefix(Pfx, K) of
true ->
DecKey = decode_key(K, R),
Rec = decode_val(V, DecKey, R),
case ets:match_spec_run([Rec], MS) of
[] ->
select_traverse(
rocksdb:iterator_move(I, next), Limit, Pfx, MS,
I, Sel, AccKeys, Acc);
[Match] ->
Acc1 = if AccKeys ->
[{K, Match}|Acc];
true ->
[Match|Acc]
end,
traverse_continue(K, decr(Limit), Pfx, MS, I, Sel, AccKeys, Acc1)
end;
false when Limit == infinity ->
lists:reverse(Acc);
false ->
{lists:reverse(Acc), '$end_of_table'}
end;
select_traverse({error, _}, Limit, _, _, _, _, _, Acc) ->
select_return(Limit, {lists:reverse(Acc), '$end_of_table'}).
select_return(infinity, {L, '$end_of_table'}) ->
L;
select_return(_, Ret) ->
Ret.
is_prefix(A, B) when is_binary(A), is_binary(B) ->
Sa = byte_size(A),
case B of
<<A:Sa/binary, _/binary>> ->
true;
_ ->
false
end.
decr(I) when is_integer(I) ->
I-1;
decr(infinity) ->
infinity.
traverse_continue(K, 0, Pfx, MS, _I, #sel{limit = Limit, ref = Ref} = Sel, AccKeys, Acc) ->
{lists:reverse(Acc),
fun(sel) -> Sel;
(cont) ->
mrdb:with_rdb_iterator(
Ref,
fun(NewI) ->
select_traverse(iterator_next(NewI, K),
Limit, Pfx, MS, NewI, Sel,
AccKeys, [])
end)
end};
traverse_continue(_K, Limit, Pfx, MS, I, Sel, AccKeys, Acc) ->
select_traverse(rocksdb:iterator_move(I, next), Limit, Pfx, MS, I, Sel, AccKeys, Acc).
iterator_next(I, K) ->
case rocksdb:iterator_move(I, K) of
{ok, K, _} ->
rocksdb:iterator_move(I, next);
Other ->
Other
end.
keypat([H|T], KeyPos, Ref) ->
keypat(T, KeyPos, Ref, keypat_pfx(H, KeyPos, Ref)).
keypat(_, _, _, <<>>) -> <<>>;
keypat([H|T], KeyPos, Ref, Pfx0) ->
Pfx = keypat_pfx(H, KeyPos, Ref),
keypat(T, KeyPos, Ref, common_prefix(Pfx, Pfx0));
keypat([], _, _, Pfx) ->
Pfx.
common_prefix(<<H, T/binary>>, <<H, T1/binary>>) ->
<<H, (common_prefix(T, T1))/binary>>;
common_prefix(_, _) ->
<<>>.
keypat_pfx({HeadPat,_Gs,_}, KeyPos, #{encoding := {sext,_}}) when is_tuple(HeadPat) ->
KP = element(KeyPos, HeadPat),
sext:prefix(KP);
keypat_pfx(_, _, _) ->
<<>>.

View File

@ -1,3 +1,4 @@
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
-module(mnesia_rocksdb_SUITE). -module(mnesia_rocksdb_SUITE).
-export([ -export([
@ -12,10 +13,25 @@
, end_per_testcase/2 , end_per_testcase/2
]). ]).
-export([error_handling/1]). -export([ encoding_sext_attrs/1
, encoding_binary_binary/1
, encoding_defaults/1
]).
-export([ mrdb_batch/1
, mrdb_transactions/1
, mrdb_abort_reasons/1
, mrdb_repeated_transactions/1
, mrdb_abort/1
, mrdb_two_procs/1
, mrdb_two_procs_tx_restart/1
, mrdb_two_procs_snap/1
, mrdb_three_procs/1
]).
-include_lib("common_test/include/ct.hrl"). -include_lib("common_test/include/ct.hrl").
-define(TABS_CREATED, tables_created).
suite() -> suite() ->
[]. [].
@ -23,18 +39,40 @@ all() ->
[{group, all_tests}]. [{group, all_tests}].
groups() -> groups() ->
[{all_tests, [sequence], [error_handling]}]. [
{all_tests, [sequence], [ {group, checks}
, {group, mrdb} ]}
%% , error_handling ]}
, {checks, [sequence], [ encoding_sext_attrs
, encoding_binary_binary
, encoding_defaults ]}
, {mrdb, [sequence], [ mrdb_batch
, mrdb_transactions
, mrdb_abort_reasons
, mrdb_repeated_transactions
, mrdb_abort
, mrdb_two_procs
, mrdb_two_procs_tx_restart
, mrdb_two_procs_snap
, mrdb_three_procs ]}
].
error_handling(_Config) -> %% error_handling(Config) ->
mnesia_rocksdb_error_handling:run(). %% mnesia_rocksdb_error_handling:run(Config).
init_per_suite(Config) -> init_per_suite(Config) ->
Config. tr_ct:set_activation_checkpoint(?TABS_CREATED, Config).
end_per_suite(_Config) -> end_per_suite(_Config) ->
ok. ok.
init_per_group(G, Config) when G==mrdb
; G==checks ->
mnesia:stop(),
ok = mnesia_rocksdb_tlib:start_mnesia(reset),
Config;
init_per_group(_, Config) -> init_per_group(_, Config) ->
Config. Config.
@ -46,3 +84,564 @@ init_per_testcase(_, Config) ->
end_per_testcase(_, _Config) -> end_per_testcase(_, _Config) ->
ok. ok.
encoding_sext_attrs(Config) ->
tr_ct:with_trace(fun encoding_sext_attrs_/1, Config,
tr_patterns(mnesia_rocksdb,
[{mnesia_rocksdb,'_',x}], tr_opts())).
encoding_sext_attrs_(Config) ->
Created = create_tabs([{t, [{attributes, [k, v]}]}], Config),
ok = mrdb:insert(t, {t, 1, a}),
ok = mnesia:dirty_write({t, 2, b}),
expect_error(fun() -> mrdb:insert(t, {t, a}) end, ?LINE,
error, {mrdb_abort, badarg}),
expect_error(fun() -> mnesia:dirty_write({t, a}) end, ?LINE,
exit, '_'),
delete_tabs(Created),
ok.
encoding_defaults(Config) ->
UP = fun(T) -> mnesia:table_info(T, user_properties) end,
Created = create_tabs([ {a, [ {attributes, [k, v]}
, {type, set}]}
, {b, [ {attributes, [k, v, w]}
, {type, ordered_set}]}
, {c, [ {attributes, [k, v]}
, {type, bag} ]}], Config),
[{mrdb_encoding,{term,{value,term}}}] = UP(a),
[{mrdb_encoding,{sext,{object,term}}}] = UP(b),
[{mrdb_encoding,{sext,{value,term}}}] = UP(c),
delete_tabs(Created),
ok.
encoding_binary_binary(Config) ->
Created = create_tabs([ {a, [ {attributes, [k,v]}
, {user_properties,
[{mrdb_encoding, {raw, raw}}]}]}
, {b, [ {attributes, [k, v, w]}
, {user_properties,
[{mrdb_encoding, {raw, {object, term}}}]}]}
], Config),
expect_error(fun() ->
create_tab(
c, [ {attributes, [k, v, w]}
, {user_properties,
[{mrdb_encoding, {raw, {value, raw}}}]}])
end, ?LINE, error, '_'),
delete_tabs(Created),
ok.
expect_error(F, Line, Type, Expected) ->
try F() of
Unexpected -> error({unexpected, Line, Unexpected})
catch
Type:Expected ->
ct:log("Caught expected ~p:~p (Line: ~p)", [Type, Expected, Line]),
ok;
Type:Error when Expected == '_' ->
ct:log("Caught expected ~p:_ (Line:~p): ~p", [Type, Line, Error]),
ok
end.
mrdb_batch(Config) ->
Created = create_tabs([{b, []}], Config),
D0 = get_dict(),
mrdb:activity(
batch, rdb,
fun() ->
[mrdb:insert(b, {b, K, K})
|| K <- lists:seq(1, 10)]
end),
dictionary_unchanged(D0),
[[{b,K,K}] = mrdb:read(b, K) || K <- lists:seq(1, 10)],
expect_error(
fun() -> mrdb:activity(
batch, rdb,
fun() ->
mrdb:insert(b, {b, 11, 11}),
error(willful_abort)
end)
end, ?LINE, error, '_'),
dictionary_unchanged(D0),
[] = mrdb:read(b, 11),
TRef = mrdb:get_ref(b),
mrdb:activity(
batch, rdb,
fun() ->
mrdb:insert(TRef, {b, 12, 12})
end),
dictionary_unchanged(D0),
[{b, 12, 12}] = mrdb:read(b, 12),
mrdb:as_batch(b, fun(R) ->
mrdb:insert(R, {b, 13, 13})
end),
dictionary_unchanged(D0),
[{b, 13, 13}] = mrdb:read(b, 13),
delete_tabs(Created),
ok.
mrdb_transactions(Config) ->
tr_ct:with_trace(fun mrdb_transactions_/1, Config,
tr_patterns(
mnesia_rocksdb_admin,
[{mnesia_rocksdb_admin,'_',x}], tr_opts())).
mrdb_transactions_(Config) ->
Created = create_tabs([{tx, []}], Config),
mrdb:insert(tx, {tx, a, 1}),
[_] = mrdb:read(tx, a),
D0 = get_dict(),
mrdb:activity(
tx, rdb,
fun() ->
[{tx,a,N}] = mrdb:read(tx, a),
N1 = N+1,
ok = mrdb:insert(tx, {tx,a,N1}),
[{tx,a,N1}] = mrdb:read(tx, a),
ok
end),
dictionary_unchanged(D0),
[{tx,a,2}] = mrdb:read(tx,a),
delete_tabs(Created),
ok.
mrdb_abort_reasons(_Config) ->
Prev = mnesia_rocksdb_admin:set_and_cache_env(mnesia_compatible_aborts, true),
X = some_value,
compare_txs('throw', fun() -> throw(X) end),
compare_txs('exit' , fun() -> exit(X) end),
compare_txs('error', fun() -> error(X) end),
compare_txs('abort', fun() -> mnesia:abort(X) end),
compare_txs('abort' , fun() -> mrdb:abort(X) end),
mnesia_rocksdb_admin:set_and_cache_env(mnesia_compatible_aborts, Prev),
ok.
compare_txs(Type, F) ->
{caught, exit, {aborted, EMn}} = mnesia_tx(F),
{caught, exit, {aborted, EMr}} = mrdb_tx(F),
ct:log("Mnesia = ~p/~p", [Type, EMn]),
ct:log("Mrdb = ~p/~p", [Type, EMr]),
case {Type, EMn, EMr} of
{error, {some_value, [_|_]}, {some_value, []}} -> ok;
{throw, {throw, some_value}, {throw, some_value}} -> ok;
{exit, some_value, some_value} -> ok;
{abort, some_value, some_value} -> ok
end.
mnesia_tx(F) ->
try
mnesia:activity(transaction, F)
catch
C:E ->
{caught, C, E}
end.
mrdb_tx(F) ->
try
mrdb:activity(transaction, rdb, F)
catch
C:E ->
{caught, C, E}
end.
mrdb_repeated_transactions(Config) ->
Created = create_tabs([{rtx, []}], Config),
mrdb:insert(rtx, {rtx, a, 0}),
[_] = mrdb:read(rtx, a),
Fun = fun() ->
[{rtx, a, N}] = mrdb:read(rtx, a),
N1 = N+1,
ok = mrdb:insert(rtx, {rtx, a, N1})
end,
D0 = get_dict(),
[ok = mrdb:activity(tx, rdb, Fun) || _ <- lists:seq(1,100)],
dictionary_unchanged(D0),
[{rtx,a,100}] = mrdb:read(rtx, a),
delete_tabs(Created),
ok.
mrdb_abort(Config) ->
Created = create_tabs([{tx_abort, []}], Config),
mrdb:insert(tx_abort, {tx_abort, a, 1}),
Pre = mrdb:read(tx_abort, a),
D0 = get_dict(),
TRes = try mrdb:activity(
tx, rdb,
fun() ->
[{tx_abort, a, N}] = mrdb:read(tx_abort, a),
error(abort_here),
ok = mrdb:insert(tx_abort, [{tx_abort, a, N+1}]),
noooo
end)
catch
error:abort_here ->
ok
end,
dictionary_unchanged(D0),
ok = TRes,
Pre = mrdb:read(tx_abort, a),
delete_tabs(Created),
ok.
mrdb_two_procs(Config) ->
tr_ct:with_trace(fun mrdb_two_procs_/1, Config,
tr_flags(
{self(), [call, sos, p]},
tr_patterns(
mrdb, [ {mrdb, insert, 2, x}
, {mrdb, read, 2, x}
, {mrdb, activity, x}], tr_opts()))).
mrdb_two_procs_(Config) ->
R = ?FUNCTION_NAME,
Parent = self(),
Created = create_tabs([{R, []}], Config),
mrdb:insert(R, {R, a, 1}),
Pre = mrdb:read(R, a),
F0 = fun() ->
wait_for_other(Parent, ?LINE),
ok = mrdb:insert(R, {R, a, 17}),
wait_for_other(Parent, ?LINE)
end,
{POther, MRef} = spawn_opt(
fun() ->
D0 = get_dict(),
ok = mrdb:activity(tx, rdb, F0),
dictionary_unchanged(D0)
end, [monitor]),
F1 = fun() ->
Pre = mrdb:read(R, a),
go_ahead_other(POther),
await_other_down(POther, MRef, ?LINE),
[{R, a, 17}] = mrdb:read(R, a),
ok = mrdb:insert(R, {R, a, 18})
end,
go_ahead_other(1, POther),
Do0 = get_dict(),
try mrdb:activity({tx, #{no_snapshot => true,
retries => 0}}, rdb, F1) of
ok -> error(unexpected)
catch
error:{error, "Resource busy" ++ _} ->
ok
end,
dictionary_unchanged(Do0),
[{R, a, 17}] = mrdb:read(R, a),
delete_tabs(Created),
ok.
mrdb_two_procs_tx_restart(Config) ->
tr_ct:with_trace(fun mrdb_two_procs_tx_restart_/1, Config,
light_tr_opts()).
mrdb_two_procs_tx_restart_(Config) ->
R = ?FUNCTION_NAME,
Parent = self(),
Created = create_tabs([{R, []}], Config),
mrdb:insert(R, {R, a, 1}),
Pre = mrdb:read(R, a),
F0 = fun() ->
wait_for_other(Parent, ?LINE),
ok = mrdb:insert(R, {R, a, 17}),
wait_for_other(Parent, ?LINE)
end,
{POther, MRef} = spawn_opt(
fun() ->
ok = mrdb:activity(tx, rdb, F0)
end, [monitor]),
F1 = fun() ->
OtherWrite = [{R, a, 17}],
Att = get_attempt(),
Expected = case Att of
1 -> Pre;
_ -> OtherWrite
end,
Expected = mrdb:read(R, a),
go_ahead_other(POther),
await_other_down(POther, MRef, ?LINE),
OtherWrite = mrdb:read(R, a),
ok = mrdb:insert(R, {R, a, 18})
end,
go_ahead_other(1, POther),
Do0 = get_dict(),
mrdb:activity({tx, #{no_snapshot => true}}, rdb, F1),
dictionary_unchanged(Do0),
[{R, a, 18}] = mrdb:read(R, a),
delete_tabs(Created),
ok.
%
%% For testing purposes, we use side-effects inside the transactions
%% to synchronize the concurrent transactions. If a transaction fails due
%% to "Resource busy", it can re-run, but then mustn't attempt to sync with
%% the other transaction, which is already committed.
%%
%% To achieve this, we rely on the `mrdb:current_context()` function, which gives
%% us information about which is the current attempt; we only sync on the first
%% attempt, and ignore the sync ops on retries.
%%
-define(IF_FIRST(N, Expr),
if N == 1 ->
Expr;
true ->
ok
end).
mrdb_two_procs_snap(Config) ->
%% _snap is now the default tx mode
R = ?FUNCTION_NAME,
Parent = self(),
Created = create_tabs([{R, []}], Config),
mrdb:insert(R, {R, a, 1}),
Pre = mrdb:read(R, a),
mrdb:insert(R, {R, b, 11}),
PreB = mrdb:read(R, b),
F0 = fun() ->
ok = mrdb:insert(R, {R, a, 17}),
wait_for_other(Parent, ?LINE)
end,
{POther, MRef} =
spawn_opt(fun() ->
D0 = get_dict(),
ok = mrdb:activity(tx, rdb, F0),
dictionary_unchanged(D0)
end, [monitor]),
F1 = fun() ->
Att = get_attempt(),
go_ahead_other(Att, POther),
ARes = mrdb:read(R, a),
ARes = case Att of
1 -> Pre;
2 -> [{R, a, 17}]
end,
await_other_down(POther, MRef, ?LINE),
PreB = mrdb:read(R, b),
mrdb:insert(R, {R, b, 18}),
1477
end,
Do0 = get_dict(),
1477 = mrdb:activity(tx, rdb, F1),
dictionary_unchanged(Do0),
[{R, a, 17}] = mrdb:read(R, a),
[{R, b, 18}] = mrdb:read(R, b),
delete_tabs(Created),
ok.
%% We spawn two helper processes, making it 3 transactions, with the one
%% in the parent process. P2 writes to key `a`, which the other two try to read.
%% We make sure that P2 commits before finishing the other two, and P3 and the
%% main thread sync, so as to maximize the contention for the retry lock.
mrdb_three_procs(Config) ->
tr_ct:with_trace(fun mrdb_three_procs_/1, Config, light_tr_opts()).
mrdb_three_procs_(Config) ->
R = ?FUNCTION_NAME,
Parent = self(),
Created = create_tabs([{R, []}], Config),
A0 = {R, a, 1},
A1 = {R, a, 11},
A2 = {R, a, 12},
ok = mrdb:insert(R, A0),
F1 = fun() ->
ok = mrdb:insert(R, A1),
ok = mrdb:insert(R, {R, p1, 1})
end,
{P1, MRef1} =
spawn_opt(fun() ->
D0 = get_dict(),
do_when_p_allows(
1, Parent, ?LINE,
fun() ->
ok = mrdb:activity({tx,#{retries => 0}}, rdb, F1)
end),
dictionary_unchanged(D0)
end, [monitor]),
F2 = fun() ->
[A0] = mrdb:read(R, a),
Att = get_attempt(),
wait_for_other(Att, Parent, ?LINE),
do_when_p_allows(
Att, Parent, ?LINE,
fun() ->
[A1] = mrdb:read(R, a),
ok = mrdb:insert(R, A2),
ok = mrdb:insert(R, {R, p2, 1})
end)
end,
{P2, MRef2} =
spawn_opt(fun() ->
D0 = get_dict(),
try mrdb:activity(
{tx, #{retries => 0,
no_snapshot => true}}, rdb, F2) of
ok -> error(unexpected)
catch
error:{error, "Resource busy" ++ _} ->
ok
end,
dictionary_unchanged(D0)
end, [monitor]),
Do0 = get_dict(),
ok = mrdb:activity(tx, rdb,
fun() ->
Att = get_attempt(),
ARes = case Att of
1 -> [A0];
2 -> [A1]
end,
%% First, ensure that P2 tx is running
go_ahead_other(Att, P2),
ARes = mrdb:read(R, a),
allow_p(Att, P1, ?LINE),
ARes = mrdb:read(R, a),
allow_p(Att, P2, ?LINE),
ARes = mrdb:read(R, a),
await_other_down(P1, MRef1, ?LINE),
await_other_down(P2, MRef2, ?LINE),
ok = mrdb:insert(R, {R, p0, 1})
end),
dictionary_unchanged(Do0),
[{R, p1, 1}] = mrdb:read(R, p1),
[] = mrdb:read(R, p2),
[A1] = mrdb:read(R, a),
[{R, p0, 1}] = mrdb:read(R, p0),
delete_tabs(Created),
ok.
tr_opts() ->
#{patterns => [ {mrdb, '_', '_', x}
, {mrdb_lib, '_', '_', x}
, {tr_ttb, event, 3, []}
, {?MODULE, go_ahead_other, 3, x}
, {?MODULE, wait_for_other, 3, x}
, {?MODULE, await_other_down, 3, x}
, {?MODULE, do_when_p_allows, 4, x}
, {?MODULE, allow_p, 3, x}
]}.
light_tr_opts() ->
tr_flags(
{self(), [call, sos, p]},
tr_patterns(
mrdb, [ {mrdb, insert, 2, x}
, {mrdb, read, 2, x}
, {mrdb, activity, x} ], tr_opts())).
tr_patterns(Mod, Ps, #{patterns := Pats} = Opts) ->
Pats1 = [P || P <- Pats, element(1,P) =/= Mod],
Opts#{patterns => Ps ++ Pats1}.
tr_flags(Flags, Opts) when is_map(Opts) ->
Opts#{flags => Flags}.
wait_for_other(Parent, L) ->
wait_for_other(get_attempt(), Parent, 1000, L).
wait_for_other(Att, Parent, L) ->
wait_for_other(Att, Parent, 1000, L).
wait_for_other(1, Parent, Timeout, L) ->
MRef = monitor(process, Parent),
Parent ! {self(), ready},
receive
{Parent, cont} ->
demonitor(MRef),
ok;
{'DOWN', MRef, _, _, Reason} ->
ct:log("Parent died, Reason = ~p", [Reason]),
exit(Reason)
after Timeout ->
demonitor(MRef),
error({inner_timeout, L})
end;
wait_for_other(_, _, _, _) ->
ok.
do_when_p_allows(Att, P, Line, F) ->
wait_for_other(Att, P, Line),
F(),
%% Tell P that we're done
go_ahead_other(Att, P, Line),
%% Wait for P to acknowlege
wait_for_other(Att, P, Line).
allow_p(Att, P, Line) ->
go_ahead_other(Att, P),
%% This is where P does its thing.
wait_for_other(Att, P, Line),
%% Acknowledge
go_ahead_other(Att, P, Line).
go_ahead_other(POther) ->
go_ahead_other(get_attempt(), POther).
go_ahead_other(Att, POther) ->
go_ahead_other(Att, POther, 1000).
go_ahead_other(Att, POther, Timeout) ->
?IF_FIRST(Att, go_ahead_other_(POther, Timeout)).
go_ahead_other_(POther, Timeout) ->
receive
{POther, ready} ->
POther ! {self(), cont}
after Timeout ->
error(go_ahead_timeout)
end.
%% Due to transaction restarts, we may already have collected
%% a DOWN message. In this case, P will already be dead, and there
%% will not be a 'DOWN' messsage still in the msg queue.
%% This is fine (we assume it is), and we just make sure that the
%% process didn't die abnormally.
await_other_down(P, MRef, Line) ->
Attempt = get_attempt(),
?IF_FIRST(Attempt, await_other_down_(P, MRef, Line)).
await_other_down_(P, MRef, Line) ->
receive {'DOWN', MRef, _, _, Reason} ->
case Reason of
normal -> ok;
_ ->
error({abnormal_termination,
[ {pid, P}
, {mref, MRef}
, {line, Line}
, {reason, Reason}]})
end
after 1000 ->
error({monitor_timeout, Line})
end.
get_attempt() ->
#{activity := #{attempt := Attempt}} = mrdb:current_context(),
Attempt.
create_tabs(Tabs, Config) ->
Res = lists:map(fun create_tab/1, Tabs),
tr_ct:trace_checkpoint(?TABS_CREATED, Config),
Res.
create_tab({T, Opts}) -> create_tab(T, Opts).
create_tab(T, Opts) ->
{atomic, ok} = mnesia:create_table(T, [{rdb,[node()]} | Opts]),
T.
delete_tabs(Tabs) ->
[{atomic,ok} = mnesia:delete_table(T) || T <- Tabs],
ok.
get_dict() ->
{dictionary, D} = process_info(self(), dictionary),
[X || {K,_} = X <- D,
K =/= log_timestamp].
dictionary_unchanged(Old) ->
New = get_dict(),
#{ deleted := []
, added := [] } = #{ deleted => Old -- New
, added => New -- Old },
ok.

View File

@ -1,116 +0,0 @@
-module(mnesia_rocksdb_error_handling).
-export([run/0,
run/4]).
run() ->
setup(),
%% run only one test for 'fatal', to save time.
[run(Type, Op, L, MaintainSz) || MaintainSz <- [false, true],
Type <- [set, bag],
Op <- [insert, update, delete],
L <- levels()]
++ [run(set, insert, fatal, false)].
run(Type, Op, Level, MaintainSz) ->
setup(),
{ok, Tab} = create_tab(Type, Level, MaintainSz),
mnesia:dirty_write({Tab, a, 1}), % pre-existing data
with_mock(Level, Op, Tab, fun() ->
try_write(Op, Type, Tab),
expect_error(Level, Tab)
end).
levels() ->
[debug, verbose, warning, error].
setup() ->
mnesia:stop(),
start_mnesia().
create_tab(Type, Level, MaintainSz) ->
TabName = tab_name(Type, Level, MaintainSz),
%% create error store before the table
case ets:info(?MODULE) of
undefined ->
?MODULE = ets:new(?MODULE, [bag, public, named_table]),
ok;
_ ->
ok
end,
UserProps = user_props(Level, MaintainSz),
{atomic, ok} = mnesia:create_table(TabName, [{rdb, [node()]},
{user_properties, UserProps}]),
{ok, TabName}.
tab_name(Type, Level, MaintainSz) ->
binary_to_atom(iolist_to_binary(
["t" | [["_", atom_to_list(A)]
|| A <- [?MODULE, Type, Level, MaintainSz]]]), utf8).
user_props(Level, MaintainSz) ->
[{maintain_sz, MaintainSz},
{rocksdb_opts, [ {on_write_error, Level}
, {on_write_error_store, ?MODULE} ]}].
start_mnesia() ->
mnesia_rocksdb_tlib:start_mnesia(reset),
ok.
with_mock(Level, Op, Tab, F) ->
mnesia:subscribe(system),
mnesia:set_debug_level(debug),
meck:new(mnesia_rocksdb_lib, [passthrough]),
meck:expect(mnesia_rocksdb_lib, put, 4, {error, some_put_error}),
meck:expect(mnesia_rocksdb_lib, write, 3, {error, some_write_error}),
meck:expect(mnesia_rocksdb_lib, delete, 3, {error,some_delete_error}),
try {Level, Op, Tab, F()} of
{_, _, _, ok} ->
ok;
Other ->
io:fwrite("OTHER: ~p~n", [Other]),
ok
catch
exit:{{aborted,_},_} ->
Level = error,
ok
after
mnesia:set_debug_level(none),
mnesia:unsubscribe(system),
meck:unload(mnesia_rocksdb_lib)
end.
try_write(insert, set, Tab) ->
mnesia:dirty_write({Tab, b, 2});
try_write(insert, bag, Tab) ->
mnesia:dirty_write({Tab, a, 2});
try_write(update, _, Tab) ->
mnesia:dirty_write({Tab, a, 1});
try_write(delete, _, Tab) ->
mnesia:dirty_delete({Tab, a}).
expect_error(Level, Tab) ->
Tag = rpt_tag(Level),
receive
{mnesia_system_event, {mnesia_fatal, Fmt, Args, _Core}} ->
Tag = mnesia_fatal,
io:fwrite("EVENT(~p, ~p):~n ~s", [Tag, Tab, io_lib:fwrite(Fmt, Args)]),
ok;
{mnesia_system_event, {Tag, Fmt, Args}} ->
io:fwrite("EVENT(~p, ~p):~n ~s", [Tag, Tab, io_lib:fwrite(Fmt, Args)]),
ok
after 1000 ->
error({expected_error, [Level, Tab]})
end,
%% Also verify that an error entry has been written into the error store.
1 = ets:select_delete(?MODULE, [{{{Tab, '_'}, '_', '_'}, [], [true]}]),
ok.
rpt_tag(fatal ) -> mnesia_fatal;
rpt_tag(error ) -> mnesia_error;
rpt_tag(warning) -> mnesia_warning;
rpt_tag(verbose) -> mnesia_info;
rpt_tag(debug ) -> mnesia_info.

View File

@ -22,7 +22,7 @@
-define(m(A,B), fun() -> L = ?LINE, -define(m(A,B), fun() -> L = ?LINE,
case {A,B} of case {A,B} of
{__X, __X} -> {X__, X__} ->
B; B;
Other -> Other ->
error({badmatch, [Other, error({badmatch, [Other,

View File

@ -16,13 +16,51 @@
%% under the License. %% under the License.
%%---------------------------------------------------------------- %%----------------------------------------------------------------
-module(mnesia_rocksdb_indexes). -module(mnesia_rocksdb_indexes_SUITE).
-export([
all/0
, groups/0
, suite/0
, init_per_suite/1
, end_per_suite/1
, init_per_group/2
, end_per_group/2
, init_per_testcase/2
, end_per_testcase/2
]).
-export([
index_plugin_mgmt/1
, add_indexes/1
, create_bag_index/1
, create_ordered_index/1
, test_1_ram_copies/1
, test_1_disc_copies/1
, fail_1_disc_only/1
, plugin_ram_copies1/1
, plugin_ram_copies2/1
, plugin_disc_copies/1
, fail_plugin_disc_only/1
, plugin_disc_copies_bag/1
, plugin_rdb_ordered/1
, index_iterator/1
]).
-include_lib("common_test/include/ct.hrl").
-export([run/0, -export([run/0,
run/1,
r1/0]). r1/0]).
-define(TAB(T), list_to_atom(lists:flatten(io_lib:fwrite("~w_~w", [T, ?LINE])))).
run() -> run() ->
run([]).
run(Config) ->
mnesia:stop(), mnesia:stop(),
maybe_set_dir(Config),
ok = mnesia_rocksdb_tlib:start_mnesia(reset), ok = mnesia_rocksdb_tlib:start_mnesia(reset),
test(1, ram_copies, r1), test(1, ram_copies, r1),
test(1, disc_copies, d1), test(1, disc_copies, d1),
@ -33,15 +71,86 @@ run() ->
add_del_indexes(), add_del_indexes(),
{atomic,ok} = mnesia_schema:add_index_plugin( {atomic,ok} = mnesia_schema:add_index_plugin(
{pfx},mnesia_rocksdb, ix_prefixes), {pfx},mnesia_rocksdb, ix_prefixes),
test_index_plugin(pr1, ram_copies, ordered), test_index_plugin(cfg([pr1, ram_copies, ordered], Config)),
test_index_plugin(pr2, ram_copies, bag), test_index_plugin(cfg([pr2, ram_copies, bag], Config)),
test_index_plugin(pd1, disc_copies, ordered), test_index_plugin(cfg([pd1, disc_copies, ordered], Config)),
fail(test_index_plugin, [pd2, disc_only_copies, ordered]), fail(test_index_plugin, [cfg([pd2, disc_only_copies, ordered], Config)]),
test_index_plugin(pd2, disc_copies, bag), test_index_plugin(cfg([pd2, disc_copies, bag], Config)),
test_index_plugin(pl2, rdb, ordered), test_index_plugin(cfg([pl2, rdb, ordered], Config)),
test_index_plugin_mgmt(), index_plugin_mgmt(Config),
ok. ok.
suite() ->
[].
all() ->
[{group, all_tests}].
groups() ->
[
{all_tests, [sequence], [ {group, mgmt}, {group, access}, {group, plugin} ]}
, {mgmt, [sequence], [
create_bag_index
, create_ordered_index
, index_plugin_mgmt
, add_indexes
]}
, {access, [sequence], [
test_1_ram_copies
, test_1_disc_copies
, fail_1_disc_only
, index_iterator
]}
, {plugin, [sequence], [
plugin_ram_copies1
, plugin_ram_copies2
, plugin_disc_copies
, fail_plugin_disc_only
, plugin_disc_copies_bag
, plugin_rdb_ordered
]}
].
%% ======================================================================
init_per_suite(Config) ->
mnesia:stop(),
maybe_set_dir(Config),
Config.
end_per_suite(_) ->
ok.
init_per_group(Grp, Config) ->
mnesia_rocksdb_tlib:restart_reset_mnesia(),
case Grp of
plugin ->
{atomic,ok} = mnesia_schema:add_index_plugin(
{pfx},mnesia_rocksdb, ix_prefixes);
_ ->
ok
end,
Config.
end_per_group(_, _) ->
ok.
init_per_testcase(_, Config) ->
Config.
end_per_testcase(_, _) ->
ok.
%% ======================================================================
cfg([Tab, Type, IxType], Config) ->
[{my_config, #{tab => Tab, type => Type, ixtype => IxType}} | Config];
cfg(Cfg, Config) when is_map(Cfg) -> [{my_config, Cfg} | Config].
cfg(Config) -> ?config(my_config, Config).
%% ======================================================================
r1() -> r1() ->
mnesia:stop(), mnesia:stop(),
ok = mnesia_rocksdb_tlib:start_mnesia(reset), ok = mnesia_rocksdb_tlib:start_mnesia(reset),
@ -51,17 +160,28 @@ r1() ->
dbg:tpl(mnesia_schema,x), dbg:tpl(mnesia_schema,x),
dbg:tpl(mnesia_index,x), dbg:tpl(mnesia_index,x),
dbg:p(all,[c]), dbg:p(all,[c]),
test_index_plugin(pd2, disc_only_copies, ordered). test_index_plugin(cfg([pd2, disc_only_copies, ordered], [])).
fail(F, Args) -> fail(F, Args) ->
try apply(?MODULE, F, Args), try apply(?MODULE, F, Args),
error(should_fail) error(should_fail)
catch catch
error:_ -> error:R when R =/= should_fail ->
io:fwrite("apply(~p, ~p, ~p) -> fails as expected~n", io:fwrite("apply(~p, ~p, ~p) -> fails as expected~n",
[?MODULE, F, Args]) [?MODULE, F, Args])
end. end.
test_1_ram_copies( _Cfg) -> test(1, ram_copies, r1).
test_1_disc_copies(_Cfg) -> test(1, disc_copies, d1).
fail_1_disc_only( _Cfg) -> fail(test, [1, disc_only_copies, do1]).
plugin_ram_copies1(Cfg) -> test_index_plugin(cfg([pr1, ram_copies, ordered], Cfg)).
plugin_ram_copies2(Cfg) -> test_index_plugin(cfg([pr2, ram_copies, bag], Cfg)).
plugin_disc_copies(Cfg) -> test_index_plugin(cfg([pd1, disc_copies, ordered], Cfg)).
fail_plugin_disc_only(Cfg) -> fail(test_index_plugin, [cfg([pd2, disc_only_copies, ordered], Cfg)]).
plugin_disc_copies_bag(Cfg) -> test_index_plugin(cfg([pd2, disc_copies, bag], Cfg)).
plugin_rdb_ordered(Cfg) -> test_index_plugin(cfg([pl2, rdb, ordered], Cfg)).
test(N, Type, T) -> test(N, Type, T) ->
{atomic, ok} = mnesia:create_table(T, [{Type,[node()]}, {atomic, ok} = mnesia:create_table(T, [{Type,[node()]},
{attributes,[k,a,b,c]}, {attributes,[k,a,b,c]},
@ -81,7 +201,8 @@ add_del_indexes() ->
{atomic, ok} = mnesia:add_table_index(l1, a), {atomic, ok} = mnesia:add_table_index(l1, a),
io:fwrite("add_del_indexes() -> ok~n", []). io:fwrite("add_del_indexes() -> ok~n", []).
test_index_plugin(Tab, Type, IxType) -> test_index_plugin(Config) ->
#{tab := Tab, type := Type, ixtype := IxType} = cfg(Config),
{atomic, ok} = mnesia:create_table(Tab, [{Type, [node()]}, {atomic, ok} = mnesia:create_table(Tab, [{Type, [node()]},
{index, [{{pfx}, IxType}]}]), {index, [{{pfx}, IxType}]}]),
mnesia:dirty_write({Tab, "foobar", "sentence"}), mnesia:dirty_write({Tab, "foobar", "sentence"}),
@ -100,10 +221,25 @@ test_index_plugin(Tab, Type, IxType) ->
Res2 = lists:sort(mnesia:dirty_index_read(Tab,<<"whi">>, {pfx})), Res2 = lists:sort(mnesia:dirty_index_read(Tab,<<"whi">>, {pfx})),
[{Tab,"foobar","sentence"}] = mnesia:dirty_index_read( [{Tab,"foobar","sentence"}] = mnesia:dirty_index_read(
Tab, <<"foo">>, {pfx}) Tab, <<"foo">>, {pfx})
end, end.
io:fwrite("test_index_plugin(~p, ~p, ~p) -> ok~n", [Tab,Type,IxType]).
test_index_plugin_mgmt() -> create_bag_index(_Config) ->
{aborted, {combine_error, _, _}} =
mnesia:create_table(bi, [{rdb, [node()]}, {index, [{val, bag}]}]),
ok.
create_ordered_index(_Config) ->
{atomic, ok} =
mnesia:create_table(oi, [{rdb, [node()]}, {index, [{val, ordered}]}]),
ok.
add_indexes(_Config) ->
T = ?TAB(t1),
{atomic, ok} = mnesia:create_table(T, [{rdb, [node()]}, {attributes, [k, a, b, c]}]),
{atomic, ok} = mnesia:add_table_index(T, a),
ok.
index_plugin_mgmt(_Config) ->
{aborted,_} = mnesia:create_table(x, [{index,[{unknown}]}]), {aborted,_} = mnesia:create_table(x, [{index,[{unknown}]}]),
{aborted,_} = mnesia:create_table(x, [{index,[{{unknown},bag}]}]), {aborted,_} = mnesia:create_table(x, [{index,[{{unknown},bag}]}]),
{aborted,_} = mnesia:create_table(x, [{index,[{{unknown},ordered}]}]), {aborted,_} = mnesia:create_table(x, [{index,[{{unknown},ordered}]}]),
@ -166,9 +302,48 @@ test_index(3, T) ->
io:fwrite("test_index(1, ~p) -> ok~n", [T]), io:fwrite("test_index(1, ~p) -> ok~n", [T]),
ok. ok.
index_iterator(_Cfg) ->
T = ?TAB(it),
Attrs = [ {rdb,[node()]}
, {record_name, i}
, {attributes, [k,a,b]}
, {index, [a,b]} ],
{atomic, ok} = mnesia:create_table(T, Attrs),
ct:log("created tab T=~p: ~p", [T, Attrs]),
L1 = [{i,K,a,y} || K <- lists:seq(4,6)],
L2 = [{i,K,b,x} || K <- lists:seq(1,3)],
true = lists:all(fun(X) -> X == ok end,
[mnesia:dirty_write(T, Obj) || Obj <- L1 ++ L2]),
ct:log("inserted ~p", [L1 ++ L2]),
ResA = [{a,X} || X <- L1] ++ [{b,Y} || Y <- L2],
ResB = [{x,X} || X <- L2] ++ [{y,Y} || Y <- L1],
F = fun iter_all/1,
ResA = mrdb_index:with_iterator(T, a, F),
ct:log("mrdb_index:with_iterator(T, a, F) -> ~p", [ResA]),
ResB = mrdb_index:with_iterator(T, b, F),
ct:log("mrdb_index:with_iterator(T, b, F) -> ~p", [ResB]),
ok.
iter_all(I) ->
iter_all(mrdb_index:iterator_move(I, first), I).
iter_all({ok, IxVal, Obj}, I) ->
[{IxVal, Obj} | iter_all(mrdb_index:iterator_move(I, next), I)];
iter_all(_, _) ->
[].
indexes(1) -> indexes(1) ->
[a,{b,ordered},{c,bag}]; [a,{b,ordered},{c,bag}];
indexes(2) -> indexes(2) ->
[a,b,{c,bag}]; [a,b,{c,bag}];
indexes(3) -> indexes(3) ->
[a,{b,ordered},{c,ordered}]. [a,{b,ordered},{c,ordered}].
maybe_set_dir(Config) ->
case proplists:get_value(priv_dir, Config) of
undefined ->
ok;
PDir ->
Dir = filename:join(PDir, "mnesia_indexes"),
application:set_env(mnesia, dir, Dir)
end.

View File

@ -0,0 +1,190 @@
-module(mnesia_rocksdb_migration_SUITE).
-export([
all/0
, suite/0
, groups/0
, init_per_suite/1
, end_per_suite/1
, init_per_group/2
, end_per_group/2
, init_per_testcase/2
, end_per_testcase/2
]).
-export([
manual_migration/1
, migrate_with_encoding_change/1
, auto_migration/1
]).
-include_lib("common_test/include/ct.hrl").
-define(TABS_CREATED, tables_created).
suite() ->
[].
all() ->
[{group, all_tests}].
groups() ->
[
{all_tests, [sequence], [ manual_migration
, migrate_with_encoding_change ]}
].
init_per_suite(Config) ->
Config.
end_per_suite(_Config) ->
ok.
init_per_group(_, Config) ->
Config.
end_per_group(_, _Config) ->
ok.
init_per_testcase(_, Config) ->
mnesia:stop(),
ok = mnesia_rocksdb_tlib:start_mnesia(reset),
Config.
%% create_migrateable_db(Config).
end_per_testcase(_, _Config) ->
ok.
manual_migration(Config) ->
tr_ct:with_trace(fun manual_migration_/1, Config, tr_opts()).
manual_migration_(Config) ->
create_migrateable_db(Config),
Tabs = tables(),
ct:log("Analyze (before): ~p", [analyze_tabs(Tabs)]),
Res = mnesia_rocksdb_admin:migrate_standalone(rdb, Tabs),
ct:log("migrate_standalone(rdb, ~p) -> ~p", [Tabs, Res]),
AnalyzeRes = analyze_tabs(Tabs),
ct:log("AnalyzeRes = ~p", [AnalyzeRes]),
MigRes = mnesia_rocksdb_admin:migrate_standalone(rdb, Tabs),
ct:log("MigRes = ~p", [MigRes]),
AnalyzeRes2 = analyze_tabs(Tabs),
ct:log("AnalyzeRes2 = ~p", [AnalyzeRes2]),
ct:log("Admin State = ~p", [sys:get_state(mnesia_rocksdb_admin)]),
ok.
migrate_with_encoding_change(_Config) ->
ok = create_tab(t, [{user_properties, [{mrdb_encoding, {sext,{object,term}}},
{rocksdb_standalone, true}]},
{index,[val]}
]),
mrdb:insert(t, {t, <<"1">>, <<"a">>}),
mrdb:insert(t, {t, <<"2">>, <<"b">>}),
TRef = mrdb:get_ref(t),
{ok, V1} = mrdb:rdb_get(TRef, sext:encode(<<"1">>), []),
{ok, V2} = mrdb:rdb_get(TRef, sext:encode(<<"2">>), []),
{t,[],<<"a">>} = binary_to_term(V1),
{t,[],<<"b">>} = binary_to_term(V2),
Opts = #{encoding => {raw, raw}},
MigRes = mnesia_rocksdb_admin:migrate_standalone(rdb, [{t, Opts}]),
ct:log("MigRes (t) = ~p", [MigRes]),
%%
%% Ensure that metadata reflect the migrated table
%% (now a column family, and the rocksdb_standalone prop gone)
%%
TRef1 = mrdb:get_ref(t),
ct:log("TRef1(t) = ~p", [TRef1]),
#{type := column_family,
properties := #{user_properties := UPs}} = TRef1,
error = maps:find(rocksdb_standalone, UPs),
UPsR = lists:sort(maps:values(UPs)),
UPsM = lists:sort(mnesia:table_info(t, user_properties)),
{UPsR,UPsM} = {UPsM,UPsR},
ct:log("user properties (t): ~p", [UPsM]),
[{<<"2">>, <<"b">>},
{<<"1">>, <<"a">>}] = mrdb:rdb_fold(
t, fun(K,V,A) -> [{K,V}|A] end, [], <<>>),
ct:log("All data present in new column family", []),
ct:log("Contents of mnesia dir: ~p",
[ok(file:list_dir(mnesia:system_info(directory)))]),
ct:log("mnesia stopped", []),
mnesia:stop(),
mnesia:start(),
ct:log("mnesia started", []),
mnesia:info(),
ok = mnesia:wait_for_tables([t], 3000),
ct:log("tables loaded", []),
[{t,<<"1">>,<<"a">>},
{t,<<"2">>,<<"b">>}] = mrdb:select(
t, [{'_',[],['$_']}]),
[{<<"2">>,<<"b">>},
{<<"1">>,<<"a">>}] = mrdb:rdb_fold(
t, fun(K,V,A) -> [{K,V}|A] end, [], <<>>),
ok.
auto_migration(_Config) ->
ok.
ok({ok, Value}) -> Value.
tr_opts() ->
#{ patterns => [ {mnesia_rocksdb_admin, '_', []}
, {mnesia_rocksdb_lib, '_', []}
, {rocksdb, '_', x} | trace_exports(mrdb, x) ] }.
trace_exports(M, Pat) ->
Fs = M:module_info(exports),
[{M, F, A, Pat} || {F, A} <- Fs].
tables() ->
[a].
create_migrateable_db(Config) ->
Os = [{user_properties, [{rocksdb_standalone, true}]}],
TabNames = tables(),
Tabs = [{T, Os} || T <- TabNames],
create_tabs(Tabs, Config),
verify_tabs_are_standalone(TabNames),
fill_tabs(TabNames),
Config.
fill_tabs(Tabs) ->
lists:foreach(fun(Tab) ->
[mrdb:insert(Tab, {Tab, X, a}) || X <- lists:seq(1,3)]
end, Tabs).
create_tabs(Tabs, Config) ->
Res = lists:map(fun create_tab/1, Tabs),
tr_ct:trace_checkpoint(?TABS_CREATED, Config),
Res.
create_tab({T, Opts}) ->
create_tab(T, Opts).
create_tab(T, Opts) ->
{atomic, ok} = mnesia:create_table(T, [{rdb, [node()]} | Opts]),
ok.
verify_tabs_are_standalone(Tabs) ->
case analyze_tabs(Tabs) of
{_, []} ->
ok;
{[], NotSA} ->
error({not_standalone, NotSA})
end.
analyze_tabs(Tabs) ->
Dir = mnesia:system_info(directory),
Files = filelib:wildcard(filename:join(Dir, "*-_tab.extrdb")),
ct:log("Files = ~p", [Files]),
TabNames = lists:map(
fun(F) ->
{match,[TStr]} =
re:run(F, "^.+/([^/]+)-_tab\\.extrdb$",
[{capture, [1], list}]),
list_to_existing_atom(TStr)
end, Files),
ct:log("TabNames = ~p", [TabNames]),
NotSA = Tabs -- TabNames,
{TabNames -- NotSA, NotSA}.

View File

@ -78,6 +78,18 @@ setup_mnesia() ->
ok = mnesia:delete_schema([node()]), ok = mnesia:delete_schema([node()]),
ok = mnesia:create_schema([node()]), ok = mnesia:create_schema([node()]),
ok = mnesia:start(), ok = mnesia:start(),
%%
%% dbg:tracer(),
%% dbg:tpl(mnesia_rocksdb_admin, x),
%% dbg:tpl(mnesia_rocksdb,x),
%% dbg:ctpl(mnesia_rocksdb, check_definition_entry),
%% dbg:ctpl(mnesia_rocksdb, '-check_definition/4-fun-0-'),
%% dbg:tpl(mnesia_rocksdb_lib,x),
%% dbg:tp(mnesia,x),
%% dbg:tpl(mrdb,x),
%% dbg:tp(rocksdb,x),
%% dbg:p(all,[c]),
%%
{ok, rocksdb_copies} = mnesia_rocksdb:register(). {ok, rocksdb_copies} = mnesia_rocksdb:register().
setup() -> setup() ->

View File

@ -20,23 +20,32 @@
-export([start_mnesia/0, -export([start_mnesia/0,
start_mnesia/1, start_mnesia/1,
restart_reset_mnesia/0,
create_table/1, create_table/1,
create_table/3, create_table/3,
trace/2]). trace/2]).
restart_reset_mnesia() ->
mnesia:stop(),
start_mnesia(reset).
start_mnesia() -> start_mnesia() ->
start_mnesia(false). start_mnesia(false).
start_mnesia(Mode) -> start_mnesia(Mode) ->
if Mode==reset -> if Mode==reset ->
mnesia:delete_schema([node()]), DRes = mnesia:delete_schema([node()]),
mnesia:create_schema([node()], ct:log("Delete schema: ~p", [DRes]),
[{backend_types, CRes = mnesia:create_schema([node()],
[{rdb,mnesia_rocksdb}]}]); [{backend_types,
[{rdb,mnesia_rocksdb}]}]),
ct:log("Create schema: ~p", [CRes]);
true -> ok true -> ok
end, end,
mnesia:start(). SRes = mnesia:start(),
ct:log("Mnesia start: ~p", [SRes]),
true = lists:member(rdb, mnesia_schema:backend_types()),
SRes.
create_table(Backend) -> create_table(Backend) ->
create_table(Backend, [k,v], [v]). create_table(Backend, [k,v], [v]).

74
test/mrdb_bench.erl Normal file
View File

@ -0,0 +1,74 @@
-module(mrdb_bench).
-compile(export_all).
init() ->
mnesia:delete_schema([node()]),
mnesia_rocksdb:create_schema([node()]),
mnesia:start(),
[mnesia:create_table(Name, [{Type, [node()]}, {record_name, r}])
|| {Name, Type} <- tabs()],
ok.
tabs() ->
[{rc, ram_copies},
{dc, disc_copies},
{do, disc_only_copies},
{rocks, rocksdb_copies},
{rdb, rocksdb_copies}].
fill(N) ->
[{T, timer:tc(fun() -> fill_(T, N) end)} || {T,_} <- tabs()].
fill_(_, 0) ->
ok;
fill_(T, N) when N > 0 ->
write(T, {r, N, <<"1234567890">>}),
fill_(T, N-1).
write(rdb, Obj) ->
mrdb:insert(rdb, Obj);
write(T, Obj) ->
mnesia:dirty_write(T, Obj).
fold() ->
[{T, timer:tc(fun() -> fold_(T) end)} || {T,_} <- tabs()].
fold_(rdb) ->
mrdb:fold(rdb, fun(_, Acc) -> Acc end, ok);
fold_(T) ->
mnesia:activity(
async_dirty,
fun() ->
mnesia:foldl(fun(_, Acc) -> Acc end, ok, T)
end).
tx(N) ->
[{T, timer:tc(fun() -> tx_(T, N) end)} || {T,_} <- tabs()].
%% tx_(T, N) ->
%% tx_(T, N, N, 10).
tx_(_, 0) -> ok;
tx_(T, N) when N > 0 ->
one_tx(T, N),
tx_(T, N-1).
one_tx(rdb, N) ->
mrdb:activity(transaction, rocksdb_copies,
fun() ->
[{r, N, Str}] = mrdb:read(rdb, N),
Str2 = <<Str/binary, "x">>,
mrdb:insert(rdb, {r, N, Str2}),
[{r, N, Str2}] = mrdb:read(rdb, N)
end);
one_tx(T, N) ->
mnesia:activity(transaction,
fun() ->
[{r, N, Str}] = mnesia:read(T, N),
Str2 = <<Str/binary, "x">>,
mnesia:write(T, {r, N, Str2}, write),
[{r, N, Str2}] = mnesia:read(T, N)
end).

31
test/mrdb_ttb.erl Normal file
View File

@ -0,0 +1,31 @@
-module(mrdb_ttb).
-export([ on_nodes/2
, stop/0
, stop_nofetch/0
, format/2
, format/3 ]).
-export([ patterns/0
, flags/0 ]).
on_nodes(Ns, File) ->
tr_ttb:on_nodes(Ns, File, ?MODULE).
patterns() ->
mrdb:patterns().
flags() ->
{all, call}.
stop() ->
tr_ttb:stop().
stop_nofetch() ->
tr_ttb:stop_nofetch().
format(Dir, Out) ->
tr_ttb:format(Dir, Out).
format(Dir, Out, Opts) ->
tr_ttb:format(Dir, Out, Opts).