Merge pull request #22 from aeternity/gh3553-refactor-plugin-final
Refactor to support column families, direct rocksdb access
This commit is contained in:
commit
b0bf4b6b9c
1
.gitignore
vendored
1
.gitignore
vendored
@ -1,2 +1 @@
|
||||
/doc
|
||||
/_build
|
10
Makefile
10
Makefile
@ -1,7 +1,7 @@
|
||||
suite=$(if $(SUITE), suite=$(SUITE), )
|
||||
REBAR3=$(shell which rebar3 || echo ./rebar3)
|
||||
|
||||
.PHONY: all check test clean run
|
||||
.PHONY: all check test clean run dialyzer xref
|
||||
|
||||
all:
|
||||
$(REBAR3) compile
|
||||
@ -18,7 +18,13 @@ eunit:
|
||||
ct:
|
||||
$(REBAR3) ct $(suite)
|
||||
|
||||
test: eunit ct
|
||||
test: dialyzer xref eunit ct
|
||||
|
||||
dialyzer:
|
||||
$(REBAR3) dialyzer
|
||||
|
||||
xref:
|
||||
$(REBAR3) xref
|
||||
|
||||
conf_clean:
|
||||
@:
|
||||
|
271
README.md
271
README.md
@ -1,16 +1,53 @@
|
||||
# mnesia_rocksdb
|
||||
|
||||
A RocksDB backend for Mnesia.
|
||||
|
||||
This permits Erlang/OTP applications to use RocksDB as a backend for
|
||||
mnesia tables. It is based on Klarna's `mnesia_eleveldb`.
|
||||
# Mnesia Rocksdb - Rocksdb backend plugin for Mnesia #
|
||||
|
||||
## Prerequisites
|
||||
Copyright (c) 2013-21 Klarna AB
|
||||
|
||||
- rocksdb (included as dependency)
|
||||
- Erlang/OTP 20.0 or newer (https://github.com/erlang/otp)
|
||||
__Authors:__ Ulf Wiger ([`ulf@wiger.net`](mailto:ulf@wiger.net)).
|
||||
|
||||
## Getting started
|
||||
The Mnesia DBMS, part of Erlang/OTP, supports 'backend plugins', making
|
||||
it possible to utilize more capable key-value stores than the `dets`
|
||||
module (limited to 2 GB per table). Unfortunately, this support is
|
||||
undocumented. Below, some informal documentation for the plugin system
|
||||
is provided.
|
||||
|
||||
|
||||
### <a name="Table_of_Contents">Table of Contents</a> ###
|
||||
|
||||
|
||||
1. [Usage](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Usage)
|
||||
1. [Prerequisites](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Prerequisites)
|
||||
1. [Getting started](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Getting_started)
|
||||
1. [Special features](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Special_features)
|
||||
1. [Customization](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Customization)
|
||||
1. [Handling of errors in write operations](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Handling_of_errors_in_write_operations)
|
||||
1. [Caveats](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Caveats)
|
||||
|
||||
1. [Mnesia backend plugins](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Mnesia_backend_plugins)
|
||||
1. [Background](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Background)
|
||||
1. [Design](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Design)
|
||||
|
||||
1. [Mnesia index plugins](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Mnesia_index_plugins)
|
||||
|
||||
1. [Rocksdb](https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/README.md#Rocksdb)
|
||||
|
||||
|
||||
|
||||
### <a name="Usage">Usage</a> ###
|
||||
|
||||
|
||||
#### <a name="Prerequisites">Prerequisites</a> ####
|
||||
|
||||
* rocksdb (included as dependency)
|
||||
|
||||
* sext (included as dependency)
|
||||
|
||||
* Erlang/OTP 21.0 or newer (https://github.com/erlang/otp)
|
||||
|
||||
|
||||
|
||||
#### <a name="Getting_started">Getting started</a> ####
|
||||
|
||||
Call `mnesia_rocksdb:register()` immediately after
|
||||
starting mnesia.
|
||||
@ -18,30 +55,32 @@ starting mnesia.
|
||||
Put `{rocksdb_copies, [node()]}` into the table definitions of
|
||||
tables you want to be in RocksDB.
|
||||
|
||||
## Special features
|
||||
|
||||
RocksDB tables support efficient selects on *prefix keys*.
|
||||
#### <a name="Special_features">Special features</a> ####
|
||||
|
||||
RocksDB tables support efficient selects on _prefix keys_.
|
||||
|
||||
The backend uses the `sext` module (see
|
||||
https://github.com/uwiger/sext) for mapping between Erlang terms and the
|
||||
[`https://github.com/uwiger/sext`](https://github.com/uwiger/sext)) for mapping between Erlang terms and the
|
||||
binary data stored in the tables. This provides two useful properties:
|
||||
|
||||
- The records are stored in the Erlang term order of their keys.
|
||||
- A prefix of a composite key is ordered just before any key for which
|
||||
it is a prefix. For example, `{x, '_'}` is a prefix for keys `{x, a}`,
|
||||
`{x, b}` and so on.
|
||||
* The records are stored in the Erlang term order of their keys.
|
||||
|
||||
* A prefix of a composite key is ordered just before any key for which
|
||||
it is a prefix. For example, `{x, '_'}` is a prefix for keys `{x, a}`,`{x, b}` and so on.
|
||||
|
||||
|
||||
This means that a prefix key identifies the start of the sequence of
|
||||
entries whose keys match the prefix. The backend uses this to optimize
|
||||
selects on prefix keys.
|
||||
|
||||
## Customization
|
||||
### Customization
|
||||
|
||||
RocksDB supports a number of customization options. These can be specified
|
||||
by providing a `{Key, Value}` list named `rocksdb_opts` under `user_properties`,
|
||||
for example:
|
||||
|
||||
```erlang
|
||||
```
|
||||
mnesia:create_table(foo, [{rocksdb_copies, [node()]},
|
||||
...
|
||||
{user_properties,
|
||||
@ -53,6 +92,7 @@ Consult the [RocksDB documentation](https://github.com/facebook/rocksdb/wiki/Set
|
||||
for information on configuration parameters. Also see the section below on handling write errors.
|
||||
|
||||
The default configuration for tables in `mnesia_rocksdb` is:
|
||||
|
||||
```
|
||||
default_open_opts() ->
|
||||
[ {create_if_missing, true}
|
||||
@ -74,60 +114,169 @@ This is experimental, and mostly copied from `mnesia_leveldb`. Consult the
|
||||
source code in `mnesia_rocksdb_tuning.erl` and `mnesia_rocksdb_params.erl`.
|
||||
Contributions are welcome.
|
||||
|
||||
## Handling of errors in write operations
|
||||
|
||||
The RocksDB update operations return either `ok` or `{error, any()}`.
|
||||
Since the actual updates are performed after the 'point-of-no-return',
|
||||
returning an `error` result will cause mnesia to behave unpredictably,
|
||||
since the operations are expected to simply work.
|
||||
|
||||
### Option 1: `on_write_error`
|
||||
|
||||
An `on_write_error` option can be provided, per-table, in the `rocksdb_opts`
|
||||
user property (see [Customization](#customization) above).
|
||||
Supported values indicate at which level an error indication should be reported.
|
||||
Mnesia may save reported events in RAM, and may also print them,
|
||||
depending on the debug level (controlled with `mnesia:set_debug_level/1`).
|
||||
|
||||
Mnesia debug levels are, in increasing detail, `none | verbose | debug | trace`
|
||||
The supported values for `on_write_error` are:
|
||||
|
||||
| Value | Saved at debug level | Printed at debug level | Action |
|
||||
| ------- | -------------------- | ---------------------- | --------- |
|
||||
| debug | unless none | verbose, debug, trace | ignore |
|
||||
| verbose | unless none | verbose, debug, trace | ignore |
|
||||
| warning | always | always | ignore |
|
||||
| error | always | always | exception |
|
||||
| fatal | always | always | core dump |
|
||||
|
||||
### Option 2: `on_write_error_store`
|
||||
|
||||
An `on_write_error_store` option can be provided, per-table, in the `rocksdb_opts`
|
||||
user property (see [Customization](#customization) above).
|
||||
When set, the backend will use the value of the option as the name for an ETS table
|
||||
which is used as storage for runtime write errors. The table must be set up outside
|
||||
of the backend by the clients themselves.
|
||||
|
||||
Entries to the table are in the form of a tuple `{{Table, Key}, Error, InsertedAt}`
|
||||
where `Table` refers to the Mnesia table name, `Key` is the primary key being used by Mnesia,
|
||||
`Error` is the error encountered by the backend, and `InsertedAt` refers to the time
|
||||
the error was encountered as system time in milliseconds.
|
||||
|
||||
The backend will only insert entries and otherwise not manage the table. Thus, clients
|
||||
are expected to clean up the table during runtime to prevent memory leakage.
|
||||
|
||||
## Caveats
|
||||
#### <a name="Caveats">Caveats</a> ####
|
||||
|
||||
Avoid placing `bag` tables in RocksDB. Although they work, each write
|
||||
requires additional reads, causing substantial runtime overheads. There
|
||||
are better ways to represent and process bag data (see above about
|
||||
*prefix keys*).
|
||||
_prefix keys_).
|
||||
|
||||
The `mnesia:table_info(T, size)` call always returns zero for RocksDB
|
||||
tables. RocksDB itself does not track the number of elements in a table, and
|
||||
although it is possible to make the mnesia_rocksdb backend maintain a size
|
||||
although it is possible to make the `mnesia_rocksdb` backend maintain a size
|
||||
counter, it incurs a high runtime overhead for writes and deletes since it
|
||||
forces them to first do a read to check the existence of the key. If you
|
||||
depend on having an up to date size count at all times, you need to maintain
|
||||
it yourself. If you only need the size occasionally, you may traverse the
|
||||
table to count the elements.
|
||||
|
||||
|
||||
### <a name="Mnesia_backend_plugins">Mnesia backend plugins</a> ###
|
||||
|
||||
|
||||
#### <a name="Background">Background</a> ####
|
||||
|
||||
Mnesia was initially designed to be a RAM-only DBMS, and Erlang's
|
||||
`ets` tables were developed for this purpose. In order to support
|
||||
persistence, e.g. for configuration data, a disk-based version of `ets`
|
||||
(called `dets`) was created. The `dets` API mimicks the `ets` API,
|
||||
and `dets` is quite convenient and fast for (nowadays) small datasets.
|
||||
However, using a 32-bit bucket system, it is limited to 2GB of data.
|
||||
It also doesn't support ordered sets. When used in Mnesia, dets-based
|
||||
tables are called `disc_only_copies`.
|
||||
|
||||
To circumvent these limitations, another table type, called `disc_copies`
|
||||
was added. This is a combination of `ets` and `disk_log`, where Mnesia
|
||||
periodically snapshots the `ets` data to a log file on disk, and meanwhile
|
||||
maintains a log of updates, which can be applied at startup. These tables
|
||||
are quite performant (especially on read access), but all data is kept in
|
||||
RAM, which can become a serious limitation.
|
||||
|
||||
A backend plugin system was proposed by Ulf Wiger in 2016, and further
|
||||
developed with Klarna's support, to finally become included in OTP 19.
|
||||
Klarna uses a LevelDb backend, but Aeternity, in 2017, instead chose
|
||||
to implement a Rocksdb backend plugin.
|
||||
|
||||
|
||||
### <a name="Design">Design</a> ###
|
||||
|
||||
As backend plugins were added on a long-since legacy-stable Mnesia,
|
||||
they had to conform to the existing code structure. For this reason,
|
||||
the plugin callbacks hook into the already present low-level access
|
||||
API in the `mnesia_lib` module. As a consequence, backend plugins have
|
||||
the same access semantics and granularity as `ets` and `dets`. This
|
||||
isn't much of a disadvantage for key-value stores like LevelDb and RocksDB,
|
||||
but a more serious issue is that the update part of this API is called
|
||||
on _after_ the point of no return. That is, Mnesia does not expect
|
||||
these updates to fail, and has no recourse if they do. As an aside,
|
||||
this could also happen if a `disc_only_copies` table exceeds the 2 GB
|
||||
limit (mnesia will not check it, and `dets` will not complain, but simply
|
||||
drop the update.)
|
||||
|
||||
|
||||
### <a name="Mnesia_index_plugins">Mnesia index plugins</a> ###
|
||||
|
||||
When adding support for backend plugins, index plugins were also added. Unfortunately, they remain undocumented.
|
||||
|
||||
An index plugin can be added in one of two ways:
|
||||
|
||||
1. When creating a schema, provide `{index_plugins, [{Name, Module, Function}]}` options.
|
||||
|
||||
1. Call the function `mnesia_schema:add_index_plugin(Name, Module, Function)`
|
||||
|
||||
|
||||
`Name` must be an atom wrapped as a 1-tuple, e.g. `{words}`.
|
||||
|
||||
The plugin callback is called as `Module:Function(Table, Pos, Obj)`, where `Pos=={words}` in
|
||||
our example. It returns a list of index terms.
|
||||
|
||||
<strong>Example</strong>
|
||||
|
||||
Given the following index plugin implementation:
|
||||
|
||||
```
|
||||
-module(words).
|
||||
-export([words_f/3]).
|
||||
|
||||
words_f(_,_,Obj) when is_tuple(Obj) ->
|
||||
words_(tuple_to_list(Obj)).
|
||||
|
||||
words_(Str) when is_binary(Str) ->
|
||||
string:lexemes(Str, [$\s, $\n, [$\r,$\n]]);
|
||||
words_(L) when is_list(L) ->
|
||||
lists:flatmap(fun words_/1, L);
|
||||
words_(_) ->
|
||||
[].
|
||||
```
|
||||
|
||||
We can register the plugin and use it in table definitions:
|
||||
|
||||
```
|
||||
Eshell V12.1.3 (abort with ^G)
|
||||
1> mnesia:start().
|
||||
ok
|
||||
2> mnesia_schema:add_index_plugin({words}, words, words_f).
|
||||
{atomic,ok}
|
||||
3> mnesia:create_table(i, [{index, [{words}]}]).
|
||||
{atomic,ok}
|
||||
```
|
||||
|
||||
Note that in this case, we had neither a backend plugin, nor even a persistent schema.
|
||||
Index plugins can be used with all table types. The registered indexing function (arity 3) must exist
|
||||
as an exported function along the node's code path.
|
||||
|
||||
To see what happens when we insert an object, we can turn on call trace.
|
||||
|
||||
```
|
||||
4> dbg:tracer().
|
||||
{ok,<0.108.0>}
|
||||
5> dbg:tp(words, x).
|
||||
{ok,[{matched,nonode@nohost,3},{saved,x}]}
|
||||
6> dbg:p(all,[c]).
|
||||
{ok,[{matched,nonode@nohost,60}]}
|
||||
7> mnesia:dirty_write({i,<<"one two">>, [<<"three">>, <<"four">>]}).
|
||||
(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
|
||||
(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
|
||||
<<"four">>]
|
||||
(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
|
||||
(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
|
||||
<<"four">>]
|
||||
ok
|
||||
8> dbg:ctp('_'), dbg:stop().
|
||||
ok
|
||||
9> mnesia:dirty_index_read(i, <<"one">>, {words}).
|
||||
[{i,<<"one two">>,[<<"three">>,<<"four">>]}]
|
||||
```
|
||||
|
||||
(The fact that the indexing function is called twice, seems like a performance bug.)
|
||||
|
||||
We can observe that the indexing callback is able to operate on the whole object.
|
||||
It needs to be side-effect free and efficient, since it will be called at least once for each update
|
||||
(if an old object exists in the table, the indexing function will be called on it too, before it is
|
||||
replaced by the new object.)
|
||||
|
||||
|
||||
### <a name="Rocksdb">Rocksdb</a> ###
|
||||
|
||||
|
||||
### <a name="Usage">Usage</a> ###
|
||||
|
||||
|
||||
|
||||
## Modules ##
|
||||
|
||||
|
||||
<table width="100%" border="0" summary="list of modules">
|
||||
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb.md" class="module">mnesia_rocksdb</a></td></tr>
|
||||
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb_admin.md" class="module">mnesia_rocksdb_admin</a></td></tr>
|
||||
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb_app.md" class="module">mnesia_rocksdb_app</a></td></tr>
|
||||
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb_lib.md" class="module">mnesia_rocksdb_lib</a></td></tr>
|
||||
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb_params.md" class="module">mnesia_rocksdb_params</a></td></tr>
|
||||
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb_sup.md" class="module">mnesia_rocksdb_sup</a></td></tr>
|
||||
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mnesia_rocksdb_tuning.md" class="module">mnesia_rocksdb_tuning</a></td></tr>
|
||||
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mrdb.md" class="module">mrdb</a></td></tr>
|
||||
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mrdb_index.md" class="module">mrdb_index</a></td></tr>
|
||||
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mrdb_mutex.md" class="module">mrdb_mutex</a></td></tr>
|
||||
<tr><td><a href="https://github.com/aeternity/mnesia_rocksdb/blob/g3553-refactor-plugin-migration-tmp-220318/doc/mrdb_select.md" class="module">mrdb_select</a></td></tr></table>
|
||||
|
||||
|
282
doc/README.md
Normal file
282
doc/README.md
Normal file
@ -0,0 +1,282 @@
|
||||
|
||||
|
||||
# Mnesia Rocksdb - Rocksdb backend plugin for Mnesia #
|
||||
|
||||
Copyright (c) 2013-21 Klarna AB
|
||||
|
||||
__Authors:__ Ulf Wiger ([`ulf@wiger.net`](mailto:ulf@wiger.net)).
|
||||
|
||||
The Mnesia DBMS, part of Erlang/OTP, supports 'backend plugins', making
|
||||
it possible to utilize more capable key-value stores than the `dets`
|
||||
module (limited to 2 GB per table). Unfortunately, this support is
|
||||
undocumented. Below, some informal documentation for the plugin system
|
||||
is provided.
|
||||
|
||||
|
||||
### <a name="Table_of_Contents">Table of Contents</a> ###
|
||||
|
||||
|
||||
1. [Usage](#Usage)
|
||||
1. [Prerequisites](#Prerequisites)
|
||||
1. [Getting started](#Getting_started)
|
||||
1. [Special features](#Special_features)
|
||||
1. [Customization](#Customization)
|
||||
1. [Handling of errors in write operations](#Handling_of_errors_in_write_operations)
|
||||
1. [Caveats](#Caveats)
|
||||
|
||||
1. [Mnesia backend plugins](#Mnesia_backend_plugins)
|
||||
1. [Background](#Background)
|
||||
1. [Design](#Design)
|
||||
|
||||
1. [Mnesia index plugins](#Mnesia_index_plugins)
|
||||
|
||||
1. [Rocksdb](#Rocksdb)
|
||||
|
||||
|
||||
|
||||
### <a name="Usage">Usage</a> ###
|
||||
|
||||
|
||||
#### <a name="Prerequisites">Prerequisites</a> ####
|
||||
|
||||
* rocksdb (included as dependency)
|
||||
|
||||
* sext (included as dependency)
|
||||
|
||||
* Erlang/OTP 21.0 or newer (https://github.com/erlang/otp)
|
||||
|
||||
|
||||
|
||||
#### <a name="Getting_started">Getting started</a> ####
|
||||
|
||||
Call `mnesia_rocksdb:register()` immediately after
|
||||
starting mnesia.
|
||||
|
||||
Put `{rocksdb_copies, [node()]}` into the table definitions of
|
||||
tables you want to be in RocksDB.
|
||||
|
||||
|
||||
#### <a name="Special_features">Special features</a> ####
|
||||
|
||||
RocksDB tables support efficient selects on _prefix keys_.
|
||||
|
||||
The backend uses the `sext` module (see
|
||||
[`https://github.com/uwiger/sext`](https://github.com/uwiger/sext)) for mapping between Erlang terms and the
|
||||
binary data stored in the tables. This provides two useful properties:
|
||||
|
||||
* The records are stored in the Erlang term order of their keys.
|
||||
|
||||
* A prefix of a composite key is ordered just before any key for which
|
||||
it is a prefix. For example, `{x, '_'}` is a prefix for keys `{x, a}`,`{x, b}` and so on.
|
||||
|
||||
|
||||
This means that a prefix key identifies the start of the sequence of
|
||||
entries whose keys match the prefix. The backend uses this to optimize
|
||||
selects on prefix keys.
|
||||
|
||||
### Customization
|
||||
|
||||
RocksDB supports a number of customization options. These can be specified
|
||||
by providing a `{Key, Value}` list named `rocksdb_opts` under `user_properties`,
|
||||
for example:
|
||||
|
||||
```
|
||||
mnesia:create_table(foo, [{rocksdb_copies, [node()]},
|
||||
...
|
||||
{user_properties,
|
||||
[{rocksdb_opts, [{max_open_files, 1024}]}]
|
||||
}])
|
||||
```
|
||||
|
||||
Consult the [RocksDB documentation](https://github.com/facebook/rocksdb/wiki/Setup-Options-and-Basic-Tuning)
|
||||
for information on configuration parameters. Also see the section below on handling write errors.
|
||||
|
||||
The default configuration for tables in `mnesia_rocksdb` is:
|
||||
|
||||
```
|
||||
default_open_opts() ->
|
||||
[ {create_if_missing, true}
|
||||
, {cache_size,
|
||||
list_to_integer(get_env_default("ROCKSDB_CACHE_SIZE", "32212254"))}
|
||||
, {block_size, 1024}
|
||||
, {max_open_files, 100}
|
||||
, {write_buffer_size,
|
||||
list_to_integer(get_env_default(
|
||||
"ROCKSDB_WRITE_BUFFER_SIZE", "4194304"))}
|
||||
, {compression,
|
||||
list_to_atom(get_env_default("ROCKSDB_COMPRESSION", "true"))}
|
||||
, {use_bloomfilter, true}
|
||||
].
|
||||
```
|
||||
|
||||
It is also possible, for larger databases, to produce a tuning parameter file.
|
||||
This is experimental, and mostly copied from `mnesia_leveldb`. Consult the
|
||||
source code in `mnesia_rocksdb_tuning.erl` and `mnesia_rocksdb_params.erl`.
|
||||
Contributions are welcome.
|
||||
|
||||
|
||||
#### <a name="Caveats">Caveats</a> ####
|
||||
|
||||
Avoid placing `bag` tables in RocksDB. Although they work, each write
|
||||
requires additional reads, causing substantial runtime overheads. There
|
||||
are better ways to represent and process bag data (see above about
|
||||
_prefix keys_).
|
||||
|
||||
The `mnesia:table_info(T, size)` call always returns zero for RocksDB
|
||||
tables. RocksDB itself does not track the number of elements in a table, and
|
||||
although it is possible to make the `mnesia_rocksdb` backend maintain a size
|
||||
counter, it incurs a high runtime overhead for writes and deletes since it
|
||||
forces them to first do a read to check the existence of the key. If you
|
||||
depend on having an up to date size count at all times, you need to maintain
|
||||
it yourself. If you only need the size occasionally, you may traverse the
|
||||
table to count the elements.
|
||||
|
||||
|
||||
### <a name="Mnesia_backend_plugins">Mnesia backend plugins</a> ###
|
||||
|
||||
|
||||
#### <a name="Background">Background</a> ####
|
||||
|
||||
Mnesia was initially designed to be a RAM-only DBMS, and Erlang's
|
||||
`ets` tables were developed for this purpose. In order to support
|
||||
persistence, e.g. for configuration data, a disk-based version of `ets`
|
||||
(called `dets`) was created. The `dets` API mimicks the `ets` API,
|
||||
and `dets` is quite convenient and fast for (nowadays) small datasets.
|
||||
However, using a 32-bit bucket system, it is limited to 2GB of data.
|
||||
It also doesn't support ordered sets. When used in Mnesia, dets-based
|
||||
tables are called `disc_only_copies`.
|
||||
|
||||
To circumvent these limitations, another table type, called `disc_copies`
|
||||
was added. This is a combination of `ets` and `disk_log`, where Mnesia
|
||||
periodically snapshots the `ets` data to a log file on disk, and meanwhile
|
||||
maintains a log of updates, which can be applied at startup. These tables
|
||||
are quite performant (especially on read access), but all data is kept in
|
||||
RAM, which can become a serious limitation.
|
||||
|
||||
A backend plugin system was proposed by Ulf Wiger in 2016, and further
|
||||
developed with Klarna's support, to finally become included in OTP 19.
|
||||
Klarna uses a LevelDb backend, but Aeternity, in 2017, instead chose
|
||||
to implement a Rocksdb backend plugin.
|
||||
|
||||
|
||||
### <a name="Design">Design</a> ###
|
||||
|
||||
As backend plugins were added on a long-since legacy-stable Mnesia,
|
||||
they had to conform to the existing code structure. For this reason,
|
||||
the plugin callbacks hook into the already present low-level access
|
||||
API in the `mnesia_lib` module. As a consequence, backend plugins have
|
||||
the same access semantics and granularity as `ets` and `dets`. This
|
||||
isn't much of a disadvantage for key-value stores like LevelDb and RocksDB,
|
||||
but a more serious issue is that the update part of this API is called
|
||||
on _after_ the point of no return. That is, Mnesia does not expect
|
||||
these updates to fail, and has no recourse if they do. As an aside,
|
||||
this could also happen if a `disc_only_copies` table exceeds the 2 GB
|
||||
limit (mnesia will not check it, and `dets` will not complain, but simply
|
||||
drop the update.)
|
||||
|
||||
|
||||
### <a name="Mnesia_index_plugins">Mnesia index plugins</a> ###
|
||||
|
||||
When adding support for backend plugins, index plugins were also added. Unfortunately, they remain undocumented.
|
||||
|
||||
An index plugin can be added in one of two ways:
|
||||
|
||||
1. When creating a schema, provide `{index_plugins, [{Name, Module, Function}]}` options.
|
||||
|
||||
1. Call the function `mnesia_schema:add_index_plugin(Name, Module, Function)`
|
||||
|
||||
|
||||
`Name` must be an atom wrapped as a 1-tuple, e.g. `{words}`.
|
||||
|
||||
The plugin callback is called as `Module:Function(Table, Pos, Obj)`, where `Pos=={words}` in
|
||||
our example. It returns a list of index terms.
|
||||
|
||||
<strong>Example</strong>
|
||||
|
||||
Given the following index plugin implementation:
|
||||
|
||||
```
|
||||
-module(words).
|
||||
-export([words_f/3]).
|
||||
|
||||
words_f(_,_,Obj) when is_tuple(Obj) ->
|
||||
words_(tuple_to_list(Obj)).
|
||||
|
||||
words_(Str) when is_binary(Str) ->
|
||||
string:lexemes(Str, [$\s, $\n, [$\r,$\n]]);
|
||||
words_(L) when is_list(L) ->
|
||||
lists:flatmap(fun words_/1, L);
|
||||
words_(_) ->
|
||||
[].
|
||||
```
|
||||
|
||||
We can register the plugin and use it in table definitions:
|
||||
|
||||
```
|
||||
Eshell V12.1.3 (abort with ^G)
|
||||
1> mnesia:start().
|
||||
ok
|
||||
2> mnesia_schema:add_index_plugin({words}, words, words_f).
|
||||
{atomic,ok}
|
||||
3> mnesia:create_table(i, [{index, [{words}]}]).
|
||||
{atomic,ok}
|
||||
```
|
||||
|
||||
Note that in this case, we had neither a backend plugin, nor even a persistent schema.
|
||||
Index plugins can be used with all table types. The registered indexing function (arity 3) must exist
|
||||
as an exported function along the node's code path.
|
||||
|
||||
To see what happens when we insert an object, we can turn on call trace.
|
||||
|
||||
```
|
||||
4> dbg:tracer().
|
||||
{ok,<0.108.0>}
|
||||
5> dbg:tp(words, x).
|
||||
{ok,[{matched,nonode@nohost,3},{saved,x}]}
|
||||
6> dbg:p(all,[c]).
|
||||
{ok,[{matched,nonode@nohost,60}]}
|
||||
7> mnesia:dirty_write({i,<<"one two">>, [<<"three">>, <<"four">>]}).
|
||||
(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
|
||||
(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
|
||||
<<"four">>]
|
||||
(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
|
||||
(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
|
||||
<<"four">>]
|
||||
ok
|
||||
8> dbg:ctp('_'), dbg:stop().
|
||||
ok
|
||||
9> mnesia:dirty_index_read(i, <<"one">>, {words}).
|
||||
[{i,<<"one two">>,[<<"three">>,<<"four">>]}]
|
||||
```
|
||||
|
||||
(The fact that the indexing function is called twice, seems like a performance bug.)
|
||||
|
||||
We can observe that the indexing callback is able to operate on the whole object.
|
||||
It needs to be side-effect free and efficient, since it will be called at least once for each update
|
||||
(if an old object exists in the table, the indexing function will be called on it too, before it is
|
||||
replaced by the new object.)
|
||||
|
||||
|
||||
### <a name="Rocksdb">Rocksdb</a> ###
|
||||
|
||||
|
||||
### <a name="Usage">Usage</a> ###
|
||||
|
||||
|
||||
|
||||
## Modules ##
|
||||
|
||||
|
||||
<table width="100%" border="0" summary="list of modules">
|
||||
<tr><td><a href="mnesia_rocksdb.md" class="module">mnesia_rocksdb</a></td></tr>
|
||||
<tr><td><a href="mnesia_rocksdb_admin.md" class="module">mnesia_rocksdb_admin</a></td></tr>
|
||||
<tr><td><a href="mnesia_rocksdb_app.md" class="module">mnesia_rocksdb_app</a></td></tr>
|
||||
<tr><td><a href="mnesia_rocksdb_lib.md" class="module">mnesia_rocksdb_lib</a></td></tr>
|
||||
<tr><td><a href="mnesia_rocksdb_params.md" class="module">mnesia_rocksdb_params</a></td></tr>
|
||||
<tr><td><a href="mnesia_rocksdb_sup.md" class="module">mnesia_rocksdb_sup</a></td></tr>
|
||||
<tr><td><a href="mnesia_rocksdb_tuning.md" class="module">mnesia_rocksdb_tuning</a></td></tr>
|
||||
<tr><td><a href="mrdb.md" class="module">mrdb</a></td></tr>
|
||||
<tr><td><a href="mrdb_index.md" class="module">mrdb_index</a></td></tr>
|
||||
<tr><td><a href="mrdb_mutex.md" class="module">mrdb_mutex</a></td></tr>
|
||||
<tr><td><a href="mrdb_select.md" class="module">mrdb_select</a></td></tr></table>
|
||||
|
5
doc/edoc-info
Normal file
5
doc/edoc-info
Normal file
@ -0,0 +1,5 @@
|
||||
%% encoding: UTF-8
|
||||
{application,mnesia_rocksdb}.
|
||||
{modules,[mnesia_rocksdb,mnesia_rocksdb_admin,mnesia_rocksdb_app,
|
||||
mnesia_rocksdb_lib,mnesia_rocksdb_params,mnesia_rocksdb_sup,
|
||||
mnesia_rocksdb_tuning,mrdb,mrdb_index,mrdb_mutex,mrdb_select]}.
|
BIN
doc/erlang.png
Normal file
BIN
doc/erlang.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 2.1 KiB |
547
doc/mnesia_rocksdb.md
Normal file
547
doc/mnesia_rocksdb.md
Normal file
File diff suppressed because one or more lines are too long
326
doc/mnesia_rocksdb_admin.md
Normal file
326
doc/mnesia_rocksdb_admin.md
Normal file
@ -0,0 +1,326 @@
|
||||
|
||||
|
||||
# Module mnesia_rocksdb_admin #
|
||||
* [Data Types](#types)
|
||||
* [Function Index](#index)
|
||||
* [Function Details](#functions)
|
||||
|
||||
__Behaviours:__ [`gen_server`](gen_server.md).
|
||||
|
||||
<a name="types"></a>
|
||||
|
||||
## Data Types ##
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-alias">alias()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
alias() = atom()
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-backend">backend()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
backend() = #{db_ref => <a href="#type-db_ref">db_ref()</a>, cf_info => #{<a href="#type-table">table()</a> => <a href="#type-cf">cf()</a>}}
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-cf">cf()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
cf() = <a href="http://www.erlang.org/doc/man/mrdb.html#type-db_ref">mrdb:db_ref()</a>
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-db_ref">db_ref()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
db_ref() = <a href="/home/uwiger/ae/mnesia_rocksdb/_build/default/lib/rocksdb/doc/rocksdb.md#type-db_handle">rocksdb:db_handle()</a>
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-gen_server_noreply">gen_server_noreply()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
gen_server_noreply() = {noreply, <a href="#type-st">st()</a>} | {stop, <a href="#type-reason">reason()</a>, <a href="#type-st">st()</a>}
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-gen_server_reply">gen_server_reply()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
gen_server_reply() = {reply, <a href="#type-reply">reply()</a>, <a href="#type-st">st()</a>} | {stop, <a href="#type-reason">reason()</a>, <a href="#type-reply">reply()</a>, <a href="#type-st">st()</a>}
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-properties">properties()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
properties() = [{atom(), any()}]
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-reason">reason()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
reason() = any()
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-reply">reply()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
reply() = any()
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-req">req()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
req() = {create_table, <a href="#type-table">table()</a>, <a href="#type-properties">properties()</a>} | {delete_table, <a href="#type-table">table()</a>} | {load_table, <a href="#type-table">table()</a>} | {related_resources, <a href="#type-table">table()</a>} | {get_ref, <a href="#type-table">table()</a>} | {add_aliases, [<a href="#type-alias">alias()</a>]} | {write_table_property, <a href="#type-tabname">tabname()</a>, tuple()} | {remove_aliases, [<a href="#type-alias">alias()</a>]} | {migrate, [{<a href="#type-tabname">tabname()</a>, map()}]} | {prep_close, <a href="#type-table">table()</a>} | {close_table, <a href="#type-table">table()</a>}
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-st">st()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
st() = #st{backends = #{<a href="#type-alias">alias()</a> => <a href="#type-backend">backend()</a>}, standalone = #{{<a href="#type-alias">alias()</a>, <a href="#type-table">table()</a>} => <a href="#type-cf">cf()</a>}, default_opts = [{atom(), term()}]}
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-table">table()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
table() = <a href="#type-tabname">tabname()</a> | {admin, <a href="#type-alias">alias()</a>} | {<a href="#type-tabname">tabname()</a>, index, any()} | {<a href="#type-tabname">tabname()</a>, retainer, any()}
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-tabname">tabname()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
tabname() = atom()
|
||||
</code></pre>
|
||||
|
||||
<a name="index"></a>
|
||||
|
||||
## Function Index ##
|
||||
|
||||
|
||||
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#add_aliases-1">add_aliases/1</a></td><td></td></tr><tr><td valign="top"><a href="#close_table-2">close_table/2</a></td><td></td></tr><tr><td valign="top"><a href="#code_change-3">code_change/3</a></td><td></td></tr><tr><td valign="top"><a href="#create_table-3">create_table/3</a></td><td></td></tr><tr><td valign="top"><a href="#delete_table-2">delete_table/2</a></td><td></td></tr><tr><td valign="top"><a href="#ensure_started-0">ensure_started/0</a></td><td></td></tr><tr><td valign="top"><a href="#get_ref-1">get_ref/1</a></td><td></td></tr><tr><td valign="top"><a href="#get_ref-2">get_ref/2</a></td><td></td></tr><tr><td valign="top"><a href="#handle_call-3">handle_call/3</a></td><td></td></tr><tr><td valign="top"><a href="#handle_cast-2">handle_cast/2</a></td><td></td></tr><tr><td valign="top"><a href="#handle_info-2">handle_info/2</a></td><td></td></tr><tr><td valign="top"><a href="#init-1">init/1</a></td><td></td></tr><tr><td valign="top"><a href="#load_table-2">load_table/2</a></td><td></td></tr><tr><td valign="top"><a href="#meta-0">meta/0</a></td><td></td></tr><tr><td valign="top"><a href="#migrate_standalone-2">migrate_standalone/2</a></td><td></td></tr><tr><td valign="top"><a href="#prep_close-2">prep_close/2</a></td><td></td></tr><tr><td valign="top"><a href="#read_info-1">read_info/1</a></td><td></td></tr><tr><td valign="top"><a href="#read_info-2">read_info/2</a></td><td></td></tr><tr><td valign="top"><a href="#read_info-4">read_info/4</a></td><td></td></tr><tr><td valign="top"><a href="#related_resources-2">related_resources/2</a></td><td></td></tr><tr><td valign="top"><a href="#remove_aliases-1">remove_aliases/1</a></td><td></td></tr><tr><td valign="top"><a href="#request_ref-2">request_ref/2</a></td><td></td></tr><tr><td valign="top"><a href="#start_link-0">start_link/0</a></td><td></td></tr><tr><td valign="top"><a href="#terminate-2">terminate/2</a></td><td></td></tr><tr><td valign="top"><a href="#write_info-4">write_info/4</a></td><td></td></tr><tr><td valign="top"><a href="#write_table_property-3">write_table_property/3</a></td><td></td></tr></table>
|
||||
|
||||
|
||||
<a name="functions"></a>
|
||||
|
||||
## Function Details ##
|
||||
|
||||
<a name="add_aliases-1"></a>
|
||||
|
||||
### add_aliases/1 ###
|
||||
|
||||
`add_aliases(Aliases) -> any()`
|
||||
|
||||
<a name="close_table-2"></a>
|
||||
|
||||
### close_table/2 ###
|
||||
|
||||
`close_table(Alias, Name) -> any()`
|
||||
|
||||
<a name="code_change-3"></a>
|
||||
|
||||
### code_change/3 ###
|
||||
|
||||
`code_change(FromVsn, St, Extra) -> any()`
|
||||
|
||||
<a name="create_table-3"></a>
|
||||
|
||||
### create_table/3 ###
|
||||
|
||||
`create_table(Alias, Name, Props) -> any()`
|
||||
|
||||
<a name="delete_table-2"></a>
|
||||
|
||||
### delete_table/2 ###
|
||||
|
||||
<pre><code>
|
||||
delete_table(Alias::<a href="#type-alias">alias()</a>, Name::<a href="#type-tabname">tabname()</a>) -> ok
|
||||
</code></pre>
|
||||
<br />
|
||||
|
||||
<a name="ensure_started-0"></a>
|
||||
|
||||
### ensure_started/0 ###
|
||||
|
||||
<pre><code>
|
||||
ensure_started() -> ok
|
||||
</code></pre>
|
||||
<br />
|
||||
|
||||
<a name="get_ref-1"></a>
|
||||
|
||||
### get_ref/1 ###
|
||||
|
||||
`get_ref(Name) -> any()`
|
||||
|
||||
<a name="get_ref-2"></a>
|
||||
|
||||
### get_ref/2 ###
|
||||
|
||||
`get_ref(Name, Default) -> any()`
|
||||
|
||||
<a name="handle_call-3"></a>
|
||||
|
||||
### handle_call/3 ###
|
||||
|
||||
<pre><code>
|
||||
handle_call(Req::{<a href="#type-alias">alias()</a>, <a href="#type-req">req()</a>}, From::any(), St::<a href="#type-st">st()</a>) -> <a href="#type-gen_server_reply">gen_server_reply()</a>
|
||||
</code></pre>
|
||||
<br />
|
||||
|
||||
<a name="handle_cast-2"></a>
|
||||
|
||||
### handle_cast/2 ###
|
||||
|
||||
<pre><code>
|
||||
handle_cast(Msg::any(), St::<a href="#type-st">st()</a>) -> <a href="#type-gen_server_noreply">gen_server_noreply()</a>
|
||||
</code></pre>
|
||||
<br />
|
||||
|
||||
<a name="handle_info-2"></a>
|
||||
|
||||
### handle_info/2 ###
|
||||
|
||||
<pre><code>
|
||||
handle_info(Msg::any(), St::<a href="#type-st">st()</a>) -> <a href="#type-gen_server_noreply">gen_server_noreply()</a>
|
||||
</code></pre>
|
||||
<br />
|
||||
|
||||
<a name="init-1"></a>
|
||||
|
||||
### init/1 ###
|
||||
|
||||
`init(X1) -> any()`
|
||||
|
||||
<a name="load_table-2"></a>
|
||||
|
||||
### load_table/2 ###
|
||||
|
||||
`load_table(Alias, Name) -> any()`
|
||||
|
||||
<a name="meta-0"></a>
|
||||
|
||||
### meta/0 ###
|
||||
|
||||
`meta() -> any()`
|
||||
|
||||
<a name="migrate_standalone-2"></a>
|
||||
|
||||
### migrate_standalone/2 ###
|
||||
|
||||
`migrate_standalone(Alias, Tabs) -> any()`
|
||||
|
||||
<a name="prep_close-2"></a>
|
||||
|
||||
### prep_close/2 ###
|
||||
|
||||
`prep_close(Alias, Tab) -> any()`
|
||||
|
||||
<a name="read_info-1"></a>
|
||||
|
||||
### read_info/1 ###
|
||||
|
||||
`read_info(TRec) -> any()`
|
||||
|
||||
<a name="read_info-2"></a>
|
||||
|
||||
### read_info/2 ###
|
||||
|
||||
`read_info(Alias, Tab) -> any()`
|
||||
|
||||
<a name="read_info-4"></a>
|
||||
|
||||
### read_info/4 ###
|
||||
|
||||
`read_info(Alias, Tab, K, Default) -> any()`
|
||||
|
||||
<a name="related_resources-2"></a>
|
||||
|
||||
### related_resources/2 ###
|
||||
|
||||
`related_resources(Alias, Name) -> any()`
|
||||
|
||||
<a name="remove_aliases-1"></a>
|
||||
|
||||
### remove_aliases/1 ###
|
||||
|
||||
`remove_aliases(Aliases) -> any()`
|
||||
|
||||
<a name="request_ref-2"></a>
|
||||
|
||||
### request_ref/2 ###
|
||||
|
||||
`request_ref(Alias, Name) -> any()`
|
||||
|
||||
<a name="start_link-0"></a>
|
||||
|
||||
### start_link/0 ###
|
||||
|
||||
`start_link() -> any()`
|
||||
|
||||
<a name="terminate-2"></a>
|
||||
|
||||
### terminate/2 ###
|
||||
|
||||
`terminate(X1, St) -> any()`
|
||||
|
||||
<a name="write_info-4"></a>
|
||||
|
||||
### write_info/4 ###
|
||||
|
||||
`write_info(Alias, Tab, K, V) -> any()`
|
||||
|
||||
<a name="write_table_property-3"></a>
|
||||
|
||||
### write_table_property/3 ###
|
||||
|
||||
`write_table_property(Alias, Tab, Prop) -> any()`
|
||||
|
32
doc/mnesia_rocksdb_app.md
Normal file
32
doc/mnesia_rocksdb_app.md
Normal file
@ -0,0 +1,32 @@
|
||||
|
||||
|
||||
# Module mnesia_rocksdb_app #
|
||||
* [Function Index](#index)
|
||||
* [Function Details](#functions)
|
||||
|
||||
__Behaviours:__ [`application`](application.md).
|
||||
|
||||
<a name="index"></a>
|
||||
|
||||
## Function Index ##
|
||||
|
||||
|
||||
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#start-2">start/2</a></td><td></td></tr><tr><td valign="top"><a href="#stop-1">stop/1</a></td><td></td></tr></table>
|
||||
|
||||
|
||||
<a name="functions"></a>
|
||||
|
||||
## Function Details ##
|
||||
|
||||
<a name="start-2"></a>
|
||||
|
||||
### start/2 ###
|
||||
|
||||
`start(StartType, StartArgs) -> any()`
|
||||
|
||||
<a name="stop-1"></a>
|
||||
|
||||
### stop/1 ###
|
||||
|
||||
`stop(State) -> any()`
|
||||
|
168
doc/mnesia_rocksdb_lib.md
Normal file
168
doc/mnesia_rocksdb_lib.md
Normal file
@ -0,0 +1,168 @@
|
||||
|
||||
|
||||
# Module mnesia_rocksdb_lib #
|
||||
* [Description](#description)
|
||||
* [Function Index](#index)
|
||||
* [Function Details](#functions)
|
||||
|
||||
RocksDB update wrappers, in separate module for easy tracing and mocking.
|
||||
|
||||
<a name="description"></a>
|
||||
|
||||
## Description ##
|
||||
<a name="index"></a>
|
||||
|
||||
## Function Index ##
|
||||
|
||||
|
||||
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#check_encoding-2">check_encoding/2</a></td><td></td></tr><tr><td valign="top"><a href="#create_mountpoint-1">create_mountpoint/1</a></td><td></td></tr><tr><td valign="top"><a href="#data_mountpoint-1">data_mountpoint/1</a></td><td></td></tr><tr><td valign="top"><a href="#decode-2">decode/2</a></td><td></td></tr><tr><td valign="top"><a href="#decode_key-1">decode_key/1</a></td><td></td></tr><tr><td valign="top"><a href="#decode_key-2">decode_key/2</a></td><td></td></tr><tr><td valign="top"><a href="#decode_val-1">decode_val/1</a></td><td></td></tr><tr><td valign="top"><a href="#decode_val-3">decode_val/3</a></td><td></td></tr><tr><td valign="top"><a href="#default_encoding-3">default_encoding/3</a></td><td></td></tr><tr><td valign="top"><a href="#delete-3">delete/3</a></td><td></td></tr><tr><td valign="top"><a href="#encode-2">encode/2</a></td><td></td></tr><tr><td valign="top"><a href="#encode_key-1">encode_key/1</a></td><td></td></tr><tr><td valign="top"><a href="#encode_key-2">encode_key/2</a></td><td></td></tr><tr><td valign="top"><a href="#encode_val-1">encode_val/1</a></td><td></td></tr><tr><td valign="top"><a href="#encode_val-2">encode_val/2</a></td><td></td></tr><tr><td valign="top"><a href="#keypos-1">keypos/1</a></td><td></td></tr><tr><td valign="top"><a href="#open_rocksdb-3">open_rocksdb/3</a></td><td></td></tr><tr><td valign="top"><a href="#put-4">put/4</a></td><td></td></tr><tr><td valign="top"><a href="#tabname-1">tabname/1</a></td><td></td></tr><tr><td valign="top"><a href="#valid_key_type-2">valid_key_type/2</a></td><td></td></tr><tr><td valign="top"><a href="#valid_obj_type-2">valid_obj_type/2</a></td><td></td></tr><tr><td valign="top"><a href="#write-3">write/3</a></td><td></td></tr></table>
|
||||
|
||||
|
||||
<a name="functions"></a>
|
||||
|
||||
## Function Details ##
|
||||
|
||||
<a name="check_encoding-2"></a>
|
||||
|
||||
### check_encoding/2 ###
|
||||
|
||||
`check_encoding(Encoding, Attributes) -> any()`
|
||||
|
||||
<a name="create_mountpoint-1"></a>
|
||||
|
||||
### create_mountpoint/1 ###
|
||||
|
||||
`create_mountpoint(Tab) -> any()`
|
||||
|
||||
<a name="data_mountpoint-1"></a>
|
||||
|
||||
### data_mountpoint/1 ###
|
||||
|
||||
`data_mountpoint(Tab) -> any()`
|
||||
|
||||
<a name="decode-2"></a>
|
||||
|
||||
### decode/2 ###
|
||||
|
||||
`decode(Val, X2) -> any()`
|
||||
|
||||
<a name="decode_key-1"></a>
|
||||
|
||||
### decode_key/1 ###
|
||||
|
||||
<pre><code>
|
||||
decode_key(CodedKey::binary()) -> any()
|
||||
</code></pre>
|
||||
<br />
|
||||
|
||||
<a name="decode_key-2"></a>
|
||||
|
||||
### decode_key/2 ###
|
||||
|
||||
`decode_key(CodedKey, Enc) -> any()`
|
||||
|
||||
<a name="decode_val-1"></a>
|
||||
|
||||
### decode_val/1 ###
|
||||
|
||||
<pre><code>
|
||||
decode_val(CodedVal::binary()) -> any()
|
||||
</code></pre>
|
||||
<br />
|
||||
|
||||
<a name="decode_val-3"></a>
|
||||
|
||||
### decode_val/3 ###
|
||||
|
||||
`decode_val(CodedVal, K, Ref) -> any()`
|
||||
|
||||
<a name="default_encoding-3"></a>
|
||||
|
||||
### default_encoding/3 ###
|
||||
|
||||
`default_encoding(X1, Type, As) -> any()`
|
||||
|
||||
<a name="delete-3"></a>
|
||||
|
||||
### delete/3 ###
|
||||
|
||||
`delete(Ref, K, Opts) -> any()`
|
||||
|
||||
<a name="encode-2"></a>
|
||||
|
||||
### encode/2 ###
|
||||
|
||||
`encode(Value, X2) -> any()`
|
||||
|
||||
<a name="encode_key-1"></a>
|
||||
|
||||
### encode_key/1 ###
|
||||
|
||||
<pre><code>
|
||||
encode_key(Key::any()) -> binary()
|
||||
</code></pre>
|
||||
<br />
|
||||
|
||||
<a name="encode_key-2"></a>
|
||||
|
||||
### encode_key/2 ###
|
||||
|
||||
`encode_key(Key, X2) -> any()`
|
||||
|
||||
<a name="encode_val-1"></a>
|
||||
|
||||
### encode_val/1 ###
|
||||
|
||||
<pre><code>
|
||||
encode_val(Val::any()) -> binary()
|
||||
</code></pre>
|
||||
<br />
|
||||
|
||||
<a name="encode_val-2"></a>
|
||||
|
||||
### encode_val/2 ###
|
||||
|
||||
`encode_val(Val, Enc) -> any()`
|
||||
|
||||
<a name="keypos-1"></a>
|
||||
|
||||
### keypos/1 ###
|
||||
|
||||
`keypos(Tab) -> any()`
|
||||
|
||||
<a name="open_rocksdb-3"></a>
|
||||
|
||||
### open_rocksdb/3 ###
|
||||
|
||||
`open_rocksdb(MPd, RdbOpts, CFs) -> any()`
|
||||
|
||||
<a name="put-4"></a>
|
||||
|
||||
### put/4 ###
|
||||
|
||||
`put(Ref, K, V, Opts) -> any()`
|
||||
|
||||
<a name="tabname-1"></a>
|
||||
|
||||
### tabname/1 ###
|
||||
|
||||
`tabname(Tab) -> any()`
|
||||
|
||||
<a name="valid_key_type-2"></a>
|
||||
|
||||
### valid_key_type/2 ###
|
||||
|
||||
`valid_key_type(X1, Key) -> any()`
|
||||
|
||||
<a name="valid_obj_type-2"></a>
|
||||
|
||||
### valid_obj_type/2 ###
|
||||
|
||||
`valid_obj_type(X1, Obj) -> any()`
|
||||
|
||||
<a name="write-3"></a>
|
||||
|
||||
### write/3 ###
|
||||
|
||||
`write(X1, L, Opts) -> any()`
|
||||
|
80
doc/mnesia_rocksdb_params.md
Normal file
80
doc/mnesia_rocksdb_params.md
Normal file
@ -0,0 +1,80 @@
|
||||
|
||||
|
||||
# Module mnesia_rocksdb_params #
|
||||
* [Function Index](#index)
|
||||
* [Function Details](#functions)
|
||||
|
||||
__Behaviours:__ [`gen_server`](gen_server.md).
|
||||
|
||||
<a name="index"></a>
|
||||
|
||||
## Function Index ##
|
||||
|
||||
|
||||
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#code_change-3">code_change/3</a></td><td></td></tr><tr><td valign="top"><a href="#delete-1">delete/1</a></td><td></td></tr><tr><td valign="top"><a href="#handle_call-3">handle_call/3</a></td><td></td></tr><tr><td valign="top"><a href="#handle_cast-2">handle_cast/2</a></td><td></td></tr><tr><td valign="top"><a href="#handle_info-2">handle_info/2</a></td><td></td></tr><tr><td valign="top"><a href="#init-1">init/1</a></td><td></td></tr><tr><td valign="top"><a href="#lookup-2">lookup/2</a></td><td></td></tr><tr><td valign="top"><a href="#start_link-0">start_link/0</a></td><td></td></tr><tr><td valign="top"><a href="#store-2">store/2</a></td><td></td></tr><tr><td valign="top"><a href="#terminate-2">terminate/2</a></td><td></td></tr></table>
|
||||
|
||||
|
||||
<a name="functions"></a>
|
||||
|
||||
## Function Details ##
|
||||
|
||||
<a name="code_change-3"></a>
|
||||
|
||||
### code_change/3 ###
|
||||
|
||||
`code_change(X1, S, X3) -> any()`
|
||||
|
||||
<a name="delete-1"></a>
|
||||
|
||||
### delete/1 ###
|
||||
|
||||
`delete(Tab) -> any()`
|
||||
|
||||
<a name="handle_call-3"></a>
|
||||
|
||||
### handle_call/3 ###
|
||||
|
||||
`handle_call(X1, X2, S) -> any()`
|
||||
|
||||
<a name="handle_cast-2"></a>
|
||||
|
||||
### handle_cast/2 ###
|
||||
|
||||
`handle_cast(X1, S) -> any()`
|
||||
|
||||
<a name="handle_info-2"></a>
|
||||
|
||||
### handle_info/2 ###
|
||||
|
||||
`handle_info(X1, S) -> any()`
|
||||
|
||||
<a name="init-1"></a>
|
||||
|
||||
### init/1 ###
|
||||
|
||||
`init(X1) -> any()`
|
||||
|
||||
<a name="lookup-2"></a>
|
||||
|
||||
### lookup/2 ###
|
||||
|
||||
`lookup(Tab, Default) -> any()`
|
||||
|
||||
<a name="start_link-0"></a>
|
||||
|
||||
### start_link/0 ###
|
||||
|
||||
`start_link() -> any()`
|
||||
|
||||
<a name="store-2"></a>
|
||||
|
||||
### store/2 ###
|
||||
|
||||
`store(Tab, Params) -> any()`
|
||||
|
||||
<a name="terminate-2"></a>
|
||||
|
||||
### terminate/2 ###
|
||||
|
||||
`terminate(X1, X2) -> any()`
|
||||
|
32
doc/mnesia_rocksdb_sup.md
Normal file
32
doc/mnesia_rocksdb_sup.md
Normal file
@ -0,0 +1,32 @@
|
||||
|
||||
|
||||
# Module mnesia_rocksdb_sup #
|
||||
* [Function Index](#index)
|
||||
* [Function Details](#functions)
|
||||
|
||||
__Behaviours:__ [`supervisor`](supervisor.md).
|
||||
|
||||
<a name="index"></a>
|
||||
|
||||
## Function Index ##
|
||||
|
||||
|
||||
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#init-1">init/1</a></td><td></td></tr><tr><td valign="top"><a href="#start_link-0">start_link/0</a></td><td></td></tr></table>
|
||||
|
||||
|
||||
<a name="functions"></a>
|
||||
|
||||
## Function Details ##
|
||||
|
||||
<a name="init-1"></a>
|
||||
|
||||
### init/1 ###
|
||||
|
||||
`init(X1) -> any()`
|
||||
|
||||
<a name="start_link-0"></a>
|
||||
|
||||
### start_link/0 ###
|
||||
|
||||
`start_link() -> any()`
|
||||
|
126
doc/mnesia_rocksdb_tuning.md
Normal file
126
doc/mnesia_rocksdb_tuning.md
Normal file
@ -0,0 +1,126 @@
|
||||
|
||||
|
||||
# Module mnesia_rocksdb_tuning #
|
||||
* [Function Index](#index)
|
||||
* [Function Details](#functions)
|
||||
|
||||
<a name="index"></a>
|
||||
|
||||
## Function Index ##
|
||||
|
||||
|
||||
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#cache-1">cache/1</a></td><td></td></tr><tr><td valign="top"><a href="#calc_sizes-0">calc_sizes/0</a></td><td></td></tr><tr><td valign="top"><a href="#calc_sizes-1">calc_sizes/1</a></td><td></td></tr><tr><td valign="top"><a href="#count_rdb_tabs-0">count_rdb_tabs/0</a></td><td></td></tr><tr><td valign="top"><a href="#count_rdb_tabs-1">count_rdb_tabs/1</a></td><td></td></tr><tr><td valign="top"><a href="#default-1">default/1</a></td><td></td></tr><tr><td valign="top"><a href="#describe_env-0">describe_env/0</a></td><td></td></tr><tr><td valign="top"><a href="#get_avail_ram-0">get_avail_ram/0</a></td><td></td></tr><tr><td valign="top"><a href="#get_maxfiles-0">get_maxfiles/0</a></td><td></td></tr><tr><td valign="top"><a href="#get_maxfiles-1">get_maxfiles/1</a></td><td></td></tr><tr><td valign="top"><a href="#ideal_max_files-0">ideal_max_files/0</a></td><td></td></tr><tr><td valign="top"><a href="#ideal_max_files-1">ideal_max_files/1</a></td><td></td></tr><tr><td valign="top"><a href="#max_files-1">max_files/1</a></td><td></td></tr><tr><td valign="top"><a href="#rdb_indexes-0">rdb_indexes/0</a></td><td></td></tr><tr><td valign="top"><a href="#rdb_indexes-1">rdb_indexes/1</a></td><td></td></tr><tr><td valign="top"><a href="#rdb_tabs-0">rdb_tabs/0</a></td><td></td></tr><tr><td valign="top"><a href="#rdb_tabs-1">rdb_tabs/1</a></td><td></td></tr><tr><td valign="top"><a href="#write_buffer-1">write_buffer/1</a></td><td></td></tr></table>
|
||||
|
||||
|
||||
<a name="functions"></a>
|
||||
|
||||
## Function Details ##
|
||||
|
||||
<a name="cache-1"></a>
|
||||
|
||||
### cache/1 ###
|
||||
|
||||
`cache(X1) -> any()`
|
||||
|
||||
<a name="calc_sizes-0"></a>
|
||||
|
||||
### calc_sizes/0 ###
|
||||
|
||||
`calc_sizes() -> any()`
|
||||
|
||||
<a name="calc_sizes-1"></a>
|
||||
|
||||
### calc_sizes/1 ###
|
||||
|
||||
`calc_sizes(D) -> any()`
|
||||
|
||||
<a name="count_rdb_tabs-0"></a>
|
||||
|
||||
### count_rdb_tabs/0 ###
|
||||
|
||||
`count_rdb_tabs() -> any()`
|
||||
|
||||
<a name="count_rdb_tabs-1"></a>
|
||||
|
||||
### count_rdb_tabs/1 ###
|
||||
|
||||
`count_rdb_tabs(Db) -> any()`
|
||||
|
||||
<a name="default-1"></a>
|
||||
|
||||
### default/1 ###
|
||||
|
||||
`default(X1) -> any()`
|
||||
|
||||
<a name="describe_env-0"></a>
|
||||
|
||||
### describe_env/0 ###
|
||||
|
||||
`describe_env() -> any()`
|
||||
|
||||
<a name="get_avail_ram-0"></a>
|
||||
|
||||
### get_avail_ram/0 ###
|
||||
|
||||
`get_avail_ram() -> any()`
|
||||
|
||||
<a name="get_maxfiles-0"></a>
|
||||
|
||||
### get_maxfiles/0 ###
|
||||
|
||||
`get_maxfiles() -> any()`
|
||||
|
||||
<a name="get_maxfiles-1"></a>
|
||||
|
||||
### get_maxfiles/1 ###
|
||||
|
||||
`get_maxfiles(X1) -> any()`
|
||||
|
||||
<a name="ideal_max_files-0"></a>
|
||||
|
||||
### ideal_max_files/0 ###
|
||||
|
||||
`ideal_max_files() -> any()`
|
||||
|
||||
<a name="ideal_max_files-1"></a>
|
||||
|
||||
### ideal_max_files/1 ###
|
||||
|
||||
`ideal_max_files(D) -> any()`
|
||||
|
||||
<a name="max_files-1"></a>
|
||||
|
||||
### max_files/1 ###
|
||||
|
||||
`max_files(X1) -> any()`
|
||||
|
||||
<a name="rdb_indexes-0"></a>
|
||||
|
||||
### rdb_indexes/0 ###
|
||||
|
||||
`rdb_indexes() -> any()`
|
||||
|
||||
<a name="rdb_indexes-1"></a>
|
||||
|
||||
### rdb_indexes/1 ###
|
||||
|
||||
`rdb_indexes(Db) -> any()`
|
||||
|
||||
<a name="rdb_tabs-0"></a>
|
||||
|
||||
### rdb_tabs/0 ###
|
||||
|
||||
`rdb_tabs() -> any()`
|
||||
|
||||
<a name="rdb_tabs-1"></a>
|
||||
|
||||
### rdb_tabs/1 ###
|
||||
|
||||
`rdb_tabs(Db) -> any()`
|
||||
|
||||
<a name="write_buffer-1"></a>
|
||||
|
||||
### write_buffer/1 ###
|
||||
|
||||
`write_buffer(X1) -> any()`
|
||||
|
943
doc/mrdb.md
Normal file
943
doc/mrdb.md
Normal file
File diff suppressed because one or more lines are too long
99
doc/mrdb_index.md
Normal file
99
doc/mrdb_index.md
Normal file
@ -0,0 +1,99 @@
|
||||
|
||||
|
||||
# Module mrdb_index #
|
||||
* [Data Types](#types)
|
||||
* [Function Index](#index)
|
||||
* [Function Details](#functions)
|
||||
|
||||
<a name="types"></a>
|
||||
|
||||
## Data Types ##
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-index_value">index_value()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
index_value() = any()
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-iterator_action">iterator_action()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
iterator_action() = <a href="http://www.erlang.org/doc/man/mrdb.html#type-iterator_action">mrdb:iterator_action()</a>
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-ix_iterator">ix_iterator()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
ix_iterator() = #mrdb_ix_iter{i = <a href="http://www.erlang.org/doc/man/mrdb.html#type-iterator">mrdb:iterator()</a>, type = set | bag, sub = <a href="http://www.erlang.org/doc/man/mrdb.html#type-ref">mrdb:ref()</a> | pid()}
|
||||
</code></pre>
|
||||
|
||||
|
||||
|
||||
|
||||
### <a name="type-object">object()</a> ###
|
||||
|
||||
|
||||
<pre><code>
|
||||
object() = tuple()
|
||||
</code></pre>
|
||||
|
||||
<a name="index"></a>
|
||||
|
||||
## Function Index ##
|
||||
|
||||
|
||||
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#iterator-2">iterator/2</a></td><td></td></tr><tr><td valign="top"><a href="#iterator_close-1">iterator_close/1</a></td><td></td></tr><tr><td valign="top"><a href="#iterator_move-2">iterator_move/2</a></td><td></td></tr><tr><td valign="top"><a href="#with_iterator-3">with_iterator/3</a></td><td></td></tr></table>
|
||||
|
||||
|
||||
<a name="functions"></a>
|
||||
|
||||
## Function Details ##
|
||||
|
||||
<a name="iterator-2"></a>
|
||||
|
||||
### iterator/2 ###
|
||||
|
||||
<pre><code>
|
||||
iterator(Tab::<a href="http://www.erlang.org/doc/man/mrdb.html#type-ref_or_tab">mrdb:ref_or_tab()</a>, IxPos::<a href="http://www.erlang.org/doc/man/mrdb.html#type-index_position">mrdb:index_position()</a>) -> {ok, <a href="#type-ix_iterator">ix_iterator()</a>} | {error, term()}
|
||||
</code></pre>
|
||||
<br />
|
||||
|
||||
<a name="iterator_close-1"></a>
|
||||
|
||||
### iterator_close/1 ###
|
||||
|
||||
<pre><code>
|
||||
iterator_close(Mrdb_ix_iter::<a href="#type-ix_iterator">ix_iterator()</a>) -> ok
|
||||
</code></pre>
|
||||
<br />
|
||||
|
||||
<a name="iterator_move-2"></a>
|
||||
|
||||
### iterator_move/2 ###
|
||||
|
||||
<pre><code>
|
||||
iterator_move(Mrdb_ix_iter::<a href="#type-ix_iterator">ix_iterator()</a>, Dir::<a href="#type-iterator_action">iterator_action()</a>) -> {ok, <a href="#type-index_value">index_value()</a>, <a href="#type-object">object()</a>} | {error, term()}
|
||||
</code></pre>
|
||||
<br />
|
||||
|
||||
<a name="with_iterator-3"></a>
|
||||
|
||||
### with_iterator/3 ###
|
||||
|
||||
<pre><code>
|
||||
with_iterator(Tab::<a href="http://www.erlang.org/doc/man/mrdb.html#type-ref_or_tab">mrdb:ref_or_tab()</a>, IxPos::<a href="http://www.erlang.org/doc/man/mrdb.html#type-index_position">mrdb:index_position()</a>, Fun::fun((<a href="#type-ix_iterator">ix_iterator()</a>) -> Res)) -> Res
|
||||
</code></pre>
|
||||
<br />
|
||||
|
30
doc/mrdb_mutex.md
Normal file
30
doc/mrdb_mutex.md
Normal file
@ -0,0 +1,30 @@
|
||||
|
||||
|
||||
# Module mrdb_mutex #
|
||||
* [Function Index](#index)
|
||||
* [Function Details](#functions)
|
||||
|
||||
<a name="index"></a>
|
||||
|
||||
## Function Index ##
|
||||
|
||||
|
||||
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#do-2">do/2</a></td><td></td></tr><tr><td valign="top"><a href="#ensure_tab-0">ensure_tab/0</a></td><td></td></tr></table>
|
||||
|
||||
|
||||
<a name="functions"></a>
|
||||
|
||||
## Function Details ##
|
||||
|
||||
<a name="do-2"></a>
|
||||
|
||||
### do/2 ###
|
||||
|
||||
`do(Rsrc, F) -> any()`
|
||||
|
||||
<a name="ensure_tab-0"></a>
|
||||
|
||||
### ensure_tab/0 ###
|
||||
|
||||
`ensure_tab() -> any()`
|
||||
|
48
doc/mrdb_select.md
Normal file
48
doc/mrdb_select.md
Normal file
@ -0,0 +1,48 @@
|
||||
|
||||
|
||||
# Module mrdb_select #
|
||||
* [Function Index](#index)
|
||||
* [Function Details](#functions)
|
||||
|
||||
<a name="index"></a>
|
||||
|
||||
## Function Index ##
|
||||
|
||||
|
||||
<table width="100%" border="1" cellspacing="0" cellpadding="2" summary="function index"><tr><td valign="top"><a href="#fold-5">fold/5</a></td><td></td></tr><tr><td valign="top"><a href="#rdb_fold-5">rdb_fold/5</a></td><td></td></tr><tr><td valign="top"><a href="#select-1">select/1</a></td><td></td></tr><tr><td valign="top"><a href="#select-3">select/3</a></td><td></td></tr><tr><td valign="top"><a href="#select-4">select/4</a></td><td></td></tr></table>
|
||||
|
||||
|
||||
<a name="functions"></a>
|
||||
|
||||
## Function Details ##
|
||||
|
||||
<a name="fold-5"></a>
|
||||
|
||||
### fold/5 ###
|
||||
|
||||
`fold(Ref, Fun, Acc, MS, Limit) -> any()`
|
||||
|
||||
<a name="rdb_fold-5"></a>
|
||||
|
||||
### rdb_fold/5 ###
|
||||
|
||||
`rdb_fold(Ref, Fun, Acc, Prefix, Limit) -> any()`
|
||||
|
||||
<a name="select-1"></a>
|
||||
|
||||
### select/1 ###
|
||||
|
||||
`select(Cont) -> any()`
|
||||
|
||||
<a name="select-3"></a>
|
||||
|
||||
### select/3 ###
|
||||
|
||||
`select(Ref, MS, Limit) -> any()`
|
||||
|
||||
<a name="select-4"></a>
|
||||
|
||||
### select/4 ###
|
||||
|
||||
`select(Ref, MS, AccKeys, Limit) -> any()`
|
||||
|
250
doc/overview.edoc
Normal file
250
doc/overview.edoc
Normal file
@ -0,0 +1,250 @@
|
||||
@author Ulf Wiger <ulf@wiger.net>
|
||||
@copyright 2013-21 Klarna AB
|
||||
@title Mnesia Rocksdb - Rocksdb backend plugin for Mnesia
|
||||
|
||||
@doc
|
||||
|
||||
The Mnesia DBMS, part of Erlang/OTP, supports 'backend plugins', making
|
||||
it possible to utilize more capable key-value stores than the `dets'
|
||||
module (limited to 2 GB per table). Unfortunately, this support is
|
||||
undocumented. Below, some informal documentation for the plugin system
|
||||
is provided.
|
||||
|
||||
== Table of Contents ==
|
||||
<ol>
|
||||
<li>{@section Usage}</li>
|
||||
<ol>
|
||||
<li>{@section Prerequisites}</li>
|
||||
<li>{@section Getting started}</li>
|
||||
<li>{@section Special features}</li>
|
||||
<li>{@section Customization}</li>
|
||||
<li>{@section Handling of errors in write operations}</li>
|
||||
<li>{@section Caveats}</li>
|
||||
</ol>
|
||||
<li>{@section Mnesia backend plugins}</li>
|
||||
<ol>
|
||||
<li>{@section Background}</li>
|
||||
<li>{@section Design}</li>
|
||||
</ol>
|
||||
<li>{@section Mnesia index plugins}</li>
|
||||
<li>{@section Rocksdb}</li>
|
||||
</ol>
|
||||
|
||||
== Usage ==
|
||||
|
||||
=== Prerequisites ===
|
||||
|
||||
<ul>
|
||||
<li>rocksdb (included as dependency)</li>
|
||||
<li>sext (included as dependency)</li>
|
||||
<li>Erlang/OTP 21.0 or newer (https://github.com/erlang/otp)</li>
|
||||
</ul>
|
||||
|
||||
=== Getting started ===
|
||||
|
||||
Call `mnesia_rocksdb:register()' immediately after
|
||||
starting mnesia.
|
||||
|
||||
Put `{rocksdb_copies, [node()]}' into the table definitions of
|
||||
tables you want to be in RocksDB.
|
||||
|
||||
=== Special features ===
|
||||
|
||||
RocksDB tables support efficient selects on <em>prefix keys</em>.
|
||||
|
||||
The backend uses the `sext' module (see
|
||||
[https://github.com/uwiger/sext]) for mapping between Erlang terms and the
|
||||
binary data stored in the tables. This provides two useful properties:
|
||||
|
||||
<ul>
|
||||
<li>The records are stored in the Erlang term order of their keys.</li>
|
||||
<li>A prefix of a composite key is ordered just before any key for which
|
||||
it is a prefix. For example, ``{x, '_'}'' is a prefix for keys `{x, a}',
|
||||
`{x, b}' and so on.</li>
|
||||
</ul>
|
||||
|
||||
This means that a prefix key identifies the start of the sequence of
|
||||
entries whose keys match the prefix. The backend uses this to optimize
|
||||
selects on prefix keys.
|
||||
|
||||
### Customization
|
||||
|
||||
RocksDB supports a number of customization options. These can be specified
|
||||
by providing a `{Key, Value}' list named `rocksdb_opts' under `user_properties',
|
||||
for example:
|
||||
|
||||
```
|
||||
mnesia:create_table(foo, [{rocksdb_copies, [node()]},
|
||||
...
|
||||
{user_properties,
|
||||
[{rocksdb_opts, [{max_open_files, 1024}]}]
|
||||
}])
|
||||
'''
|
||||
|
||||
Consult the <a href="https://github.com/facebook/rocksdb/wiki/Setup-Options-and-Basic-Tuning">RocksDB documentation</a>
|
||||
for information on configuration parameters. Also see the section below on handling write errors.
|
||||
|
||||
The default configuration for tables in `mnesia_rocksdb' is:
|
||||
```
|
||||
default_open_opts() ->
|
||||
[ {create_if_missing, true}
|
||||
, {cache_size,
|
||||
list_to_integer(get_env_default("ROCKSDB_CACHE_SIZE", "32212254"))}
|
||||
, {block_size, 1024}
|
||||
, {max_open_files, 100}
|
||||
, {write_buffer_size,
|
||||
list_to_integer(get_env_default(
|
||||
"ROCKSDB_WRITE_BUFFER_SIZE", "4194304"))}
|
||||
, {compression,
|
||||
list_to_atom(get_env_default("ROCKSDB_COMPRESSION", "true"))}
|
||||
, {use_bloomfilter, true}
|
||||
].
|
||||
'''
|
||||
|
||||
It is also possible, for larger databases, to produce a tuning parameter file.
|
||||
This is experimental, and mostly copied from `mnesia_leveldb'. Consult the
|
||||
source code in `mnesia_rocksdb_tuning.erl' and `mnesia_rocksdb_params.erl'.
|
||||
Contributions are welcome.
|
||||
|
||||
=== Caveats ===
|
||||
|
||||
Avoid placing `bag' tables in RocksDB. Although they work, each write
|
||||
requires additional reads, causing substantial runtime overheads. There
|
||||
are better ways to represent and process bag data (see above about
|
||||
<em>prefix keys</em>).
|
||||
|
||||
The `mnesia:table_info(T, size)' call always returns zero for RocksDB
|
||||
tables. RocksDB itself does not track the number of elements in a table, and
|
||||
although it is possible to make the `mnesia_rocksdb' backend maintain a size
|
||||
counter, it incurs a high runtime overhead for writes and deletes since it
|
||||
forces them to first do a read to check the existence of the key. If you
|
||||
depend on having an up to date size count at all times, you need to maintain
|
||||
it yourself. If you only need the size occasionally, you may traverse the
|
||||
table to count the elements.
|
||||
|
||||
== Mnesia backend plugins ==
|
||||
|
||||
=== Background ===
|
||||
|
||||
Mnesia was initially designed to be a RAM-only DBMS, and Erlang's
|
||||
`ets' tables were developed for this purpose. In order to support
|
||||
persistence, e.g. for configuration data, a disk-based version of `ets'
|
||||
(called `dets') was created. The `dets' API mimicks the `ets' API,
|
||||
and `dets' is quite convenient and fast for (nowadays) small datasets.
|
||||
However, using a 32-bit bucket system, it is limited to 2GB of data.
|
||||
It also doesn't support ordered sets. When used in Mnesia, dets-based
|
||||
tables are called `disc_only_copies'.
|
||||
|
||||
To circumvent these limitations, another table type, called `disc_copies'
|
||||
was added. This is a combination of `ets' and `disk_log', where Mnesia
|
||||
periodically snapshots the `ets' data to a log file on disk, and meanwhile
|
||||
maintains a log of updates, which can be applied at startup. These tables
|
||||
are quite performant (especially on read access), but all data is kept in
|
||||
RAM, which can become a serious limitation.
|
||||
|
||||
A backend plugin system was proposed by Ulf Wiger in 2016, and further
|
||||
developed with Klarna's support, to finally become included in OTP 19.
|
||||
Klarna uses a LevelDb backend, but Aeternity, in 2017, instead chose
|
||||
to implement a Rocksdb backend plugin.
|
||||
|
||||
== Design ==
|
||||
|
||||
As backend plugins were added on a long-since legacy-stable Mnesia,
|
||||
they had to conform to the existing code structure. For this reason,
|
||||
the plugin callbacks hook into the already present low-level access
|
||||
API in the `mnesia_lib' module. As a consequence, backend plugins have
|
||||
the same access semantics and granularity as `ets' and `dets'. This
|
||||
isn't much of a disadvantage for key-value stores like LevelDb and RocksDB,
|
||||
but a more serious issue is that the update part of this API is called
|
||||
on <em>after</em> the point of no return. That is, Mnesia does not expect
|
||||
these updates to fail, and has no recourse if they do. As an aside,
|
||||
this could also happen if a `disc_only_copies' table exceeds the 2 GB
|
||||
limit (mnesia will not check it, and `dets' will not complain, but simply
|
||||
drop the update.)
|
||||
|
||||
== Mnesia index plugins ==
|
||||
|
||||
When adding support for backend plugins, index plugins were also added. Unfortunately, they remain undocumented.
|
||||
|
||||
An index plugin can be added in one of two ways:
|
||||
|
||||
<ol>
|
||||
<li>When creating a schema, provide `{index_plugins, [{Name, Module, Function}]}' options.</li>
|
||||
<li>Call the function `mnesia_schema:add_index_plugin(Name, Module, Function)'</li>
|
||||
</ol>
|
||||
|
||||
`Name' must be an atom wrapped as a 1-tuple, e.g. `{words}'.
|
||||
|
||||
The plugin callback is called as `Module:Function(Table, Pos, Obj)', where `Pos=={words}' in
|
||||
our example. It returns a list of index terms.
|
||||
|
||||
<strong>Example</strong>
|
||||
|
||||
Given the following index plugin implementation:
|
||||
|
||||
```
|
||||
-module(words).
|
||||
-export([words_f/3]).
|
||||
|
||||
words_f(_,_,Obj) when is_tuple(Obj) ->
|
||||
words_(tuple_to_list(Obj)).
|
||||
|
||||
words_(Str) when is_binary(Str) ->
|
||||
string:lexemes(Str, [$\s, $\n, [$\r,$\n]]);
|
||||
words_(L) when is_list(L) ->
|
||||
lists:flatmap(fun words_/1, L);
|
||||
words_(_) ->
|
||||
[].
|
||||
'''
|
||||
|
||||
We can register the plugin and use it in table definitions:
|
||||
|
||||
```
|
||||
Eshell V12.1.3 (abort with ^G)
|
||||
1> mnesia:start().
|
||||
ok
|
||||
2> mnesia_schema:add_index_plugin({words}, words, words_f).
|
||||
{atomic,ok}
|
||||
3> mnesia:create_table(i, [{index, [{words}]}]).
|
||||
{atomic,ok}
|
||||
'''
|
||||
|
||||
Note that in this case, we had neither a backend plugin, nor even a persistent schema.
|
||||
Index plugins can be used with all table types. The registered indexing function (arity 3) must exist
|
||||
as an exported function along the node's code path.
|
||||
|
||||
To see what happens when we insert an object, we can turn on call trace.
|
||||
|
||||
```
|
||||
4> dbg:tracer().
|
||||
{ok,<0.108.0>}
|
||||
5> dbg:tp(words, x).
|
||||
{ok,[{matched,nonode@nohost,3},{saved,x}]}
|
||||
6> dbg:p(all,[c]).
|
||||
{ok,[{matched,nonode@nohost,60}]}
|
||||
7> mnesia:dirty_write({i,<<"one two">>, [<<"three">>, <<"four">>]}).
|
||||
(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
|
||||
(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
|
||||
<<"four">>]
|
||||
(<0.84.0>) call words:words_f(i,{words},{i,<<"one two">>,[<<"three">>,<<"four">>]})
|
||||
(<0.84.0>) returned from words:words_f/3 -> [<<"one">>,<<"two">>,<<"three">>,
|
||||
<<"four">>]
|
||||
ok
|
||||
8> dbg:ctp('_'), dbg:stop().
|
||||
ok
|
||||
9> mnesia:dirty_index_read(i, <<"one">>, {words}).
|
||||
[{i,<<"one two">>,[<<"three">>,<<"four">>]}]
|
||||
'''
|
||||
|
||||
(The fact that the indexing function is called twice, seems like a performance bug.)
|
||||
|
||||
We can observe that the indexing callback is able to operate on the whole object.
|
||||
It needs to be side-effect free and efficient, since it will be called at least once for each update
|
||||
(if an old object exists in the table, the indexing function will be called on it too, before it is
|
||||
replaced by the new object.)
|
||||
|
||||
== Rocksdb ==
|
||||
|
||||
== Usage ==
|
||||
|
||||
@end
|
300
doc/plugin-userguide.md
Normal file
300
doc/plugin-userguide.md
Normal file
@ -0,0 +1,300 @@
|
||||
|
||||
|
||||
# Using Mnesia Plugins #
|
||||
|
||||
Copyright (c) 2017-21 Aeternity Anstalt. All Rights Reserved.
|
||||
|
||||
__Authors:__ Ulf Wiger ([`ulf@wiger.net`](mailto:ulf@wiger.net)).
|
||||
|
||||
The Mnesia DBMS, part of Erlang/OTP, supports 'backend plugins', making
|
||||
it possible to utilize more capable key-value stores than the `dets`
|
||||
module (limited to 2 GB per table). Unfortunately, this support is
|
||||
undocumented. Below, some informal documentation for the plugin system
|
||||
is provided.
|
||||
|
||||
This user guide illustrates these concepts using `mnesia_rocksdb`
|
||||
as an example.
|
||||
|
||||
We will deal with two types of plugin:
|
||||
1. backend plugins
|
||||
2. index plugins
|
||||
|
||||
A backend plugin is a module that implements a `mnesia_backend_type`
|
||||
behavior. Each plugin can support any number of `aliases`, which
|
||||
combined with the plugin module make up a `backend_type`.
|
||||
|
||||
When using `mnesia_rocksdb`, the default alias is `rocksdb_copies`,
|
||||
and it is registered as a `{rocksdb_copies, mnesia_rocksdb}` pair.
|
||||
Once registered, the alias can be used just like the built-in
|
||||
backend types `ram_copies`, `disc_copies`, `disc_only_copies`.
|
||||
Mnesia asks the plugin module which one of the built-in types'
|
||||
semantics the new type is supposed to mimick: ram-only, ram+disk
|
||||
or disk-only. This is mainly relevant for how Mnesia checkpoints and
|
||||
backs up data.
|
||||
|
||||
### <a name="Table_of_Contents">Table of Contents</a> ###
|
||||
|
||||
1. [Usage](#Usage)
|
||||
1. [Prerequisites](#Prerequisites)
|
||||
2. [Getting started](#Getting_started)
|
||||
3. [New indexing functionality](#New_indexing_functionality)
|
||||
|
||||
## Usage
|
||||
|
||||
### Prerequisites
|
||||
|
||||
- rocksdb (included as dependency)
|
||||
- sext (included as dependency)
|
||||
- Erlang/OTP 22.0 or newer (https://github.com/erlang/otp)
|
||||
|
||||
### Getting started
|
||||
|
||||
For the purposes of this user guide, we assume an unnamed, single node
|
||||
mnesia installation. The only place where plugins are affected by
|
||||
distributed Mnesia, is in the table sync callbacks. The simplest way
|
||||
to get all paths in order for experimentation is to check out
|
||||
`mnesia_rocksdb`, building it, and then calling `rebar3 shell`. Unless
|
||||
we note otherwise, this is how a node has been started for each example.
|
||||
|
||||
> Erlang shell interactions have been slightly beautified by eliding
|
||||
> some text and breaking and indenting some lines
|
||||
|
||||
#### Adding a backend type to mnesia
|
||||
|
||||
There are three different ways, all undocumented, to register a
|
||||
backend plugin in mnesia:
|
||||
|
||||
1. Add a `backend_types` option when creating the schema, using
|
||||
`mnesia:create_schema/2`
|
||||
|
||||
```erlang
|
||||
Erlang/OTP 22 [erts-10.7] ...
|
||||
|
||||
Eshell V10.7 (abort with ^G)
|
||||
1> mnesia:create_schema([node()],
|
||||
[{backend_types,[{rocksdb_copies,mnesia_rocksdb}]}]).
|
||||
ok
|
||||
2> mnesia:start().
|
||||
ok
|
||||
3> mnesia_schema:backend_types().
|
||||
[ram_copies,disc_copies,disc_only_copies,rocksdb_copies]
|
||||
```
|
||||
|
||||
(In `mnesia_rocksdb`, a shortcut for this exists in `mnesia_rocksdb:create_schema(Nodes)`.)
|
||||
|
||||
2. Add it when starting mnesia, using `mnesia:start/1` (undocumented)
|
||||
|
||||
```erlang
|
||||
Eshell V10.7 (abort with ^G)
|
||||
1> mnesia:create_schema([node()]).
|
||||
ok
|
||||
2> mnesia:start([{schema,[{backend_types,
|
||||
[{rocksdb_copies,mnesia_rocksdb}]}]}]).
|
||||
ok
|
||||
3> mnesia_schema:backend_types().
|
||||
[ram_copies,disc_copies,disc_only_copies]
|
||||
```
|
||||
|
||||
3. Call `mnesia_schema:add_backend_type/2` when mnesia is running.
|
||||
|
||||
```erlang
|
||||
Eshell V10.7 (abort with ^G)
|
||||
1> mnesia:create_schema([node()]).
|
||||
ok
|
||||
2> mnesia:start().
|
||||
ok
|
||||
3> mnesia_schema:add_backend_type(rocksdb_copies,mnesia_rocksdb).
|
||||
{atomic,ok}
|
||||
4> mnesia_schema:backend_types().
|
||||
[ram_copies,disc_copies,disc_only_copies,rocksdb_copies]
|
||||
```
|
||||
|
||||
In all cases the schema is updated, and other nodes, and subsequently
|
||||
added nodes, will automatically receive the information.
|
||||
The function `mnesia_schema:backend_types()` shows which backend plugin
|
||||
aliases are registered.
|
||||
|
||||
The information is also displayed when calling `mnesia:info()`:
|
||||
|
||||
```erlang
|
||||
5> mnesia:info().
|
||||
---> Processes holding locks <---
|
||||
---> Processes waiting for locks <---
|
||||
---> Participant transactions <---
|
||||
---> Coordinator transactions <---
|
||||
---> Uncertain transactions <---
|
||||
---> Active tables <---
|
||||
schema : with 1 records occupying 443 words of mem
|
||||
===> System info in version "4.16.3", debug level = none <===
|
||||
opt_disc. Directory "/.../Mnesia.nonode@nohost" is used.
|
||||
use fallback at restart = false
|
||||
running db nodes = [nonode@nohost]
|
||||
stopped db nodes = []
|
||||
master node tables = []
|
||||
backend types = rocksdb_copies - mnesia_rocksdb
|
||||
remote = []
|
||||
ram_copies = []
|
||||
disc_copies = [schema]
|
||||
disc_only_copies = []
|
||||
[{nonode@nohost,disc_copies}] = [schema]
|
||||
2 transactions committed, 0 aborted, 0 restarted, 0 logged to disc
|
||||
0 held locks, 0 in queue; 0 local transactions, 0 remote
|
||||
0 transactions waits for other nodes: []
|
||||
ok
|
||||
```
|
||||
|
||||
To illustrate how mnesia persists the information in the schema:
|
||||
|
||||
```erlang
|
||||
6> mnesia:table_info(schema,user_properties).
|
||||
[{mnesia_backend_types,[{rocksdb_copies,mnesia_rocksdb}]}]
|
||||
```
|
||||
|
||||
#### Rocksdb registration shortcut
|
||||
|
||||
Call `mnesia_rocksdb:register()` after starting mnesia.
|
||||
|
||||
#### Creating a table
|
||||
|
||||
Put `{rocksdb_copies, [node()]}` into the table definitions of
|
||||
tables you want to be in RocksDB.
|
||||
|
||||
```erlang
|
||||
4> mnesia:create_table(t, [{rocksdb_copies,[node()]}]).
|
||||
{atomic,ok}
|
||||
5> rr(mnesia).
|
||||
[commit,cstruct,cyclic,decision,log_header,mnesia_select,
|
||||
tid,tidstore]
|
||||
6> mnesia:table_info(t,cstruct).
|
||||
#cstruct{name = t,type = set,ram_copies = [],
|
||||
disc_copies = [],disc_only_copies = [],
|
||||
external_copies = [{{rocksdb_copies,mnesia_rocksdb},
|
||||
[nonode@nohost]}],
|
||||
load_order = 0,access_mode = read_write,majority = false,
|
||||
index = [],snmp = [],local_content = false,record_name = t,
|
||||
attributes = [key,val],
|
||||
user_properties = [],frag_properties = [],
|
||||
storage_properties = [],
|
||||
cookie = {{1621758137965715000,-576460752303423420,1},
|
||||
nonode@nohost},
|
||||
version = {{2,0},[]}}
|
||||
```
|
||||
|
||||
In the example above, we take a peek at the `cstruct`, which is the
|
||||
internal metadata structure for mnesia tables. The attribute showing
|
||||
that the table has been created with a `rocksdb_copies` instance, is
|
||||
the `external_copies` attribute. It lists the alias, the callback module
|
||||
and the nodes, where the instances reside.
|
||||
|
||||
The table works essentially like one of the built-in table types.
|
||||
If we want to find out which type, we can query the callback module:
|
||||
|
||||
```erlang
|
||||
8> mnesia_rocksdb:semantics(rocksdb_copies, storage).
|
||||
disc_only_copies
|
||||
```
|
||||
|
||||
Consult the `mnesia_rocksdb` man page for more info on the
|
||||
`Mod:semantics/2` function.
|
||||
|
||||
### New indexing functionality
|
||||
|
||||
With the introduction of backend plugins, a few improvements were made
|
||||
to mnesia's indexing support.
|
||||
|
||||
#### Persistent indexes
|
||||
|
||||
In the past, and still with the built-in types, indexes were always
|
||||
rebuilt on startup. Since backend plugins were introduced mainly in
|
||||
order to support very large tables, a couple of callback functions
|
||||
were added in order to detect whether a full rebuild is needed.
|
||||
|
||||
> The callback functions are `Mod:is_index_consistent/2` and
|
||||
> `Mod:index_is_consistent/3`.
|
||||
> The first function (figuratively) always returns `false` for indexes
|
||||
> on built-in table types. Backend plugin modules should always return
|
||||
> `false` if they have no information. After building the index, mnesia
|
||||
> calls `Mod:index_is_consistent(Alias, IxTab, true)`, and the callback
|
||||
> is expected to persist this information. `IxTab`, in this case, is
|
||||
> a logical name for the index 'table': `{Tab, index, PosInfo}`
|
||||
|
||||
#### Ordered indexes
|
||||
|
||||
A problem in the past with mnesia indexing has been that indexes with
|
||||
very large fan-out were inefficient. Indexes were represented as `bag`
|
||||
tables, and the cost of inserting a secondary key was proportional to
|
||||
the number of identical secondary keys already in the index.
|
||||
|
||||
When adding the backend plugin support - also not least because the
|
||||
first candidate LevelDb didn't do bags well - support for ordered
|
||||
indexes was added. They turn out to be have much more stable performance
|
||||
for indexes with large fan-out. They also work on all built-in table
|
||||
types.
|
||||
|
||||
When creating an index, you can specify the type of index as `bag` or
|
||||
`ordered`. If you omit the type, it will default to `bag` for built-in
|
||||
table types, and for external types, whatever is the first type in the
|
||||
list of supported index types returned by `Mod:semantics(Alias, index_types)`.
|
||||
|
||||
> For `mnesia_rocksdb`, only `ordered` is supported, but a bug in mnesia
|
||||
> makes it ignore this, and try to create a bag index anyway. The
|
||||
> `mnesia_rocksdb` plugin rejects this.
|
||||
> Note that while e.g. mnesia_rocksdb supports regular bag tables, they are not
|
||||
> efficiently implemented.
|
||||
|
||||
Mnesia currently doesn't allow specifying an index type in
|
||||
`mnesia:add_table_index/2`, so simply indicate the index position,
|
||||
and let the backend choose the default.
|
||||
|
||||
Having ordered indexes opens up for some new possibilities, but
|
||||
there are currently no functions in mnesia such as index_first, index_next
|
||||
etc., or performing a select in index order.
|
||||
|
||||
#### Index plugins
|
||||
|
||||
Index plugins are a great new feature, also almost entirely undocumented.
|
||||
|
||||
An index plugin is a registered indexing function, which can operate
|
||||
on the entire object, and shall return a list of secondary keys.
|
||||
When registering an index plugin, it is given an alias, a callback module,
|
||||
and an function name, not unlike backend plugins. The index plugin alias
|
||||
must be an atom wrapped inside a 1-tuple, i.e. `{atom()}`.
|
||||
|
||||
To illustrate, we use a sample indexing function implemented in
|
||||
mnesia_rocksdb, which checks all non-key attributes of an object,
|
||||
and for each value that is a list, makes each list element a secondary
|
||||
key value.
|
||||
|
||||
```erlang
|
||||
9> mnesia_schema:add_index_plugin({lv}, mnesia_rocksdb, ix_listvals).
|
||||
{atomic,ok}
|
||||
10> mnesia:add_table_index(t,{lv}).
|
||||
{atomic,ok}
|
||||
11> mnesia:dirty_write({t,1,[a,b]}).
|
||||
ok
|
||||
12> mnesia:dirty_write({t,2,[b,c]}).
|
||||
ok
|
||||
13> mnesia:dirty_index_read(t,a,{lv}).
|
||||
[{t,1,[a,b]}]
|
||||
14> mnesia:dirty_index_read(t,b,{lv}).
|
||||
[{t,1,[a,b]},{t,2,[b,c]}]
|
||||
15> mnesia:dirty_index_read(t,c,{lv}).
|
||||
[{t,2,[b,c]}]
|
||||
```
|
||||
|
||||
For clarity, this is the implementation of the index callback:
|
||||
|
||||
```erlang
|
||||
ix_listvals(_Tab, _Pos, Obj) ->
|
||||
lists:foldl(
|
||||
fun(V, Acc) when is_list(V) ->
|
||||
V ++ Acc;
|
||||
(_, Acc) ->
|
||||
Acc
|
||||
end, [], tl(tuple_to_list(Obj))).
|
||||
```
|
||||
|
||||
Note that the index callback must be a pure function, as it
|
||||
is also relied upon when deleting objects. That is, it must
|
||||
always return the same values when called with a specific
|
||||
set of input arguments.
|
55
doc/stylesheet.css
Normal file
55
doc/stylesheet.css
Normal file
@ -0,0 +1,55 @@
|
||||
/* standard EDoc style sheet */
|
||||
body {
|
||||
font-family: Verdana, Arial, Helvetica, sans-serif;
|
||||
margin-left: .25in;
|
||||
margin-right: .2in;
|
||||
margin-top: 0.2in;
|
||||
margin-bottom: 0.2in;
|
||||
color: #000000;
|
||||
background-color: #ffffff;
|
||||
}
|
||||
h1,h2 {
|
||||
margin-left: -0.2in;
|
||||
}
|
||||
div.navbar {
|
||||
background-color: #add8e6;
|
||||
padding: 0.2em;
|
||||
}
|
||||
h2.indextitle {
|
||||
padding: 0.4em;
|
||||
background-color: #add8e6;
|
||||
}
|
||||
h3.function,h3.typedecl {
|
||||
background-color: #add8e6;
|
||||
padding-left: 1em;
|
||||
}
|
||||
div.spec {
|
||||
margin-left: 2em;
|
||||
background-color: #eeeeee;
|
||||
}
|
||||
a.module {
|
||||
text-decoration:none
|
||||
}
|
||||
a.module:hover {
|
||||
background-color: #eeeeee;
|
||||
}
|
||||
ul.definitions {
|
||||
list-style-type: none;
|
||||
}
|
||||
ul.index {
|
||||
list-style-type: none;
|
||||
background-color: #eeeeee;
|
||||
}
|
||||
|
||||
/*
|
||||
* Minor style tweaks
|
||||
*/
|
||||
ul {
|
||||
list-style-type: square;
|
||||
}
|
||||
table {
|
||||
border-collapse: collapse;
|
||||
}
|
||||
td {
|
||||
padding: 3
|
||||
}
|
7
erlang_ls.config
Normal file
7
erlang_ls.config
Normal file
@ -0,0 +1,7 @@
|
||||
deps_dirs:
|
||||
- "_build/default/lib/*"
|
||||
include_paths:
|
||||
- "src/*"
|
||||
include_dirs:
|
||||
- "include"
|
||||
- "_build/default/lib"
|
15
include/mnesia_rocksdb.hrl
Normal file
15
include/mnesia_rocksdb.hrl
Normal file
@ -0,0 +1,15 @@
|
||||
|
||||
%% Data and meta data (a.k.a. info) are stored in the same table.
|
||||
%% This is a table of the first byte in data
|
||||
%% 0 = before meta data
|
||||
%% 1 = meta data
|
||||
%% 2 = before data
|
||||
%% >= 8 = data
|
||||
|
||||
-define(INFO_START, 0).
|
||||
-define(INFO_TAG, 1).
|
||||
-define(DATA_START, 2).
|
||||
-define(BAG_CNT, 32). % Number of bits used for bag object counter
|
||||
-define(MAX_BAG, 16#FFFFFFFF).
|
||||
|
||||
-define(VSN, 2).
|
34
rebar.config
34
rebar.config
@ -4,14 +4,40 @@
|
||||
{deps,
|
||||
[
|
||||
{sext, "1.8.0"},
|
||||
{rocksdb,"1.7.0"}
|
||||
{rocksdb, {git, "https://gitlab.com/seanhinde/erlang-rocksdb.git", {ref,"9ae37839"}}},
|
||||
{hut, "1.3.0"}
|
||||
]}.
|
||||
|
||||
{xref_checks, [
|
||||
undefined_function_calls,
|
||||
locals_not_used,
|
||||
deprecated_function_calls
|
||||
]}.
|
||||
|
||||
{profiles,
|
||||
[
|
||||
{test,
|
||||
[
|
||||
{deps, [ {proper, "1.3.0"}
|
||||
, {meck, "0.9.0"}]}
|
||||
]}
|
||||
{deps, [ {proper, "1.4.0"}
|
||||
, {meck, "0.9.2"}
|
||||
, {trace_runner, {git, "https://github.com/uwiger/trace_runner.git",
|
||||
{ref, "2e56677"}}}
|
||||
]}
|
||||
]},
|
||||
{edown,
|
||||
%% Use as `rebar3 as edown do edoc`
|
||||
[
|
||||
{deps, [{edown, "0.8.4"}]},
|
||||
{edoc_opts,
|
||||
[{doclet, edown_doclet},
|
||||
{app_default, "http://www.erlang.org/doc/man"},
|
||||
{doc_path, []},
|
||||
{top_level_readme,
|
||||
{"./README.md", "https://github.com/aeternity/mnesia_rocksdb"}}]}]}
|
||||
]}.
|
||||
|
||||
{ex_doc, [
|
||||
{extras, [<<"README.md">>, <<"LICENSE">>]},
|
||||
{main, <<"readme">>},
|
||||
{source_url, <<"https://github.com/aeternity/mnesia_rocksdb">>}
|
||||
]}.
|
||||
|
10
rebar.lock
10
rebar.lock
@ -1,11 +1,15 @@
|
||||
{"1.2.0",
|
||||
[{<<"rocksdb">>,{pkg,<<"rocksdb">>,<<"1.7.0">>},0},
|
||||
[{<<"hut">>,{pkg,<<"hut">>,<<"1.3.0">>},0},
|
||||
{<<"rocksdb">>,
|
||||
{git,"https://gitlab.com/seanhinde/erlang-rocksdb.git",
|
||||
{ref,"9ae378391ffc94200bde24efcd7a4921eba688d0"}},
|
||||
0},
|
||||
{<<"sext">>,{pkg,<<"sext">>,<<"1.8.0">>},0}]}.
|
||||
[
|
||||
{pkg_hash,[
|
||||
{<<"rocksdb">>, <<"5D23319998A7FCE5FFD5D7824116C905CABA7F91BAF8EDDABD0180F1BB272CEF">>},
|
||||
{<<"hut">>, <<"71F2F054E657C03F959CF1ACC43F436EA87580696528CA2A55C8AFB1B06C85E7">>},
|
||||
{<<"sext">>, <<"90A95B889F5C781B70BBCF44278B763148E313C376B60D87CE664CB1C1DD29B5">>}]},
|
||||
{pkg_hash_ext,[
|
||||
{<<"rocksdb">>, <<"A4BDC5DD80ED137161549713062131E8240523787EBE7B51DF61CFB48B1786CE">>},
|
||||
{<<"hut">>, <<"7E15D28555D8A1F2B5A3A931EC120AF0753E4853A4C66053DB354F35BF9AB563">>},
|
||||
{<<"sext">>, <<"BC6016CB8690BAF677EACACFE6E7CADFEC8DC7E286CBBED762F6CD55B0678E73">>}]}
|
||||
].
|
||||
|
@ -1,3 +1,4 @@
|
||||
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
|
||||
{application, mnesia_rocksdb,
|
||||
[
|
||||
{description, "RocksDB backend plugin for Mnesia"},
|
||||
|
File diff suppressed because it is too large
Load Diff
1495
src/mnesia_rocksdb_admin.erl
Normal file
1495
src/mnesia_rocksdb_admin.erl
Normal file
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,4 @@
|
||||
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
|
||||
%%----------------------------------------------------------------
|
||||
%% Copyright (c) 2013-2016 Klarna AB
|
||||
%%
|
||||
|
18
src/mnesia_rocksdb_int.hrl
Normal file
18
src/mnesia_rocksdb_int.hrl
Normal file
@ -0,0 +1,18 @@
|
||||
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
|
||||
-include_lib("hut/include/hut.hrl").
|
||||
|
||||
%% enable debugging messages through mnesia:set_debug_level(debug)
|
||||
-ifndef(MNESIA_ROCKSDB_NO_DBG).
|
||||
-define(dbg(Fmt, Args), ?log(debug, Fmt, Args)).
|
||||
%% -define(dbg(Fmt, Args),
|
||||
%% %% avoid evaluating Args if the message will be dropped anyway
|
||||
%% case mnesia_monitor:get_env(debug) of
|
||||
%% none -> ok;
|
||||
%% verbose -> ok;
|
||||
%% _ -> mnesia_lib:dbg_out("~p:~p: "++(Fmt),[?MODULE,?LINE|Args])
|
||||
%% end).
|
||||
-else.
|
||||
-define(dbg(Fmt, Args), ok).
|
||||
-endif.
|
||||
|
||||
-define(DEFAULT_RETRIES, 1).
|
@ -1,17 +1,341 @@
|
||||
%%% @doc RocksDB update wrappers, in separate module for easy tracing and mocking.
|
||||
%%% @doc RocksDB update wrappers, in separate module for easy tracing and mocking.
|
||||
%%%
|
||||
-module(mnesia_rocksdb_lib).
|
||||
|
||||
-export([put/4,
|
||||
write/3,
|
||||
delete/3]).
|
||||
-export([ put/4
|
||||
, write/3
|
||||
, delete/3
|
||||
]).
|
||||
|
||||
-export([ open_rocksdb/3
|
||||
, data_mountpoint/1
|
||||
, create_mountpoint/1
|
||||
, tabname/1
|
||||
]).
|
||||
|
||||
-export([ default_encoding/3
|
||||
, check_encoding/2
|
||||
, valid_obj_type/2
|
||||
, valid_key_type/2 ]).
|
||||
|
||||
-export([ keypos/1
|
||||
, encode_key/1, encode_key/2
|
||||
, decode_key/1, decode_key/2
|
||||
, encode_val/1, encode_val/2
|
||||
, decode_val/1, decode_val/3
|
||||
, encode/2
|
||||
, decode/2
|
||||
]).
|
||||
|
||||
-include("mnesia_rocksdb.hrl").
|
||||
-include_lib("hut/include/hut.hrl").
|
||||
|
||||
put(#{db := Ref, cf := CF}, K, V, Opts) ->
|
||||
rocksdb:put(Ref, CF, K, V, Opts);
|
||||
put(Ref, K, V, Opts) ->
|
||||
rocksdb:put(Ref, K, V, Opts).
|
||||
|
||||
write(Ref, L, Opts) ->
|
||||
rocksdb:write(Ref, L, Opts).
|
||||
write(#{db := Ref, cf := CF}, L, Opts) ->
|
||||
write_as_batch(L, Ref, CF, Opts).
|
||||
|
||||
delete(Ref, K, Opts) ->
|
||||
rocksdb:delete(Ref, K, Opts).
|
||||
|
||||
|
||||
write_as_batch(L, Ref, CF, Opts) ->
|
||||
{ok, Batch} = rocksdb:batch(),
|
||||
lists:foreach(
|
||||
fun({put, K, V}) ->
|
||||
ok = rocksdb:batch_put(Batch, CF, K, V);
|
||||
({delete, K}) ->
|
||||
ok = rocksdb:batch_delete(Batch, CF, K)
|
||||
end, L),
|
||||
rocksdb:write_batch(Ref, Batch, Opts).
|
||||
|
||||
create_mountpoint(Tab) ->
|
||||
MPd = data_mountpoint(Tab),
|
||||
case filelib:is_dir(MPd) of
|
||||
false ->
|
||||
file:make_dir(MPd),
|
||||
ok;
|
||||
true ->
|
||||
Dir = mnesia_lib:dir(),
|
||||
case lists:prefix(Dir, MPd) of
|
||||
true ->
|
||||
ok;
|
||||
false ->
|
||||
{error, exists}
|
||||
end
|
||||
end.
|
||||
|
||||
data_mountpoint(Tab) ->
|
||||
Dir = mnesia_monitor:get_env(dir),
|
||||
filename:join(Dir, tabname(Tab) ++ ".extrdb").
|
||||
|
||||
tabname({admin, Alias}) ->
|
||||
"mnesia_rocksdb-" ++ atom_to_list(Alias) ++ "-_db";
|
||||
tabname({Tab, index, {{Pos},_}}) ->
|
||||
atom_to_list(Tab) ++ "-=" ++ atom_to_list(Pos) ++ "=-_ix";
|
||||
tabname({Tab, index, {Pos,_}}) ->
|
||||
atom_to_list(Tab) ++ "-" ++ integer_to_list(Pos) ++ "-_ix";
|
||||
tabname({Tab, retainer, Name}) ->
|
||||
atom_to_list(Tab) ++ "-" ++ retainername(Name) ++ "-_RET";
|
||||
tabname(Tab) when is_atom(Tab) ->
|
||||
atom_to_list(Tab) ++ "-_tab".
|
||||
|
||||
default_encoding({_, index, _}, _, _) ->
|
||||
{sext, {value, raw}};
|
||||
default_encoding({_, retainer, _}, _, _) ->
|
||||
{term, {value, term}};
|
||||
default_encoding(_, Type, As) ->
|
||||
KeyEnc = case Type of
|
||||
ordered_set -> sext;
|
||||
set -> term;
|
||||
bag -> sext
|
||||
end,
|
||||
ValEnc = case As of
|
||||
[_, _] ->
|
||||
{value, term};
|
||||
[_, _ | _] ->
|
||||
{object, term}
|
||||
end,
|
||||
{KeyEnc, ValEnc}.
|
||||
|
||||
check_encoding(Encoding, Attributes) ->
|
||||
try check_encoding_(Encoding, Attributes)
|
||||
catch
|
||||
throw:Error ->
|
||||
Error
|
||||
end.
|
||||
|
||||
check_encoding_({Key, Val}, As) ->
|
||||
Key1 = check_key_encoding(Key),
|
||||
Val1 = check_value_encoding(Val, As),
|
||||
{ok, {Key1, Val1}};
|
||||
check_encoding_(E, _) ->
|
||||
throw({error, {invalid_encoding, E}}).
|
||||
|
||||
check_key_encoding(E) when E==sext; E==term; E==raw ->
|
||||
E;
|
||||
check_key_encoding(E) ->
|
||||
throw({error, {invalid_key_encoding, E}}).
|
||||
|
||||
check_value_encoding(raw, [_, _]) -> {value, raw};
|
||||
check_value_encoding({value, E} = V, [_, _]) when E==term; E==raw; E==sext -> V;
|
||||
check_value_encoding({object, E} = V, _) when E==term; E==raw; E==sext -> V;
|
||||
check_value_encoding(term, As) -> {val_encoding_type(As), term};
|
||||
check_value_encoding(sext, As) -> {val_encoding_type(As), sext};
|
||||
check_value_encoding(E, _) ->
|
||||
throw({error, {invalid_value_encoding, E}}).
|
||||
|
||||
val_encoding_type(Attrs) ->
|
||||
case Attrs of
|
||||
[_, _] -> value;
|
||||
[_, _|_] -> object
|
||||
end.
|
||||
|
||||
valid_obj_type(#{encoding := Enc}, Obj) ->
|
||||
case {Enc, Obj} of
|
||||
{{binary, {value, binary}}, {_, K, V}} ->
|
||||
is_binary(K) andalso is_binary(V);
|
||||
{{binary, _}, _} ->
|
||||
is_binary(element(2, Obj));
|
||||
{{_, {value, binary}}, {_, _, V}} ->
|
||||
is_binary(V);
|
||||
_ ->
|
||||
%% No restrictions on object type
|
||||
%% unless key and/or value typed to binary
|
||||
true
|
||||
end.
|
||||
|
||||
valid_key_type(#{encoding := Enc}, Key) ->
|
||||
case Enc of
|
||||
{binary, _} when is_binary(Key) ->
|
||||
true;
|
||||
{binary, _} ->
|
||||
false;
|
||||
_ ->
|
||||
true
|
||||
end.
|
||||
|
||||
|
||||
-spec encode_key(any()) -> binary().
|
||||
encode_key(Key) ->
|
||||
encode(Key, sext).
|
||||
|
||||
encode(Value, sext) ->
|
||||
sext:encode(Value);
|
||||
encode(Value, raw) when is_binary(Value) ->
|
||||
Value;
|
||||
encode(Value, term) ->
|
||||
term_to_binary(Value).
|
||||
|
||||
|
||||
encode_key(Key, #{encoding := {Enc,_}}) ->
|
||||
encode(Key, Enc);
|
||||
encode_key(Key, _) ->
|
||||
encode(Key, sext).
|
||||
|
||||
-spec decode_key(binary()) -> any().
|
||||
decode_key(CodedKey) ->
|
||||
decode(CodedKey, sext).
|
||||
|
||||
decode_key(CodedKey, #{encoding := {Enc, _}}) ->
|
||||
decode(CodedKey, Enc);
|
||||
decode_key(CodedKey, Enc) ->
|
||||
decode(CodedKey, Enc).
|
||||
|
||||
decode(Val, sext) ->
|
||||
case sext:partial_decode(Val) of
|
||||
{full, Result, _} ->
|
||||
Result;
|
||||
_ ->
|
||||
error(badarg, Val)
|
||||
end;
|
||||
decode(Val, raw) ->
|
||||
Val;
|
||||
decode(Val, term) ->
|
||||
binary_to_term(Val).
|
||||
|
||||
-spec encode_val(any()) -> binary().
|
||||
encode_val(Val) ->
|
||||
encode(Val, term).
|
||||
|
||||
encode_val(Val, Enc) when is_atom(Enc) ->
|
||||
encode(Val, Enc);
|
||||
encode_val(_, #{name := {_,index,_}}) ->
|
||||
<<>>;
|
||||
encode_val(Val, #{encoding := {_, Enc0}, attr_pos := AP}) ->
|
||||
{Type, Enc} = enc_type(Enc0),
|
||||
case {map_size(AP), Type} of
|
||||
{2, value} ->
|
||||
encode(element(3, Val), Enc);
|
||||
{_, object} ->
|
||||
encode(setelement(2, Val, []), Enc)
|
||||
end.
|
||||
|
||||
enc_type({T, _} = E) when T==value; T==object ->
|
||||
E;
|
||||
enc_type(E) when is_atom(E) ->
|
||||
{object, E}.
|
||||
|
||||
-spec decode_val(binary()) -> any().
|
||||
decode_val(CodedVal) ->
|
||||
binary_to_term(CodedVal).
|
||||
|
||||
decode_val(<<>>, K, #{name := {_,index,_}}) ->
|
||||
{K};
|
||||
decode_val(CodedVal, Key, Ref) ->
|
||||
{Type, Enc} = value_encoding(Ref),
|
||||
case Type of
|
||||
object ->
|
||||
setelement(2, decode(CodedVal, Enc), Key);
|
||||
value ->
|
||||
make_rec(Key, decode(CodedVal, Enc), Ref)
|
||||
end.
|
||||
|
||||
make_rec(Key, _Val, #{name := {_, index, {_,ordered}}}) ->
|
||||
{Key};
|
||||
make_rec(Key, Val, #{properties := #{record_name := Tag}}) ->
|
||||
{Tag, Key, Val};
|
||||
make_rec(Key, Val, #{attr_pos := AP}) ->
|
||||
%% no record name
|
||||
case AP of
|
||||
#{key := 1} -> {Key, Val};
|
||||
#{key := 2} -> {Val, Key} %% Yeah, right, but people are weird
|
||||
end.
|
||||
|
||||
value_encoding(#{encoding := {_, Enc}}) ->
|
||||
enc_type(Enc);
|
||||
value_encoding(#{}) ->
|
||||
{object, term};
|
||||
value_encoding({Type, Enc} = E) when is_atom(Type), is_atom(Enc) ->
|
||||
E.
|
||||
|
||||
keypos({admin, _}) ->
|
||||
1;
|
||||
keypos({_, index, _}) ->
|
||||
1;
|
||||
keypos({_, retainer, _}) ->
|
||||
2;
|
||||
keypos(Tab) when is_atom(Tab) ->
|
||||
2.
|
||||
|
||||
%% ======================================================================
|
||||
%% Private functions
|
||||
%% ======================================================================
|
||||
|
||||
retainername(Name) when is_atom(Name) ->
|
||||
atom_to_list(Name);
|
||||
retainername(Name) when is_list(Name) ->
|
||||
try binary_to_list(list_to_binary(Name))
|
||||
catch
|
||||
error:_ ->
|
||||
lists:flatten(io_lib:write(Name))
|
||||
end;
|
||||
retainername(Name) ->
|
||||
lists:flatten(io_lib:write(Name)).
|
||||
|
||||
open_rocksdb(MPd, RdbOpts, CFs) ->
|
||||
open_rocksdb(MPd, rocksdb_open_opts_(RdbOpts), CFs, get_retries()).
|
||||
|
||||
%% Code adapted from basho/riak_kv_eleveldb_backend.erl
|
||||
open_rocksdb(MPd, Opts, CFs, Retries) ->
|
||||
open_db(MPd, Opts, CFs, max(1, Retries), undefined).
|
||||
|
||||
open_db(_, _, _, 0, LastError) ->
|
||||
{error, LastError};
|
||||
open_db(MPd, Opts, CFs, RetriesLeft, _) ->
|
||||
case rocksdb:open_optimistic_transaction_db(MPd, Opts, CFs) of
|
||||
{ok, _Ref, _CFRefs} = Ok ->
|
||||
?log(debug, "Open - Rocksdb: ~s (~p) -> ~p", [MPd, Opts, Ok]),
|
||||
Ok;
|
||||
%% Check specifically for lock error, this can be caused if
|
||||
%% a crashed mnesia takes some time to flush rocksdb information
|
||||
%% out to disk. The process is gone, but the NIF resource cleanup
|
||||
%% may not have completed.
|
||||
{error, {db_open, OpenErr}=Reason} ->
|
||||
case lists:prefix("IO error: lock ", OpenErr) of
|
||||
true ->
|
||||
SleepFor = get_retry_delay(),
|
||||
?log(debug, ("Open - Rocksdb backend retrying ~p in ~p ms"
|
||||
" after error ~s"), [MPd, SleepFor, OpenErr]),
|
||||
timer:sleep(SleepFor),
|
||||
open_db(MPd, Opts, CFs, RetriesLeft - 1, Reason);
|
||||
false ->
|
||||
{error, Reason}
|
||||
end;
|
||||
{error, Reason} ->
|
||||
{error, Reason}
|
||||
end.
|
||||
|
||||
get_retries() -> 30.
|
||||
get_retry_delay() -> 100.
|
||||
|
||||
rocksdb_open_opts_(RdbOpts) ->
|
||||
lists:foldl(
|
||||
fun({K,_} = Item, Acc) ->
|
||||
lists:keystore(K, 1, Acc, Item)
|
||||
end, default_open_opts(), RdbOpts).
|
||||
|
||||
default_open_opts() ->
|
||||
[ {create_if_missing, true}
|
||||
, {cache_size,
|
||||
list_to_integer(get_env_default("ROCKSDB_CACHE_SIZE", "32212254"))}
|
||||
, {block_size, 1024}
|
||||
, {max_open_files, 30}
|
||||
, {write_buffer_size,
|
||||
list_to_integer(get_env_default(
|
||||
"ROCKSDB_WRITE_BUFFER_SIZE", "4194304"))}
|
||||
, {compression,
|
||||
list_to_atom(get_env_default("ROCKSDB_COMPRESSION", "true"))}
|
||||
, {use_bloomfilter, true}
|
||||
].
|
||||
|
||||
get_env_default(Key, Default) ->
|
||||
case os:getenv(Key) of
|
||||
false ->
|
||||
Default;
|
||||
Value ->
|
||||
Value
|
||||
end.
|
||||
|
@ -1,3 +1,4 @@
|
||||
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
|
||||
%%----------------------------------------------------------------
|
||||
%% Copyright (c) 2013-2016 Klarna AB
|
||||
%%
|
||||
@ -33,17 +34,12 @@
|
||||
code_change/3]).
|
||||
|
||||
-include("mnesia_rocksdb_tuning.hrl").
|
||||
-include("mnesia_rocksdb_int.hrl").
|
||||
|
||||
-define(KB, 1024).
|
||||
-define(MB, 1024 * 1024).
|
||||
-define(GB, 1024 * 1024 * 1024).
|
||||
|
||||
-ifdef(DEBUG).
|
||||
-define(dbg(Fmt, Args), io:fwrite(user,"~p:~p: "++(Fmt),[?MODULE,?LINE|Args])).
|
||||
-else.
|
||||
-define(dbg(Fmt, Args), ok).
|
||||
-endif.
|
||||
|
||||
lookup(Tab, Default) ->
|
||||
try ets:lookup(?MODULE, Tab) of
|
||||
[{_, Params}] ->
|
||||
@ -113,21 +109,21 @@ store_params(Params) ->
|
||||
NTabs = length(Params),
|
||||
Env0= mnesia_rocksdb_tuning:describe_env(),
|
||||
Env = Env0#tuning{n_tabs = NTabs},
|
||||
?dbg("Env = ~p~n", [Env]),
|
||||
?log(debug, "Env = ~p~n", [Env]),
|
||||
TotalFiles = lists:sum([mnesia_rocksdb_tuning:max_files(Sz) ||
|
||||
{_, Sz} <- Params]),
|
||||
?dbg("TotalFiles = ~p~n", [TotalFiles]),
|
||||
?log(debug, "TotalFiles = ~p~n", [TotalFiles]),
|
||||
MaxFs = Env#tuning.max_files,
|
||||
?dbg("MaxFs = ~p~n", [MaxFs]),
|
||||
?log(debug, "MaxFs = ~p~n", [MaxFs]),
|
||||
FsHeadroom = MaxFs * 0.6,
|
||||
?dbg("FsHeadroom = ~p~n", [FsHeadroom]),
|
||||
?log(debug, "FsHeadroom = ~p~n", [FsHeadroom]),
|
||||
FilesFactor = if TotalFiles =< FsHeadroom ->
|
||||
1; % don't have to scale down
|
||||
true ->
|
||||
FsHeadroom / TotalFiles
|
||||
end,
|
||||
Env1 = Env#tuning{files_factor = FilesFactor},
|
||||
?dbg("Env1 = ~p~n", [Env1]),
|
||||
?log(debug, "Env1 = ~p~n", [Env1]),
|
||||
lists:foreach(
|
||||
fun({Tab, Sz}) when is_atom(Tab);
|
||||
is_atom(element(1,Tab)),
|
||||
|
@ -1,3 +1,4 @@
|
||||
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
|
||||
%%----------------------------------------------------------------
|
||||
%% Copyright (c) 2013-2016 Klarna AB
|
||||
%%
|
||||
|
@ -1,3 +1,4 @@
|
||||
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
|
||||
%%----------------------------------------------------------------
|
||||
%% Copyright (c) 2013-2016 Klarna AB
|
||||
%%
|
||||
|
@ -1,3 +1,4 @@
|
||||
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
|
||||
%%----------------------------------------------------------------
|
||||
%% Copyright (c) 2013-2016 Klarna AB
|
||||
%%
|
||||
|
1560
src/mrdb.erl
Normal file
1560
src/mrdb.erl
Normal file
File diff suppressed because it is too large
Load Diff
192
src/mrdb_index.erl
Normal file
192
src/mrdb_index.erl
Normal file
@ -0,0 +1,192 @@
|
||||
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
|
||||
-module(mrdb_index).
|
||||
|
||||
-export([
|
||||
with_iterator/3
|
||||
, iterator_move/2
|
||||
, iterator/2
|
||||
, iterator_close/1
|
||||
]).
|
||||
|
||||
-record(mrdb_ix_iter, { i :: mrdb:iterator()
|
||||
, type = set :: set | bag
|
||||
, sub :: mrdb:ref() | pid()
|
||||
}).
|
||||
|
||||
-type ix_iterator() :: #mrdb_ix_iter{}.
|
||||
-type index_value() :: any().
|
||||
-type iterator_action() :: mrdb:iterator_action().
|
||||
|
||||
-type object() :: tuple().
|
||||
|
||||
-record(subst, { i :: mrdb:iterator()
|
||||
, vals_f
|
||||
, cur
|
||||
, mref }).
|
||||
|
||||
-define(TIMEOUT, 5000).
|
||||
|
||||
-import(mnesia_rocksdb_lib, [ encode_key/2 ]).
|
||||
|
||||
-export_type([ ix_iterator/0 ]).
|
||||
|
||||
-spec with_iterator(mrdb:ref_or_tab(), mrdb:index_position(), fun( (ix_iterator()) -> Res)) -> Res.
|
||||
with_iterator(Tab, IxPos, Fun) when is_function(Fun, 1) ->
|
||||
{ok, I} = iterator(Tab, IxPos),
|
||||
try Fun(I)
|
||||
after
|
||||
iterator_close(I)
|
||||
end.
|
||||
|
||||
-spec iterator(mrdb:ref_or_tab(), mrdb:index_position()) -> {ok, ix_iterator()}
|
||||
| {error, _}.
|
||||
iterator(Tab, IxPos) ->
|
||||
#{semantics := Sem} = R = mrdb:ensure_ref(Tab),
|
||||
#{ix_vals_f := IxValsF} = IxR = ensure_index_ref(IxPos, R),
|
||||
case mrdb:iterator(IxR, []) of
|
||||
{ok, I} ->
|
||||
case Sem of
|
||||
bag ->
|
||||
P = sub_new(R, IxValsF),
|
||||
{ok, #mrdb_ix_iter{ i = I
|
||||
, sub = P }};
|
||||
_ ->
|
||||
{ok, #mrdb_ix_iter{i = I, sub = R}}
|
||||
end;
|
||||
Err ->
|
||||
Err
|
||||
end.
|
||||
|
||||
-spec iterator_move(ix_iterator(), iterator_action()) -> {ok, index_value(), object()}
|
||||
| {error, _}.
|
||||
iterator_move(#mrdb_ix_iter{type = set} = IxI, Dir) -> iterator_move_set(IxI, Dir);
|
||||
iterator_move(#mrdb_ix_iter{type = bag} = IxI, Dir) -> iterator_move_bag(IxI, Dir).
|
||||
|
||||
iterator_move_set(#mrdb_ix_iter{i = I, sub = Sub}, Dir) ->
|
||||
case mrdb:iterator_move(I, Dir) of
|
||||
{ok, {{FKey, PKey}}} ->
|
||||
{ok, FKey, opt_read(Sub, PKey)};
|
||||
Other ->
|
||||
Other
|
||||
end.
|
||||
|
||||
iterator_move_bag(#mrdb_ix_iter{i = I, sub = Sub}, Dir) ->
|
||||
case call_sub(Sub, {move_rel, Dir}) of
|
||||
not_found ->
|
||||
case mrdb:iterator_move(I, Dir) of
|
||||
{ok, {FKey, PKey}} ->
|
||||
call_sub(Sub, {move_abs, FKey, PKey});
|
||||
Other ->
|
||||
Other
|
||||
end;
|
||||
Other ->
|
||||
Other
|
||||
end.
|
||||
|
||||
opt_read(R, Key) ->
|
||||
case mrdb:read(R, Key, []) of
|
||||
[Obj] ->
|
||||
Obj;
|
||||
[] ->
|
||||
[]
|
||||
end.
|
||||
|
||||
sub_new(R, ValsF) when is_function(ValsF, 1) ->
|
||||
Me = self(),
|
||||
{Pid, MRef} = spawn_monitor(
|
||||
fun() ->
|
||||
MRef = monitor(process, Me),
|
||||
case mrdb:iterator(R) of
|
||||
{ok, I} ->
|
||||
Me ! {self(), ok},
|
||||
sub_loop(#subst{ mref = MRef
|
||||
, i = I
|
||||
, vals_f = ValsF
|
||||
, cur = undefined});
|
||||
Error ->
|
||||
Me ! {self(), Error}
|
||||
end
|
||||
end),
|
||||
receive
|
||||
{'DOWN', MRef, _, _, Crash} ->
|
||||
mrdb:abort({error, Crash});
|
||||
{Pid, ok} ->
|
||||
demonitor(MRef),
|
||||
Pid;
|
||||
{Pid, Error} ->
|
||||
demonitor(MRef),
|
||||
mrdb:abort(Error)
|
||||
end.
|
||||
|
||||
sub_loop(#subst{i = I, mref = MRef} = St) ->
|
||||
receive
|
||||
{'DOWN', MRef, _, _, _} ->
|
||||
mrdb:iterator_close(I);
|
||||
{Pid, Ref, close} ->
|
||||
mrdb:iterator_close(I),
|
||||
Pid ! {Ref, ok};
|
||||
{Pid, Ref, cur} ->
|
||||
Pid ! {Ref, St#subst.cur},
|
||||
sub_loop(St);
|
||||
{Pid, Ref, {move, Cur, Dir}} when is_binary(Dir) ->
|
||||
{Res, St1} = sub_abs_move(Cur, Dir, St),
|
||||
Pid ! {Ref, Res},
|
||||
sub_loop(St1);
|
||||
{Pid, Ref, {move_rel, Dir}} ->
|
||||
{Res, St1} = sub_rel_move(Dir, St),
|
||||
Pid ! {Ref, Res},
|
||||
sub_loop(St1)
|
||||
end.
|
||||
|
||||
sub_abs_move(Cur, Dir, #subst{i = I} = St) ->
|
||||
case mrdb:iterator_move(I, Dir) of
|
||||
{ok, _} = Ok ->
|
||||
{Ok, St#subst{cur = Cur}};
|
||||
Other ->
|
||||
{Other, St#subst{cur = undefined}}
|
||||
end.
|
||||
|
||||
sub_rel_move(Dir, #subst{i = I, vals_f = VF, cur = Prev} = St) ->
|
||||
case mrdb:iterator_move(I, Dir) of
|
||||
{ok, Obj} = Ok ->
|
||||
case lists:member(Prev, VF(Obj)) of
|
||||
true ->
|
||||
{Ok, St};
|
||||
false ->
|
||||
{not_found, St#subst{cur = undefined}}
|
||||
end;
|
||||
Other ->
|
||||
{Other, St#subst{cur = undefined}}
|
||||
end.
|
||||
|
||||
call_sub(Pid, Req) ->
|
||||
MRef = monitor(process, Pid),
|
||||
Pid ! {self(), MRef, Req},
|
||||
receive
|
||||
{MRef, Reply} ->
|
||||
demonitor(MRef),
|
||||
Reply;
|
||||
{'DOWN', MRef, _, _, Reason} ->
|
||||
error(Reason)
|
||||
after ?TIMEOUT ->
|
||||
error(timeout)
|
||||
end.
|
||||
|
||||
-spec iterator_close(ix_iterator()) -> ok.
|
||||
iterator_close(#mrdb_ix_iter{i = I, sub = Sub}) ->
|
||||
mrdb:iterator_close(I),
|
||||
iterator_close_sub(Sub).
|
||||
|
||||
iterator_close_sub(P) when is_pid(P) ->
|
||||
call_sub(P, close);
|
||||
iterator_close_sub(_) ->
|
||||
ok.
|
||||
|
||||
ensure_index_ref(IxPos, #{name := Name, attr_pos := AP, properties := #{index := Ixs}}) ->
|
||||
{_,ordered} = Ix = lists:keyfind(index_pos(IxPos, AP), 1, Ixs),
|
||||
mrdb:get_ref({Name, index, Ix}).
|
||||
|
||||
index_pos(P, AP) when is_atom(P) ->
|
||||
maps:get(P, AP);
|
||||
index_pos(P, _) ->
|
||||
P.
|
81
src/mrdb_mutex.erl
Normal file
81
src/mrdb_mutex.erl
Normal file
@ -0,0 +1,81 @@
|
||||
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
|
||||
-module(mrdb_mutex).
|
||||
|
||||
-export([ do/2 ]).
|
||||
|
||||
-export([ ensure_tab/0 ]).
|
||||
|
||||
-define(LOCK_TAB, ?MODULE).
|
||||
|
||||
%% We use a wrapping ets counter (default: 0) as a form of semaphor.
|
||||
%% The claim operation is done using an atomic list of two updates:
|
||||
%% first, incrementing with 0 - this returns the previous value
|
||||
%% then, incrementing with 1, but wrapping at 1, ensuring that we get 1 back,
|
||||
%% regardless of previous value. This means that if [0,1] is returned, the resource
|
||||
%% was not locked previously; if [1,1] is returned, it was.
|
||||
%%
|
||||
%% Releasing the resource is done by deleting the resource. If we just decrement,
|
||||
%% we will end up with lingering unlocked resources, so we might as well delete.
|
||||
%% Either operation is atomic, and the claim op creates the object if it's missing.
|
||||
|
||||
do(Rsrc, F) when is_function(F, 0) ->
|
||||
true = claim(Rsrc),
|
||||
try F()
|
||||
after
|
||||
release(Rsrc)
|
||||
end.
|
||||
|
||||
claim(Rsrc) ->
|
||||
case claim_(Rsrc) of
|
||||
true -> true;
|
||||
false -> busy_wait(Rsrc, 1000)
|
||||
end.
|
||||
|
||||
claim_(Rsrc) ->
|
||||
case ets:update_counter(?LOCK_TAB, Rsrc, [{2, 0},
|
||||
{2, 1, 1, 1}], {Rsrc, 0}) of
|
||||
[0, 1] ->
|
||||
%% have lock
|
||||
true;
|
||||
[1, 1] ->
|
||||
false
|
||||
end.
|
||||
|
||||
%% The busy-wait function makes use of the fact that we can read a timer to find out
|
||||
%% if it still has time remaining. This reduces the need for selective receive, looking
|
||||
%% for a timeout message. We yield, then retry the claim op. Yielding at least used to
|
||||
%% also be necessary for the `read_timer/1` value to refresh.
|
||||
%%
|
||||
busy_wait(Rsrc, Timeout) ->
|
||||
Ref = erlang:send_after(Timeout, self(), {claim, Rsrc}),
|
||||
do_wait(Rsrc, Ref).
|
||||
|
||||
do_wait(Rsrc, Ref) ->
|
||||
erlang:yield(),
|
||||
case erlang:read_timer(Ref) of
|
||||
false ->
|
||||
erlang:cancel_timer(Ref),
|
||||
error(lock_wait_timeout);
|
||||
_ ->
|
||||
case claim_(Rsrc) of
|
||||
true ->
|
||||
erlang:cancel_timer(Ref),
|
||||
ok;
|
||||
false ->
|
||||
do_wait(Rsrc, Ref)
|
||||
end
|
||||
end.
|
||||
|
||||
release(Rsrc) ->
|
||||
ets:delete(?LOCK_TAB, Rsrc),
|
||||
ok.
|
||||
|
||||
|
||||
%% Called by the process holding the ets table.
|
||||
ensure_tab() ->
|
||||
case ets:info(?LOCK_TAB, name) of
|
||||
undefined ->
|
||||
ets:new(?LOCK_TAB, [set, public, named_table, {write_concurrency, true}]);
|
||||
_ ->
|
||||
true
|
||||
end.
|
285
src/mrdb_select.erl
Normal file
285
src/mrdb_select.erl
Normal file
@ -0,0 +1,285 @@
|
||||
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
|
||||
-module(mrdb_select).
|
||||
|
||||
-export([ select/3 %% (Ref, MatchSpec, Limit)
|
||||
, select/4 %% (Ref, MatchSpec, AccKeys, Limit)
|
||||
, select/1 %% (Cont)
|
||||
, fold/5 %% (Ref, Fun, Acc, MatchSpec, Limit)
|
||||
, rdb_fold/5 %% (Ref, Fun, Acc, Prefix, Limit)
|
||||
]).
|
||||
|
||||
-export([continuation_info/2]).
|
||||
|
||||
-import(mnesia_rocksdb_lib, [ keypos/1
|
||||
, decode_key/2
|
||||
, decode_val/3
|
||||
]).
|
||||
|
||||
-include("mnesia_rocksdb.hrl").
|
||||
|
||||
-record(sel, { alias % TODO: not used
|
||||
, tab
|
||||
, ref
|
||||
, keypat
|
||||
, ms % TODO: not used
|
||||
, compiled_ms
|
||||
, limit
|
||||
, key_only = false % TODO: not used
|
||||
, direction = forward % TODO: not used
|
||||
}).
|
||||
|
||||
select(Ref, MS, Limit) when is_map(Ref), is_list(MS) ->
|
||||
select(Ref, MS, false, Limit).
|
||||
|
||||
select(Ref, MS, AccKeys, Limit)
|
||||
when is_map(Ref), is_list(MS), is_boolean(AccKeys) ->
|
||||
Sel = mk_sel(Ref, MS, Limit),
|
||||
mrdb:with_rdb_iterator(Ref, fun(I) -> i_select(I, Sel, AccKeys, []) end).
|
||||
|
||||
mk_sel(#{name := Tab} = Ref, MS, Limit) ->
|
||||
Keypat = keypat(MS, keypos(Tab), Ref),
|
||||
#sel{tab = Tab,
|
||||
ref = Ref,
|
||||
keypat = Keypat,
|
||||
ms = MS,
|
||||
compiled_ms = ets:match_spec_compile(MS),
|
||||
key_only = needs_key_only(MS),
|
||||
limit = Limit}.
|
||||
|
||||
select(Cont) ->
|
||||
case Cont of
|
||||
'$end_of_table' -> '$end_of_table';
|
||||
_ when is_function(Cont, 1) ->
|
||||
Cont(cont)
|
||||
end.
|
||||
|
||||
continuation_info(Item, C) when is_atom(Item), is_function(C, 1) ->
|
||||
continuation_info_(Item, C(sel));
|
||||
continuation_info(_, _) -> undefined.
|
||||
|
||||
continuation_info_(ref, #sel{ref = Ref}) -> Ref;
|
||||
continuation_info_(ms, #sel{ms = MS }) -> MS;
|
||||
continuation_info_(limit, #sel{limit = L }) -> L;
|
||||
continuation_info_(direction, #sel{direction = Dir}) -> Dir;
|
||||
continuation_info_(_, _) -> undefined.
|
||||
|
||||
fold(Ref, Fun, Acc, MS, Limit) ->
|
||||
{AccKeys, F} =
|
||||
if is_function(Fun, 3) ->
|
||||
{true, fun({K, Obj}, Acc1) ->
|
||||
Fun(Obj, K, Acc1)
|
||||
end};
|
||||
is_function(Fun, 2) ->
|
||||
{false, Fun};
|
||||
true ->
|
||||
mrdb:abort(invalid_fold_fun)
|
||||
end,
|
||||
fold_(select(Ref, MS, AccKeys, Limit), F, Acc).
|
||||
|
||||
fold_('$end_of_table', _, Acc) ->
|
||||
Acc;
|
||||
fold_(L, Fun, Acc) when is_list(L) ->
|
||||
lists:foldl(Fun, Acc, L);
|
||||
fold_({L, Cont}, Fun, Acc) ->
|
||||
fold_(select(Cont), Fun, lists:foldl(Fun, Acc, L)).
|
||||
|
||||
rdb_fold(Ref, Fun, Acc, Prefix, Limit) ->
|
||||
mrdb:with_rdb_iterator(
|
||||
Ref, fun(I) ->
|
||||
MovRes = rocksdb:iterator_move(I, first(Ref)),
|
||||
i_rdb_fold(MovRes, I, Prefix, Fun, Acc, Limit)
|
||||
end).
|
||||
|
||||
first(#{vsn := 1}) -> <<?DATA_START>>;
|
||||
first(_) -> first.
|
||||
|
||||
i_rdb_fold({ok, K, V}, I, Pfx, Fun, Acc, Limit) when Limit > 0 ->
|
||||
case is_prefix(Pfx, K) of
|
||||
true ->
|
||||
i_rdb_fold(rocksdb:iterator_move(I, next), I, Pfx, Fun,
|
||||
Fun(K, V, Acc), decr(Limit));
|
||||
false ->
|
||||
Acc
|
||||
end;
|
||||
i_rdb_fold(_, _, _, _, Acc, _) ->
|
||||
Acc.
|
||||
|
||||
i_select(I, #sel{ keypat = Pfx
|
||||
, compiled_ms = MS
|
||||
, limit = Limit
|
||||
, ref = #{vsn := Vsn, encoding := Enc} } = Sel, AccKeys, Acc) ->
|
||||
StartKey = case {Pfx, Vsn, Enc} of
|
||||
{<<>>, 1, {sext, _}} ->
|
||||
<<?DATA_START>>;
|
||||
{_, _, {term, _}} ->
|
||||
<<>>;
|
||||
_ ->
|
||||
Pfx
|
||||
end,
|
||||
select_traverse(rocksdb:iterator_move(I, StartKey), Limit,
|
||||
Pfx, MS, I, Sel, AccKeys, Acc).
|
||||
|
||||
needs_key_only([Pat]) ->
|
||||
needs_key_only_(Pat);
|
||||
needs_key_only([_|_] = Pats) ->
|
||||
lists:all(fun needs_key_only_/1, Pats).
|
||||
|
||||
needs_key_only_({HP, _, Body}) ->
|
||||
BodyVars = lists:flatmap(fun extract_vars/1, Body),
|
||||
%% Note that we express the conditions for "needs more than key" and negate.
|
||||
not(wild_in_body(BodyVars) orelse
|
||||
case bound_in_headpat(HP) of
|
||||
{all,V} -> lists:member(V, BodyVars);
|
||||
Vars when is_list(Vars) -> any_in_body(lists:keydelete(2,1,Vars), BodyVars)
|
||||
end).
|
||||
|
||||
extract_vars([H|T]) ->
|
||||
extract_vars(H) ++ extract_vars(T);
|
||||
extract_vars(T) when is_tuple(T) ->
|
||||
extract_vars(tuple_to_list(T));
|
||||
extract_vars(T) when T=='$$'; T=='$_' ->
|
||||
[T];
|
||||
extract_vars(T) when is_atom(T) ->
|
||||
case is_wild(T) of
|
||||
true ->
|
||||
[T];
|
||||
false ->
|
||||
[]
|
||||
end;
|
||||
extract_vars(_) ->
|
||||
[].
|
||||
|
||||
any_in_body(Vars, BodyVars) ->
|
||||
lists:any(fun({_,Vs}) ->
|
||||
intersection(Vs, BodyVars) =/= []
|
||||
end, Vars).
|
||||
|
||||
intersection(A,B) when is_list(A), is_list(B) ->
|
||||
A -- (A -- B).
|
||||
|
||||
is_wild('_') ->
|
||||
true;
|
||||
is_wild(A) when is_atom(A) ->
|
||||
case atom_to_list(A) of
|
||||
"\$" ++ S ->
|
||||
try begin
|
||||
_ = list_to_integer(S),
|
||||
true
|
||||
end
|
||||
catch
|
||||
error:_ ->
|
||||
false
|
||||
end;
|
||||
_ ->
|
||||
false
|
||||
end.
|
||||
|
||||
wild_in_body(BodyVars) ->
|
||||
intersection(BodyVars, ['$$','$_']) =/= [].
|
||||
|
||||
bound_in_headpat(HP) when is_atom(HP) ->
|
||||
{all, HP};
|
||||
bound_in_headpat(HP) when is_tuple(HP) ->
|
||||
[_|T] = tuple_to_list(HP),
|
||||
map_vars(T, 2).
|
||||
|
||||
map_vars([H|T], P) ->
|
||||
case extract_vars(H) of
|
||||
[] ->
|
||||
map_vars(T, P+1);
|
||||
Vs ->
|
||||
[{P, Vs}|map_vars(T, P+1)]
|
||||
end;
|
||||
map_vars([], _) ->
|
||||
[].
|
||||
|
||||
select_traverse({ok, K, V}, Limit, Pfx, MS, I, #sel{ref = R} = Sel,
|
||||
AccKeys, Acc) ->
|
||||
case is_prefix(Pfx, K) of
|
||||
true ->
|
||||
DecKey = decode_key(K, R),
|
||||
Rec = decode_val(V, DecKey, R),
|
||||
case ets:match_spec_run([Rec], MS) of
|
||||
[] ->
|
||||
select_traverse(
|
||||
rocksdb:iterator_move(I, next), Limit, Pfx, MS,
|
||||
I, Sel, AccKeys, Acc);
|
||||
[Match] ->
|
||||
Acc1 = if AccKeys ->
|
||||
[{K, Match}|Acc];
|
||||
true ->
|
||||
[Match|Acc]
|
||||
end,
|
||||
traverse_continue(K, decr(Limit), Pfx, MS, I, Sel, AccKeys, Acc1)
|
||||
end;
|
||||
false when Limit == infinity ->
|
||||
lists:reverse(Acc);
|
||||
false ->
|
||||
{lists:reverse(Acc), '$end_of_table'}
|
||||
end;
|
||||
select_traverse({error, _}, Limit, _, _, _, _, _, Acc) ->
|
||||
select_return(Limit, {lists:reverse(Acc), '$end_of_table'}).
|
||||
|
||||
select_return(infinity, {L, '$end_of_table'}) ->
|
||||
L;
|
||||
select_return(_, Ret) ->
|
||||
Ret.
|
||||
|
||||
is_prefix(A, B) when is_binary(A), is_binary(B) ->
|
||||
Sa = byte_size(A),
|
||||
case B of
|
||||
<<A:Sa/binary, _/binary>> ->
|
||||
true;
|
||||
_ ->
|
||||
false
|
||||
end.
|
||||
|
||||
decr(I) when is_integer(I) ->
|
||||
I-1;
|
||||
decr(infinity) ->
|
||||
infinity.
|
||||
|
||||
traverse_continue(K, 0, Pfx, MS, _I, #sel{limit = Limit, ref = Ref} = Sel, AccKeys, Acc) ->
|
||||
{lists:reverse(Acc),
|
||||
fun(sel) -> Sel;
|
||||
(cont) ->
|
||||
mrdb:with_rdb_iterator(
|
||||
Ref,
|
||||
fun(NewI) ->
|
||||
select_traverse(iterator_next(NewI, K),
|
||||
Limit, Pfx, MS, NewI, Sel,
|
||||
AccKeys, [])
|
||||
end)
|
||||
end};
|
||||
traverse_continue(_K, Limit, Pfx, MS, I, Sel, AccKeys, Acc) ->
|
||||
select_traverse(rocksdb:iterator_move(I, next), Limit, Pfx, MS, I, Sel, AccKeys, Acc).
|
||||
|
||||
iterator_next(I, K) ->
|
||||
case rocksdb:iterator_move(I, K) of
|
||||
{ok, K, _} ->
|
||||
rocksdb:iterator_move(I, next);
|
||||
Other ->
|
||||
Other
|
||||
end.
|
||||
|
||||
keypat([H|T], KeyPos, Ref) ->
|
||||
keypat(T, KeyPos, Ref, keypat_pfx(H, KeyPos, Ref)).
|
||||
|
||||
keypat(_, _, _, <<>>) -> <<>>;
|
||||
keypat([H|T], KeyPos, Ref, Pfx0) ->
|
||||
Pfx = keypat_pfx(H, KeyPos, Ref),
|
||||
keypat(T, KeyPos, Ref, common_prefix(Pfx, Pfx0));
|
||||
keypat([], _, _, Pfx) ->
|
||||
Pfx.
|
||||
|
||||
common_prefix(<<H, T/binary>>, <<H, T1/binary>>) ->
|
||||
<<H, (common_prefix(T, T1))/binary>>;
|
||||
common_prefix(_, _) ->
|
||||
<<>>.
|
||||
|
||||
keypat_pfx({HeadPat,_Gs,_}, KeyPos, #{encoding := {sext,_}}) when is_tuple(HeadPat) ->
|
||||
KP = element(KeyPos, HeadPat),
|
||||
sext:prefix(KP);
|
||||
keypat_pfx(_, _, _) ->
|
||||
<<>>.
|
||||
|
@ -1,3 +1,4 @@
|
||||
%% -*- mode: erlang; erlang-indent-level: 4; indent-tabs-mode: nil -*-
|
||||
-module(mnesia_rocksdb_SUITE).
|
||||
|
||||
-export([
|
||||
@ -12,10 +13,25 @@
|
||||
, end_per_testcase/2
|
||||
]).
|
||||
|
||||
-export([error_handling/1]).
|
||||
-export([ encoding_sext_attrs/1
|
||||
, encoding_binary_binary/1
|
||||
, encoding_defaults/1
|
||||
]).
|
||||
-export([ mrdb_batch/1
|
||||
, mrdb_transactions/1
|
||||
, mrdb_abort_reasons/1
|
||||
, mrdb_repeated_transactions/1
|
||||
, mrdb_abort/1
|
||||
, mrdb_two_procs/1
|
||||
, mrdb_two_procs_tx_restart/1
|
||||
, mrdb_two_procs_snap/1
|
||||
, mrdb_three_procs/1
|
||||
]).
|
||||
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
-define(TABS_CREATED, tables_created).
|
||||
|
||||
suite() ->
|
||||
[].
|
||||
|
||||
@ -23,18 +39,40 @@ all() ->
|
||||
[{group, all_tests}].
|
||||
|
||||
groups() ->
|
||||
[{all_tests, [sequence], [error_handling]}].
|
||||
[
|
||||
{all_tests, [sequence], [ {group, checks}
|
||||
, {group, mrdb} ]}
|
||||
%% , error_handling ]}
|
||||
, {checks, [sequence], [ encoding_sext_attrs
|
||||
, encoding_binary_binary
|
||||
, encoding_defaults ]}
|
||||
, {mrdb, [sequence], [ mrdb_batch
|
||||
, mrdb_transactions
|
||||
, mrdb_abort_reasons
|
||||
, mrdb_repeated_transactions
|
||||
, mrdb_abort
|
||||
, mrdb_two_procs
|
||||
, mrdb_two_procs_tx_restart
|
||||
, mrdb_two_procs_snap
|
||||
, mrdb_three_procs ]}
|
||||
].
|
||||
|
||||
|
||||
error_handling(_Config) ->
|
||||
mnesia_rocksdb_error_handling:run().
|
||||
%% error_handling(Config) ->
|
||||
%% mnesia_rocksdb_error_handling:run(Config).
|
||||
|
||||
init_per_suite(Config) ->
|
||||
Config.
|
||||
tr_ct:set_activation_checkpoint(?TABS_CREATED, Config).
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok.
|
||||
|
||||
init_per_group(G, Config) when G==mrdb
|
||||
; G==checks ->
|
||||
mnesia:stop(),
|
||||
ok = mnesia_rocksdb_tlib:start_mnesia(reset),
|
||||
Config;
|
||||
|
||||
init_per_group(_, Config) ->
|
||||
Config.
|
||||
|
||||
@ -46,3 +84,564 @@ init_per_testcase(_, Config) ->
|
||||
|
||||
end_per_testcase(_, _Config) ->
|
||||
ok.
|
||||
|
||||
encoding_sext_attrs(Config) ->
|
||||
tr_ct:with_trace(fun encoding_sext_attrs_/1, Config,
|
||||
tr_patterns(mnesia_rocksdb,
|
||||
[{mnesia_rocksdb,'_',x}], tr_opts())).
|
||||
|
||||
encoding_sext_attrs_(Config) ->
|
||||
Created = create_tabs([{t, [{attributes, [k, v]}]}], Config),
|
||||
ok = mrdb:insert(t, {t, 1, a}),
|
||||
ok = mnesia:dirty_write({t, 2, b}),
|
||||
expect_error(fun() -> mrdb:insert(t, {t, a}) end, ?LINE,
|
||||
error, {mrdb_abort, badarg}),
|
||||
expect_error(fun() -> mnesia:dirty_write({t, a}) end, ?LINE,
|
||||
exit, '_'),
|
||||
delete_tabs(Created),
|
||||
ok.
|
||||
|
||||
encoding_defaults(Config) ->
|
||||
UP = fun(T) -> mnesia:table_info(T, user_properties) end,
|
||||
Created = create_tabs([ {a, [ {attributes, [k, v]}
|
||||
, {type, set}]}
|
||||
, {b, [ {attributes, [k, v, w]}
|
||||
, {type, ordered_set}]}
|
||||
, {c, [ {attributes, [k, v]}
|
||||
, {type, bag} ]}], Config),
|
||||
[{mrdb_encoding,{term,{value,term}}}] = UP(a),
|
||||
[{mrdb_encoding,{sext,{object,term}}}] = UP(b),
|
||||
[{mrdb_encoding,{sext,{value,term}}}] = UP(c),
|
||||
delete_tabs(Created),
|
||||
ok.
|
||||
|
||||
encoding_binary_binary(Config) ->
|
||||
Created = create_tabs([ {a, [ {attributes, [k,v]}
|
||||
, {user_properties,
|
||||
[{mrdb_encoding, {raw, raw}}]}]}
|
||||
, {b, [ {attributes, [k, v, w]}
|
||||
, {user_properties,
|
||||
[{mrdb_encoding, {raw, {object, term}}}]}]}
|
||||
], Config),
|
||||
expect_error(fun() ->
|
||||
create_tab(
|
||||
c, [ {attributes, [k, v, w]}
|
||||
, {user_properties,
|
||||
[{mrdb_encoding, {raw, {value, raw}}}]}])
|
||||
end, ?LINE, error, '_'),
|
||||
delete_tabs(Created),
|
||||
ok.
|
||||
|
||||
expect_error(F, Line, Type, Expected) ->
|
||||
try F() of
|
||||
Unexpected -> error({unexpected, Line, Unexpected})
|
||||
catch
|
||||
Type:Expected ->
|
||||
ct:log("Caught expected ~p:~p (Line: ~p)", [Type, Expected, Line]),
|
||||
ok;
|
||||
Type:Error when Expected == '_' ->
|
||||
ct:log("Caught expected ~p:_ (Line:~p): ~p", [Type, Line, Error]),
|
||||
ok
|
||||
end.
|
||||
|
||||
mrdb_batch(Config) ->
|
||||
Created = create_tabs([{b, []}], Config),
|
||||
D0 = get_dict(),
|
||||
mrdb:activity(
|
||||
batch, rdb,
|
||||
fun() ->
|
||||
[mrdb:insert(b, {b, K, K})
|
||||
|| K <- lists:seq(1, 10)]
|
||||
end),
|
||||
dictionary_unchanged(D0),
|
||||
[[{b,K,K}] = mrdb:read(b, K) || K <- lists:seq(1, 10)],
|
||||
expect_error(
|
||||
fun() -> mrdb:activity(
|
||||
batch, rdb,
|
||||
fun() ->
|
||||
mrdb:insert(b, {b, 11, 11}),
|
||||
error(willful_abort)
|
||||
end)
|
||||
end, ?LINE, error, '_'),
|
||||
dictionary_unchanged(D0),
|
||||
[] = mrdb:read(b, 11),
|
||||
TRef = mrdb:get_ref(b),
|
||||
mrdb:activity(
|
||||
batch, rdb,
|
||||
fun() ->
|
||||
mrdb:insert(TRef, {b, 12, 12})
|
||||
end),
|
||||
dictionary_unchanged(D0),
|
||||
[{b, 12, 12}] = mrdb:read(b, 12),
|
||||
mrdb:as_batch(b, fun(R) ->
|
||||
mrdb:insert(R, {b, 13, 13})
|
||||
end),
|
||||
dictionary_unchanged(D0),
|
||||
[{b, 13, 13}] = mrdb:read(b, 13),
|
||||
delete_tabs(Created),
|
||||
ok.
|
||||
|
||||
mrdb_transactions(Config) ->
|
||||
tr_ct:with_trace(fun mrdb_transactions_/1, Config,
|
||||
tr_patterns(
|
||||
mnesia_rocksdb_admin,
|
||||
[{mnesia_rocksdb_admin,'_',x}], tr_opts())).
|
||||
|
||||
mrdb_transactions_(Config) ->
|
||||
Created = create_tabs([{tx, []}], Config),
|
||||
mrdb:insert(tx, {tx, a, 1}),
|
||||
[_] = mrdb:read(tx, a),
|
||||
D0 = get_dict(),
|
||||
mrdb:activity(
|
||||
tx, rdb,
|
||||
fun() ->
|
||||
[{tx,a,N}] = mrdb:read(tx, a),
|
||||
N1 = N+1,
|
||||
ok = mrdb:insert(tx, {tx,a,N1}),
|
||||
[{tx,a,N1}] = mrdb:read(tx, a),
|
||||
ok
|
||||
end),
|
||||
dictionary_unchanged(D0),
|
||||
[{tx,a,2}] = mrdb:read(tx,a),
|
||||
delete_tabs(Created),
|
||||
ok.
|
||||
|
||||
mrdb_abort_reasons(_Config) ->
|
||||
Prev = mnesia_rocksdb_admin:set_and_cache_env(mnesia_compatible_aborts, true),
|
||||
X = some_value,
|
||||
compare_txs('throw', fun() -> throw(X) end),
|
||||
compare_txs('exit' , fun() -> exit(X) end),
|
||||
compare_txs('error', fun() -> error(X) end),
|
||||
compare_txs('abort', fun() -> mnesia:abort(X) end),
|
||||
compare_txs('abort' , fun() -> mrdb:abort(X) end),
|
||||
mnesia_rocksdb_admin:set_and_cache_env(mnesia_compatible_aborts, Prev),
|
||||
ok.
|
||||
|
||||
compare_txs(Type, F) ->
|
||||
{caught, exit, {aborted, EMn}} = mnesia_tx(F),
|
||||
{caught, exit, {aborted, EMr}} = mrdb_tx(F),
|
||||
ct:log("Mnesia = ~p/~p", [Type, EMn]),
|
||||
ct:log("Mrdb = ~p/~p", [Type, EMr]),
|
||||
case {Type, EMn, EMr} of
|
||||
{error, {some_value, [_|_]}, {some_value, []}} -> ok;
|
||||
{throw, {throw, some_value}, {throw, some_value}} -> ok;
|
||||
{exit, some_value, some_value} -> ok;
|
||||
{abort, some_value, some_value} -> ok
|
||||
end.
|
||||
|
||||
mnesia_tx(F) ->
|
||||
try
|
||||
mnesia:activity(transaction, F)
|
||||
catch
|
||||
C:E ->
|
||||
{caught, C, E}
|
||||
end.
|
||||
|
||||
mrdb_tx(F) ->
|
||||
try
|
||||
mrdb:activity(transaction, rdb, F)
|
||||
catch
|
||||
C:E ->
|
||||
{caught, C, E}
|
||||
end.
|
||||
|
||||
mrdb_repeated_transactions(Config) ->
|
||||
Created = create_tabs([{rtx, []}], Config),
|
||||
mrdb:insert(rtx, {rtx, a, 0}),
|
||||
[_] = mrdb:read(rtx, a),
|
||||
Fun = fun() ->
|
||||
[{rtx, a, N}] = mrdb:read(rtx, a),
|
||||
N1 = N+1,
|
||||
ok = mrdb:insert(rtx, {rtx, a, N1})
|
||||
end,
|
||||
D0 = get_dict(),
|
||||
[ok = mrdb:activity(tx, rdb, Fun) || _ <- lists:seq(1,100)],
|
||||
dictionary_unchanged(D0),
|
||||
[{rtx,a,100}] = mrdb:read(rtx, a),
|
||||
delete_tabs(Created),
|
||||
ok.
|
||||
|
||||
mrdb_abort(Config) ->
|
||||
Created = create_tabs([{tx_abort, []}], Config),
|
||||
mrdb:insert(tx_abort, {tx_abort, a, 1}),
|
||||
Pre = mrdb:read(tx_abort, a),
|
||||
D0 = get_dict(),
|
||||
TRes = try mrdb:activity(
|
||||
tx, rdb,
|
||||
fun() ->
|
||||
[{tx_abort, a, N}] = mrdb:read(tx_abort, a),
|
||||
error(abort_here),
|
||||
ok = mrdb:insert(tx_abort, [{tx_abort, a, N+1}]),
|
||||
noooo
|
||||
end)
|
||||
catch
|
||||
error:abort_here ->
|
||||
ok
|
||||
end,
|
||||
dictionary_unchanged(D0),
|
||||
ok = TRes,
|
||||
Pre = mrdb:read(tx_abort, a),
|
||||
delete_tabs(Created),
|
||||
ok.
|
||||
|
||||
mrdb_two_procs(Config) ->
|
||||
tr_ct:with_trace(fun mrdb_two_procs_/1, Config,
|
||||
tr_flags(
|
||||
{self(), [call, sos, p]},
|
||||
tr_patterns(
|
||||
mrdb, [ {mrdb, insert, 2, x}
|
||||
, {mrdb, read, 2, x}
|
||||
, {mrdb, activity, x}], tr_opts()))).
|
||||
|
||||
mrdb_two_procs_(Config) ->
|
||||
R = ?FUNCTION_NAME,
|
||||
Parent = self(),
|
||||
Created = create_tabs([{R, []}], Config),
|
||||
mrdb:insert(R, {R, a, 1}),
|
||||
Pre = mrdb:read(R, a),
|
||||
F0 = fun() ->
|
||||
wait_for_other(Parent, ?LINE),
|
||||
ok = mrdb:insert(R, {R, a, 17}),
|
||||
wait_for_other(Parent, ?LINE)
|
||||
end,
|
||||
{POther, MRef} = spawn_opt(
|
||||
fun() ->
|
||||
D0 = get_dict(),
|
||||
ok = mrdb:activity(tx, rdb, F0),
|
||||
dictionary_unchanged(D0)
|
||||
end, [monitor]),
|
||||
F1 = fun() ->
|
||||
Pre = mrdb:read(R, a),
|
||||
go_ahead_other(POther),
|
||||
await_other_down(POther, MRef, ?LINE),
|
||||
[{R, a, 17}] = mrdb:read(R, a),
|
||||
ok = mrdb:insert(R, {R, a, 18})
|
||||
end,
|
||||
go_ahead_other(1, POther),
|
||||
Do0 = get_dict(),
|
||||
try mrdb:activity({tx, #{no_snapshot => true,
|
||||
retries => 0}}, rdb, F1) of
|
||||
ok -> error(unexpected)
|
||||
catch
|
||||
error:{error, "Resource busy" ++ _} ->
|
||||
ok
|
||||
end,
|
||||
dictionary_unchanged(Do0),
|
||||
[{R, a, 17}] = mrdb:read(R, a),
|
||||
delete_tabs(Created),
|
||||
ok.
|
||||
|
||||
mrdb_two_procs_tx_restart(Config) ->
|
||||
tr_ct:with_trace(fun mrdb_two_procs_tx_restart_/1, Config,
|
||||
light_tr_opts()).
|
||||
|
||||
mrdb_two_procs_tx_restart_(Config) ->
|
||||
R = ?FUNCTION_NAME,
|
||||
Parent = self(),
|
||||
Created = create_tabs([{R, []}], Config),
|
||||
mrdb:insert(R, {R, a, 1}),
|
||||
Pre = mrdb:read(R, a),
|
||||
F0 = fun() ->
|
||||
wait_for_other(Parent, ?LINE),
|
||||
ok = mrdb:insert(R, {R, a, 17}),
|
||||
wait_for_other(Parent, ?LINE)
|
||||
end,
|
||||
{POther, MRef} = spawn_opt(
|
||||
fun() ->
|
||||
ok = mrdb:activity(tx, rdb, F0)
|
||||
end, [monitor]),
|
||||
F1 = fun() ->
|
||||
OtherWrite = [{R, a, 17}],
|
||||
Att = get_attempt(),
|
||||
Expected = case Att of
|
||||
1 -> Pre;
|
||||
_ -> OtherWrite
|
||||
end,
|
||||
Expected = mrdb:read(R, a),
|
||||
go_ahead_other(POther),
|
||||
await_other_down(POther, MRef, ?LINE),
|
||||
OtherWrite = mrdb:read(R, a),
|
||||
ok = mrdb:insert(R, {R, a, 18})
|
||||
end,
|
||||
go_ahead_other(1, POther),
|
||||
Do0 = get_dict(),
|
||||
mrdb:activity({tx, #{no_snapshot => true}}, rdb, F1),
|
||||
dictionary_unchanged(Do0),
|
||||
[{R, a, 18}] = mrdb:read(R, a),
|
||||
delete_tabs(Created),
|
||||
ok.
|
||||
|
||||
|
||||
%
|
||||
%% For testing purposes, we use side-effects inside the transactions
|
||||
%% to synchronize the concurrent transactions. If a transaction fails due
|
||||
%% to "Resource busy", it can re-run, but then mustn't attempt to sync with
|
||||
%% the other transaction, which is already committed.
|
||||
%%
|
||||
%% To achieve this, we rely on the `mrdb:current_context()` function, which gives
|
||||
%% us information about which is the current attempt; we only sync on the first
|
||||
%% attempt, and ignore the sync ops on retries.
|
||||
%%
|
||||
-define(IF_FIRST(N, Expr),
|
||||
if N == 1 ->
|
||||
Expr;
|
||||
true ->
|
||||
ok
|
||||
end).
|
||||
|
||||
mrdb_two_procs_snap(Config) ->
|
||||
%% _snap is now the default tx mode
|
||||
R = ?FUNCTION_NAME,
|
||||
Parent = self(),
|
||||
Created = create_tabs([{R, []}], Config),
|
||||
mrdb:insert(R, {R, a, 1}),
|
||||
Pre = mrdb:read(R, a),
|
||||
mrdb:insert(R, {R, b, 11}),
|
||||
PreB = mrdb:read(R, b),
|
||||
F0 = fun() ->
|
||||
ok = mrdb:insert(R, {R, a, 17}),
|
||||
wait_for_other(Parent, ?LINE)
|
||||
end,
|
||||
{POther, MRef} =
|
||||
spawn_opt(fun() ->
|
||||
D0 = get_dict(),
|
||||
ok = mrdb:activity(tx, rdb, F0),
|
||||
dictionary_unchanged(D0)
|
||||
end, [monitor]),
|
||||
F1 = fun() ->
|
||||
Att = get_attempt(),
|
||||
go_ahead_other(Att, POther),
|
||||
ARes = mrdb:read(R, a),
|
||||
ARes = case Att of
|
||||
1 -> Pre;
|
||||
2 -> [{R, a, 17}]
|
||||
end,
|
||||
await_other_down(POther, MRef, ?LINE),
|
||||
PreB = mrdb:read(R, b),
|
||||
mrdb:insert(R, {R, b, 18}),
|
||||
1477
|
||||
end,
|
||||
Do0 = get_dict(),
|
||||
1477 = mrdb:activity(tx, rdb, F1),
|
||||
dictionary_unchanged(Do0),
|
||||
[{R, a, 17}] = mrdb:read(R, a),
|
||||
[{R, b, 18}] = mrdb:read(R, b),
|
||||
delete_tabs(Created),
|
||||
ok.
|
||||
|
||||
%% We spawn two helper processes, making it 3 transactions, with the one
|
||||
%% in the parent process. P2 writes to key `a`, which the other two try to read.
|
||||
%% We make sure that P2 commits before finishing the other two, and P3 and the
|
||||
%% main thread sync, so as to maximize the contention for the retry lock.
|
||||
mrdb_three_procs(Config) ->
|
||||
tr_ct:with_trace(fun mrdb_three_procs_/1, Config, light_tr_opts()).
|
||||
|
||||
mrdb_three_procs_(Config) ->
|
||||
R = ?FUNCTION_NAME,
|
||||
Parent = self(),
|
||||
Created = create_tabs([{R, []}], Config),
|
||||
A0 = {R, a, 1},
|
||||
A1 = {R, a, 11},
|
||||
A2 = {R, a, 12},
|
||||
ok = mrdb:insert(R, A0),
|
||||
F1 = fun() ->
|
||||
ok = mrdb:insert(R, A1),
|
||||
ok = mrdb:insert(R, {R, p1, 1})
|
||||
end,
|
||||
{P1, MRef1} =
|
||||
spawn_opt(fun() ->
|
||||
D0 = get_dict(),
|
||||
do_when_p_allows(
|
||||
1, Parent, ?LINE,
|
||||
fun() ->
|
||||
ok = mrdb:activity({tx,#{retries => 0}}, rdb, F1)
|
||||
end),
|
||||
dictionary_unchanged(D0)
|
||||
end, [monitor]),
|
||||
F2 = fun() ->
|
||||
[A0] = mrdb:read(R, a),
|
||||
Att = get_attempt(),
|
||||
wait_for_other(Att, Parent, ?LINE),
|
||||
do_when_p_allows(
|
||||
Att, Parent, ?LINE,
|
||||
fun() ->
|
||||
[A1] = mrdb:read(R, a),
|
||||
ok = mrdb:insert(R, A2),
|
||||
ok = mrdb:insert(R, {R, p2, 1})
|
||||
end)
|
||||
end,
|
||||
{P2, MRef2} =
|
||||
spawn_opt(fun() ->
|
||||
D0 = get_dict(),
|
||||
try mrdb:activity(
|
||||
{tx, #{retries => 0,
|
||||
no_snapshot => true}}, rdb, F2) of
|
||||
ok -> error(unexpected)
|
||||
catch
|
||||
error:{error, "Resource busy" ++ _} ->
|
||||
ok
|
||||
end,
|
||||
dictionary_unchanged(D0)
|
||||
end, [monitor]),
|
||||
Do0 = get_dict(),
|
||||
ok = mrdb:activity(tx, rdb,
|
||||
fun() ->
|
||||
Att = get_attempt(),
|
||||
ARes = case Att of
|
||||
1 -> [A0];
|
||||
2 -> [A1]
|
||||
end,
|
||||
%% First, ensure that P2 tx is running
|
||||
go_ahead_other(Att, P2),
|
||||
ARes = mrdb:read(R, a),
|
||||
allow_p(Att, P1, ?LINE),
|
||||
ARes = mrdb:read(R, a),
|
||||
allow_p(Att, P2, ?LINE),
|
||||
ARes = mrdb:read(R, a),
|
||||
await_other_down(P1, MRef1, ?LINE),
|
||||
await_other_down(P2, MRef2, ?LINE),
|
||||
ok = mrdb:insert(R, {R, p0, 1})
|
||||
end),
|
||||
dictionary_unchanged(Do0),
|
||||
[{R, p1, 1}] = mrdb:read(R, p1),
|
||||
[] = mrdb:read(R, p2),
|
||||
[A1] = mrdb:read(R, a),
|
||||
[{R, p0, 1}] = mrdb:read(R, p0),
|
||||
delete_tabs(Created),
|
||||
ok.
|
||||
|
||||
tr_opts() ->
|
||||
#{patterns => [ {mrdb, '_', '_', x}
|
||||
, {mrdb_lib, '_', '_', x}
|
||||
, {tr_ttb, event, 3, []}
|
||||
, {?MODULE, go_ahead_other, 3, x}
|
||||
, {?MODULE, wait_for_other, 3, x}
|
||||
, {?MODULE, await_other_down, 3, x}
|
||||
, {?MODULE, do_when_p_allows, 4, x}
|
||||
, {?MODULE, allow_p, 3, x}
|
||||
]}.
|
||||
|
||||
light_tr_opts() ->
|
||||
tr_flags(
|
||||
{self(), [call, sos, p]},
|
||||
tr_patterns(
|
||||
mrdb, [ {mrdb, insert, 2, x}
|
||||
, {mrdb, read, 2, x}
|
||||
, {mrdb, activity, x} ], tr_opts())).
|
||||
|
||||
tr_patterns(Mod, Ps, #{patterns := Pats} = Opts) ->
|
||||
Pats1 = [P || P <- Pats, element(1,P) =/= Mod],
|
||||
Opts#{patterns => Ps ++ Pats1}.
|
||||
|
||||
tr_flags(Flags, Opts) when is_map(Opts) ->
|
||||
Opts#{flags => Flags}.
|
||||
|
||||
wait_for_other(Parent, L) ->
|
||||
wait_for_other(get_attempt(), Parent, 1000, L).
|
||||
|
||||
wait_for_other(Att, Parent, L) ->
|
||||
wait_for_other(Att, Parent, 1000, L).
|
||||
|
||||
wait_for_other(1, Parent, Timeout, L) ->
|
||||
MRef = monitor(process, Parent),
|
||||
Parent ! {self(), ready},
|
||||
receive
|
||||
{Parent, cont} ->
|
||||
demonitor(MRef),
|
||||
ok;
|
||||
{'DOWN', MRef, _, _, Reason} ->
|
||||
ct:log("Parent died, Reason = ~p", [Reason]),
|
||||
exit(Reason)
|
||||
after Timeout ->
|
||||
demonitor(MRef),
|
||||
error({inner_timeout, L})
|
||||
end;
|
||||
wait_for_other(_, _, _, _) ->
|
||||
ok.
|
||||
|
||||
do_when_p_allows(Att, P, Line, F) ->
|
||||
wait_for_other(Att, P, Line),
|
||||
F(),
|
||||
%% Tell P that we're done
|
||||
go_ahead_other(Att, P, Line),
|
||||
%% Wait for P to acknowlege
|
||||
wait_for_other(Att, P, Line).
|
||||
|
||||
allow_p(Att, P, Line) ->
|
||||
go_ahead_other(Att, P),
|
||||
%% This is where P does its thing.
|
||||
wait_for_other(Att, P, Line),
|
||||
%% Acknowledge
|
||||
go_ahead_other(Att, P, Line).
|
||||
|
||||
go_ahead_other(POther) ->
|
||||
go_ahead_other(get_attempt(), POther).
|
||||
|
||||
go_ahead_other(Att, POther) ->
|
||||
go_ahead_other(Att, POther, 1000).
|
||||
|
||||
go_ahead_other(Att, POther, Timeout) ->
|
||||
?IF_FIRST(Att, go_ahead_other_(POther, Timeout)).
|
||||
|
||||
go_ahead_other_(POther, Timeout) ->
|
||||
receive
|
||||
{POther, ready} ->
|
||||
POther ! {self(), cont}
|
||||
after Timeout ->
|
||||
error(go_ahead_timeout)
|
||||
end.
|
||||
|
||||
%% Due to transaction restarts, we may already have collected
|
||||
%% a DOWN message. In this case, P will already be dead, and there
|
||||
%% will not be a 'DOWN' messsage still in the msg queue.
|
||||
%% This is fine (we assume it is), and we just make sure that the
|
||||
%% process didn't die abnormally.
|
||||
await_other_down(P, MRef, Line) ->
|
||||
Attempt = get_attempt(),
|
||||
?IF_FIRST(Attempt, await_other_down_(P, MRef, Line)).
|
||||
|
||||
await_other_down_(P, MRef, Line) ->
|
||||
receive {'DOWN', MRef, _, _, Reason} ->
|
||||
case Reason of
|
||||
normal -> ok;
|
||||
_ ->
|
||||
error({abnormal_termination,
|
||||
[ {pid, P}
|
||||
, {mref, MRef}
|
||||
, {line, Line}
|
||||
, {reason, Reason}]})
|
||||
end
|
||||
after 1000 ->
|
||||
error({monitor_timeout, Line})
|
||||
end.
|
||||
|
||||
get_attempt() ->
|
||||
#{activity := #{attempt := Attempt}} = mrdb:current_context(),
|
||||
Attempt.
|
||||
|
||||
create_tabs(Tabs, Config) ->
|
||||
Res = lists:map(fun create_tab/1, Tabs),
|
||||
tr_ct:trace_checkpoint(?TABS_CREATED, Config),
|
||||
Res.
|
||||
|
||||
create_tab({T, Opts}) -> create_tab(T, Opts).
|
||||
|
||||
create_tab(T, Opts) ->
|
||||
{atomic, ok} = mnesia:create_table(T, [{rdb,[node()]} | Opts]),
|
||||
T.
|
||||
|
||||
delete_tabs(Tabs) ->
|
||||
[{atomic,ok} = mnesia:delete_table(T) || T <- Tabs],
|
||||
ok.
|
||||
|
||||
get_dict() ->
|
||||
{dictionary, D} = process_info(self(), dictionary),
|
||||
[X || {K,_} = X <- D,
|
||||
K =/= log_timestamp].
|
||||
|
||||
dictionary_unchanged(Old) ->
|
||||
New = get_dict(),
|
||||
#{ deleted := []
|
||||
, added := [] } = #{ deleted => Old -- New
|
||||
, added => New -- Old },
|
||||
ok.
|
||||
|
@ -1,116 +0,0 @@
|
||||
-module(mnesia_rocksdb_error_handling).
|
||||
|
||||
-export([run/0,
|
||||
run/4]).
|
||||
|
||||
|
||||
run() ->
|
||||
setup(),
|
||||
%% run only one test for 'fatal', to save time.
|
||||
[run(Type, Op, L, MaintainSz) || MaintainSz <- [false, true],
|
||||
Type <- [set, bag],
|
||||
Op <- [insert, update, delete],
|
||||
L <- levels()]
|
||||
++ [run(set, insert, fatal, false)].
|
||||
|
||||
run(Type, Op, Level, MaintainSz) ->
|
||||
setup(),
|
||||
{ok, Tab} = create_tab(Type, Level, MaintainSz),
|
||||
mnesia:dirty_write({Tab, a, 1}), % pre-existing data
|
||||
with_mock(Level, Op, Tab, fun() ->
|
||||
try_write(Op, Type, Tab),
|
||||
expect_error(Level, Tab)
|
||||
end).
|
||||
|
||||
levels() ->
|
||||
[debug, verbose, warning, error].
|
||||
|
||||
setup() ->
|
||||
mnesia:stop(),
|
||||
start_mnesia().
|
||||
|
||||
create_tab(Type, Level, MaintainSz) ->
|
||||
TabName = tab_name(Type, Level, MaintainSz),
|
||||
%% create error store before the table
|
||||
case ets:info(?MODULE) of
|
||||
undefined ->
|
||||
?MODULE = ets:new(?MODULE, [bag, public, named_table]),
|
||||
ok;
|
||||
_ ->
|
||||
ok
|
||||
end,
|
||||
UserProps = user_props(Level, MaintainSz),
|
||||
{atomic, ok} = mnesia:create_table(TabName, [{rdb, [node()]},
|
||||
{user_properties, UserProps}]),
|
||||
{ok, TabName}.
|
||||
|
||||
tab_name(Type, Level, MaintainSz) ->
|
||||
binary_to_atom(iolist_to_binary(
|
||||
["t" | [["_", atom_to_list(A)]
|
||||
|| A <- [?MODULE, Type, Level, MaintainSz]]]), utf8).
|
||||
|
||||
user_props(Level, MaintainSz) ->
|
||||
[{maintain_sz, MaintainSz},
|
||||
{rocksdb_opts, [ {on_write_error, Level}
|
||||
, {on_write_error_store, ?MODULE} ]}].
|
||||
|
||||
start_mnesia() ->
|
||||
mnesia_rocksdb_tlib:start_mnesia(reset),
|
||||
ok.
|
||||
|
||||
with_mock(Level, Op, Tab, F) ->
|
||||
mnesia:subscribe(system),
|
||||
mnesia:set_debug_level(debug),
|
||||
meck:new(mnesia_rocksdb_lib, [passthrough]),
|
||||
meck:expect(mnesia_rocksdb_lib, put, 4, {error, some_put_error}),
|
||||
meck:expect(mnesia_rocksdb_lib, write, 3, {error, some_write_error}),
|
||||
meck:expect(mnesia_rocksdb_lib, delete, 3, {error,some_delete_error}),
|
||||
try {Level, Op, Tab, F()} of
|
||||
{_, _, _, ok} ->
|
||||
ok;
|
||||
Other ->
|
||||
io:fwrite("OTHER: ~p~n", [Other]),
|
||||
ok
|
||||
catch
|
||||
exit:{{aborted,_},_} ->
|
||||
Level = error,
|
||||
ok
|
||||
after
|
||||
mnesia:set_debug_level(none),
|
||||
mnesia:unsubscribe(system),
|
||||
meck:unload(mnesia_rocksdb_lib)
|
||||
end.
|
||||
|
||||
try_write(insert, set, Tab) ->
|
||||
mnesia:dirty_write({Tab, b, 2});
|
||||
try_write(insert, bag, Tab) ->
|
||||
mnesia:dirty_write({Tab, a, 2});
|
||||
try_write(update, _, Tab) ->
|
||||
mnesia:dirty_write({Tab, a, 1});
|
||||
try_write(delete, _, Tab) ->
|
||||
mnesia:dirty_delete({Tab, a}).
|
||||
|
||||
|
||||
expect_error(Level, Tab) ->
|
||||
Tag = rpt_tag(Level),
|
||||
receive
|
||||
{mnesia_system_event, {mnesia_fatal, Fmt, Args, _Core}} ->
|
||||
Tag = mnesia_fatal,
|
||||
io:fwrite("EVENT(~p, ~p):~n ~s", [Tag, Tab, io_lib:fwrite(Fmt, Args)]),
|
||||
ok;
|
||||
{mnesia_system_event, {Tag, Fmt, Args}} ->
|
||||
io:fwrite("EVENT(~p, ~p):~n ~s", [Tag, Tab, io_lib:fwrite(Fmt, Args)]),
|
||||
ok
|
||||
after 1000 ->
|
||||
error({expected_error, [Level, Tab]})
|
||||
|
||||
end,
|
||||
%% Also verify that an error entry has been written into the error store.
|
||||
1 = ets:select_delete(?MODULE, [{{{Tab, '_'}, '_', '_'}, [], [true]}]),
|
||||
ok.
|
||||
|
||||
rpt_tag(fatal ) -> mnesia_fatal;
|
||||
rpt_tag(error ) -> mnesia_error;
|
||||
rpt_tag(warning) -> mnesia_warning;
|
||||
rpt_tag(verbose) -> mnesia_info;
|
||||
rpt_tag(debug ) -> mnesia_info.
|
@ -22,7 +22,7 @@
|
||||
|
||||
-define(m(A,B), fun() -> L = ?LINE,
|
||||
case {A,B} of
|
||||
{__X, __X} ->
|
||||
{X__, X__} ->
|
||||
B;
|
||||
Other ->
|
||||
error({badmatch, [Other,
|
||||
|
@ -16,13 +16,51 @@
|
||||
%% under the License.
|
||||
%%----------------------------------------------------------------
|
||||
|
||||
-module(mnesia_rocksdb_indexes).
|
||||
-module(mnesia_rocksdb_indexes_SUITE).
|
||||
|
||||
-export([
|
||||
all/0
|
||||
, groups/0
|
||||
, suite/0
|
||||
, init_per_suite/1
|
||||
, end_per_suite/1
|
||||
, init_per_group/2
|
||||
, end_per_group/2
|
||||
, init_per_testcase/2
|
||||
, end_per_testcase/2
|
||||
]).
|
||||
|
||||
-export([
|
||||
index_plugin_mgmt/1
|
||||
, add_indexes/1
|
||||
, create_bag_index/1
|
||||
, create_ordered_index/1
|
||||
, test_1_ram_copies/1
|
||||
, test_1_disc_copies/1
|
||||
, fail_1_disc_only/1
|
||||
, plugin_ram_copies1/1
|
||||
, plugin_ram_copies2/1
|
||||
, plugin_disc_copies/1
|
||||
, fail_plugin_disc_only/1
|
||||
, plugin_disc_copies_bag/1
|
||||
, plugin_rdb_ordered/1
|
||||
, index_iterator/1
|
||||
]).
|
||||
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
-export([run/0,
|
||||
run/1,
|
||||
r1/0]).
|
||||
|
||||
-define(TAB(T), list_to_atom(lists:flatten(io_lib:fwrite("~w_~w", [T, ?LINE])))).
|
||||
|
||||
run() ->
|
||||
run([]).
|
||||
|
||||
run(Config) ->
|
||||
mnesia:stop(),
|
||||
maybe_set_dir(Config),
|
||||
ok = mnesia_rocksdb_tlib:start_mnesia(reset),
|
||||
test(1, ram_copies, r1),
|
||||
test(1, disc_copies, d1),
|
||||
@ -33,15 +71,86 @@ run() ->
|
||||
add_del_indexes(),
|
||||
{atomic,ok} = mnesia_schema:add_index_plugin(
|
||||
{pfx},mnesia_rocksdb, ix_prefixes),
|
||||
test_index_plugin(pr1, ram_copies, ordered),
|
||||
test_index_plugin(pr2, ram_copies, bag),
|
||||
test_index_plugin(pd1, disc_copies, ordered),
|
||||
fail(test_index_plugin, [pd2, disc_only_copies, ordered]),
|
||||
test_index_plugin(pd2, disc_copies, bag),
|
||||
test_index_plugin(pl2, rdb, ordered),
|
||||
test_index_plugin_mgmt(),
|
||||
test_index_plugin(cfg([pr1, ram_copies, ordered], Config)),
|
||||
test_index_plugin(cfg([pr2, ram_copies, bag], Config)),
|
||||
test_index_plugin(cfg([pd1, disc_copies, ordered], Config)),
|
||||
fail(test_index_plugin, [cfg([pd2, disc_only_copies, ordered], Config)]),
|
||||
test_index_plugin(cfg([pd2, disc_copies, bag], Config)),
|
||||
test_index_plugin(cfg([pl2, rdb, ordered], Config)),
|
||||
index_plugin_mgmt(Config),
|
||||
ok.
|
||||
|
||||
suite() ->
|
||||
[].
|
||||
|
||||
all() ->
|
||||
[{group, all_tests}].
|
||||
|
||||
groups() ->
|
||||
[
|
||||
{all_tests, [sequence], [ {group, mgmt}, {group, access}, {group, plugin} ]}
|
||||
, {mgmt, [sequence], [
|
||||
create_bag_index
|
||||
, create_ordered_index
|
||||
, index_plugin_mgmt
|
||||
, add_indexes
|
||||
]}
|
||||
, {access, [sequence], [
|
||||
test_1_ram_copies
|
||||
, test_1_disc_copies
|
||||
, fail_1_disc_only
|
||||
, index_iterator
|
||||
]}
|
||||
, {plugin, [sequence], [
|
||||
plugin_ram_copies1
|
||||
, plugin_ram_copies2
|
||||
, plugin_disc_copies
|
||||
, fail_plugin_disc_only
|
||||
, plugin_disc_copies_bag
|
||||
, plugin_rdb_ordered
|
||||
]}
|
||||
].
|
||||
|
||||
%% ======================================================================
|
||||
|
||||
init_per_suite(Config) ->
|
||||
mnesia:stop(),
|
||||
maybe_set_dir(Config),
|
||||
Config.
|
||||
|
||||
end_per_suite(_) ->
|
||||
ok.
|
||||
|
||||
init_per_group(Grp, Config) ->
|
||||
mnesia_rocksdb_tlib:restart_reset_mnesia(),
|
||||
case Grp of
|
||||
plugin ->
|
||||
{atomic,ok} = mnesia_schema:add_index_plugin(
|
||||
{pfx},mnesia_rocksdb, ix_prefixes);
|
||||
_ ->
|
||||
ok
|
||||
end,
|
||||
Config.
|
||||
|
||||
end_per_group(_, _) ->
|
||||
ok.
|
||||
|
||||
init_per_testcase(_, Config) ->
|
||||
Config.
|
||||
|
||||
end_per_testcase(_, _) ->
|
||||
ok.
|
||||
|
||||
%% ======================================================================
|
||||
|
||||
cfg([Tab, Type, IxType], Config) ->
|
||||
[{my_config, #{tab => Tab, type => Type, ixtype => IxType}} | Config];
|
||||
cfg(Cfg, Config) when is_map(Cfg) -> [{my_config, Cfg} | Config].
|
||||
|
||||
cfg(Config) -> ?config(my_config, Config).
|
||||
|
||||
%% ======================================================================
|
||||
|
||||
r1() ->
|
||||
mnesia:stop(),
|
||||
ok = mnesia_rocksdb_tlib:start_mnesia(reset),
|
||||
@ -51,17 +160,28 @@ r1() ->
|
||||
dbg:tpl(mnesia_schema,x),
|
||||
dbg:tpl(mnesia_index,x),
|
||||
dbg:p(all,[c]),
|
||||
test_index_plugin(pd2, disc_only_copies, ordered).
|
||||
test_index_plugin(cfg([pd2, disc_only_copies, ordered], [])).
|
||||
|
||||
fail(F, Args) ->
|
||||
try apply(?MODULE, F, Args),
|
||||
error(should_fail)
|
||||
catch
|
||||
error:_ ->
|
||||
error:R when R =/= should_fail ->
|
||||
io:fwrite("apply(~p, ~p, ~p) -> fails as expected~n",
|
||||
[?MODULE, F, Args])
|
||||
end.
|
||||
|
||||
test_1_ram_copies( _Cfg) -> test(1, ram_copies, r1).
|
||||
test_1_disc_copies(_Cfg) -> test(1, disc_copies, d1).
|
||||
fail_1_disc_only( _Cfg) -> fail(test, [1, disc_only_copies, do1]).
|
||||
|
||||
plugin_ram_copies1(Cfg) -> test_index_plugin(cfg([pr1, ram_copies, ordered], Cfg)).
|
||||
plugin_ram_copies2(Cfg) -> test_index_plugin(cfg([pr2, ram_copies, bag], Cfg)).
|
||||
plugin_disc_copies(Cfg) -> test_index_plugin(cfg([pd1, disc_copies, ordered], Cfg)).
|
||||
fail_plugin_disc_only(Cfg) -> fail(test_index_plugin, [cfg([pd2, disc_only_copies, ordered], Cfg)]).
|
||||
plugin_disc_copies_bag(Cfg) -> test_index_plugin(cfg([pd2, disc_copies, bag], Cfg)).
|
||||
plugin_rdb_ordered(Cfg) -> test_index_plugin(cfg([pl2, rdb, ordered], Cfg)).
|
||||
|
||||
test(N, Type, T) ->
|
||||
{atomic, ok} = mnesia:create_table(T, [{Type,[node()]},
|
||||
{attributes,[k,a,b,c]},
|
||||
@ -81,7 +201,8 @@ add_del_indexes() ->
|
||||
{atomic, ok} = mnesia:add_table_index(l1, a),
|
||||
io:fwrite("add_del_indexes() -> ok~n", []).
|
||||
|
||||
test_index_plugin(Tab, Type, IxType) ->
|
||||
test_index_plugin(Config) ->
|
||||
#{tab := Tab, type := Type, ixtype := IxType} = cfg(Config),
|
||||
{atomic, ok} = mnesia:create_table(Tab, [{Type, [node()]},
|
||||
{index, [{{pfx}, IxType}]}]),
|
||||
mnesia:dirty_write({Tab, "foobar", "sentence"}),
|
||||
@ -100,10 +221,25 @@ test_index_plugin(Tab, Type, IxType) ->
|
||||
Res2 = lists:sort(mnesia:dirty_index_read(Tab,<<"whi">>, {pfx})),
|
||||
[{Tab,"foobar","sentence"}] = mnesia:dirty_index_read(
|
||||
Tab, <<"foo">>, {pfx})
|
||||
end,
|
||||
io:fwrite("test_index_plugin(~p, ~p, ~p) -> ok~n", [Tab,Type,IxType]).
|
||||
end.
|
||||
|
||||
test_index_plugin_mgmt() ->
|
||||
create_bag_index(_Config) ->
|
||||
{aborted, {combine_error, _, _}} =
|
||||
mnesia:create_table(bi, [{rdb, [node()]}, {index, [{val, bag}]}]),
|
||||
ok.
|
||||
|
||||
create_ordered_index(_Config) ->
|
||||
{atomic, ok} =
|
||||
mnesia:create_table(oi, [{rdb, [node()]}, {index, [{val, ordered}]}]),
|
||||
ok.
|
||||
|
||||
add_indexes(_Config) ->
|
||||
T = ?TAB(t1),
|
||||
{atomic, ok} = mnesia:create_table(T, [{rdb, [node()]}, {attributes, [k, a, b, c]}]),
|
||||
{atomic, ok} = mnesia:add_table_index(T, a),
|
||||
ok.
|
||||
|
||||
index_plugin_mgmt(_Config) ->
|
||||
{aborted,_} = mnesia:create_table(x, [{index,[{unknown}]}]),
|
||||
{aborted,_} = mnesia:create_table(x, [{index,[{{unknown},bag}]}]),
|
||||
{aborted,_} = mnesia:create_table(x, [{index,[{{unknown},ordered}]}]),
|
||||
@ -166,9 +302,48 @@ test_index(3, T) ->
|
||||
io:fwrite("test_index(1, ~p) -> ok~n", [T]),
|
||||
ok.
|
||||
|
||||
index_iterator(_Cfg) ->
|
||||
T = ?TAB(it),
|
||||
Attrs = [ {rdb,[node()]}
|
||||
, {record_name, i}
|
||||
, {attributes, [k,a,b]}
|
||||
, {index, [a,b]} ],
|
||||
{atomic, ok} = mnesia:create_table(T, Attrs),
|
||||
ct:log("created tab T=~p: ~p", [T, Attrs]),
|
||||
L1 = [{i,K,a,y} || K <- lists:seq(4,6)],
|
||||
L2 = [{i,K,b,x} || K <- lists:seq(1,3)],
|
||||
true = lists:all(fun(X) -> X == ok end,
|
||||
[mnesia:dirty_write(T, Obj) || Obj <- L1 ++ L2]),
|
||||
ct:log("inserted ~p", [L1 ++ L2]),
|
||||
ResA = [{a,X} || X <- L1] ++ [{b,Y} || Y <- L2],
|
||||
ResB = [{x,X} || X <- L2] ++ [{y,Y} || Y <- L1],
|
||||
F = fun iter_all/1,
|
||||
ResA = mrdb_index:with_iterator(T, a, F),
|
||||
ct:log("mrdb_index:with_iterator(T, a, F) -> ~p", [ResA]),
|
||||
ResB = mrdb_index:with_iterator(T, b, F),
|
||||
ct:log("mrdb_index:with_iterator(T, b, F) -> ~p", [ResB]),
|
||||
ok.
|
||||
|
||||
iter_all(I) ->
|
||||
iter_all(mrdb_index:iterator_move(I, first), I).
|
||||
|
||||
iter_all({ok, IxVal, Obj}, I) ->
|
||||
[{IxVal, Obj} | iter_all(mrdb_index:iterator_move(I, next), I)];
|
||||
iter_all(_, _) ->
|
||||
[].
|
||||
|
||||
indexes(1) ->
|
||||
[a,{b,ordered},{c,bag}];
|
||||
indexes(2) ->
|
||||
[a,b,{c,bag}];
|
||||
indexes(3) ->
|
||||
[a,{b,ordered},{c,ordered}].
|
||||
|
||||
maybe_set_dir(Config) ->
|
||||
case proplists:get_value(priv_dir, Config) of
|
||||
undefined ->
|
||||
ok;
|
||||
PDir ->
|
||||
Dir = filename:join(PDir, "mnesia_indexes"),
|
||||
application:set_env(mnesia, dir, Dir)
|
||||
end.
|
190
test/mnesia_rocksdb_migration_SUITE.erl
Normal file
190
test/mnesia_rocksdb_migration_SUITE.erl
Normal file
@ -0,0 +1,190 @@
|
||||
-module(mnesia_rocksdb_migration_SUITE).
|
||||
|
||||
-export([
|
||||
all/0
|
||||
, suite/0
|
||||
, groups/0
|
||||
, init_per_suite/1
|
||||
, end_per_suite/1
|
||||
, init_per_group/2
|
||||
, end_per_group/2
|
||||
, init_per_testcase/2
|
||||
, end_per_testcase/2
|
||||
]).
|
||||
|
||||
-export([
|
||||
manual_migration/1
|
||||
, migrate_with_encoding_change/1
|
||||
, auto_migration/1
|
||||
]).
|
||||
|
||||
-include_lib("common_test/include/ct.hrl").
|
||||
|
||||
-define(TABS_CREATED, tables_created).
|
||||
|
||||
suite() ->
|
||||
[].
|
||||
|
||||
all() ->
|
||||
[{group, all_tests}].
|
||||
|
||||
groups() ->
|
||||
[
|
||||
{all_tests, [sequence], [ manual_migration
|
||||
, migrate_with_encoding_change ]}
|
||||
].
|
||||
|
||||
init_per_suite(Config) ->
|
||||
Config.
|
||||
|
||||
end_per_suite(_Config) ->
|
||||
ok.
|
||||
|
||||
init_per_group(_, Config) ->
|
||||
Config.
|
||||
|
||||
end_per_group(_, _Config) ->
|
||||
ok.
|
||||
|
||||
init_per_testcase(_, Config) ->
|
||||
mnesia:stop(),
|
||||
ok = mnesia_rocksdb_tlib:start_mnesia(reset),
|
||||
Config.
|
||||
%% create_migrateable_db(Config).
|
||||
|
||||
end_per_testcase(_, _Config) ->
|
||||
ok.
|
||||
|
||||
manual_migration(Config) ->
|
||||
tr_ct:with_trace(fun manual_migration_/1, Config, tr_opts()).
|
||||
|
||||
manual_migration_(Config) ->
|
||||
create_migrateable_db(Config),
|
||||
Tabs = tables(),
|
||||
ct:log("Analyze (before): ~p", [analyze_tabs(Tabs)]),
|
||||
Res = mnesia_rocksdb_admin:migrate_standalone(rdb, Tabs),
|
||||
ct:log("migrate_standalone(rdb, ~p) -> ~p", [Tabs, Res]),
|
||||
AnalyzeRes = analyze_tabs(Tabs),
|
||||
ct:log("AnalyzeRes = ~p", [AnalyzeRes]),
|
||||
MigRes = mnesia_rocksdb_admin:migrate_standalone(rdb, Tabs),
|
||||
ct:log("MigRes = ~p", [MigRes]),
|
||||
AnalyzeRes2 = analyze_tabs(Tabs),
|
||||
ct:log("AnalyzeRes2 = ~p", [AnalyzeRes2]),
|
||||
ct:log("Admin State = ~p", [sys:get_state(mnesia_rocksdb_admin)]),
|
||||
ok.
|
||||
|
||||
migrate_with_encoding_change(_Config) ->
|
||||
ok = create_tab(t, [{user_properties, [{mrdb_encoding, {sext,{object,term}}},
|
||||
{rocksdb_standalone, true}]},
|
||||
{index,[val]}
|
||||
]),
|
||||
mrdb:insert(t, {t, <<"1">>, <<"a">>}),
|
||||
mrdb:insert(t, {t, <<"2">>, <<"b">>}),
|
||||
TRef = mrdb:get_ref(t),
|
||||
{ok, V1} = mrdb:rdb_get(TRef, sext:encode(<<"1">>), []),
|
||||
{ok, V2} = mrdb:rdb_get(TRef, sext:encode(<<"2">>), []),
|
||||
{t,[],<<"a">>} = binary_to_term(V1),
|
||||
{t,[],<<"b">>} = binary_to_term(V2),
|
||||
Opts = #{encoding => {raw, raw}},
|
||||
MigRes = mnesia_rocksdb_admin:migrate_standalone(rdb, [{t, Opts}]),
|
||||
ct:log("MigRes (t) = ~p", [MigRes]),
|
||||
%%
|
||||
%% Ensure that metadata reflect the migrated table
|
||||
%% (now a column family, and the rocksdb_standalone prop gone)
|
||||
%%
|
||||
TRef1 = mrdb:get_ref(t),
|
||||
ct:log("TRef1(t) = ~p", [TRef1]),
|
||||
#{type := column_family,
|
||||
properties := #{user_properties := UPs}} = TRef1,
|
||||
error = maps:find(rocksdb_standalone, UPs),
|
||||
UPsR = lists:sort(maps:values(UPs)),
|
||||
UPsM = lists:sort(mnesia:table_info(t, user_properties)),
|
||||
{UPsR,UPsM} = {UPsM,UPsR},
|
||||
ct:log("user properties (t): ~p", [UPsM]),
|
||||
[{<<"2">>, <<"b">>},
|
||||
{<<"1">>, <<"a">>}] = mrdb:rdb_fold(
|
||||
t, fun(K,V,A) -> [{K,V}|A] end, [], <<>>),
|
||||
ct:log("All data present in new column family", []),
|
||||
ct:log("Contents of mnesia dir: ~p",
|
||||
[ok(file:list_dir(mnesia:system_info(directory)))]),
|
||||
ct:log("mnesia stopped", []),
|
||||
mnesia:stop(),
|
||||
mnesia:start(),
|
||||
ct:log("mnesia started", []),
|
||||
mnesia:info(),
|
||||
ok = mnesia:wait_for_tables([t], 3000),
|
||||
ct:log("tables loaded", []),
|
||||
[{t,<<"1">>,<<"a">>},
|
||||
{t,<<"2">>,<<"b">>}] = mrdb:select(
|
||||
t, [{'_',[],['$_']}]),
|
||||
[{<<"2">>,<<"b">>},
|
||||
{<<"1">>,<<"a">>}] = mrdb:rdb_fold(
|
||||
t, fun(K,V,A) -> [{K,V}|A] end, [], <<>>),
|
||||
ok.
|
||||
|
||||
auto_migration(_Config) ->
|
||||
ok.
|
||||
|
||||
ok({ok, Value}) -> Value.
|
||||
|
||||
tr_opts() ->
|
||||
#{ patterns => [ {mnesia_rocksdb_admin, '_', []}
|
||||
, {mnesia_rocksdb_lib, '_', []}
|
||||
, {rocksdb, '_', x} | trace_exports(mrdb, x) ] }.
|
||||
|
||||
trace_exports(M, Pat) ->
|
||||
Fs = M:module_info(exports),
|
||||
[{M, F, A, Pat} || {F, A} <- Fs].
|
||||
|
||||
tables() ->
|
||||
[a].
|
||||
|
||||
create_migrateable_db(Config) ->
|
||||
Os = [{user_properties, [{rocksdb_standalone, true}]}],
|
||||
TabNames = tables(),
|
||||
Tabs = [{T, Os} || T <- TabNames],
|
||||
create_tabs(Tabs, Config),
|
||||
verify_tabs_are_standalone(TabNames),
|
||||
fill_tabs(TabNames),
|
||||
Config.
|
||||
|
||||
fill_tabs(Tabs) ->
|
||||
lists:foreach(fun(Tab) ->
|
||||
[mrdb:insert(Tab, {Tab, X, a}) || X <- lists:seq(1,3)]
|
||||
end, Tabs).
|
||||
|
||||
create_tabs(Tabs, Config) ->
|
||||
Res = lists:map(fun create_tab/1, Tabs),
|
||||
tr_ct:trace_checkpoint(?TABS_CREATED, Config),
|
||||
Res.
|
||||
|
||||
create_tab({T, Opts}) ->
|
||||
create_tab(T, Opts).
|
||||
|
||||
create_tab(T, Opts) ->
|
||||
{atomic, ok} = mnesia:create_table(T, [{rdb, [node()]} | Opts]),
|
||||
ok.
|
||||
|
||||
verify_tabs_are_standalone(Tabs) ->
|
||||
case analyze_tabs(Tabs) of
|
||||
{_, []} ->
|
||||
ok;
|
||||
{[], NotSA} ->
|
||||
error({not_standalone, NotSA})
|
||||
end.
|
||||
|
||||
analyze_tabs(Tabs) ->
|
||||
Dir = mnesia:system_info(directory),
|
||||
Files = filelib:wildcard(filename:join(Dir, "*-_tab.extrdb")),
|
||||
ct:log("Files = ~p", [Files]),
|
||||
TabNames = lists:map(
|
||||
fun(F) ->
|
||||
{match,[TStr]} =
|
||||
re:run(F, "^.+/([^/]+)-_tab\\.extrdb$",
|
||||
[{capture, [1], list}]),
|
||||
list_to_existing_atom(TStr)
|
||||
end, Files),
|
||||
ct:log("TabNames = ~p", [TabNames]),
|
||||
NotSA = Tabs -- TabNames,
|
||||
{TabNames -- NotSA, NotSA}.
|
||||
|
@ -78,6 +78,18 @@ setup_mnesia() ->
|
||||
ok = mnesia:delete_schema([node()]),
|
||||
ok = mnesia:create_schema([node()]),
|
||||
ok = mnesia:start(),
|
||||
%%
|
||||
%% dbg:tracer(),
|
||||
%% dbg:tpl(mnesia_rocksdb_admin, x),
|
||||
%% dbg:tpl(mnesia_rocksdb,x),
|
||||
%% dbg:ctpl(mnesia_rocksdb, check_definition_entry),
|
||||
%% dbg:ctpl(mnesia_rocksdb, '-check_definition/4-fun-0-'),
|
||||
%% dbg:tpl(mnesia_rocksdb_lib,x),
|
||||
%% dbg:tp(mnesia,x),
|
||||
%% dbg:tpl(mrdb,x),
|
||||
%% dbg:tp(rocksdb,x),
|
||||
%% dbg:p(all,[c]),
|
||||
%%
|
||||
{ok, rocksdb_copies} = mnesia_rocksdb:register().
|
||||
|
||||
setup() ->
|
||||
|
@ -20,23 +20,32 @@
|
||||
|
||||
-export([start_mnesia/0,
|
||||
start_mnesia/1,
|
||||
restart_reset_mnesia/0,
|
||||
create_table/1,
|
||||
create_table/3,
|
||||
trace/2]).
|
||||
|
||||
restart_reset_mnesia() ->
|
||||
mnesia:stop(),
|
||||
start_mnesia(reset).
|
||||
|
||||
start_mnesia() ->
|
||||
start_mnesia(false).
|
||||
|
||||
start_mnesia(Mode) ->
|
||||
if Mode==reset ->
|
||||
mnesia:delete_schema([node()]),
|
||||
mnesia:create_schema([node()],
|
||||
[{backend_types,
|
||||
[{rdb,mnesia_rocksdb}]}]);
|
||||
DRes = mnesia:delete_schema([node()]),
|
||||
ct:log("Delete schema: ~p", [DRes]),
|
||||
CRes = mnesia:create_schema([node()],
|
||||
[{backend_types,
|
||||
[{rdb,mnesia_rocksdb}]}]),
|
||||
ct:log("Create schema: ~p", [CRes]);
|
||||
true -> ok
|
||||
end,
|
||||
mnesia:start().
|
||||
SRes = mnesia:start(),
|
||||
ct:log("Mnesia start: ~p", [SRes]),
|
||||
true = lists:member(rdb, mnesia_schema:backend_types()),
|
||||
SRes.
|
||||
|
||||
create_table(Backend) ->
|
||||
create_table(Backend, [k,v], [v]).
|
||||
|
74
test/mrdb_bench.erl
Normal file
74
test/mrdb_bench.erl
Normal file
@ -0,0 +1,74 @@
|
||||
-module(mrdb_bench).
|
||||
|
||||
-compile(export_all).
|
||||
|
||||
init() ->
|
||||
mnesia:delete_schema([node()]),
|
||||
mnesia_rocksdb:create_schema([node()]),
|
||||
mnesia:start(),
|
||||
[mnesia:create_table(Name, [{Type, [node()]}, {record_name, r}])
|
||||
|| {Name, Type} <- tabs()],
|
||||
ok.
|
||||
|
||||
tabs() ->
|
||||
[{rc, ram_copies},
|
||||
{dc, disc_copies},
|
||||
{do, disc_only_copies},
|
||||
{rocks, rocksdb_copies},
|
||||
{rdb, rocksdb_copies}].
|
||||
|
||||
fill(N) ->
|
||||
[{T, timer:tc(fun() -> fill_(T, N) end)} || {T,_} <- tabs()].
|
||||
|
||||
fill_(_, 0) ->
|
||||
ok;
|
||||
fill_(T, N) when N > 0 ->
|
||||
write(T, {r, N, <<"1234567890">>}),
|
||||
fill_(T, N-1).
|
||||
|
||||
write(rdb, Obj) ->
|
||||
mrdb:insert(rdb, Obj);
|
||||
write(T, Obj) ->
|
||||
mnesia:dirty_write(T, Obj).
|
||||
|
||||
|
||||
fold() ->
|
||||
[{T, timer:tc(fun() -> fold_(T) end)} || {T,_} <- tabs()].
|
||||
|
||||
fold_(rdb) ->
|
||||
mrdb:fold(rdb, fun(_, Acc) -> Acc end, ok);
|
||||
fold_(T) ->
|
||||
mnesia:activity(
|
||||
async_dirty,
|
||||
fun() ->
|
||||
mnesia:foldl(fun(_, Acc) -> Acc end, ok, T)
|
||||
end).
|
||||
|
||||
tx(N) ->
|
||||
[{T, timer:tc(fun() -> tx_(T, N) end)} || {T,_} <- tabs()].
|
||||
|
||||
%% tx_(T, N) ->
|
||||
%% tx_(T, N, N, 10).
|
||||
|
||||
tx_(_, 0) -> ok;
|
||||
tx_(T, N) when N > 0 ->
|
||||
one_tx(T, N),
|
||||
tx_(T, N-1).
|
||||
|
||||
one_tx(rdb, N) ->
|
||||
mrdb:activity(transaction, rocksdb_copies,
|
||||
fun() ->
|
||||
[{r, N, Str}] = mrdb:read(rdb, N),
|
||||
Str2 = <<Str/binary, "x">>,
|
||||
mrdb:insert(rdb, {r, N, Str2}),
|
||||
[{r, N, Str2}] = mrdb:read(rdb, N)
|
||||
end);
|
||||
one_tx(T, N) ->
|
||||
mnesia:activity(transaction,
|
||||
fun() ->
|
||||
[{r, N, Str}] = mnesia:read(T, N),
|
||||
Str2 = <<Str/binary, "x">>,
|
||||
mnesia:write(T, {r, N, Str2}, write),
|
||||
[{r, N, Str2}] = mnesia:read(T, N)
|
||||
end).
|
||||
|
31
test/mrdb_ttb.erl
Normal file
31
test/mrdb_ttb.erl
Normal file
@ -0,0 +1,31 @@
|
||||
-module(mrdb_ttb).
|
||||
|
||||
-export([ on_nodes/2
|
||||
, stop/0
|
||||
, stop_nofetch/0
|
||||
, format/2
|
||||
, format/3 ]).
|
||||
|
||||
-export([ patterns/0
|
||||
, flags/0 ]).
|
||||
|
||||
on_nodes(Ns, File) ->
|
||||
tr_ttb:on_nodes(Ns, File, ?MODULE).
|
||||
|
||||
patterns() ->
|
||||
mrdb:patterns().
|
||||
|
||||
flags() ->
|
||||
{all, call}.
|
||||
|
||||
stop() ->
|
||||
tr_ttb:stop().
|
||||
|
||||
stop_nofetch() ->
|
||||
tr_ttb:stop_nofetch().
|
||||
|
||||
format(Dir, Out) ->
|
||||
tr_ttb:format(Dir, Out).
|
||||
|
||||
format(Dir, Out, Opts) ->
|
||||
tr_ttb:format(Dir, Out, Opts).
|
Loading…
x
Reference in New Issue
Block a user