449 lines
16 KiB
Erlang
449 lines
16 KiB
Erlang
-module(gmhc_server).
|
|
-vsn("0.8.3").
|
|
|
|
-behaviour(gen_server).
|
|
|
|
-export([ connected/2
|
|
, disconnected/1
|
|
, from_pool/1
|
|
, new_candidate/1
|
|
]).
|
|
|
|
-export([ total_nonces/0 ]).
|
|
|
|
-export([
|
|
start_link/0
|
|
, init/1
|
|
, handle_call/3
|
|
, handle_cast/2
|
|
, handle_info/2
|
|
, terminate/2
|
|
, code_change/3
|
|
]).
|
|
|
|
-include_lib("kernel/include/logger.hrl").
|
|
-include("gmhc_events.hrl").
|
|
|
|
-record(worker, { config
|
|
, nonces = 0
|
|
, index
|
|
, pid
|
|
, mref
|
|
, cand
|
|
, nonce
|
|
, errors = 0}).
|
|
-type worker() :: #worker{}.
|
|
-type type() :: monitor | worker.
|
|
|
|
-record(st, {
|
|
connected = #{} :: #{non_neg_integer() => {pid(), type()}}
|
|
, working = false :: boolean()
|
|
, candidate :: map() | 'undefined'
|
|
, nonces = 1 :: pos_integer()
|
|
, workers = [] :: [worker()]
|
|
}).
|
|
|
|
-define(CONNECTED(S), map_size(S#st.connected) > 0).
|
|
|
|
-define(MAX_ERRORS, 50).
|
|
|
|
connected(Id, Type) ->
|
|
gen_server:call(?MODULE, {connected, Id, Type}).
|
|
|
|
disconnected(Id) ->
|
|
gen_server:cast(?MODULE, {disconnected, Id}).
|
|
|
|
from_pool(Msg) ->
|
|
ToSend = {from_pool, Msg},
|
|
%% ?LOG_DEBUG("Sending to server: ~p", [ToSend]),
|
|
gen_server:cast(?MODULE, ToSend).
|
|
|
|
new_candidate(Cand) ->
|
|
gen_server:cast(?MODULE, {new_candidate, Cand}).
|
|
|
|
total_nonces() ->
|
|
gen_server:call(?MODULE, total_nonces).
|
|
|
|
start_link() ->
|
|
gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
|
|
|
|
init([]) ->
|
|
WorkerConfigs = gmhc_workers:get_worker_configs(),
|
|
?LOG_DEBUG("WorkerConfigs = ~p", [WorkerConfigs]),
|
|
%% IdleWorkers = [#worker{executable = E} || E <- Instances],
|
|
{IdleWorkers,_} = lists:mapfoldl(
|
|
fun(C, N) ->
|
|
NNonces = calc_nonces(C),
|
|
{#worker{index = N, config = C, nonces = NNonces}, N+1}
|
|
end, 1, WorkerConfigs),
|
|
TotalNonces = lists:foldl(fun(#worker{nonces = N}, Acc) ->
|
|
N + Acc
|
|
end, 0, IdleWorkers),
|
|
process_flag(trap_exit, true),
|
|
{ok, #st{workers = IdleWorkers, nonces = TotalNonces}}.
|
|
|
|
handle_call(total_nonces, _From, #st{nonces = Nonces} = S) ->
|
|
{reply, Nonces, S};
|
|
handle_call({connected, Id, Type}, {Pid,_}, #st{connected = Conn} = S) ->
|
|
?LOG_DEBUG("connected: ~p, ~p", [Id, Type]),
|
|
erlang:monitor(process, Pid),
|
|
S1 = S#st{connected = Conn#{Id => {Pid, Type}}},
|
|
S2 = case Type of
|
|
monitor ->
|
|
stop_workers(S1#st.workers), % shouldn't be any running
|
|
S1#st{workers = [], working = false};
|
|
worker ->
|
|
gmhc_watchdog:watch(5*60_000, 1), %% 5 minutes, one-shot
|
|
S1#st{working = true}
|
|
end,
|
|
{reply, ok, S2};
|
|
handle_call(_Req, _From, S) ->
|
|
{reply, unknown_call, S}.
|
|
|
|
handle_cast({from_pool, #{via := Connector,
|
|
notification :=
|
|
#{candidate := Cand0}}},
|
|
#st{workers = Workers} = S) ->
|
|
Cand = maps:put(via, Connector, decode_candidate_hash(Cand0)),
|
|
?LOG_DEBUG("Got new candidate; will mine it: ~p", [Cand]),
|
|
%%
|
|
%% We could check whether we have already received the candidate ...
|
|
%% For now, stop all workers, restart with new candidate
|
|
try
|
|
%% Most of the time we don't want to stop the worker. If we do, though, then
|
|
%% we need to do it more carefully than this, or memory usage will triple.
|
|
%% Workers1 = stop_workers(Workers),
|
|
%%
|
|
%% Nonces may be [], in which case we need to request new nonces first.
|
|
#st{candidate = Cand1} = S1 = maybe_request_nonces(S#st{candidate = Cand}),
|
|
{Workers2, Cand2} = assign_nonces(Workers, Cand1),
|
|
#st{candidate = Cand3} = S2 = maybe_request_nonces(S1#st{candidate = Cand2}),
|
|
NewWorkers = [spawn_worker(W, Cand3) || W <- Workers2],
|
|
{noreply, S2#st{workers = NewWorkers}}
|
|
catch
|
|
Cat:Err:St ->
|
|
?LOG_ERROR("CAUGHT ~p:~p / ~p", [Cat, Err, St]),
|
|
{noreply, S}
|
|
end;
|
|
handle_cast({disconnected, Id}, #st{connected = Conn} = S) ->
|
|
?LOG_DEBUG("disconnected: ~p", [Id]),
|
|
Conn1 = maps:remove(Id, Conn),
|
|
gmhc_watchdog:unwatch(),
|
|
S1 = if map_size(Conn1) == 0 ->
|
|
Ws = stop_workers(S#st.workers),
|
|
S#st{connected = Conn1, workers = Ws};
|
|
true -> S#st{connected = Conn1}
|
|
end,
|
|
{noreply, S1};
|
|
handle_cast(_Msg, S) ->
|
|
{noreply, S}.
|
|
|
|
handle_info({'DOWN', MRef, process, Pid, Reason}, #st{ workers = Workers
|
|
, connected = Connected} = S) ->
|
|
%% ?LOG_DEBUG("DOWN from ~p: ~p", [Pid, Reason]),
|
|
case lists:keyfind(Pid, #worker.pid, Workers) of
|
|
#worker{mref = MRef} = W ->
|
|
S1 = handle_worker_result(Reason, W, S),
|
|
{noreply, S1};
|
|
false ->
|
|
Conn1 = maps:filter(fun(_, {P,_}) -> P =/= Pid end, Connected),
|
|
{noreply, S#st{connected = Conn1}}
|
|
end;
|
|
handle_info({'EXIT', Pid, Reason}, #st{ workers = Workers
|
|
, working = Working} = S)
|
|
when ?CONNECTED(S), Working ->
|
|
case lists:keyfind(Pid, #worker.pid, Workers) of
|
|
#worker{} = W ->
|
|
%% ?LOG_DEBUG("EXIT from worker ~p: ~p", [W#worker.index, Reason]),
|
|
gmhc_events:publish(error, ?ERR_EVT(#{error => worker_error,
|
|
data => Reason})),
|
|
Ws1 = incr_worker_error(W, Workers),
|
|
erlang:start_timer(100, self(), check_workers),
|
|
{noreply, S#st{workers = Ws1}};
|
|
false ->
|
|
%% ?LOG_DEBUG("EXIT apparently not from worker?? (~p)", [Pid]),
|
|
{noreply, S}
|
|
end;
|
|
handle_info({timeout, _, check_workers}, #st{workers = Workers} = S) ->
|
|
S1 = maybe_request_nonces(S),
|
|
S2 = lists:foldl(fun(W, Sx) ->
|
|
maybe_restart_worker(W, Sx)
|
|
end, S1, Workers),
|
|
{noreply, S2};
|
|
handle_info(Msg, St) ->
|
|
?LOG_DEBUG("Unknown msg: ~p", [Msg]),
|
|
{noreply, St}.
|
|
|
|
terminate(_Reason, _St) ->
|
|
ok.
|
|
|
|
code_change(_FromVsn, S, _Extra) ->
|
|
{ok, S}.
|
|
|
|
report_solutions(Solutions, W, #st{} = S) when ?CONNECTED(S) ->
|
|
#{via := Via, seq := Seq} = W#worker.cand,
|
|
Nonces = all_nonces(W),
|
|
[report_no_solution_(Via, Seq, N)
|
|
|| N <- Nonces, not lists:keymember(N, 1, Solutions)],
|
|
gmhc_handler:call(
|
|
#{via => Via,
|
|
solutions => #{ seq => Seq
|
|
, found => [#{ nonce => Nonce
|
|
, evidence => Evd }
|
|
|| {Nonce, Evd} <- Solutions] }}).
|
|
|
|
report_no_solution(_Nonce, W, #st{} = S) when ?CONNECTED(S) ->
|
|
#{via := Via, seq := Seq} = W#worker.cand,
|
|
Nonces = all_nonces(W),
|
|
%% ?LOG_DEBUG("report no_solution Seq = ~p, Nonce = ~p", [Seq, Nonce]),
|
|
[report_no_solution_(Via, Seq, Nonce1) || Nonce1 <- Nonces],
|
|
ok.
|
|
|
|
report_no_solution_(Via, Seq, Nonce) ->
|
|
gmhc_handler:async_call(#{via => Via,
|
|
no_solution => #{ seq => Seq
|
|
, nonce => Nonce}}).
|
|
|
|
all_nonces(#worker{nonce = Nonce, config = Config}) ->
|
|
case gmhw_pow_cuckoo:repeats(Config) of
|
|
1 -> [Nonce];
|
|
Rs -> lists:seq(Nonce, Nonce + Rs - 1)
|
|
end.
|
|
|
|
maybe_request_nonces(#st{ candidate = #{via := Via, seq := Seq, nonces := Nonces}
|
|
, nonces = N} = S) when ?CONNECTED(S) ->
|
|
case Nonces == [] of
|
|
true ->
|
|
Res = gmhc_handler:call(#{via => Via,
|
|
get_nonces => #{ seq => Seq
|
|
, n => N }}),
|
|
nonces_result(Res, Seq, S);
|
|
false ->
|
|
S
|
|
end;
|
|
maybe_request_nonces(S) ->
|
|
S.
|
|
|
|
nonces_result(#{nonces := #{seq := Seq, nonces := Nonces}}, Seq0, S) ->
|
|
case Seq == Seq0 of
|
|
true ->
|
|
wd_ping(),
|
|
#st{candidate = Cand} = S,
|
|
S#st{candidate = Cand#{nonces => Nonces}};
|
|
false ->
|
|
?LOG_DEBUG("Seq mismatch - wtf?? ~p - ~p", [Seq, Seq0]),
|
|
S
|
|
end;
|
|
nonces_result({error, Reason}, Seq0, S) ->
|
|
?LOG_DEBUG("Got error on nonce request: ~p", [Reason]),
|
|
Workers = stop_workers_for_seq(Seq0, S#st.workers),
|
|
case Reason of
|
|
{timeout, _} ->
|
|
Timeout = retry_timeout(1000, 3000),
|
|
erlang:start_timer(Timeout, self(), check_workers);
|
|
_ ->
|
|
ok
|
|
end,
|
|
S#st{workers = Workers}.
|
|
|
|
retry_timeout(Floor, Range) ->
|
|
Floor + gmhc_lib:rand(Range).
|
|
|
|
handle_worker_result({worker_result, Result}, W, S) ->
|
|
%% ?LOG_DEBUG("worker result: ~p", [Result]),
|
|
wd_ping(),
|
|
case Result of
|
|
{solutions, Solutions} ->
|
|
{Cont, S1} = report_solutions_(Solutions, W, S),
|
|
maybe_continue(Cont, reset_errors(W), S1);
|
|
{solution, Nonce, Solution} ->
|
|
%% report_solution(Nonce, Solution, W, S),
|
|
{Cont, S1} = report_solutions_([{Nonce, Solution}], W, S),
|
|
maybe_continue(Cont, reset_errors(W), S1);
|
|
{no_solution, Nonce} ->
|
|
report_no_solution(Nonce, W, S),
|
|
maybe_restart_worker(reset_errors(W), S);
|
|
{error, S} ->
|
|
?LOG_DEBUG("Worker ~p reported error as normal", [W#worker.index]),
|
|
gmhc_events:publish(error, ?ERR_EVT(#{error => worker_error,
|
|
data => Result})),
|
|
Ws = incr_worker_error(W, S#st.workers),
|
|
S#st{workers = Ws}
|
|
end;
|
|
handle_worker_result(Error, W, S) ->
|
|
?LOG_DEBUG("Got worker error from ~p: ~p", [W#worker.index, Error]),
|
|
gmhc_events:publish(error, ?ERR_EVT(#{error => worker_error,
|
|
data => Error})),
|
|
Ws = incr_worker_error(W, S#st.workers),
|
|
S#st{workers = Ws}.
|
|
|
|
report_solutions_(Solutions, W, S) ->
|
|
case report_solutions(Solutions, W, S) of
|
|
ok ->
|
|
%% Ws = reset_worker(W, S#st.workers),
|
|
%% Ws1 = stop_workers(Ws),
|
|
%% {stopped, S#st{workers = Ws1}};
|
|
{continue, S};
|
|
continue ->
|
|
{continue, S};
|
|
{error, _} ->
|
|
{error, S}
|
|
end.
|
|
|
|
reset_errors(#worker{} = W) ->
|
|
W#worker{errors = 0}.
|
|
|
|
reset_worker(#worker{index = I} = W, Ws) ->
|
|
W1 = reset_worker_(W),
|
|
lists:keyreplace(I, #worker.index, Ws, W1).
|
|
|
|
reset_worker_(W) ->
|
|
%% ?LOG_DEBUG("reset_worker ~p", [W#worker.index]),
|
|
W#worker{pid = undefined, mref = undefined,
|
|
nonce = undefined, cand = undefined}.
|
|
|
|
incr_worker_error(#worker{errors = Es, index = I} = W, Ws) ->
|
|
%% ?LOG_DEBUG("Increment worker (~p) error count: ~p", [I, Es+1]),
|
|
W1 = reset_worker_(W#worker{errors = Es+1}),
|
|
lists:keyreplace(I, #worker.index, Ws, W1).
|
|
|
|
%% maybe_continue(stopped, _, S) ->
|
|
%% S;
|
|
maybe_continue(continue, W, S) ->
|
|
maybe_restart_worker(W, S);
|
|
maybe_continue(error, W, S) ->
|
|
?LOG_INFO("Won't restart worker ~p due to error", [W#worker.index]),
|
|
Ws = reset_worker(W, S#st.workers),
|
|
S#st{workers = Ws}.
|
|
|
|
maybe_restart_worker(#worker{index = I} = W, #st{candidate = C} = S) ->
|
|
case maps:get(nonces, C) of
|
|
[] ->
|
|
%% Waiting for nonces
|
|
Ws = reset_worker(W, S#st.workers),
|
|
S#st{workers = Ws};
|
|
Ns ->
|
|
{Nonce, Ns1} = pick_nonce(W#worker.nonces, Ns),
|
|
%% ?LOG_DEBUG("restart worker ~p with nonce ~p", [I, Nonce]),
|
|
W1 = reset_worker_(W),
|
|
W2 = spawn_worker(W1#worker{nonce = Nonce}, C),
|
|
Ws = lists:keyreplace(I, #worker.index, S#st.workers, W2),
|
|
S1 = S#st{candidate = C#{nonces => Ns1}, workers = Ws},
|
|
maybe_request_nonces(S1)
|
|
end.
|
|
|
|
%% In a Gajumaru node, a typical worker config might look like this:
|
|
%% "cuckoo": {
|
|
%% "edge_bits": 29,
|
|
%% "miners": [{"executable": "mean29-avx2"},
|
|
%% {"executable": "lean29-avx2"},
|
|
%% {"executable": "lean29-avx2"},
|
|
%% {"executable": "lean29-avx2"}]
|
|
%% }
|
|
stop_workers(Workers) ->
|
|
[stop_worker(W) || W <- Workers].
|
|
|
|
stop_workers_for_seq(Seq, Workers) ->
|
|
[stop_worker(W) || #worker{cand = #{seq := Seq1}} = W <- Workers,
|
|
Seq1 =:= Seq].
|
|
|
|
stop_worker(#worker{pid = Pid} = W) when is_pid(Pid) ->
|
|
MRef = erlang:monitor(process, Pid),
|
|
?LOG_DEBUG("Will stop worker ~p (MRef = ~p)", [Pid, MRef]),
|
|
exit(Pid, shutdown),
|
|
receive
|
|
{'EXIT', Pid, _} -> ok;
|
|
{'DOWN', MRef, process, Pid, _} -> ok
|
|
end,
|
|
W#worker{pid = undefined, mref = undefined, nonce = undefined};
|
|
stop_worker(W) ->
|
|
W.
|
|
|
|
assign_nonces(Ws, #{nonces := Ns} = C) ->
|
|
{Ws1, Nonces1} = assign_nonces_(Ws, Ns, []),
|
|
{Ws1, C#{nonces => Nonces1}}.
|
|
|
|
assign_nonces_([W | Ws], [], Acc) ->
|
|
assign_nonces_(Ws, [], [W#worker{nonce = undefined}|Acc]);
|
|
assign_nonces_([#worker{nonces = N} = W | Ws], Ns, Acc) ->
|
|
{Nonce, Ns1} = pick_nonce(N, Ns),
|
|
assign_nonces_(Ws, Ns1, [W#worker{nonce = Nonce}|Acc]);
|
|
assign_nonces_([], Ns, Acc) ->
|
|
{lists:reverse(Acc), Ns}.
|
|
|
|
-spec calc_nonces(gmhw_pow_cuckoo:config()) -> non_neg_integer().
|
|
calc_nonces(Cfg) ->
|
|
NInstances = case gmhw_pow_cuckoo:addressed_instances(Cfg) of
|
|
undefined -> 1;
|
|
L -> length(L)
|
|
end,
|
|
Repeats = gmhw_pow_cuckoo:repeats(Cfg),
|
|
Repeats * NInstances.
|
|
|
|
pick_nonce(_, [A, A]) ->
|
|
%% ?LOG_DEBUG("last nonce (~p)", [A]),
|
|
{A, []};
|
|
pick_nonce(N, [A, B]) when A < B ->
|
|
A1 = A + N,
|
|
New = if A1 > B -> [];
|
|
true -> [A1, B]
|
|
end,
|
|
%% ?LOG_DEBUG("Remanining nonces: ~p", [New]),
|
|
{A, New}.
|
|
|
|
%% Dialyzer doesn't like that the fun passed to spawn_link/1
|
|
%% doesn't have a local return (it communicates its result via the exit reason).
|
|
-dialyzer({no_return, spawn_worker/2}).
|
|
|
|
spawn_worker(#worker{nonce = undefined} = W, _) ->
|
|
W;
|
|
spawn_worker(#worker{errors = Es} = W, _) when Es >= ?MAX_ERRORS ->
|
|
?LOG_DEBUG("Won't start worker - reached max error count: ~p", [W]),
|
|
W;
|
|
spawn_worker(#worker{pid = undefined, nonce = Nonce, config = Cfg} = W, Cand) ->
|
|
Me = self(),
|
|
#{candidate := Data, target := Target, edge_bits := EdgeBits} = Cand,
|
|
Pid = spawn_link(
|
|
fun() ->
|
|
Cfg1 = gmhw_pow_cuckoo:set_edge_bits(EdgeBits, Cfg),
|
|
init_worker(Data, Nonce, Target, Cfg1, Me)
|
|
end),
|
|
MRef = erlang:monitor(process, Pid),
|
|
W#worker{pid = Pid, mref = MRef, cand = Cand, nonce = Nonce};
|
|
spawn_worker(W, _) ->
|
|
% Worker already has work. Don't disturb it.
|
|
W.
|
|
|
|
-spec init_worker(binary(), integer(), integer(), tuple(), pid()) -> no_return().
|
|
init_worker(Data, Nonce, Target, Config, Parent) ->
|
|
gmhc_events:publish(puzzle, {Data, Target, Nonce, Config}),
|
|
Res = gmhc_workers:generate_from_hash(Data, Target, Nonce, Config, undefined),
|
|
%% ?LOG_DEBUG("worker result: ~p", [Res]),
|
|
gmhc_events:publish(result, Res),
|
|
case Res of
|
|
{ok, Solutions} when is_list(Solutions) ->
|
|
worker_result(Parent, {solutions, Solutions});
|
|
%% {ok, {Nonce1, Solution}} ->
|
|
%% worker_result(Parent, {solution, Nonce1, Solution});
|
|
{error, no_solution} ->
|
|
%% TODO: If we are using repeats, then we might report
|
|
%% no_solution for each nonce tried.
|
|
worker_result(Parent, {no_solution, Nonce});
|
|
{error, Other} ->
|
|
?LOG_ERROR("Bad worker! {error, ~p}", [Other]),
|
|
gmhc_events:publish(error, ?ERR_EVT(#{error => cannot_start_worker,
|
|
data => {error, Other}})),
|
|
exit(Other)
|
|
end.
|
|
|
|
worker_result(Pid, Result) ->
|
|
unlink(Pid),
|
|
exit({worker_result, Result}).
|
|
|
|
decode_candidate_hash(#{candidate := C} = Cand) ->
|
|
{ok, Hash} = gmser_api_encoder:safe_decode(bytearray, C),
|
|
Cand#{candidate := Hash}.
|
|
|
|
wd_ping() ->
|
|
gmhc_watchdog:ping().
|