More robust worker stop/restart

This commit is contained in:
Ulf Wiger 2025-05-23 07:01:37 +02:00
parent 9038158c12
commit 4961fdfff4

View File

@ -102,13 +102,21 @@ handle_cast({from_pool, #{via := Connector,
#{candidate := Cand0}}}, #{candidate := Cand0}}},
#st{workers = Workers} = S) -> #st{workers = Workers} = S) ->
Cand = maps:put(via, Connector, decode_candidate_hash(Cand0)), Cand = maps:put(via, Connector, decode_candidate_hash(Cand0)),
%% ?LOG_DEBUG("Got new candidate; will mine it: ~p", [Cand]), ?LOG_DEBUG("Got new candidate; will mine it: ~p", [Cand]),
%%
%% We could check whether we have already received the candidate ...
%% For now, stop all workers, restart with new candidate %% For now, stop all workers, restart with new candidate
Workers1 = stop_workers(Workers), try
{Workers2, Cand1} = assign_nonces(Workers1, Cand), Workers1 = stop_workers(Workers),
#st{candidate = Cand2} = S1 = maybe_request_nonces(S#st{candidate = Cand1}), {Workers2, Cand1} = assign_nonces(Workers1, Cand),
NewWorkers = [spawn_worker(W, Cand2) || W <- Workers2], #st{candidate = Cand2} = S1 = maybe_request_nonces(S#st{candidate = Cand1}),
{noreply, S1#st{workers = NewWorkers}}; NewWorkers = [spawn_worker(W, Cand2) || W <- Workers2],
{noreply, S1#st{workers = NewWorkers}}
catch
Cat:Err:St ->
?LOG_ERROR("CAUGHT ~p:~p / ~p", [Cat, Err, St]),
{noreply, S}
end;
handle_cast({disconnected, Id}, #st{connected = Conn} = S) -> handle_cast({disconnected, Id}, #st{connected = Conn} = S) ->
?LOG_DEBUG("disconnected: ~p", [Id]), ?LOG_DEBUG("disconnected: ~p", [Id]),
Conn1 = maps:remove(Id, Conn), Conn1 = maps:remove(Id, Conn),
@ -122,9 +130,7 @@ handle_cast(_Msg, S) ->
{noreply, S}. {noreply, S}.
handle_info({'DOWN', MRef, process, Pid, Reason}, #st{ workers = Workers handle_info({'DOWN', MRef, process, Pid, Reason}, #st{ workers = Workers
, connected = Connected , connected = Connected} = S) ->
, working = Working} = S)
when ?CONNECTED(S), Working ->
%% ?LOG_DEBUG("DOWN from ~p: ~p", [Pid, Reason]), %% ?LOG_DEBUG("DOWN from ~p: ~p", [Pid, Reason]),
case lists:keyfind(Pid, #worker.pid, Workers) of case lists:keyfind(Pid, #worker.pid, Workers) of
#worker{mref = MRef} = W -> #worker{mref = MRef} = W ->
@ -302,7 +308,9 @@ stop_workers_for_seq(Seq, Workers) ->
[stop_worker(W) || #worker{cand = #{seq := Seq1}} = W <- Workers, [stop_worker(W) || #worker{cand = #{seq := Seq1}} = W <- Workers,
Seq1 =:= Seq]. Seq1 =:= Seq].
stop_worker(#worker{pid = Pid, mref = MRef} = W) when is_pid(Pid) -> stop_worker(#worker{pid = Pid} = W) when is_pid(Pid) ->
MRef = erlang:monitor(process, Pid),
?LOG_DEBUG("Will stop worker ~p (MRef = ~p)", [Pid, MRef]),
exit(Pid, kill), exit(Pid, kill),
receive receive
{'EXIT', Pid, _} -> ok; {'EXIT', Pid, _} -> ok;