#!/usr/bin/env escript

% Takes an input file, shows where non-ascii characters are
%
% Written by Peter Harpending, 2026-03-05
%
% Copyright (c) 2026 QPQ AG
%
% Permission is hereby granted, free of charge, to any person obtaining a copy
% of this software and associated documentation files (the "Software"), to deal
% in the Software without restriction, including without limitation the rights
% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
% copies of the Software, and to permit persons to whom the Software is
% furnished to do so, subject to the following conditions:
%
% The above copyright notice and this permission notice shall be included in
% all copies or substantial portions of the Software.
%
% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
% SOFTWARE.

-mode(compile).

help() ->
    io:format("USAGE: ununicode Filename~n"
              "~n"
              "Example:~n"
              "     ununicode foo.md | less -RS~n",
              []).


main([Filename]) ->
    case file:read_file(Filename) of
        {ok, Bytes} ->
            Lines = string:split(Bytes, "\n", all),
            go(Lines, 1, []);
        Error ->
            io:format("ERROR reading file ~p: ~tp~n", [Filename, Error]),
            help()
    end;
main(_) ->
    help().


go([Line | Rest], LineNum, Badnesses) ->
    NewAcc =
        case bad(Line) of
            true  -> [{LineNum, Line} | Badnesses];
            false -> Badnesses
        end,
    go(Rest, LineNum + 1, NewAcc);
go([], _, Badness) ->
    punish(Badness).


bad(<<0:1, _:7, Rest/bytes>>) -> bad(Rest);
bad(<<1:1, _:7, _/bytes>>)    -> true;
bad(<<>>)                     -> false.

% tabulate
punish([]) ->
    io:format("no badness found~n");
punish(Bads = [{MaxLineNum, _} | _]) ->
    punish(ndigits(MaxLineNum), lists:reverse(Bads)).


punish(NDigits, [{Linum, Line} | Rest]) ->
    Print = [format_digits(NDigits, Linum), "\t", highlight_bad(Line)],
    io:format("~ts~n", [Print]),
    punish(NDigits, Rest);
punish(_, []) ->
    ok.


highlight_bad(Line) ->
    hlb(Line, []).

hlb(<<0:1, A:7, Rest/bytes>>, Acc) ->
    hlb(Rest, [Acc, <<0:1, A:7>>]);
hlb(<<(2#110):3, A:5, B:8, Rest/bytes>>, Acc) ->
    hlb(Rest, [Acc, red(<<(2#110):3, A:5, B:8>>)]);
hlb(<<(2#1110):4, A:4, B:8, C:8, Rest/bytes>>, Acc) ->
    hlb(Rest, [Acc, red(<<(2#1110):4, A:4, B:8, C:8>>)]);
hlb(<<(2#11110):5, A:3, B:8, C:8, D:8, Rest/bytes>>, Acc) ->
    hlb(Rest, [Acc, red(<<(2#11110):5, A:3, B:8, C:8, D:8>>)]);
hlb(<<>>, Acc) ->
    Acc.

red(String) ->
    ["\e[7;31m", String, "\e[0m"].

ndigits(N) ->
    round(math:floor(math:log10(N)) + 1).

format_digits(Width, N) ->
    NSpaces = Width - ndigits(N),
    [spaces(NSpaces), integer_to_list(N)].

spaces(N) when N >= 1 -> [" " | spaces(N - 1)];
spaces(_)             -> "".
