initial import

This commit is contained in:
SUZUKI Tetsuya 2012-10-03 15:05:19 +09:00
commit d9d541dce8
49 changed files with 14027 additions and 0 deletions

14
.gitignore vendored Normal file
View File

@ -0,0 +1,14 @@
.DS_Store
.eunit
ebin
deps
priv
*.o
*.beam
*.plt
*.swp
*.html
*.png
edoc-info
stylesheet.css

12
.travis.yml Normal file
View File

@ -0,0 +1,12 @@
language: erlang
notifications:
disabled: true
branches:
only:
- develop
- 0.1.0
otp_release:
- R15B02
- R15B01
- R15B

23
Makefile Normal file
View File

@ -0,0 +1,23 @@
.PHONY: doc
all:
./rebar compile
./rebar doc
./rebar xref
./rebar eunit
compile:
./rebar compile
doc:
./rebar doc
xref: compile
./rebar xref
clean:
./rebar clean
test: xref
./rebar eunit

21
README.md Normal file
View File

@ -0,0 +1,21 @@
erlang-sha3
===========
[![Build Status](https://secure.travis-ci.org/szktty/erlang-sha3.png?branch=develop)](http://travis-ci.org/szktty/erlang-sha3)
SHA3 for Erlang
Licenses
--------
This program is distributed under Apache License 2.0.
Keccak source files are distributed under CC0 1.0 Universal (CC0 1.0) Public Domain Dedication license.
Author
------
SUZUKI Tetsuya <tetsuya.suzuki@gmail.com>

27
c_src/AVR8-rotate64.h Executable file
View File

@ -0,0 +1,27 @@
/*
File: AVR8-rotate64.h
This code is originally by Daniel Otte (daniel.otte@rub.de) in 2006-2010 as part of the AVR-Crypto-Lib, and was then improved by Ronny Van Keer, STMicroelectronics, in 2010.
Implementation by Daniel Otte and Ronny Van Keer,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef ROTATE64_H_
#define ROTATE64_H_
#include <stdint.h>
#define ROT_CODE(a) ((((a)/8+((((a)%8)>4)?1:0))<<4) | ((a) & 7))
uint64_t rotate64_1bit_left(uint64_t a);
uint64_t rotate64_1bit_right(uint64_t a);
uint64_t rotate64left_code(uint64_t a, int8_t code);
#endif /* ROTATE64_H_ */

285
c_src/AVR8-rotate64.s Executable file
View File

@ -0,0 +1,285 @@
/*
File: AVR8-rotate64.s
This code is originally by Daniel Otte (daniel.otte@rub.de) in 2006-2010 as part of the AVR-Crypto-Lib, and was then improved by Ronny Van Keer, STMicroelectronics, in 2010.
Implementation by Daniel Otte and Ronny Van Keer,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
.global rotate64_1bit_left
rotate64_4bit_left:
lsl r18
rol r19
rol r20
rol r21
rol r22
rol r23
rol r24
rol r25
adc r18, r1
rotate64_3bit_left:
lsl r18
rol r19
rol r20
rol r21
rol r22
rol r23
rol r24
rol r25
adc r18, r1
rotate64_2bit_left:
lsl r18
rol r19
rol r20
rol r21
rol r22
rol r23
rol r24
rol r25
adc r18, r1
rotate64_1bit_left:
lsl r18
rol r19
rol r20
rol r21
rol r22
rol r23
rol r24
rol r25
adc r18, r1
ret
.global rotate64_1bit_right
rotate64_3bit_right:
bst r18, 0
ror r25
ror r24
ror r23
ror r22
ror r21
ror r20
ror r19
ror r18
bld r25, 7
rotate64_2bit_right:
bst r18, 0
ror r25
ror r24
ror r23
ror r22
ror r21
ror r20
ror r19
ror r18
bld r25, 7
rotate64_1bit_right:
bst r18, 0
ror r25
ror r24
ror r23
ror r22
ror r21
ror r20
ror r19
ror r18
bld r25, 7
ret
/*
** Each byte rotate routine must be 16 instructions long.
*/
rotate64_0byte_left:
andi r16, 0x07
ldi r30, pm_lo8(bit_rot_jmp_table)
ldi r31, pm_hi8(bit_rot_jmp_table)
add r30, r16
adc r31, r1
ijmp
nop
nop
nop
nop
nop
nop
nop
nop
nop
nop
rotate64_1byte_left:
mov r0, r25
mov r25, r24
mov r24, r23
mov r23, r22
mov r22, r21
mov r21, r20
mov r20, r19
mov r19, r18
mov r18, r0
andi r16, 0x07
ldi r30, pm_lo8(bit_rot_jmp_table)
ldi r31, pm_hi8(bit_rot_jmp_table)
add r30, r16
adc r31, r1
ijmp
nop
rotate64_2byte_left:
movw r0, r24
movw r24, r22
movw r22, r20
movw r20, r18
movw r18, r0
clr r1
andi r16, 0x07
ldi r30, pm_lo8(bit_rot_jmp_table)
ldi r31, pm_hi8(bit_rot_jmp_table)
add r30, r16
adc r31, r1
ijmp
nop
nop
nop
nop
rotate64_3byte_left:
mov r0, r25
mov r25, r22
mov r22, r19
mov r19, r24
mov r24, r21
mov r21, r18
mov r18, r23
mov r23, r20
mov r20, r0
andi r16, 0x07
ldi r30, pm_lo8(bit_rot_jmp_table)
ldi r31, pm_hi8(bit_rot_jmp_table)
add r30, r16
adc r31, r1
ijmp
nop
rotate64_4byte_left:
movw r0, r24
movw r24, r20
movw r20, r0
movw r0, r22
movw r22, r18
movw r18, r0
clr r1
andi r16, 0x07
ldi r30, pm_lo8(bit_rot_jmp_table)
ldi r31, pm_hi8(bit_rot_jmp_table)
add r30, r16
adc r31, r1
ijmp
nop
nop
nop
rotate64_5byte_left:
mov r0, r25
mov r25, r20
mov r20, r23
mov r23, r18
mov r18, r21
mov r21, r24
mov r24, r19
mov r19, r22
mov r22, r0
andi r16, 0x07
ldi r30, pm_lo8(bit_rot_jmp_table)
ldi r31, pm_hi8(bit_rot_jmp_table)
add r30, r16
adc r31, r1
ijmp
nop
rotate64_6byte_left:
movw r0, r18
movw r18, r20
movw r20, r22
movw r22, r24
movw r24, r0
clr r1
andi r16, 0x07
ldi r30, pm_lo8(bit_rot_jmp_table)
ldi r31, pm_hi8(bit_rot_jmp_table)
add r30, r16
adc r31, r1
ijmp
nop
nop
nop
nop
rotate64_7byte_left:
mov r0, r18
mov r18, r19
mov r19, r20
mov r20, r21
mov r21, r22
mov r22, r23
mov r23, r24
mov r24, r25
mov r25, r0
andi r16, 0x07
ldi r30, pm_lo8(bit_rot_jmp_table)
ldi r31, pm_hi8(bit_rot_jmp_table)
add r30, r16
adc r31, r1
ijmp
nop
bit_rot_jmp_table:
ret
rjmp rotate64_1bit_left
rjmp rotate64_2bit_left
rjmp rotate64_3bit_left
rjmp rotate64_4bit_left
rjmp rotate64_3bit_right
rjmp rotate64_2bit_right
rjmp rotate64_1bit_right
.global rotate64left_code
rotate64left_code:
ldi r30, pm_lo8(rotate64_0byte_left)
ldi r31, pm_hi8(rotate64_0byte_left)
mov r0, r16
andi r16, 0x70
add r30, r16
adc r31, r1
mov r16, r0
ijmp

2
c_src/Keccak-avr8-settings.h Executable file
View File

@ -0,0 +1,2 @@
#define cKeccakR 1088
#define cKeccakFixedOutputLengthInBytes 32

555
c_src/KeccakF-1600-32-rvk.macros Executable file
View File

@ -0,0 +1,555 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by Ronny Van Keer,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
static const UINT32 KeccakF1600RoundConstants_int2[2*24] =
{
0x00000001UL, 0x00000000UL,
0x00000000UL, 0x00000089UL,
0x00000000UL, 0x8000008bUL,
0x00000000UL, 0x80008080UL,
0x00000001UL, 0x0000008bUL,
0x00000001UL, 0x00008000UL,
0x00000001UL, 0x80008088UL,
0x00000001UL, 0x80000082UL,
0x00000000UL, 0x0000000bUL,
0x00000000UL, 0x0000000aUL,
0x00000001UL, 0x00008082UL,
0x00000000UL, 0x00008003UL,
0x00000001UL, 0x0000808bUL,
0x00000001UL, 0x8000000bUL,
0x00000001UL, 0x8000008aUL,
0x00000001UL, 0x80000081UL,
0x00000000UL, 0x80000081UL,
0x00000000UL, 0x80000008UL,
0x00000000UL, 0x00000083UL,
0x00000000UL, 0x80008003UL,
0x00000001UL, 0x80008088UL,
0x00000000UL, 0x80000088UL,
0x00000001UL, 0x00008000UL,
0x00000000UL, 0x80008082UL
};
#undef rounds
#define rounds \
{ \
UINT32 Da0, De0, Di0, Do0, Du0; \
UINT32 Da1, De1, Di1, Do1, Du1; \
UINT32 Ba, Be, Bi, Bo, Bu; \
UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \
UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \
UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \
UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \
UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \
UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \
UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \
UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \
UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \
UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \
UINT32 Cw, Cx, Cy, Cz; \
UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \
UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \
UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \
UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \
UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \
UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \
UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \
UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \
UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \
UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \
const UINT32 * pRoundConstants = KeccakF1600RoundConstants_int2; \
UINT32 i; \
\
copyFromState(A, state) \
\
for( i = 12; i != 0; --i ) { \
Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \
Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \
Da0 = Cx^ROL32(Du1, 1); \
Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \
Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \
Da1 = Cz^Du0; \
\
Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \
Do0 = Cw^ROL32(Cz, 1); \
Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \
Do1 = Cy^Cx; \
\
Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \
De0 = Cx^ROL32(Cy, 1); \
Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \
De1 = Cz^Cw; \
\
Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \
Di0 = Du0^ROL32(Cy, 1); \
Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \
Di1 = Du1^Cw; \
\
Du0 = Cw^ROL32(Cz, 1); \
Du1 = Cy^Cx; \
\
Aba0 ^= Da0; \
Ba = Aba0; \
Age0 ^= De0; \
Be = ROL32(Age0, 22); \
Aki1 ^= Di1; \
Bi = ROL32(Aki1, 22); \
Amo1 ^= Do1; \
Bo = ROL32(Amo1, 11); \
Asu0 ^= Du0; \
Bu = ROL32(Asu0, 7); \
Eba0 = Ba ^((~Be)& Bi ) ^ *(pRoundConstants++); \
Ebe0 = Be ^((~Bi)& Bo ); \
Ebi0 = Bi ^((~Bo)& Bu ); \
Ebo0 = Bo ^((~Bu)& Ba ); \
Ebu0 = Bu ^((~Ba)& Be ); \
\
Abo0 ^= Do0; \
Ba = ROL32(Abo0, 14); \
Agu0 ^= Du0; \
Be = ROL32(Agu0, 10); \
Aka1 ^= Da1; \
Bi = ROL32(Aka1, 2); \
Ame1 ^= De1; \
Bo = ROL32(Ame1, 23); \
Asi1 ^= Di1; \
Bu = ROL32(Asi1, 31); \
Ega0 = Ba ^((~Be)& Bi ); \
Ege0 = Be ^((~Bi)& Bo ); \
Egi0 = Bi ^((~Bo)& Bu ); \
Ego0 = Bo ^((~Bu)& Ba ); \
Egu0 = Bu ^((~Ba)& Be ); \
\
Abe1 ^= De1; \
Ba = ROL32(Abe1, 1); \
Agi0 ^= Di0; \
Be = ROL32(Agi0, 3); \
Ako1 ^= Do1; \
Bi = ROL32(Ako1, 13); \
Amu0 ^= Du0; \
Bo = ROL32(Amu0, 4); \
Asa0 ^= Da0; \
Bu = ROL32(Asa0, 9); \
Eka0 = Ba ^((~Be)& Bi ); \
Eke0 = Be ^((~Bi)& Bo ); \
Eki0 = Bi ^((~Bo)& Bu ); \
Eko0 = Bo ^((~Bu)& Ba ); \
Eku0 = Bu ^((~Ba)& Be ); \
\
Abu1 ^= Du1; \
Ba = ROL32(Abu1, 14); \
Aga0 ^= Da0; \
Be = ROL32(Aga0, 18); \
Ake0 ^= De0; \
Bi = ROL32(Ake0, 5); \
Ami1 ^= Di1; \
Bo = ROL32(Ami1, 8); \
Aso0 ^= Do0; \
Bu = ROL32(Aso0, 28); \
Ema0 = Ba ^((~Be)& Bi ); \
Eme0 = Be ^((~Bi)& Bo ); \
Emi0 = Bi ^((~Bo)& Bu ); \
Emo0 = Bo ^((~Bu)& Ba ); \
Emu0 = Bu ^((~Ba)& Be ); \
\
Abi0 ^= Di0; \
Ba = ROL32(Abi0, 31); \
Ago1 ^= Do1; \
Be = ROL32(Ago1, 28); \
Aku1 ^= Du1; \
Bi = ROL32(Aku1, 20); \
Ama1 ^= Da1; \
Bo = ROL32(Ama1, 21); \
Ase0 ^= De0; \
Bu = ROL32(Ase0, 1); \
Esa0 = Ba ^((~Be)& Bi ); \
Ese0 = Be ^((~Bi)& Bo ); \
Esi0 = Bi ^((~Bo)& Bu ); \
Eso0 = Bo ^((~Bu)& Ba ); \
Esu0 = Bu ^((~Ba)& Be ); \
\
Aba1 ^= Da1; \
Ba = Aba1; \
Age1 ^= De1; \
Be = ROL32(Age1, 22); \
Aki0 ^= Di0; \
Bi = ROL32(Aki0, 21); \
Amo0 ^= Do0; \
Bo = ROL32(Amo0, 10); \
Asu1 ^= Du1; \
Bu = ROL32(Asu1, 7); \
Eba1 = Ba ^((~Be)& Bi ); \
Eba1 ^= *(pRoundConstants++); \
Ebe1 = Be ^((~Bi)& Bo ); \
Ebi1 = Bi ^((~Bo)& Bu ); \
Ebo1 = Bo ^((~Bu)& Ba ); \
Ebu1 = Bu ^((~Ba)& Be ); \
\
Abo1 ^= Do1; \
Ba = ROL32(Abo1, 14); \
Agu1 ^= Du1; \
Be = ROL32(Agu1, 10); \
Aka0 ^= Da0; \
Bi = ROL32(Aka0, 1); \
Ame0 ^= De0; \
Bo = ROL32(Ame0, 22); \
Asi0 ^= Di0; \
Bu = ROL32(Asi0, 30); \
Ega1 = Ba ^((~Be)& Bi ); \
Ege1 = Be ^((~Bi)& Bo ); \
Egi1 = Bi ^((~Bo)& Bu ); \
Ego1 = Bo ^((~Bu)& Ba ); \
Egu1 = Bu ^((~Ba)& Be ); \
\
Abe0 ^= De0; \
Ba = Abe0; \
Agi1 ^= Di1; \
Be = ROL32(Agi1, 3); \
Ako0 ^= Do0; \
Bi = ROL32(Ako0, 12); \
Amu1 ^= Du1; \
Bo = ROL32(Amu1, 4); \
Asa1 ^= Da1; \
Bu = ROL32(Asa1, 9); \
Eka1 = Ba ^((~Be)& Bi ); \
Eke1 = Be ^((~Bi)& Bo ); \
Eki1 = Bi ^((~Bo)& Bu ); \
Eko1 = Bo ^((~Bu)& Ba ); \
Eku1 = Bu ^((~Ba)& Be ); \
\
Abu0 ^= Du0; \
Ba = ROL32(Abu0, 13); \
Aga1 ^= Da1; \
Be = ROL32(Aga1, 18); \
Ake1 ^= De1; \
Bi = ROL32(Ake1, 5); \
Ami0 ^= Di0; \
Bo = ROL32(Ami0, 7); \
Aso1 ^= Do1; \
Bu = ROL32(Aso1, 28); \
Ema1 = Ba ^((~Be)& Bi ); \
Eme1 = Be ^((~Bi)& Bo ); \
Emi1 = Bi ^((~Bo)& Bu ); \
Emo1 = Bo ^((~Bu)& Ba ); \
Emu1 = Bu ^((~Ba)& Be ); \
\
Abi1 ^= Di1; \
Ba = ROL32(Abi1, 31); \
Ago0 ^= Do0; \
Be = ROL32(Ago0, 27); \
Aku0 ^= Du0; \
Bi = ROL32(Aku0, 19); \
Ama0 ^= Da0; \
Bo = ROL32(Ama0, 20); \
Ase1 ^= De1; \
Bu = ROL32(Ase1, 1); \
Esa1 = Ba ^((~Be)& Bi ); \
Ese1 = Be ^((~Bi)& Bo ); \
Esi1 = Bi ^((~Bo)& Bu ); \
Eso1 = Bo ^((~Bu)& Ba ); \
Esu1 = Bu ^((~Ba)& Be ); \
\
Cx = Ebu0^Egu0^Eku0^Emu0^Esu0; \
Du1 = Ebe1^Ege1^Eke1^Eme1^Ese1; \
Da0 = Cx^ROL32(Du1, 1); \
Cz = Ebu1^Egu1^Eku1^Emu1^Esu1; \
Du0 = Ebe0^Ege0^Eke0^Eme0^Ese0; \
Da1 = Cz^Du0; \
\
Cw = Ebi0^Egi0^Eki0^Emi0^Esi0; \
Do0 = Cw^ROL32(Cz, 1); \
Cy = Ebi1^Egi1^Eki1^Emi1^Esi1; \
Do1 = Cy^Cx; \
\
Cx = Eba0^Ega0^Eka0^Ema0^Esa0; \
De0 = Cx^ROL32(Cy, 1); \
Cz = Eba1^Ega1^Eka1^Ema1^Esa1; \
De1 = Cz^Cw; \
\
Cy = Ebo1^Ego1^Eko1^Emo1^Eso1; \
Di0 = Du0^ROL32(Cy, 1); \
Cw = Ebo0^Ego0^Eko0^Emo0^Eso0; \
Di1 = Du1^Cw; \
\
Du0 = Cw^ROL32(Cz, 1); \
Du1 = Cy^Cx; \
\
Eba0 ^= Da0; \
Ba = Eba0; \
Ege0 ^= De0; \
Be = ROL32(Ege0, 22); \
Eki1 ^= Di1; \
Bi = ROL32(Eki1, 22); \
Emo1 ^= Do1; \
Bo = ROL32(Emo1, 11); \
Esu0 ^= Du0; \
Bu = ROL32(Esu0, 7); \
Aba0 = Ba ^((~Be)& Bi ); \
Aba0 ^= *(pRoundConstants++); \
Abe0 = Be ^((~Bi)& Bo ); \
Abi0 = Bi ^((~Bo)& Bu ); \
Abo0 = Bo ^((~Bu)& Ba ); \
Abu0 = Bu ^((~Ba)& Be ); \
\
Ebo0 ^= Do0; \
Ba = ROL32(Ebo0, 14); \
Egu0 ^= Du0; \
Be = ROL32(Egu0, 10); \
Eka1 ^= Da1; \
Bi = ROL32(Eka1, 2); \
Eme1 ^= De1; \
Bo = ROL32(Eme1, 23); \
Esi1 ^= Di1; \
Bu = ROL32(Esi1, 31); \
Aga0 = Ba ^((~Be)& Bi ); \
Age0 = Be ^((~Bi)& Bo ); \
Agi0 = Bi ^((~Bo)& Bu ); \
Ago0 = Bo ^((~Bu)& Ba ); \
Agu0 = Bu ^((~Ba)& Be ); \
\
Ebe1 ^= De1; \
Ba = ROL32(Ebe1, 1); \
Egi0 ^= Di0; \
Be = ROL32(Egi0, 3); \
Eko1 ^= Do1; \
Bi = ROL32(Eko1, 13); \
Emu0 ^= Du0; \
Bo = ROL32(Emu0, 4); \
Esa0 ^= Da0; \
Bu = ROL32(Esa0, 9); \
Aka0 = Ba ^((~Be)& Bi ); \
Ake0 = Be ^((~Bi)& Bo ); \
Aki0 = Bi ^((~Bo)& Bu ); \
Ako0 = Bo ^((~Bu)& Ba ); \
Aku0 = Bu ^((~Ba)& Be ); \
\
Ebu1 ^= Du1; \
Ba = ROL32(Ebu1, 14); \
Ega0 ^= Da0; \
Be = ROL32(Ega0, 18); \
Eke0 ^= De0; \
Bi = ROL32(Eke0, 5); \
Emi1 ^= Di1; \
Bo = ROL32(Emi1, 8); \
Eso0 ^= Do0; \
Bu = ROL32(Eso0, 28); \
Ama0 = Ba ^((~Be)& Bi ); \
Ame0 = Be ^((~Bi)& Bo ); \
Ami0 = Bi ^((~Bo)& Bu ); \
Amo0 = Bo ^((~Bu)& Ba ); \
Amu0 = Bu ^((~Ba)& Be ); \
\
Ebi0 ^= Di0; \
Ba = ROL32(Ebi0, 31); \
Ego1 ^= Do1; \
Be = ROL32(Ego1, 28); \
Eku1 ^= Du1; \
Bi = ROL32(Eku1, 20); \
Ema1 ^= Da1; \
Bo = ROL32(Ema1, 21); \
Ese0 ^= De0; \
Bu = ROL32(Ese0, 1); \
Asa0 = Ba ^((~Be)& Bi ); \
Ase0 = Be ^((~Bi)& Bo ); \
Asi0 = Bi ^((~Bo)& Bu ); \
Aso0 = Bo ^((~Bu)& Ba ); \
Asu0 = Bu ^((~Ba)& Be ); \
\
Eba1 ^= Da1; \
Ba = Eba1; \
Ege1 ^= De1; \
Be = ROL32(Ege1, 22); \
Eki0 ^= Di0; \
Bi = ROL32(Eki0, 21); \
Emo0 ^= Do0; \
Bo = ROL32(Emo0, 10); \
Esu1 ^= Du1; \
Bu = ROL32(Esu1, 7); \
Aba1 = Ba ^((~Be)& Bi ); \
Aba1 ^= *(pRoundConstants++); \
Abe1 = Be ^((~Bi)& Bo ); \
Abi1 = Bi ^((~Bo)& Bu ); \
Abo1 = Bo ^((~Bu)& Ba ); \
Abu1 = Bu ^((~Ba)& Be ); \
\
Ebo1 ^= Do1; \
Ba = ROL32(Ebo1, 14); \
Egu1 ^= Du1; \
Be = ROL32(Egu1, 10); \
Eka0 ^= Da0; \
Bi = ROL32(Eka0, 1); \
Eme0 ^= De0; \
Bo = ROL32(Eme0, 22); \
Esi0 ^= Di0; \
Bu = ROL32(Esi0, 30); \
Aga1 = Ba ^((~Be)& Bi ); \
Age1 = Be ^((~Bi)& Bo ); \
Agi1 = Bi ^((~Bo)& Bu ); \
Ago1 = Bo ^((~Bu)& Ba ); \
Agu1 = Bu ^((~Ba)& Be ); \
\
Ebe0 ^= De0; \
Ba = Ebe0; \
Egi1 ^= Di1; \
Be = ROL32(Egi1, 3); \
Eko0 ^= Do0; \
Bi = ROL32(Eko0, 12); \
Emu1 ^= Du1; \
Bo = ROL32(Emu1, 4); \
Esa1 ^= Da1; \
Bu = ROL32(Esa1, 9); \
Aka1 = Ba ^((~Be)& Bi ); \
Ake1 = Be ^((~Bi)& Bo ); \
Aki1 = Bi ^((~Bo)& Bu ); \
Ako1 = Bo ^((~Bu)& Ba ); \
Aku1 = Bu ^((~Ba)& Be ); \
\
Ebu0 ^= Du0; \
Ba = ROL32(Ebu0, 13); \
Ega1 ^= Da1; \
Be = ROL32(Ega1, 18); \
Eke1 ^= De1; \
Bi = ROL32(Eke1, 5); \
Emi0 ^= Di0; \
Bo = ROL32(Emi0, 7); \
Eso1 ^= Do1; \
Bu = ROL32(Eso1, 28); \
Ama1 = Ba ^((~Be)& Bi ); \
Ame1 = Be ^((~Bi)& Bo ); \
Ami1 = Bi ^((~Bo)& Bu ); \
Amo1 = Bo ^((~Bu)& Ba ); \
Amu1 = Bu ^((~Ba)& Be ); \
\
Ebi1 ^= Di1; \
Ba = ROL32(Ebi1, 31); \
Ego0 ^= Do0; \
Be = ROL32(Ego0, 27); \
Eku0 ^= Du0; \
Bi = ROL32(Eku0, 19); \
Ema0 ^= Da0; \
Bo = ROL32(Ema0, 20); \
Ese1 ^= De1; \
Bu = ROL32(Ese1, 1); \
Asa1 = Ba ^((~Be)& Bi ); \
Ase1 = Be ^((~Bi)& Bo ); \
Asi1 = Bi ^((~Bo)& Bu ); \
Aso1 = Bo ^((~Bu)& Ba ); \
Asu1 = Bu ^((~Ba)& Be ); \
} \
copyToState(state, A) \
}
#define copyFromState(X, state) \
X##ba0 = state[ 0]; \
X##ba1 = state[ 1]; \
X##be0 = state[ 2]; \
X##be1 = state[ 3]; \
X##bi0 = state[ 4]; \
X##bi1 = state[ 5]; \
X##bo0 = state[ 6]; \
X##bo1 = state[ 7]; \
X##bu0 = state[ 8]; \
X##bu1 = state[ 9]; \
X##ga0 = state[10]; \
X##ga1 = state[11]; \
X##ge0 = state[12]; \
X##ge1 = state[13]; \
X##gi0 = state[14]; \
X##gi1 = state[15]; \
X##go0 = state[16]; \
X##go1 = state[17]; \
X##gu0 = state[18]; \
X##gu1 = state[19]; \
X##ka0 = state[20]; \
X##ka1 = state[21]; \
X##ke0 = state[22]; \
X##ke1 = state[23]; \
X##ki0 = state[24]; \
X##ki1 = state[25]; \
X##ko0 = state[26]; \
X##ko1 = state[27]; \
X##ku0 = state[28]; \
X##ku1 = state[29]; \
X##ma0 = state[30]; \
X##ma1 = state[31]; \
X##me0 = state[32]; \
X##me1 = state[33]; \
X##mi0 = state[34]; \
X##mi1 = state[35]; \
X##mo0 = state[36]; \
X##mo1 = state[37]; \
X##mu0 = state[38]; \
X##mu1 = state[39]; \
X##sa0 = state[40]; \
X##sa1 = state[41]; \
X##se0 = state[42]; \
X##se1 = state[43]; \
X##si0 = state[44]; \
X##si1 = state[45]; \
X##so0 = state[46]; \
X##so1 = state[47]; \
X##su0 = state[48]; \
X##su1 = state[49]; \
#define copyToState(state, X) \
state[ 0] = X##ba0; \
state[ 1] = X##ba1; \
state[ 2] = X##be0; \
state[ 3] = X##be1; \
state[ 4] = X##bi0; \
state[ 5] = X##bi1; \
state[ 6] = X##bo0; \
state[ 7] = X##bo1; \
state[ 8] = X##bu0; \
state[ 9] = X##bu1; \
state[10] = X##ga0; \
state[11] = X##ga1; \
state[12] = X##ge0; \
state[13] = X##ge1; \
state[14] = X##gi0; \
state[15] = X##gi1; \
state[16] = X##go0; \
state[17] = X##go1; \
state[18] = X##gu0; \
state[19] = X##gu1; \
state[20] = X##ka0; \
state[21] = X##ka1; \
state[22] = X##ke0; \
state[23] = X##ke1; \
state[24] = X##ki0; \
state[25] = X##ki1; \
state[26] = X##ko0; \
state[27] = X##ko1; \
state[28] = X##ku0; \
state[29] = X##ku1; \
state[30] = X##ma0; \
state[31] = X##ma1; \
state[32] = X##me0; \
state[33] = X##me1; \
state[34] = X##mi0; \
state[35] = X##mi1; \
state[36] = X##mo0; \
state[37] = X##mo1; \
state[38] = X##mu0; \
state[39] = X##mu1; \
state[40] = X##sa0; \
state[41] = X##sa1; \
state[42] = X##se0; \
state[43] = X##se1; \
state[44] = X##si0; \
state[45] = X##si1; \
state[46] = X##so0; \
state[47] = X##so1; \
state[48] = X##su0; \
state[49] = X##su1; \

1187
c_src/KeccakF-1600-32-s1.macros Executable file

File diff suppressed because it is too large Load Diff

1187
c_src/KeccakF-1600-32-s2.macros Executable file

File diff suppressed because it is too large Load Diff

26
c_src/KeccakF-1600-32.macros Executable file
View File

@ -0,0 +1,26 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifdef UseSchedule
#if (UseSchedule == 1)
#include "KeccakF-1600-32-s1.macros"
#elif (UseSchedule == 2)
#include "KeccakF-1600-32-s2.macros"
#elif (UseSchedule == 3)
#include "KeccakF-1600-32-rvk.macros"
#else
#error "This schedule is not supported."
#endif
#else
#include "KeccakF-1600-32-s1.macros"
#endif

728
c_src/KeccakF-1600-64.macros Executable file
View File

@ -0,0 +1,728 @@
/*
Code automatically generated by KeccakTools!
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define declareABCDE \
UINT64 Aba, Abe, Abi, Abo, Abu; \
UINT64 Aga, Age, Agi, Ago, Agu; \
UINT64 Aka, Ake, Aki, Ako, Aku; \
UINT64 Ama, Ame, Ami, Amo, Amu; \
UINT64 Asa, Ase, Asi, Aso, Asu; \
UINT64 Bba, Bbe, Bbi, Bbo, Bbu; \
UINT64 Bga, Bge, Bgi, Bgo, Bgu; \
UINT64 Bka, Bke, Bki, Bko, Bku; \
UINT64 Bma, Bme, Bmi, Bmo, Bmu; \
UINT64 Bsa, Bse, Bsi, Bso, Bsu; \
UINT64 Ca, Ce, Ci, Co, Cu; \
UINT64 Da, De, Di, Do, Du; \
UINT64 Eba, Ebe, Ebi, Ebo, Ebu; \
UINT64 Ega, Ege, Egi, Ego, Egu; \
UINT64 Eka, Eke, Eki, Eko, Eku; \
UINT64 Ema, Eme, Emi, Emo, Emu; \
UINT64 Esa, Ese, Esi, Eso, Esu; \
#define prepareTheta \
Ca = Aba^Aga^Aka^Ama^Asa; \
Ce = Abe^Age^Ake^Ame^Ase; \
Ci = Abi^Agi^Aki^Ami^Asi; \
Co = Abo^Ago^Ako^Amo^Aso; \
Cu = Abu^Agu^Aku^Amu^Asu; \
#ifdef UseBebigokimisa
// --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa')
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
Da = Cu^ROL64(Ce, 1); \
De = Ca^ROL64(Ci, 1); \
Di = Ce^ROL64(Co, 1); \
Do = Ci^ROL64(Cu, 1); \
Du = Co^ROL64(Ca, 1); \
\
A##ba ^= Da; \
Bba = A##ba; \
A##ge ^= De; \
Bbe = ROL64(A##ge, 44); \
A##ki ^= Di; \
Bbi = ROL64(A##ki, 43); \
A##mo ^= Do; \
Bbo = ROL64(A##mo, 21); \
A##su ^= Du; \
Bbu = ROL64(A##su, 14); \
E##ba = Bba ^( Bbe | Bbi ); \
E##ba ^= KeccakF1600RoundConstants[i]; \
Ca = E##ba; \
E##be = Bbe ^((~Bbi)| Bbo ); \
Ce = E##be; \
E##bi = Bbi ^( Bbo & Bbu ); \
Ci = E##bi; \
E##bo = Bbo ^( Bbu | Bba ); \
Co = E##bo; \
E##bu = Bbu ^( Bba & Bbe ); \
Cu = E##bu; \
\
A##bo ^= Do; \
Bga = ROL64(A##bo, 28); \
A##gu ^= Du; \
Bge = ROL64(A##gu, 20); \
A##ka ^= Da; \
Bgi = ROL64(A##ka, 3); \
A##me ^= De; \
Bgo = ROL64(A##me, 45); \
A##si ^= Di; \
Bgu = ROL64(A##si, 61); \
E##ga = Bga ^( Bge | Bgi ); \
Ca ^= E##ga; \
E##ge = Bge ^( Bgi & Bgo ); \
Ce ^= E##ge; \
E##gi = Bgi ^( Bgo |(~Bgu)); \
Ci ^= E##gi; \
E##go = Bgo ^( Bgu | Bga ); \
Co ^= E##go; \
E##gu = Bgu ^( Bga & Bge ); \
Cu ^= E##gu; \
\
A##be ^= De; \
Bka = ROL64(A##be, 1); \
A##gi ^= Di; \
Bke = ROL64(A##gi, 6); \
A##ko ^= Do; \
Bki = ROL64(A##ko, 25); \
A##mu ^= Du; \
Bko = ROL64(A##mu, 8); \
A##sa ^= Da; \
Bku = ROL64(A##sa, 18); \
E##ka = Bka ^( Bke | Bki ); \
Ca ^= E##ka; \
E##ke = Bke ^( Bki & Bko ); \
Ce ^= E##ke; \
E##ki = Bki ^((~Bko)& Bku ); \
Ci ^= E##ki; \
E##ko = (~Bko)^( Bku | Bka ); \
Co ^= E##ko; \
E##ku = Bku ^( Bka & Bke ); \
Cu ^= E##ku; \
\
A##bu ^= Du; \
Bma = ROL64(A##bu, 27); \
A##ga ^= Da; \
Bme = ROL64(A##ga, 36); \
A##ke ^= De; \
Bmi = ROL64(A##ke, 10); \
A##mi ^= Di; \
Bmo = ROL64(A##mi, 15); \
A##so ^= Do; \
Bmu = ROL64(A##so, 56); \
E##ma = Bma ^( Bme & Bmi ); \
Ca ^= E##ma; \
E##me = Bme ^( Bmi | Bmo ); \
Ce ^= E##me; \
E##mi = Bmi ^((~Bmo)| Bmu ); \
Ci ^= E##mi; \
E##mo = (~Bmo)^( Bmu & Bma ); \
Co ^= E##mo; \
E##mu = Bmu ^( Bma | Bme ); \
Cu ^= E##mu; \
\
A##bi ^= Di; \
Bsa = ROL64(A##bi, 62); \
A##go ^= Do; \
Bse = ROL64(A##go, 55); \
A##ku ^= Du; \
Bsi = ROL64(A##ku, 39); \
A##ma ^= Da; \
Bso = ROL64(A##ma, 41); \
A##se ^= De; \
Bsu = ROL64(A##se, 2); \
E##sa = Bsa ^((~Bse)& Bsi ); \
Ca ^= E##sa; \
E##se = (~Bse)^( Bsi | Bso ); \
Ce ^= E##se; \
E##si = Bsi ^( Bso & Bsu ); \
Ci ^= E##si; \
E##so = Bso ^( Bsu | Bsa ); \
Co ^= E##so; \
E##su = Bsu ^( Bsa & Bse ); \
Cu ^= E##su; \
\
// --- Code for round (lane complementing pattern 'bebigokimisa')
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIota(i, A, E) \
Da = Cu^ROL64(Ce, 1); \
De = Ca^ROL64(Ci, 1); \
Di = Ce^ROL64(Co, 1); \
Do = Ci^ROL64(Cu, 1); \
Du = Co^ROL64(Ca, 1); \
\
A##ba ^= Da; \
Bba = A##ba; \
A##ge ^= De; \
Bbe = ROL64(A##ge, 44); \
A##ki ^= Di; \
Bbi = ROL64(A##ki, 43); \
A##mo ^= Do; \
Bbo = ROL64(A##mo, 21); \
A##su ^= Du; \
Bbu = ROL64(A##su, 14); \
E##ba = Bba ^( Bbe | Bbi ); \
E##ba ^= KeccakF1600RoundConstants[i]; \
E##be = Bbe ^((~Bbi)| Bbo ); \
E##bi = Bbi ^( Bbo & Bbu ); \
E##bo = Bbo ^( Bbu | Bba ); \
E##bu = Bbu ^( Bba & Bbe ); \
\
A##bo ^= Do; \
Bga = ROL64(A##bo, 28); \
A##gu ^= Du; \
Bge = ROL64(A##gu, 20); \
A##ka ^= Da; \
Bgi = ROL64(A##ka, 3); \
A##me ^= De; \
Bgo = ROL64(A##me, 45); \
A##si ^= Di; \
Bgu = ROL64(A##si, 61); \
E##ga = Bga ^( Bge | Bgi ); \
E##ge = Bge ^( Bgi & Bgo ); \
E##gi = Bgi ^( Bgo |(~Bgu)); \
E##go = Bgo ^( Bgu | Bga ); \
E##gu = Bgu ^( Bga & Bge ); \
\
A##be ^= De; \
Bka = ROL64(A##be, 1); \
A##gi ^= Di; \
Bke = ROL64(A##gi, 6); \
A##ko ^= Do; \
Bki = ROL64(A##ko, 25); \
A##mu ^= Du; \
Bko = ROL64(A##mu, 8); \
A##sa ^= Da; \
Bku = ROL64(A##sa, 18); \
E##ka = Bka ^( Bke | Bki ); \
E##ke = Bke ^( Bki & Bko ); \
E##ki = Bki ^((~Bko)& Bku ); \
E##ko = (~Bko)^( Bku | Bka ); \
E##ku = Bku ^( Bka & Bke ); \
\
A##bu ^= Du; \
Bma = ROL64(A##bu, 27); \
A##ga ^= Da; \
Bme = ROL64(A##ga, 36); \
A##ke ^= De; \
Bmi = ROL64(A##ke, 10); \
A##mi ^= Di; \
Bmo = ROL64(A##mi, 15); \
A##so ^= Do; \
Bmu = ROL64(A##so, 56); \
E##ma = Bma ^( Bme & Bmi ); \
E##me = Bme ^( Bmi | Bmo ); \
E##mi = Bmi ^((~Bmo)| Bmu ); \
E##mo = (~Bmo)^( Bmu & Bma ); \
E##mu = Bmu ^( Bma | Bme ); \
\
A##bi ^= Di; \
Bsa = ROL64(A##bi, 62); \
A##go ^= Do; \
Bse = ROL64(A##go, 55); \
A##ku ^= Du; \
Bsi = ROL64(A##ku, 39); \
A##ma ^= Da; \
Bso = ROL64(A##ma, 41); \
A##se ^= De; \
Bsu = ROL64(A##se, 2); \
E##sa = Bsa ^((~Bse)& Bsi ); \
E##se = (~Bse)^( Bsi | Bso ); \
E##si = Bsi ^( Bso & Bsu ); \
E##so = Bso ^( Bsu | Bsa ); \
E##su = Bsu ^( Bsa & Bse ); \
\
#else // UseBebigokimisa
// --- Code for round, with prepare-theta
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
Da = Cu^ROL64(Ce, 1); \
De = Ca^ROL64(Ci, 1); \
Di = Ce^ROL64(Co, 1); \
Do = Ci^ROL64(Cu, 1); \
Du = Co^ROL64(Ca, 1); \
\
A##ba ^= Da; \
Bba = A##ba; \
A##ge ^= De; \
Bbe = ROL64(A##ge, 44); \
A##ki ^= Di; \
Bbi = ROL64(A##ki, 43); \
A##mo ^= Do; \
Bbo = ROL64(A##mo, 21); \
A##su ^= Du; \
Bbu = ROL64(A##su, 14); \
E##ba = Bba ^((~Bbe)& Bbi ); \
E##ba ^= KeccakF1600RoundConstants[i]; \
Ca = E##ba; \
E##be = Bbe ^((~Bbi)& Bbo ); \
Ce = E##be; \
E##bi = Bbi ^((~Bbo)& Bbu ); \
Ci = E##bi; \
E##bo = Bbo ^((~Bbu)& Bba ); \
Co = E##bo; \
E##bu = Bbu ^((~Bba)& Bbe ); \
Cu = E##bu; \
\
A##bo ^= Do; \
Bga = ROL64(A##bo, 28); \
A##gu ^= Du; \
Bge = ROL64(A##gu, 20); \
A##ka ^= Da; \
Bgi = ROL64(A##ka, 3); \
A##me ^= De; \
Bgo = ROL64(A##me, 45); \
A##si ^= Di; \
Bgu = ROL64(A##si, 61); \
E##ga = Bga ^((~Bge)& Bgi ); \
Ca ^= E##ga; \
E##ge = Bge ^((~Bgi)& Bgo ); \
Ce ^= E##ge; \
E##gi = Bgi ^((~Bgo)& Bgu ); \
Ci ^= E##gi; \
E##go = Bgo ^((~Bgu)& Bga ); \
Co ^= E##go; \
E##gu = Bgu ^((~Bga)& Bge ); \
Cu ^= E##gu; \
\
A##be ^= De; \
Bka = ROL64(A##be, 1); \
A##gi ^= Di; \
Bke = ROL64(A##gi, 6); \
A##ko ^= Do; \
Bki = ROL64(A##ko, 25); \
A##mu ^= Du; \
Bko = ROL64(A##mu, 8); \
A##sa ^= Da; \
Bku = ROL64(A##sa, 18); \
E##ka = Bka ^((~Bke)& Bki ); \
Ca ^= E##ka; \
E##ke = Bke ^((~Bki)& Bko ); \
Ce ^= E##ke; \
E##ki = Bki ^((~Bko)& Bku ); \
Ci ^= E##ki; \
E##ko = Bko ^((~Bku)& Bka ); \
Co ^= E##ko; \
E##ku = Bku ^((~Bka)& Bke ); \
Cu ^= E##ku; \
\
A##bu ^= Du; \
Bma = ROL64(A##bu, 27); \
A##ga ^= Da; \
Bme = ROL64(A##ga, 36); \
A##ke ^= De; \
Bmi = ROL64(A##ke, 10); \
A##mi ^= Di; \
Bmo = ROL64(A##mi, 15); \
A##so ^= Do; \
Bmu = ROL64(A##so, 56); \
E##ma = Bma ^((~Bme)& Bmi ); \
Ca ^= E##ma; \
E##me = Bme ^((~Bmi)& Bmo ); \
Ce ^= E##me; \
E##mi = Bmi ^((~Bmo)& Bmu ); \
Ci ^= E##mi; \
E##mo = Bmo ^((~Bmu)& Bma ); \
Co ^= E##mo; \
E##mu = Bmu ^((~Bma)& Bme ); \
Cu ^= E##mu; \
\
A##bi ^= Di; \
Bsa = ROL64(A##bi, 62); \
A##go ^= Do; \
Bse = ROL64(A##go, 55); \
A##ku ^= Du; \
Bsi = ROL64(A##ku, 39); \
A##ma ^= Da; \
Bso = ROL64(A##ma, 41); \
A##se ^= De; \
Bsu = ROL64(A##se, 2); \
E##sa = Bsa ^((~Bse)& Bsi ); \
Ca ^= E##sa; \
E##se = Bse ^((~Bsi)& Bso ); \
Ce ^= E##se; \
E##si = Bsi ^((~Bso)& Bsu ); \
Ci ^= E##si; \
E##so = Bso ^((~Bsu)& Bsa ); \
Co ^= E##so; \
E##su = Bsu ^((~Bsa)& Bse ); \
Cu ^= E##su; \
\
// --- Code for round
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIota(i, A, E) \
Da = Cu^ROL64(Ce, 1); \
De = Ca^ROL64(Ci, 1); \
Di = Ce^ROL64(Co, 1); \
Do = Ci^ROL64(Cu, 1); \
Du = Co^ROL64(Ca, 1); \
\
A##ba ^= Da; \
Bba = A##ba; \
A##ge ^= De; \
Bbe = ROL64(A##ge, 44); \
A##ki ^= Di; \
Bbi = ROL64(A##ki, 43); \
A##mo ^= Do; \
Bbo = ROL64(A##mo, 21); \
A##su ^= Du; \
Bbu = ROL64(A##su, 14); \
E##ba = Bba ^((~Bbe)& Bbi ); \
E##ba ^= KeccakF1600RoundConstants[i]; \
E##be = Bbe ^((~Bbi)& Bbo ); \
E##bi = Bbi ^((~Bbo)& Bbu ); \
E##bo = Bbo ^((~Bbu)& Bba ); \
E##bu = Bbu ^((~Bba)& Bbe ); \
\
A##bo ^= Do; \
Bga = ROL64(A##bo, 28); \
A##gu ^= Du; \
Bge = ROL64(A##gu, 20); \
A##ka ^= Da; \
Bgi = ROL64(A##ka, 3); \
A##me ^= De; \
Bgo = ROL64(A##me, 45); \
A##si ^= Di; \
Bgu = ROL64(A##si, 61); \
E##ga = Bga ^((~Bge)& Bgi ); \
E##ge = Bge ^((~Bgi)& Bgo ); \
E##gi = Bgi ^((~Bgo)& Bgu ); \
E##go = Bgo ^((~Bgu)& Bga ); \
E##gu = Bgu ^((~Bga)& Bge ); \
\
A##be ^= De; \
Bka = ROL64(A##be, 1); \
A##gi ^= Di; \
Bke = ROL64(A##gi, 6); \
A##ko ^= Do; \
Bki = ROL64(A##ko, 25); \
A##mu ^= Du; \
Bko = ROL64(A##mu, 8); \
A##sa ^= Da; \
Bku = ROL64(A##sa, 18); \
E##ka = Bka ^((~Bke)& Bki ); \
E##ke = Bke ^((~Bki)& Bko ); \
E##ki = Bki ^((~Bko)& Bku ); \
E##ko = Bko ^((~Bku)& Bka ); \
E##ku = Bku ^((~Bka)& Bke ); \
\
A##bu ^= Du; \
Bma = ROL64(A##bu, 27); \
A##ga ^= Da; \
Bme = ROL64(A##ga, 36); \
A##ke ^= De; \
Bmi = ROL64(A##ke, 10); \
A##mi ^= Di; \
Bmo = ROL64(A##mi, 15); \
A##so ^= Do; \
Bmu = ROL64(A##so, 56); \
E##ma = Bma ^((~Bme)& Bmi ); \
E##me = Bme ^((~Bmi)& Bmo ); \
E##mi = Bmi ^((~Bmo)& Bmu ); \
E##mo = Bmo ^((~Bmu)& Bma ); \
E##mu = Bmu ^((~Bma)& Bme ); \
\
A##bi ^= Di; \
Bsa = ROL64(A##bi, 62); \
A##go ^= Do; \
Bse = ROL64(A##go, 55); \
A##ku ^= Du; \
Bsi = ROL64(A##ku, 39); \
A##ma ^= Da; \
Bso = ROL64(A##ma, 41); \
A##se ^= De; \
Bsu = ROL64(A##se, 2); \
E##sa = Bsa ^((~Bse)& Bsi ); \
E##se = Bse ^((~Bsi)& Bso ); \
E##si = Bsi ^((~Bso)& Bsu ); \
E##so = Bso ^((~Bsu)& Bsa ); \
E##su = Bsu ^((~Bsa)& Bse ); \
\
#endif // UseBebigokimisa
const UINT64 KeccakF1600RoundConstants[24] = {
0x0000000000000001ULL,
0x0000000000008082ULL,
0x800000000000808aULL,
0x8000000080008000ULL,
0x000000000000808bULL,
0x0000000080000001ULL,
0x8000000080008081ULL,
0x8000000000008009ULL,
0x000000000000008aULL,
0x0000000000000088ULL,
0x0000000080008009ULL,
0x000000008000000aULL,
0x000000008000808bULL,
0x800000000000008bULL,
0x8000000000008089ULL,
0x8000000000008003ULL,
0x8000000000008002ULL,
0x8000000000000080ULL,
0x000000000000800aULL,
0x800000008000000aULL,
0x8000000080008081ULL,
0x8000000000008080ULL,
0x0000000080000001ULL,
0x8000000080008008ULL };
#define copyFromStateAndXor576bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]; \
X##ka = state[10]; \
X##ke = state[11]; \
X##ki = state[12]; \
X##ko = state[13]; \
X##ku = state[14]; \
X##ma = state[15]; \
X##me = state[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor832bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]; \
X##ku = state[14]; \
X##ma = state[15]; \
X##me = state[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor1024bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]^input[13]; \
X##ku = state[14]^input[14]; \
X##ma = state[15]^input[15]; \
X##me = state[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor1088bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]^input[13]; \
X##ku = state[14]^input[14]; \
X##ma = state[15]^input[15]; \
X##me = state[16]^input[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor1152bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]^input[13]; \
X##ku = state[14]^input[14]; \
X##ma = state[15]^input[15]; \
X##me = state[16]^input[16]; \
X##mi = state[17]^input[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromStateAndXor1344bits(X, state, input) \
X##ba = state[ 0]^input[ 0]; \
X##be = state[ 1]^input[ 1]; \
X##bi = state[ 2]^input[ 2]; \
X##bo = state[ 3]^input[ 3]; \
X##bu = state[ 4]^input[ 4]; \
X##ga = state[ 5]^input[ 5]; \
X##ge = state[ 6]^input[ 6]; \
X##gi = state[ 7]^input[ 7]; \
X##go = state[ 8]^input[ 8]; \
X##gu = state[ 9]^input[ 9]; \
X##ka = state[10]^input[10]; \
X##ke = state[11]^input[11]; \
X##ki = state[12]^input[12]; \
X##ko = state[13]^input[13]; \
X##ku = state[14]^input[14]; \
X##ma = state[15]^input[15]; \
X##me = state[16]^input[16]; \
X##mi = state[17]^input[17]; \
X##mo = state[18]^input[18]; \
X##mu = state[19]^input[19]; \
X##sa = state[20]^input[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyFromState(X, state) \
X##ba = state[ 0]; \
X##be = state[ 1]; \
X##bi = state[ 2]; \
X##bo = state[ 3]; \
X##bu = state[ 4]; \
X##ga = state[ 5]; \
X##ge = state[ 6]; \
X##gi = state[ 7]; \
X##go = state[ 8]; \
X##gu = state[ 9]; \
X##ka = state[10]; \
X##ke = state[11]; \
X##ki = state[12]; \
X##ko = state[13]; \
X##ku = state[14]; \
X##ma = state[15]; \
X##me = state[16]; \
X##mi = state[17]; \
X##mo = state[18]; \
X##mu = state[19]; \
X##sa = state[20]; \
X##se = state[21]; \
X##si = state[22]; \
X##so = state[23]; \
X##su = state[24]; \
#define copyToState(state, X) \
state[ 0] = X##ba; \
state[ 1] = X##be; \
state[ 2] = X##bi; \
state[ 3] = X##bo; \
state[ 4] = X##bu; \
state[ 5] = X##ga; \
state[ 6] = X##ge; \
state[ 7] = X##gi; \
state[ 8] = X##go; \
state[ 9] = X##gu; \
state[10] = X##ka; \
state[11] = X##ke; \
state[12] = X##ki; \
state[13] = X##ko; \
state[14] = X##ku; \
state[15] = X##ma; \
state[16] = X##me; \
state[17] = X##mi; \
state[18] = X##mo; \
state[19] = X##mu; \
state[20] = X##sa; \
state[21] = X##se; \
state[22] = X##si; \
state[23] = X##so; \
state[24] = X##su; \
#define copyStateVariables(X, Y) \
X##ba = Y##ba; \
X##be = Y##be; \
X##bi = Y##bi; \
X##bo = Y##bo; \
X##bu = Y##bu; \
X##ga = Y##ga; \
X##ge = Y##ge; \
X##gi = Y##gi; \
X##go = Y##go; \
X##gu = Y##gu; \
X##ka = Y##ka; \
X##ke = Y##ke; \
X##ki = Y##ki; \
X##ko = Y##ko; \
X##ku = Y##ku; \
X##ma = Y##ma; \
X##me = Y##me; \
X##mi = Y##mi; \
X##mo = Y##mo; \
X##mu = Y##mu; \
X##sa = Y##sa; \
X##se = Y##se; \
X##si = Y##si; \
X##so = Y##so; \
X##su = Y##su; \

123
c_src/KeccakF-1600-arm.c Executable file
View File

@ -0,0 +1,123 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by Ronny Van Keer,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include "KeccakF-1600-interface.h"
#include "KeccakSponge.h"
#include <string.h>
typedef unsigned char UINT8;
typedef unsigned short UINT16;
typedef unsigned int UINT32;
typedef unsigned long long int UINT64;
void KeccakPermutationOnWordsAfterXoring_ARM_asm(UINT32 *state, const UINT8 *input, int laneCount);
void KeccakInitialize( void )
{
}
void KeccakInitializeState(unsigned char *state)
{
memset(state, 0, KeccakPermutationSizeInBytes);
}
void KeccakPermutation(unsigned char *state)
{
KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, 0, 0);
}
#ifdef ProvideFast576
void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, 9);
}
#endif
#ifdef ProvideFast832
void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, 13);
}
#endif
#ifdef ProvideFast1024
void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, 16);
}
#endif
#ifdef ProvideFast1088
void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, 17);
}
#endif
#ifdef ProvideFast1152
void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, 18);
}
#endif
#ifdef ProvideFast1344
void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, 21);
}
#endif
void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
{
KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, laneCount);
}
// Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
UINT64 fromInterleaving(UINT64 x)
{
UINT64 t;
t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16);
t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4);
t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2);
t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1);
return x;
}
void setInterleavedWordsInto8bytes(UINT8* dest, UINT32* evenAndOdd)
{
((UINT64*)dest)[0] = fromInterleaving(*(UINT64*)evenAndOdd);
}
#define extractLanes(laneCount, state, data) \
{ \
int i; \
for(i=0; i<(laneCount); i++) \
setInterleavedWordsInto8bytes(data+i*8, (UINT32*)state+i*2); \
}
#ifdef ProvideFast1024
void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
{
extractLanes(16, state, data)
}
#endif
void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
{
extractLanes(laneCount, state, data)
}

653
c_src/KeccakF-1600-armcc.s Executable file
View File

@ -0,0 +1,653 @@
;// The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
;// Michaël Peeters and Gilles Van Assche. For more information, feedback or
;// questions, please refer to our website: http://keccak.noekeon.org/
;//
;// Implementation by Ronny Van Keer,
;// hereby denoted as "the implementer".
;//
;// To the extent possible under law, the implementer has waived all copyright
;// and related or neighboring rights to the source code in this file.
;// http://creativecommons.org/publicdomain/zero/1.0/
PRESERVE8
THUMB
AREA |.text|, CODE, READONLY
;// --- defines
_ba0 equ 0*4
_ba1 equ 1*4
_be0 equ 2*4
_be1 equ 3*4
_bi0 equ 4*4
_bi1 equ 5*4
_bo0 equ 6*4
_bo1 equ 7*4
_bu0 equ 8*4
_bu1 equ 9*4
_ga0 equ 10*4
_ga1 equ 11*4
_ge0 equ 12*4
_ge1 equ 13*4
_gi0 equ 14*4
_gi1 equ 15*4
_go0 equ 16*4
_go1 equ 17*4
_gu0 equ 18*4
_gu1 equ 19*4
_ka0 equ 20*4
_ka1 equ 21*4
_ke0 equ 22*4
_ke1 equ 23*4
_ki0 equ 24*4
_ki1 equ 25*4
_ko0 equ 26*4
_ko1 equ 27*4
_ku0 equ 28*4
_ku1 equ 29*4
_ma0 equ 30*4
_ma1 equ 31*4
_me0 equ 32*4
_me1 equ 33*4
_mi0 equ 34*4
_mi1 equ 35*4
_mo0 equ 36*4
_mo1 equ 37*4
_mu0 equ 38*4
_mu1 equ 39*4
_sa0 equ 40*4
_sa1 equ 41*4
_se0 equ 42*4
_se1 equ 43*4
_si0 equ 44*4
_si1 equ 45*4
_so0 equ 46*4
_so1 equ 47*4
_su0 equ 48*4
_su1 equ 49*4
mDe1 equ 50*4
mDi0 equ 51*4
mDo0 equ 52*4
mDo1 equ 53*4
;// --- macros
MACRO
xor5 $result,$ptr,$b,$g,$k,$m,$s
ldr $result, [$ptr, #$b]
ldr r1, [$ptr, #$g]
ldr r2, [$ptr, #$k]
eor $result, $result, r1
ldr r1, [$ptr, #$m]
eor $result, $result, r2
ldr r2, [$ptr, #$s]
eor $result, $result, r1
eor $result, $result, r2
MEND
MACRO
xorrol $b, $yy, $rr
eor $b, $b, $yy
ror $b, #32-$rr
MEND
MACRO
xandnot $resptr, $resofs, $aa, $bb, $cc
bic r1, $cc, $bb
eor r1, r1, $aa
str r1, [$resptr, #$resofs]
MEND
MACRO
xandnotRC $resptr, $resofs, $aa, $bb, $cc
ldr r1, [r3], #4
bic $cc, $cc, $bb
eor $cc, $cc, r1
eor $cc, $cc, $aa
str $cc, [$resptr, #$resofs]
MEND
EXPORT KeccakPermutationOnWordsAfterXoring_ARM_asm
KeccakPermutationOnWordsAfterXoring_ARM_asm PROC
push {r4-r12,lr}
sub sp,sp,#4*(50+4)
movs r9, r2
beq interleaveDone
mov r8,r0
interleaveLoop
ldr r4, [r1], #4
ldr r5, [r1], #4
ldrd r6, r7, [r8]
;// Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
and r3,r4,#0x55555555
orr r3,r3,r3, LSR #1
and r3,r3,#0x33333333
orr r3,r3,r3, LSR #2
and r3,r3,#0x0F0F0F0F
orr r3,r3,r3, LSR #4
and r3,r3,#0x00FF00FF
bfi r3,r3,#8, #8
eor r6,r6,r3, LSR #8
and r3,r5,#0x55555555
orr r3,r3,r3, LSR #1
and r3,r3,#0x33333333
orr r3,r3,r3, LSR #2
and r3,r3,#0x0F0F0F0F
orr r3,r3,r3, LSR #4
and r3,r3,#0x00FF00FF
orr r3,r3,r3, LSR #8
eor r6,r6,r3, LSL #16
and r3,r4,#0xAAAAAAAA
orr r3,r3,r3, LSL #1
and r3,r3,#0xCCCCCCCC
orr r3,r3,r3, LSL #2
and r3,r3,#0xF0F0F0F0
orr r3,r3,r3, LSL #4
and r3,r3,#0xFF00FF00
orr r3,r3,r3, LSL #8
eor r7,r7,r3, LSR #16
and r3,r5,#0xAAAAAAAA
orr r3,r3,r3, LSL #1
and r3,r3,#0xCCCCCCCC
orr r3,r3,r3, LSL #2
and r3,r3,#0xF0F0F0F0
orr r3,r3,r3, LSL #4
and r3,r3,#0xFF00FF00
orr r3,r3,r3, LSL #8
bfc r3, #0, #16
eor r7,r7,r3
strd r6,r7,[r8], #8
subs r9,r9,#1
bne interleaveLoop
interleaveDone
ldr r3, =KeccakF1600RoundConstantsWithTerminator
b roundLoop ;//jump over the table
LTORG
ALIGN
KeccakF1600RoundConstantsWithTerminator
;// 0 1
dcd 0x00000001, 0x00000000
dcd 0x00000000, 0x00000089
dcd 0x00000000, 0x8000008b
dcd 0x00000000, 0x80008080
dcd 0x00000001, 0x0000008b
dcd 0x00000001, 0x00008000
dcd 0x00000001, 0x80008088
dcd 0x00000001, 0x80000082
dcd 0x00000000, 0x0000000b
dcd 0x00000000, 0x0000000a
dcd 0x00000001, 0x00008082
dcd 0x00000000, 0x00008003
dcd 0x00000001, 0x0000808b
dcd 0x00000001, 0x8000000b
dcd 0x00000001, 0x8000008a
dcd 0x00000001, 0x80000081
dcd 0x00000000, 0x80000081
dcd 0x00000000, 0x80000008
dcd 0x00000000, 0x00000083
dcd 0x00000000, 0x80008003
dcd 0x00000001, 0x80008088
dcd 0x00000000, 0x80000088
dcd 0x00000001, 0x00008000
dcd 0x00000000, 0x80008082
dcd 0xFFFFFFFF ;//terminator
roundLoop
;//prepTheta A
xor5 r10, r0,_bu0, _gu0, _ku0, _mu0, _su0
xor5 r6, r0,_be1, _ge1, _ke1, _me1, _se1
eor r5, r10, r6, ROR #31
xor5 r11, r0,_bu1, _gu1, _ku1, _mu1, _su1
xor5 r7, r0,_be0, _ge0, _ke0, _me0, _se0
eor r4, r11, r7
xor5 r8, r0,_bi0, _gi0, _ki0, _mi0, _si0
eor r1, r8, r11, ROR #31
str r1, [sp, #mDo0]
xor5 r9, r0,_bi1, _gi1, _ki1, _mi1, _si1
eor r1, r9, r10
str r1, [sp, #mDo1]
xor5 r10, r0,_ba0, _ga0, _ka0, _ma0, _sa0
eor lr, r10, r9, ROR #31
xor5 r11, r0,_ba1, _ga1, _ka1, _ma1, _sa1
eor r1, r11, r8
str r1, [sp, #mDe1]
xor5 r9, r0,_bo1, _go1, _ko1, _mo1, _so1
eor r1, r7, r9, ROR #31
str r1, [sp, #mDi0]
xor5 r8, r0,_bo0, _go0, _ko0, _mo0, _so0
eor r2, r6, r8
eor r7, r8, r11, ROR #31
eor r6, r9, r10
;//thetaRhoPiChiIota 0, in A, out E
ldr r8, [r0, #_ba0]
ldr r9, [r0, #_ge0]
ldr r10, [r0, #_ki1]
ldr r11, [r0, #_mo1]
ldr r12, [r0, #_su0]
ldr r1, [sp, #mDo1]
eor r8, r8, r5
xorrol r9, lr, 22
xorrol r10, r2, 22
xorrol r11, r1, 11
xorrol r12, r7, 7
xandnot sp, _be0, r9, r10, r11
xandnot sp, _bi0, r10, r11, r12
xandnot sp, _bo0, r11, r12, r8
xandnot sp, _bu0, r12, r8, r9
xandnotRC sp, _ba0, r8, r9, r10
ldr r8, [r0, #_bo0]
ldr r1, [sp, #mDo0]
ldr r9, [r0, #_gu0]
xorrol r8, r1, 14
ldr r1, [sp, #mDe1]
ldr r10, [r0, #_ka1]
ldr r11, [r0, #_me1]
ldr r12, [r0, #_si1]
xorrol r9, r7, 10
xorrol r10, r4, 2
xorrol r11, r1, 23
xorrol r12, r2, 31
xandnot sp, _ga0, r8, r9, r10
xandnot sp, _ge0, r9, r10, r11
xandnot sp, _gi0, r10, r11, r12
xandnot sp, _go0, r11, r12, r8
xandnot sp, _gu0, r12, r8, r9
ldr r8, [r0, #_be1]
ldr r1, [sp, #mDe1]
ldr r9, [r0, #_gi0]
xorrol r8, r1, 1
ldr r1, [sp, #mDi0]
ldr r10, [r0, #_ko1]
xorrol r9, r1, 3
ldr r1, [sp, #mDo1]
ldr r11, [r0, #_mu0]
ldr r12, [r0, #_sa0]
xorrol r10, r1, 13
xorrol r11, r7, 4
xorrol r12, r5, 9
xandnot sp, _ka0, r8, r9, r10
xandnot sp, _ke0, r9, r10, r11
xandnot sp, _ki0, r10, r11, r12
xandnot sp, _ko0, r11, r12, r8
xandnot sp, _ku0, r12, r8, r9
ldr r8, [r0, #_bu1]
ldr r9, [r0, #_ga0]
ldr r10, [r0, #_ke0]
ldr r11, [r0, #_mi1]
ldr r12, [r0, #_so0]
ldr r1, [sp, #mDo0]
xorrol r8, r6, 14
xorrol r9, r5, 18
xorrol r10, lr, 5
xorrol r11, r2, 8
xorrol r12, r1, 28
xandnot sp, _ma0, r8, r9, r10
xandnot sp, _me0, r9, r10, r11
xandnot sp, _mi0, r10, r11, r12
xandnot sp, _mo0, r11, r12, r8
xandnot sp, _mu0, r12, r8, r9
ldr r1, [sp, #mDi0]
ldr r8, [r0, #_bi0]
ldr r9, [r0, #_go1]
xorrol r8, r1, 31
ldr r1, [sp, #mDo1]
ldr r10, [r0, #_ku1]
xorrol r9, r1, 28
ldr r11, [r0, #_ma1]
ldr r12, [r0, #_se0]
xorrol r10, r6, 20
xorrol r11, r4, 21
xorrol r12, lr, 1
xandnot sp, _sa0, r8, r9, r10
xandnot sp, _se0, r9, r10, r11
xandnot sp, _si0, r10, r11, r12
xandnot sp, _so0, r11, r12, r8
xandnot sp, _su0, r12, r8, r9
;// thetaRhoPiChiIota 1, in A, out E
ldr r1, [sp, #mDe1]
ldr r9, [r0, #_ge1]
ldr r8, [r0, #_ba1]
xorrol r9, r1, 22
ldr r1, [sp, #mDi0]
ldr r10, [r0, #_ki0]
eor r8, r8, r4
xorrol r10, r1, 21
ldr r1, [sp, #mDo0]
ldr r11, [r0, #_mo0]
ldr r12, [r0, #_su1]
xorrol r11, r1, 10
xorrol r12, r6, 7
xandnot sp, _be1, r9, r10, r11
xandnot sp, _bi1, r10, r11, r12
xandnot sp, _bo1, r11, r12, r8
xandnot sp, _bu1, r12, r8, r9
xandnotRC sp, _ba1, r8, r9, r10
ldr r1, [sp, #mDo1]
ldr r8, [r0, #_bo1]
ldr r12, [r0, #_si0]
xorrol r8, r1, 14
ldr r1, [sp, #mDi0]
ldr r9, [r0, #_gu1]
xorrol r12, r1, 30
ldr r10, [r0, #_ka0]
ldr r11, [r0, #_me0]
xorrol r9, r6, 10
xorrol r10, r5, 1
xorrol r11, lr, 22
xandnot sp, _ga1, r8, r9, r10
xandnot sp, _ge1, r9, r10, r11
xandnot sp, _gi1, r10, r11, r12
xandnot sp, _go1, r11, r12, r8
xandnot sp, _gu1, r12, r8, r9
ldr r1, [sp, #mDo0]
ldr r10, [r0, #_ko0]
ldr r8, [r0, #_be0]
xorrol r10, r1, 12
ldr r9, [r0, #_gi1]
ldr r11, [r0, #_mu1]
ldr r12, [r0, #_sa1]
eor r8, r8, lr
xorrol r9, r2, 3
xorrol r11, r6, 4
xorrol r12, r4, 9
xandnot sp, _ka1, r8, r9, r10
xandnot sp, _ke1, r9, r10, r11
xandnot sp, _ki1, r10, r11, r12
xandnot sp, _ko1, r11, r12, r8
xandnot sp, _ku1, r12, r8, r9
ldr r1, [sp, #mDe1]
ldr r10, [r0, #_ke1]
ldr r11, [r0, #_mi0]
xorrol r10, r1, 5
ldr r1, [sp, #mDi0]
ldr r12, [r0, #_so1]
xorrol r11, r1, 7
ldr r1, [sp, #mDo1]
ldr r8, [r0, #_bu0]
ldr r9, [r0, #_ga1]
xorrol r8, r7, 13
xorrol r9, r4, 18
xorrol r12, r1, 28
xandnot sp, _ma1, r8, r9, r10
xandnot sp, _me1, r9, r10, r11
xandnot sp, _mi1, r10, r11, r12
xandnot sp, _mo1, r11, r12, r8
xandnot sp, _mu1, r12, r8, r9
ldr r1, [sp, #mDo0]
ldr r9, [r0, #_go0]
ldr r8, [r0, #_bi1]
xorrol r9, r1, 27
ldr r10, [r0, #_ku0]
ldr r11, [r0, #_ma0]
ldr r12, [r0, #_se1]
ldr r1, [sp, #mDe1]
xorrol r8, r2, 31
xorrol r10, r7, 19
xorrol r11, r5, 20
xorrol r12, r1, 1
xandnot sp, _sa1, r8, r9, r10
xandnot sp, _se1, r9, r10, r11
xandnot sp, _si1, r10, r11, r12
xandnot sp, _so1, r11, r12, r8
xandnot sp, _su1, r12, r8, r9
;//prepTheta E
xor5 r10, sp,_bu0, _gu0, _ku0, _mu0, _su0
xor5 r6, sp,_be1, _ge1, _ke1, _me1, _se1
eor r5, r10, r6, ROR #31
xor5 r11, sp,_bu1, _gu1, _ku1, _mu1, _su1
xor5 r7, sp,_be0, _ge0, _ke0, _me0, _se0
eor r4, r11, r7
xor5 r8, sp,_bi0, _gi0, _ki0, _mi0, _si0
eor r1, r8, r11, ROR #31
str r1, [sp, #mDo0]
xor5 r9, sp,_bi1, _gi1, _ki1, _mi1, _si1
eor r1, r9, r10
str r1, [sp, #mDo1]
xor5 r10, sp,_ba0, _ga0, _ka0, _ma0, _sa0
eor lr, r10, r9, ROR #31
xor5 r11, sp,_ba1, _ga1, _ka1, _ma1, _sa1
eor r1, r11, r8
str r1, [sp, #mDe1]
xor5 r9, sp,_bo1, _go1, _ko1, _mo1, _so1
eor r1, r7, r9, ROR #31
str r1, [sp, #mDi0]
xor5 r8, sp,_bo0, _go0, _ko0, _mo0, _so0
eor r2, r6, r8
eor r7, r8, r11, ROR #31
eor r6, r9, r10
;//thetaRhoPiChiIota 0, in E, out A
ldr r8, [sp, #_ba0]
ldr r9, [sp, #_ge0]
ldr r10, [sp, #_ki1]
ldr r11, [sp, #_mo1]
ldr r12, [sp, #_su0]
ldr r1, [sp, #mDo1]
eor r8, r8, r5
xorrol r9, lr, 22
xorrol r10, r2, 22
xorrol r11, r1, 11
xorrol r12, r7, 7
xandnot r0, _be0, r9, r10, r11
xandnot r0, _bi0, r10, r11, r12
xandnot r0, _bo0, r11, r12, r8
xandnot r0, _bu0, r12, r8, r9
xandnotRC r0, _ba0, r8, r9, r10
ldr r8, [sp, #_bo0]
ldr r1, [sp, #mDo0]
ldr r9, [sp, #_gu0]
xorrol r8, r1, 14
ldr r1, [sp, #mDe1]
ldr r10, [sp, #_ka1]
ldr r11, [sp, #_me1]
ldr r12, [sp, #_si1]
xorrol r9, r7, 10
xorrol r10, r4, 2
xorrol r11, r1, 23
xorrol r12, r2, 31
xandnot r0, _ga0, r8, r9, r10
xandnot r0, _ge0, r9, r10, r11
xandnot r0, _gi0, r10, r11, r12
xandnot r0, _go0, r11, r12, r8
xandnot r0, _gu0, r12, r8, r9
ldr r8, [sp, #_be1]
ldr r1, [sp, #mDe1]
ldr r9, [sp, #_gi0]
xorrol r8, r1, 1
ldr r1, [sp, #mDi0]
ldr r10, [sp, #_ko1]
xorrol r9, r1, 3
ldr r1, [sp, #mDo1]
ldr r11, [sp, #_mu0]
ldr r12, [sp, #_sa0]
xorrol r10, r1, 13
xorrol r11, r7, 4
xorrol r12, r5, 9
xandnot r0, _ka0, r8, r9, r10
xandnot r0, _ke0, r9, r10, r11
xandnot r0, _ki0, r10, r11, r12
xandnot r0, _ko0, r11, r12, r8
xandnot r0, _ku0, r12, r8, r9
ldr r8, [sp, #_bu1]
ldr r9, [sp, #_ga0]
ldr r10, [sp, #_ke0]
ldr r11, [sp, #_mi1]
ldr r12, [sp, #_so0]
ldr r1, [sp, #mDo0]
xorrol r8, r6, 14
xorrol r9, r5, 18
xorrol r10, lr, 5
xorrol r11, r2, 8
xorrol r12, r1, 28
xandnot r0, _ma0, r8, r9, r10
xandnot r0, _me0, r9, r10, r11
xandnot r0, _mi0, r10, r11, r12
xandnot r0, _mo0, r11, r12, r8
xandnot r0, _mu0, r12, r8, r9
ldr r1, [sp, #mDi0]
ldr r8, [sp, #_bi0]
ldr r9, [sp, #_go1]
xorrol r8, r1, 31
ldr r1, [sp, #mDo1]
ldr r10, [sp, #_ku1]
xorrol r9, r1, 28
ldr r11, [sp, #_ma1]
ldr r12, [sp, #_se0]
xorrol r10, r6, 20
xorrol r11, r4, 21
xorrol r12, lr, 1
xandnot r0, _sa0, r8, r9, r10
xandnot r0, _se0, r9, r10, r11
xandnot r0, _si0, r10, r11, r12
xandnot r0, _so0, r11, r12, r8
xandnot r0, _su0, r12, r8, r9
;// thetaRhoPiChiIota 1, in A, out E
ldr r1, [sp, #mDe1]
ldr r9, [sp, #_ge1]
ldr r8, [sp, #_ba1]
xorrol r9, r1, 22
ldr r1, [sp, #mDi0]
ldr r10, [sp, #_ki0]
eor r8, r8, r4
xorrol r10, r1, 21
ldr r1, [sp, #mDo0]
ldr r11, [sp, #_mo0]
ldr r12, [sp, #_su1]
xorrol r11, r1, 10
xorrol r12, r6, 7
xandnot r0, _be1, r9, r10, r11
xandnot r0, _bi1, r10, r11, r12
xandnot r0, _bo1, r11, r12, r8
xandnot r0, _bu1, r12, r8, r9
xandnotRC r0, _ba1, r8, r9, r10
ldr r1, [sp, #mDo1]
ldr r8, [sp, #_bo1]
ldr r12, [sp, #_si0]
xorrol r8, r1, 14
ldr r1, [sp, #mDi0]
ldr r9, [sp, #_gu1]
xorrol r12, r1, 30
ldr r10, [sp, #_ka0]
ldr r11, [sp, #_me0]
xorrol r9, r6, 10
xorrol r10, r5, 1
xorrol r11, lr, 22
xandnot r0, _ga1, r8, r9, r10
xandnot r0, _ge1, r9, r10, r11
xandnot r0, _gi1, r10, r11, r12
xandnot r0, _go1, r11, r12, r8
xandnot r0, _gu1, r12, r8, r9
ldr r1, [sp, #mDo0]
ldr r10, [sp, #_ko0]
ldr r8, [sp, #_be0]
xorrol r10, r1, 12
ldr r9, [sp, #_gi1]
ldr r11, [sp, #_mu1]
ldr r12, [sp, #_sa1]
eor r8, r8, lr
xorrol r9, r2, 3
xorrol r11, r6, 4
xorrol r12, r4, 9
xandnot r0, _ka1, r8, r9, r10
xandnot r0, _ke1, r9, r10, r11
xandnot r0, _ki1, r10, r11, r12
xandnot r0, _ko1, r11, r12, r8
xandnot r0, _ku1, r12, r8, r9
ldr r1, [sp, #mDe1]
ldr r10, [sp, #_ke1]
ldr r11, [sp, #_mi0]
xorrol r10, r1, 5
ldr r1, [sp, #mDi0]
ldr r12, [sp, #_so1]
xorrol r11, r1, 7
ldr r1, [sp, #mDo1]
ldr r8, [sp, #_bu0]
ldr r9, [sp, #_ga1]
xorrol r8, r7, 13
xorrol r9, r4, 18
xorrol r12, r1, 28
xandnot r0, _ma1, r8, r9, r10
xandnot r0, _me1, r9, r10, r11
xandnot r0, _mi1, r10, r11, r12
xandnot r0, _mo1, r11, r12, r8
xandnot r0, _mu1, r12, r8, r9
ldr r1, [sp, #mDo0]
ldr r9, [sp, #_go0]
ldr r8, [sp, #_bi1]
xorrol r9, r1, 27
ldr r10, [sp, #_ku0]
ldr r11, [sp, #_ma0]
ldr r12, [sp, #_se1]
ldr r1, [sp, #mDe1]
xorrol r8, r2, 31
xorrol r10, r7, 19
xorrol r11, r5, 20
xorrol r12, r1, 1
xandnot r0, _sa1, r8, r9, r10
xandnot r0, _se1, r9, r10, r11
xandnot r0, _si1, r10, r11, r12
xandnot r0, _so1, r11, r12, r8
ldr r10, [r3]
xandnot r0, _su1, r12, r8, r9
cmp r10, #0xFFFFFFFF
bne roundLoop
add sp,sp,#4*(50+4)
pop {r4-r12,pc}
ENDP
ALIGN
END

686
c_src/KeccakF-1600-armgcc.s Executable file
View File

@ -0,0 +1,686 @@
@ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
@ Michaël Peeters and Gilles Van Assche. For more information, feedback or
@ questions, please refer to our website: http://keccak.noekeon.org/
@
@ Implementation by Ronny Van Keer,
@ hereby denoted as "the implementer".
@
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ http://creativecommons.org/publicdomain/zero/1.0/
@ This file was created from a .asm file
@ using the ads2gas.pl script.
.equ DO1STROUNDING, 0
@ PRESERVE8
@ THUMB
.syntax unified
.cpu cortex-m3
.thumb
@// --- defines
.equ _ba0 , 0*4
.equ _ba1 , 1*4
.equ _be0 , 2*4
.equ _be1 , 3*4
.equ _bi0 , 4*4
.equ _bi1 , 5*4
.equ _bo0 , 6*4
.equ _bo1 , 7*4
.equ _bu0 , 8*4
.equ _bu1 , 9*4
.equ _ga0 , 10*4
.equ _ga1 , 11*4
.equ _ge0 , 12*4
.equ _ge1 , 13*4
.equ _gi0 , 14*4
.equ _gi1 , 15*4
.equ _go0 , 16*4
.equ _go1 , 17*4
.equ _gu0 , 18*4
.equ _gu1 , 19*4
.equ _ka0 , 20*4
.equ _ka1 , 21*4
.equ _ke0 , 22*4
.equ _ke1 , 23*4
.equ _ki0 , 24*4
.equ _ki1 , 25*4
.equ _ko0 , 26*4
.equ _ko1 , 27*4
.equ _ku0 , 28*4
.equ _ku1 , 29*4
.equ _ma0 , 30*4
.equ _ma1 , 31*4
.equ _me0 , 32*4
.equ _me1 , 33*4
.equ _mi0 , 34*4
.equ _mi1 , 35*4
.equ _mo0 , 36*4
.equ _mo1 , 37*4
.equ _mu0 , 38*4
.equ _mu1 , 39*4
.equ _sa0 , 40*4
.equ _sa1 , 41*4
.equ _se0 , 42*4
.equ _se1 , 43*4
.equ _si0 , 44*4
.equ _si1 , 45*4
.equ _so0 , 46*4
.equ _so1 , 47*4
.equ _su0 , 48*4
.equ _su1 , 49*4
.equ mDe1 , 50*4
.equ mDi0 , 51*4
.equ mDo0 , 52*4
.equ mDo1 , 53*4
@// --- macros
.macro xor5 result,ptr,b,g,k,m,s
ldr \result, [\ptr, #\b]
ldr r1, [\ptr, #\g]
ldr r2, [\ptr, #\k]
eor \result, \result, r1
ldr r1, [\ptr, #\m]
eor \result, \result, r2
ldr r2, [\ptr, #\s]
eor \result, \result, r1
eor \result, \result, r2
.endm
.macro xorrol b, yy, rr
eor \b, \b, \yy
ror \b, #32-\rr
.endm
.macro xandnot resptr, resofs, aa, bb, cc
bic r1, \cc, \bb
eor r1, r1, \aa
str r1, [\resptr, #\resofs]
.endm
.macro xandnotRC resptr, resofs, aa, bb, cc
ldr r1, [r3], #4
bic \cc, \cc, \bb
eor \cc, \cc, r1
eor \cc, \cc, \aa
str \cc, [\resptr, #\resofs]
.endm
.size KeccakPermutationOnWords, .-KeccakPermutationOnWords
.align 2
.global KeccakPermutationOnWordsAfterXoring_ARM_asm
.thumb
.thumb_func
.type KeccakPermutationOnWordsAfterXoring_ARM_asm, %function
KeccakPermutationOnWordsAfterXoring_ARM_asm:
@ args = 0, pretend = 0, frame = 408
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
push {r4-r12,lr}
sub sp,sp,#4*(50+4)
movs r9, r2
beq interleaveDone
mov r8,r0
interleaveLoop:
ldr r4, [r1], #4
ldr r5, [r1], #4
ldrd r6, r7, [r8]
@// Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
and r3,r4,#0x55555555
orr r3,r3,r3, LSR #1
and r3,r3,#0x33333333
orr r3,r3,r3, LSR #2
and r3,r3,#0x0F0F0F0F
orr r3,r3,r3, LSR #4
and r3,r3,#0x00FF00FF
bfi r3,r3,#8, #8
eor r6,r6,r3, LSR #8
and r3,r5,#0x55555555
orr r3,r3,r3, LSR #1
and r3,r3,#0x33333333
orr r3,r3,r3, LSR #2
and r3,r3,#0x0F0F0F0F
orr r3,r3,r3, LSR #4
and r3,r3,#0x00FF00FF
orr r3,r3,r3, LSR #8
eor r6,r6,r3, LSL #16
and r3,r4,#0xAAAAAAAA
orr r3,r3,r3, LSL #1
and r3,r3,#0xCCCCCCCC
orr r3,r3,r3, LSL #2
and r3,r3,#0xF0F0F0F0
orr r3,r3,r3, LSL #4
and r3,r3,#0xFF00FF00
orr r3,r3,r3, LSL #8
eor r7,r7,r3, LSR #16
and r3,r5,#0xAAAAAAAA
orr r3,r3,r3, LSL #1
and r3,r3,#0xCCCCCCCC
orr r3,r3,r3, LSL #2
and r3,r3,#0xF0F0F0F0
orr r3,r3,r3, LSL #4
and r3,r3,#0xFF00FF00
orr r3,r3,r3, LSL #8
bfc r3, #0, #16
eor r7,r7,r3
strd r6,r7,[r8], #8
subs r9,r9,#1
bne interleaveLoop
interleaveDone:
ldr r3, =KeccakF1600RoundConstantsWithTerminator
b roundLoop @//jump over the table
.ltorg
@ ALIGN
KeccakF1600RoundConstantsWithTerminator:
@// 0 1
.word 0x00000001
.word 0x00000000
.word 0x00000000
.word 0x00000089
.word 0x00000000
.word 0x8000008b
.word 0x00000000
.word 0x80008080
.word 0x00000001
.word 0x0000008b
.word 0x00000001
.word 0x00008000
.word 0x00000001
.word 0x80008088
.word 0x00000001
.word 0x80000082
.word 0x00000000
.word 0x0000000b
.word 0x00000000
.word 0x0000000a
.word 0x00000001
.word 0x00008082
.word 0x00000000
.word 0x00008003
.word 0x00000001
.word 0x0000808b
.word 0x00000001
.word 0x8000000b
.word 0x00000001
.word 0x8000008a
.word 0x00000001
.word 0x80000081
.word 0x00000000
.word 0x80000081
.word 0x00000000
.word 0x80000008
.word 0x00000000
.word 0x00000083
.word 0x00000000
.word 0x80008003
.word 0x00000001
.word 0x80008088
.word 0x00000000
.word 0x80000088
.word 0x00000001
.word 0x00008000
.word 0x00000000
.word 0x80008082
.word 0xFFFFFFFF @//terminator
roundLoop:
@//prepTheta A
xor5 r10, r0,_bu0, _gu0, _ku0, _mu0, _su0
xor5 r6, r0,_be1, _ge1, _ke1, _me1, _se1
eor r5, r10, r6, ROR #31
xor5 r11, r0,_bu1, _gu1, _ku1, _mu1, _su1
xor5 r7, r0,_be0, _ge0, _ke0, _me0, _se0
eor r4, r11, r7
xor5 r8, r0,_bi0, _gi0, _ki0, _mi0, _si0
eor r1, r8, r11, ROR #31
str r1, [sp, #mDo0]
xor5 r9, r0,_bi1, _gi1, _ki1, _mi1, _si1
eor r1, r9, r10
str r1, [sp, #mDo1]
xor5 r10, r0,_ba0, _ga0, _ka0, _ma0, _sa0
eor lr, r10, r9, ROR #31
xor5 r11, r0,_ba1, _ga1, _ka1, _ma1, _sa1
eor r1, r11, r8
str r1, [sp, #mDe1]
xor5 r9, r0,_bo1, _go1, _ko1, _mo1, _so1
eor r1, r7, r9, ROR #31
str r1, [sp, #mDi0]
xor5 r8, r0,_bo0, _go0, _ko0, _mo0, _so0
eor r2, r6, r8
eor r7, r8, r11, ROR #31
eor r6, r9, r10
@//thetaRhoPiChiIota 0, in A, out E
ldr r8, [r0, #_ba0]
ldr r9, [r0, #_ge0]
ldr r10, [r0, #_ki1]
ldr r11, [r0, #_mo1]
ldr r12, [r0, #_su0]
ldr r1, [sp, #mDo1]
eor r8, r8, r5
xorrol r9, lr, 22
xorrol r10, r2, 22
xorrol r11, r1, 11
xorrol r12, r7, 7
xandnot sp, _be0, r9, r10, r11
xandnot sp, _bi0, r10, r11, r12
xandnot sp, _bo0, r11, r12, r8
xandnot sp, _bu0, r12, r8, r9
xandnotRC sp, _ba0, r8, r9, r10
ldr r8, [r0, #_bo0]
ldr r1, [sp, #mDo0]
ldr r9, [r0, #_gu0]
xorrol r8, r1, 14
ldr r1, [sp, #mDe1]
ldr r10, [r0, #_ka1]
ldr r11, [r0, #_me1]
ldr r12, [r0, #_si1]
xorrol r9, r7, 10
xorrol r10, r4, 2
xorrol r11, r1, 23
xorrol r12, r2, 31
xandnot sp, _ga0, r8, r9, r10
xandnot sp, _ge0, r9, r10, r11
xandnot sp, _gi0, r10, r11, r12
xandnot sp, _go0, r11, r12, r8
xandnot sp, _gu0, r12, r8, r9
ldr r8, [r0, #_be1]
ldr r1, [sp, #mDe1]
ldr r9, [r0, #_gi0]
xorrol r8, r1, 1
ldr r1, [sp, #mDi0]
ldr r10, [r0, #_ko1]
xorrol r9, r1, 3
ldr r1, [sp, #mDo1]
ldr r11, [r0, #_mu0]
ldr r12, [r0, #_sa0]
xorrol r10, r1, 13
xorrol r11, r7, 4
xorrol r12, r5, 9
xandnot sp, _ka0, r8, r9, r10
xandnot sp, _ke0, r9, r10, r11
xandnot sp, _ki0, r10, r11, r12
xandnot sp, _ko0, r11, r12, r8
xandnot sp, _ku0, r12, r8, r9
ldr r8, [r0, #_bu1]
ldr r9, [r0, #_ga0]
ldr r10, [r0, #_ke0]
ldr r11, [r0, #_mi1]
ldr r12, [r0, #_so0]
ldr r1, [sp, #mDo0]
xorrol r8, r6, 14
xorrol r9, r5, 18
xorrol r10, lr, 5
xorrol r11, r2, 8
xorrol r12, r1, 28
xandnot sp, _ma0, r8, r9, r10
xandnot sp, _me0, r9, r10, r11
xandnot sp, _mi0, r10, r11, r12
xandnot sp, _mo0, r11, r12, r8
xandnot sp, _mu0, r12, r8, r9
ldr r1, [sp, #mDi0]
ldr r8, [r0, #_bi0]
ldr r9, [r0, #_go1]
xorrol r8, r1, 31
ldr r1, [sp, #mDo1]
ldr r10, [r0, #_ku1]
xorrol r9, r1, 28
ldr r11, [r0, #_ma1]
ldr r12, [r0, #_se0]
xorrol r10, r6, 20
xorrol r11, r4, 21
xorrol r12, lr, 1
xandnot sp, _sa0, r8, r9, r10
xandnot sp, _se0, r9, r10, r11
xandnot sp, _si0, r10, r11, r12
xandnot sp, _so0, r11, r12, r8
xandnot sp, _su0, r12, r8, r9
@// thetaRhoPiChiIota 1, in A, out E
ldr r1, [sp, #mDe1]
ldr r9, [r0, #_ge1]
ldr r8, [r0, #_ba1]
xorrol r9, r1, 22
ldr r1, [sp, #mDi0]
ldr r10, [r0, #_ki0]
eor r8, r8, r4
xorrol r10, r1, 21
ldr r1, [sp, #mDo0]
ldr r11, [r0, #_mo0]
ldr r12, [r0, #_su1]
xorrol r11, r1, 10
xorrol r12, r6, 7
xandnot sp, _be1, r9, r10, r11
xandnot sp, _bi1, r10, r11, r12
xandnot sp, _bo1, r11, r12, r8
xandnot sp, _bu1, r12, r8, r9
xandnotRC sp, _ba1, r8, r9, r10
ldr r1, [sp, #mDo1]
ldr r8, [r0, #_bo1]
ldr r12, [r0, #_si0]
xorrol r8, r1, 14
ldr r1, [sp, #mDi0]
ldr r9, [r0, #_gu1]
xorrol r12, r1, 30
ldr r10, [r0, #_ka0]
ldr r11, [r0, #_me0]
xorrol r9, r6, 10
xorrol r10, r5, 1
xorrol r11, lr, 22
xandnot sp, _ga1, r8, r9, r10
xandnot sp, _ge1, r9, r10, r11
xandnot sp, _gi1, r10, r11, r12
xandnot sp, _go1, r11, r12, r8
xandnot sp, _gu1, r12, r8, r9
ldr r1, [sp, #mDo0]
ldr r10, [r0, #_ko0]
ldr r8, [r0, #_be0]
xorrol r10, r1, 12
ldr r9, [r0, #_gi1]
ldr r11, [r0, #_mu1]
ldr r12, [r0, #_sa1]
eor r8, r8, lr
xorrol r9, r2, 3
xorrol r11, r6, 4
xorrol r12, r4, 9
xandnot sp, _ka1, r8, r9, r10
xandnot sp, _ke1, r9, r10, r11
xandnot sp, _ki1, r10, r11, r12
xandnot sp, _ko1, r11, r12, r8
xandnot sp, _ku1, r12, r8, r9
ldr r1, [sp, #mDe1]
ldr r10, [r0, #_ke1]
ldr r11, [r0, #_mi0]
xorrol r10, r1, 5
ldr r1, [sp, #mDi0]
ldr r12, [r0, #_so1]
xorrol r11, r1, 7
ldr r1, [sp, #mDo1]
ldr r8, [r0, #_bu0]
ldr r9, [r0, #_ga1]
xorrol r8, r7, 13
xorrol r9, r4, 18
xorrol r12, r1, 28
xandnot sp, _ma1, r8, r9, r10
xandnot sp, _me1, r9, r10, r11
xandnot sp, _mi1, r10, r11, r12
xandnot sp, _mo1, r11, r12, r8
xandnot sp, _mu1, r12, r8, r9
ldr r1, [sp, #mDo0]
ldr r9, [r0, #_go0]
ldr r8, [r0, #_bi1]
xorrol r9, r1, 27
ldr r10, [r0, #_ku0]
ldr r11, [r0, #_ma0]
ldr r12, [r0, #_se1]
ldr r1, [sp, #mDe1]
xorrol r8, r2, 31
xorrol r10, r7, 19
xorrol r11, r5, 20
xorrol r12, r1, 1
xandnot sp, _sa1, r8, r9, r10
xandnot sp, _se1, r9, r10, r11
xandnot sp, _si1, r10, r11, r12
xandnot sp, _so1, r11, r12, r8
xandnot sp, _su1, r12, r8, r9
@//prepTheta E
xor5 r10, sp,_bu0, _gu0, _ku0, _mu0, _su0
xor5 r6, sp,_be1, _ge1, _ke1, _me1, _se1
eor r5, r10, r6, ROR #31
xor5 r11, sp,_bu1, _gu1, _ku1, _mu1, _su1
xor5 r7, sp,_be0, _ge0, _ke0, _me0, _se0
eor r4, r11, r7
xor5 r8, sp,_bi0, _gi0, _ki0, _mi0, _si0
eor r1, r8, r11, ROR #31
str r1, [sp, #mDo0]
xor5 r9, sp,_bi1, _gi1, _ki1, _mi1, _si1
eor r1, r9, r10
str r1, [sp, #mDo1]
xor5 r10, sp,_ba0, _ga0, _ka0, _ma0, _sa0
eor lr, r10, r9, ROR #31
xor5 r11, sp,_ba1, _ga1, _ka1, _ma1, _sa1
eor r1, r11, r8
str r1, [sp, #mDe1]
xor5 r9, sp,_bo1, _go1, _ko1, _mo1, _so1
eor r1, r7, r9, ROR #31
str r1, [sp, #mDi0]
xor5 r8, sp,_bo0, _go0, _ko0, _mo0, _so0
eor r2, r6, r8
eor r7, r8, r11, ROR #31
eor r6, r9, r10
@//thetaRhoPiChiIota 0, in E, out A
ldr r8, [sp, #_ba0]
ldr r9, [sp, #_ge0]
ldr r10, [sp, #_ki1]
ldr r11, [sp, #_mo1]
ldr r12, [sp, #_su0]
ldr r1, [sp, #mDo1]
eor r8, r8, r5
xorrol r9, lr, 22
xorrol r10, r2, 22
xorrol r11, r1, 11
xorrol r12, r7, 7
xandnot r0, _be0, r9, r10, r11
xandnot r0, _bi0, r10, r11, r12
xandnot r0, _bo0, r11, r12, r8
xandnot r0, _bu0, r12, r8, r9
xandnotRC r0, _ba0, r8, r9, r10
ldr r8, [sp, #_bo0]
ldr r1, [sp, #mDo0]
ldr r9, [sp, #_gu0]
xorrol r8, r1, 14
ldr r1, [sp, #mDe1]
ldr r10, [sp, #_ka1]
ldr r11, [sp, #_me1]
ldr r12, [sp, #_si1]
xorrol r9, r7, 10
xorrol r10, r4, 2
xorrol r11, r1, 23
xorrol r12, r2, 31
xandnot r0, _ga0, r8, r9, r10
xandnot r0, _ge0, r9, r10, r11
xandnot r0, _gi0, r10, r11, r12
xandnot r0, _go0, r11, r12, r8
xandnot r0, _gu0, r12, r8, r9
ldr r8, [sp, #_be1]
ldr r1, [sp, #mDe1]
ldr r9, [sp, #_gi0]
xorrol r8, r1, 1
ldr r1, [sp, #mDi0]
ldr r10, [sp, #_ko1]
xorrol r9, r1, 3
ldr r1, [sp, #mDo1]
ldr r11, [sp, #_mu0]
ldr r12, [sp, #_sa0]
xorrol r10, r1, 13
xorrol r11, r7, 4
xorrol r12, r5, 9
xandnot r0, _ka0, r8, r9, r10
xandnot r0, _ke0, r9, r10, r11
xandnot r0, _ki0, r10, r11, r12
xandnot r0, _ko0, r11, r12, r8
xandnot r0, _ku0, r12, r8, r9
ldr r8, [sp, #_bu1]
ldr r9, [sp, #_ga0]
ldr r10, [sp, #_ke0]
ldr r11, [sp, #_mi1]
ldr r12, [sp, #_so0]
ldr r1, [sp, #mDo0]
xorrol r8, r6, 14
xorrol r9, r5, 18
xorrol r10, lr, 5
xorrol r11, r2, 8
xorrol r12, r1, 28
xandnot r0, _ma0, r8, r9, r10
xandnot r0, _me0, r9, r10, r11
xandnot r0, _mi0, r10, r11, r12
xandnot r0, _mo0, r11, r12, r8
xandnot r0, _mu0, r12, r8, r9
ldr r1, [sp, #mDi0]
ldr r8, [sp, #_bi0]
ldr r9, [sp, #_go1]
xorrol r8, r1, 31
ldr r1, [sp, #mDo1]
ldr r10, [sp, #_ku1]
xorrol r9, r1, 28
ldr r11, [sp, #_ma1]
ldr r12, [sp, #_se0]
xorrol r10, r6, 20
xorrol r11, r4, 21
xorrol r12, lr, 1
xandnot r0, _sa0, r8, r9, r10
xandnot r0, _se0, r9, r10, r11
xandnot r0, _si0, r10, r11, r12
xandnot r0, _so0, r11, r12, r8
xandnot r0, _su0, r12, r8, r9
@// thetaRhoPiChiIota 1, in A, out E
ldr r1, [sp, #mDe1]
ldr r9, [sp, #_ge1]
ldr r8, [sp, #_ba1]
xorrol r9, r1, 22
ldr r1, [sp, #mDi0]
ldr r10, [sp, #_ki0]
eor r8, r8, r4
xorrol r10, r1, 21
ldr r1, [sp, #mDo0]
ldr r11, [sp, #_mo0]
ldr r12, [sp, #_su1]
xorrol r11, r1, 10
xorrol r12, r6, 7
xandnot r0, _be1, r9, r10, r11
xandnot r0, _bi1, r10, r11, r12
xandnot r0, _bo1, r11, r12, r8
xandnot r0, _bu1, r12, r8, r9
xandnotRC r0, _ba1, r8, r9, r10
ldr r1, [sp, #mDo1]
ldr r8, [sp, #_bo1]
ldr r12, [sp, #_si0]
xorrol r8, r1, 14
ldr r1, [sp, #mDi0]
ldr r9, [sp, #_gu1]
xorrol r12, r1, 30
ldr r10, [sp, #_ka0]
ldr r11, [sp, #_me0]
xorrol r9, r6, 10
xorrol r10, r5, 1
xorrol r11, lr, 22
xandnot r0, _ga1, r8, r9, r10
xandnot r0, _ge1, r9, r10, r11
xandnot r0, _gi1, r10, r11, r12
xandnot r0, _go1, r11, r12, r8
xandnot r0, _gu1, r12, r8, r9
ldr r1, [sp, #mDo0]
ldr r10, [sp, #_ko0]
ldr r8, [sp, #_be0]
xorrol r10, r1, 12
ldr r9, [sp, #_gi1]
ldr r11, [sp, #_mu1]
ldr r12, [sp, #_sa1]
eor r8, r8, lr
xorrol r9, r2, 3
xorrol r11, r6, 4
xorrol r12, r4, 9
xandnot r0, _ka1, r8, r9, r10
xandnot r0, _ke1, r9, r10, r11
xandnot r0, _ki1, r10, r11, r12
xandnot r0, _ko1, r11, r12, r8
xandnot r0, _ku1, r12, r8, r9
ldr r1, [sp, #mDe1]
ldr r10, [sp, #_ke1]
ldr r11, [sp, #_mi0]
xorrol r10, r1, 5
ldr r1, [sp, #mDi0]
ldr r12, [sp, #_so1]
xorrol r11, r1, 7
ldr r1, [sp, #mDo1]
ldr r8, [sp, #_bu0]
ldr r9, [sp, #_ga1]
xorrol r8, r7, 13
xorrol r9, r4, 18
xorrol r12, r1, 28
xandnot r0, _ma1, r8, r9, r10
xandnot r0, _me1, r9, r10, r11
xandnot r0, _mi1, r10, r11, r12
xandnot r0, _mo1, r11, r12, r8
xandnot r0, _mu1, r12, r8, r9
ldr r1, [sp, #mDo0]
ldr r9, [sp, #_go0]
ldr r8, [sp, #_bi1]
xorrol r9, r1, 27
ldr r10, [sp, #_ku0]
ldr r11, [sp, #_ma0]
ldr r12, [sp, #_se1]
ldr r1, [sp, #mDe1]
xorrol r8, r2, 31
xorrol r10, r7, 19
xorrol r11, r5, 20
xorrol r12, r1, 1
xandnot r0, _sa1, r8, r9, r10
xandnot r0, _se1, r9, r10, r11
xandnot r0, _si1, r10, r11, r12
xandnot r0, _so1, r11, r12, r8
ldr r10, [r3]
xandnot r0, _su1, r12, r8, r9
cmp r10, #0xFFFFFFFF
bne roundLoop
add sp,sp,#4*(50+4)
pop {r4-r12,pc}
@
@ ALIGN

163
c_src/KeccakF-1600-avr8.c Executable file
View File

@ -0,0 +1,163 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by Ronny Van Keer,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include <avr/pgmspace.h>
#include "AVR8-rotate64.h"
typedef unsigned char UINT8;
typedef UINT8 tSmallUInt;
typedef unsigned long long UINT64;
typedef UINT64 tKeccakLane;
#define cKeccakLaneSizeInBits (sizeof(tKeccakLane) * 8)
#define cKeccakNumberOfRounds 24
static tKeccakLane KeccakF_RoundConstants[cKeccakNumberOfRounds] PROGMEM =
{
(tKeccakLane)0x0000000000000001ULL,
(tKeccakLane)0x0000000000008082ULL,
(tKeccakLane)0x800000000000808aULL,
(tKeccakLane)0x8000000080008000ULL,
(tKeccakLane)0x000000000000808bULL,
(tKeccakLane)0x0000000080000001ULL,
(tKeccakLane)0x8000000080008081ULL,
(tKeccakLane)0x8000000000008009ULL,
(tKeccakLane)0x000000000000008aULL,
(tKeccakLane)0x0000000000000088ULL,
(tKeccakLane)0x0000000080008009ULL,
(tKeccakLane)0x000000008000000aULL,
(tKeccakLane)0x000000008000808bULL,
(tKeccakLane)0x800000000000008bULL,
(tKeccakLane)0x8000000000008089ULL,
(tKeccakLane)0x8000000000008003ULL,
(tKeccakLane)0x8000000000008002ULL,
(tKeccakLane)0x8000000000000080ULL,
(tKeccakLane)0x000000000000800aULL,
(tKeccakLane)0x800000008000000aULL,
(tKeccakLane)0x8000000080008081ULL,
(tKeccakLane)0x8000000000008080ULL,
(tKeccakLane)0x0000000080000001ULL,
(tKeccakLane)0x8000000080008008ULL
};
static tSmallUInt KeccakF_RotationConstants[24] PROGMEM =
{
ROT_CODE( 1), ROT_CODE( 3), ROT_CODE( 6), ROT_CODE(10), ROT_CODE(15),
ROT_CODE(21), ROT_CODE(28), ROT_CODE(36), ROT_CODE(45), ROT_CODE(55),
ROT_CODE( 2), ROT_CODE(14), ROT_CODE(27), ROT_CODE(41), ROT_CODE(56),
ROT_CODE( 8), ROT_CODE(25), ROT_CODE(43), ROT_CODE(62), ROT_CODE(18),
ROT_CODE(39), ROT_CODE(61), ROT_CODE(20), ROT_CODE(44)
};
static tSmallUInt KeccakF_PiLane[24] PROGMEM =
{
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1
};
static tSmallUInt KeccakF_Mod5[10] PROGMEM =
{
0, 1, 2, 3, 4, 0, 1, 2, 3, 4
};
void KeccakF( tKeccakLane * state )
{
tSmallUInt round;
tKeccakLane C[5];
// prepare Theta
{
tSmallUInt x;
tKeccakLane * pC;
for ( x = 0, pC = C; x < 5; ++x, ++pC )
{
*pC = state[x] ^ state[5 + x] ^ state[10 + x] ^ state[15 + x] ^ state[20 + x];
}
}
for( round = 0; round < cKeccakNumberOfRounds; ++round )
{
// Theta
{
tSmallUInt x;
for ( x = 0; x < 5; ++x )
{
tKeccakLane temp;
tSmallUInt y;
temp = rotate64_1bit_left( C[pgm_read_byte((KeccakF_Mod5+1)+x)] );
temp ^= C[pgm_read_byte((KeccakF_Mod5+4)+x)];
for ( y = 0; y < 25; y += 5 )
{
state[y + x] ^= temp;
}
}
}
// Rho Pi
{
tKeccakLane temp;
tSmallUInt x;
temp = state[1];
for ( x = 0; x < 24; ++x )
{
tSmallUInt t;
tKeccakLane T[1];
t = pgm_read_byte(&KeccakF_PiLane[x]);
T[0] = state[t];
state[t] = rotate64left_code( temp, pgm_read_byte(&KeccakF_RotationConstants[x]) );
temp = T[0];
}
}
// Chi Iota Prepare Theta
{
tSmallUInt z;
UINT8 * p = (unsigned char *)state;
UINT8 * pC = (unsigned char *)C;
for( z = 0; z < 8; ++z, ++p, ++pC )
{
tSmallUInt y;
UINT8 c0, c1, c2, c3, c4, t;
c0 = c1 = c2 = c3 = c4 = 0;
for( y = 5; y != 0; --y, p += 40 )
{
UINT8 a0 = *p;
UINT8 a1 = *(p+8);
UINT8 a2 = *(p+16);
UINT8 a3 = *(p+24);
UINT8 a4 = *(p+32);
*p = t = a0 ^ ((~a1) & a2); c0 ^= t;
*(p+8) = t = a1 ^ ((~a2) & a3); c1 ^= t;
*(p+16) = a2 ^= ((~a3) & a4); c2 ^= a2;
*(p+24) = a3 ^= ((~a4) & a0); c3 ^= a3;
*(p+32) = a4 ^= ((~a0) & a1); c4 ^= a4;
}
p -= 5 * 5 * 8;
y = pgm_read_byte( (UINT8 *)(KeccakF_RoundConstants+round) + z );
*p ^= y;
*pC = c0 ^ y;
*(pC+ 8) = c1;
*(pC+16) = c2;
*(pC+24) = c3;
*(pC+32) = c4;
}
}
}
}

View File

@ -0,0 +1,647 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include "Keccak-avr8-settings.h"
#include "crypto_hash.h"
#define cKeccakR_SizeInBytes (cKeccakR/8)
#ifndef crypto_hash_BYTES
#ifdef cKeccakFixedOutputLengthInBytes
#define crypto_hash_BYTES cKeccakFixedOutputLengthInBytes
#else
#define crypto_hash_BYTES cKeccakR_SizeInBytes
#endif
#endif
// Registers used in all routines
#define zero 1
#define rpState 24
#define rX 26
#define rY 28
#define rZ 30
/*
* int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen )
*
* argument out is passed in r24:r25
* argument in is passed in r22:r23
* argument inlen is passed in r14:r21, only lowest 16-bits (r14-r15) are used
*/
.global crypto_hash // populate.py, please update crypto_hash
crypto_hash: // populate.py, please update crypto_hash
// crypto_hash only registers
#define rT1 16
#define rT2 17
#define rT3 18
#define rInLen 22 //(2 regs)
#define sp 0x3D
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
// Allocate state (25*8) + C variables (5*8)
in rZ, sp
in rZ+1, sp+1
subi rZ, 240
sbci rZ+1, 0
in r0, 0x3F
cli
out sp+1, rZ+1
out sp, rZ
out 0x3F, r0
adiw rZ, 41 // pointer to start of state, end of C, compensate post decrement
push r24 // save out pointer
push r25
movw rpState, rZ
movw rY, r22 //y contains in pointer
movw rInLen, r14
ldi rT3, 5*5*8 //clear state
clearStateLoop:
st z+, zero
dec rT3
brne clearStateLoop
// Full blocks
cpi rInLen, cKeccakR_SizeInBytes
cpc rInLen+1, zero
brcs ch_lastblock
ch_FullRateLoop:
ldi rT3, cKeccakR_SizeInBytes
movw rZ, rpState
ch_XorLanesLoop:
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
subi rT3, 1
brne ch_XorLanesLoop
push rY
push rY+1
call KeccakF
pop rY+1
pop rY
subi rInLen, cKeccakR_SizeInBytes
sbci rInLen+1, 0
cpi rInLen, cKeccakR_SizeInBytes
cpc rInLen+1, zero
brcc ch_FullRateLoop
ch_lastblock: // XOR last uncomplete block into state
movw rZ, rpState
subi rInLen, 0
breq ch_Padding
ch_xorBytesLoop:
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
subi rInLen, 1
brne ch_xorBytesLoop
ch_Padding:
ldi rT1, 1
ld rT2, Z
eor rT1, rT2
st Z, rT1
ldi rZ, cKeccakR_SizeInBytes-1
add rZ, rpState
mov rZ+1, rpState+1
adc rZ+1, zero
ld rT1, Z
subi rT1, 0x80
st Z, rT1
call KeccakF
//output
ldi rT3, crypto_hash_BYTES
movw rY, rpState
pop rZ+1 ; restore out pointer
pop rZ
outputLoop:
ld rT1, Y+
st Z+, rT1
dec rT3
brne outputLoop
// Free state and pop registers
ldi rZ, 199
add rpState, rZ
adc rpState+1, zero
in r0, 0x3F
cli
out sp+1, rpState+1
out sp, rpState
out 0x3F, r0
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
// return 0
mov r24, zero
mov r25, zero
#undef rInLen
#undef rT1
#undef rT2
#undef rT3
#undef sp
ret
//#define ROT_BIT(a) (a <= 4) ? ((a == 0) ? 0x80 : (a & 7)) : (0x80 | (8-a))
#define ROT_BIT(a) ((a) & 7)
#define ROT_BYTE(a) (((a)/8 + !!(((a)%8) > 4)) & 7)
KeccakF_RhoPiConstants:
.BYTE ROT_BIT( 1), ROT_BYTE( 3), 10 * 8
.BYTE ROT_BIT( 3), ROT_BYTE( 6), 7 * 8
.BYTE ROT_BIT( 6), ROT_BYTE(10), 11 * 8
.BYTE ROT_BIT(10), ROT_BYTE(15), 17 * 8
.BYTE ROT_BIT(15), ROT_BYTE(21), 18 * 8
.BYTE ROT_BIT(21), ROT_BYTE(28), 3 * 8
.BYTE ROT_BIT(28), ROT_BYTE(36), 5 * 8
.BYTE ROT_BIT(36), ROT_BYTE(45), 16 * 8
.BYTE ROT_BIT(45), ROT_BYTE(55), 8 * 8
.BYTE ROT_BIT(55), ROT_BYTE( 2), 21 * 8
.BYTE ROT_BIT( 2), ROT_BYTE(14), 24 * 8
.BYTE ROT_BIT(14), ROT_BYTE(27), 4 * 8
.BYTE ROT_BIT(27), ROT_BYTE(41), 15 * 8
.BYTE ROT_BIT(41), ROT_BYTE(56), 23 * 8
.BYTE ROT_BIT(56), ROT_BYTE( 8), 19 * 8
.BYTE ROT_BIT( 8), ROT_BYTE(25), 13 * 8
.BYTE ROT_BIT(25), ROT_BYTE(43), 12 * 8
.BYTE ROT_BIT(43), ROT_BYTE(62), 2 * 8
.BYTE ROT_BIT(62), ROT_BYTE(18), 20 * 8
.BYTE ROT_BIT(18), ROT_BYTE(39), 14 * 8
.BYTE ROT_BIT(39), ROT_BYTE(61), 22 * 8
.BYTE ROT_BIT(61), ROT_BYTE(20), 9 * 8
.BYTE ROT_BIT(20), ROT_BYTE(44), 6 * 8
.BYTE ROT_BIT(44), ROT_BYTE( 1), 1 * 8
KeccakF_RoundConstants:
.BYTE 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.BYTE 0x82, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.BYTE 0x8a, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x00, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
.BYTE 0x8b, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.BYTE 0x01, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
.BYTE 0x81, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
.BYTE 0x09, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.BYTE 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.BYTE 0x09, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
.BYTE 0x0a, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
.BYTE 0x8b, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
.BYTE 0x8b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x89, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x02, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x0a, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.BYTE 0x0a, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
.BYTE 0x81, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
.BYTE 0x80, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x01, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
.BYTE 0x08, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
.BYTE 0xFF, 0 //terminator
.text
// KeccakF
// Not callable from C!
//
// argument rpState is passed in r24:r25
//
KeccakF:
// Variables used in multiple operations
#define rTemp 2 // 8 regs (2-9)
#define rTempBis 10 // 8 regs (10-17)
#define rTempTer 18 // 2 regs (18-19)
#define pRound 20 // 2 regs (20-21)
// Initial Prepare Theta
#define TCIPx rTempTer
movw rZ, rpState // Z points to 5 C lanes
sbiw rZ, 40
movw rY, rpState
ldi TCIPx, 5*8
KeccakInitialPrepTheta_Loop:
ld r0, Y
adiw rY, 40
ld rTemp, Y
adiw rY, 40
eor r0, rTemp
ld rTemp, Y
adiw rY, 40
eor r0, rTemp
ld rTemp, Y
eor r0, rTemp
ldd rTemp, Y+40
eor r0, rTemp
st Z+, r0
subi rY, 119
sbc rY+1, zero
dec TCIPx
brne KeccakInitialPrepTheta_Loop
#undef TCIPx
ldi pRound, lo8(KeccakF_RoundConstants)
ldi pRound+1, hi8(KeccakF_RoundConstants)
Keccak_RoundLoop:
// Theta
#define TCplus rX
#define TCminus rZ
#define TCcoordX rTempTer
#define TCcoordY rTempTer+1
movw TCminus, rpState
sbiw TCminus, 1*8
movw TCplus, rpState
sbiw TCplus, 4*8
movw rY, rpState
ldi TCcoordX, 0x16
KeccakTheta_Loop1:
ld rTemp+0, X+
ld rTemp+1, X+
ld rTemp+2, X+
ld rTemp+3, X+
ld rTemp+4, X+
ld rTemp+5, X+
ld rTemp+6, X+
ld rTemp+7, X+
lsl rTemp+0
rol rTemp+1
rol rTemp+2
rol rTemp+3
rol rTemp+4
rol rTemp+5
rol rTemp+6
rol rTemp+7
adc rTemp+0, zero
ld r0, Z+
eor rTemp+0, r0
ld r0, Z+
eor rTemp+1, r0
ld r0, Z+
eor rTemp+2, r0
ld r0, Z+
eor rTemp+3, r0
ld r0, Z+
eor rTemp+4, r0
ld r0, Z+
eor rTemp+5, r0
ld r0, Z+
eor rTemp+6, r0
ld r0, Z+
eor rTemp+7, r0
ldi TCcoordY, 5
KeccakTheta_Loop2:
ld r0, Y
eor r0, rTemp+0
st Y+, r0
ld r0, Y
eor r0, rTemp+1
st Y+, r0
ld r0, Y
eor r0, rTemp+2
st Y+, r0
ld r0, Y
eor r0, rTemp+3
st Y+, r0
ld r0, Y
eor r0, rTemp+4
st Y+, r0
ld r0, Y
eor r0, rTemp+5
st Y+, r0
ld r0, Y
eor r0, rTemp+6
st Y+, r0
ld r0, Y
eor r0, rTemp+7
st Y+, r0
adiw rY, 32
dec TCcoordY
brne KeccakTheta_Loop2
subi rY, 200-8
sbc rY+1, zero
lsr TCcoordX
brcc 1f
breq KeccakTheta_End
rjmp KeccakTheta_Loop1
1:
cpi TCcoordX, 0x0B
brne 2f
sbiw TCminus, 40
rjmp KeccakTheta_Loop1
2:
sbiw TCplus, 40
rjmp KeccakTheta_Loop1
KeccakTheta_End:
#undef TCplus
#undef TCminus
#undef TCcoordX
#undef TCcoordY
// Rho Pi
#define RPindex rTempTer+0
#define RPTemp rTempTer+1
sbiw rY, 32
ld rTemp+0, Y+
ld rTemp+1, Y+
ld rTemp+2, Y+
ld rTemp+3, Y+
ld rTemp+4, Y+
ld rTemp+5, Y+
ld rTemp+6, Y+
ld rTemp+7, Y+
ldi rZ, lo8(KeccakF_RhoPiConstants)
ldi rZ+1, hi8(KeccakF_RhoPiConstants)
KeccakRhoPi_Loop:
; do bit rotation
lpm RPTemp, Z+ ;get nuber of bits to rotate
cpi RPTemp, 5
brcs rotate64_nbit_leftOrNot
neg RPTemp
andi RPTemp, 3
rotate64_nbit_right:
bst rTemp, 0
ror rTemp+7
ror rTemp+6
ror rTemp+5
ror rTemp+4
ror rTemp+3
ror rTemp+2
ror rTemp+1
ror rTemp
bld rTemp+7, 7
dec RPTemp
brne rotate64_nbit_right
rjmp KeccakRhoPi_RhoBitRotateDone
rotate64_nbit_leftOrNot:
tst RPTemp
breq KeccakRhoPi_RhoBitRotateDone
rotate64_nbit_left:
lsl rTemp
rol rTemp+1
rol rTemp+2
rol rTemp+3
rol rTemp+4
rol rTemp+5
rol rTemp+6
rol rTemp+7
adc rTemp, r1
dec RPTemp
brne rotate64_nbit_left
KeccakRhoPi_RhoBitRotateDone:
lpm r0, Z+ ;get number of bytes to rotate
lpm RPindex, Z+ ;get index in state
movw rY, rpState
add rY, RPindex
adc rY+1, zero
ldi rX, rTempBis
add rX, r0
mov rX+1, zero
ldi RPTemp, 8
KeccakRhoPi_PiByteRotLoop:
ld r0, Y+
st X+, r0
cpi rX, rTempBis+8
brne KeccakRhoPi_PiByteRotFirst
ldi rX, rTempBis
KeccakRhoPi_PiByteRotFirst:
dec RPTemp
brne KeccakRhoPi_PiByteRotLoop
sbiw rY, 8
st Y+, rTemp+0
st Y+, rTemp+1
st Y+, rTemp+2
st Y+, rTemp+3
st Y+, rTemp+4
st Y+, rTemp+5
st Y+, rTemp+6
st Y+, rTemp+7
movw rTemp+0, rTempBis+0
movw rTemp+2, rTempBis+2
movw rTemp+4, rTempBis+4
movw rTemp+6, rTempBis+6
KeccakRhoPi_RhoDone:
subi RPindex, 8
brne KeccakRhoPi_Loop
#undef RPindex
#undef RPTemp
// Chi Iota prepare Theta
#define CIPTa0 rTemp
#define CIPTa1 rTemp+1
#define CIPTa2 rTemp+2
#define CIPTa3 rTemp+3
#define CIPTa4 rTemp+4
#define CIPTc0 rTempBis
#define CIPTc1 rTempBis+1
#define CIPTc2 rTempBis+2
#define CIPTc3 rTempBis+3
#define CIPTc4 rTempBis+4
#define CIPTz rTempBis+6
#define CIPTy rTempBis+7
movw rY, rpState
movw rX, rpState ; 5 * C
sbiw rX, 40
movw rZ, pRound
ldi CIPTz, 8
KeccakChiIotaPrepareTheta_zLoop:
mov CIPTc0, zero
mov CIPTc1, zero
movw CIPTc2, CIPTc0
mov CIPTc4, zero
ldi CIPTy, 5
KeccakChiIotaPrepareTheta_yLoop:
ld CIPTa0, Y
ldd CIPTa1, Y+8
ldd CIPTa2, Y+16
ldd CIPTa3, Y+24
ldd CIPTa4, Y+32
;*p = t = a0 ^ ((~a1) & a2); c0 ^= t;
mov r0, CIPTa1
com r0
and r0, CIPTa2
eor r0, CIPTa0
eor CIPTc0, r0
st Y, r0
;*(p+8) = t = a1 ^ ((~a2) & a3); c1 ^= t;
mov r0, CIPTa2
com r0
and r0, CIPTa3
eor r0, CIPTa1
eor CIPTc1, r0
std Y+8, r0
;*(p+16) = a2 ^= ((~a3) & a4); c2 ^= a2;
mov r0, CIPTa3
com r0
and r0, CIPTa4
eor r0, CIPTa2
eor CIPTc2, r0
std Y+16, r0
;*(p+24) = a3 ^= ((~a4) & a0); c3 ^= a3;
mov r0, CIPTa4
com r0
and r0, CIPTa0
eor r0, CIPTa3
eor CIPTc3, r0
std Y+24, r0
;*(p+32) = a4 ^= ((~a0) & a1); c4 ^= a4;
com CIPTa0
and CIPTa0, CIPTa1
eor CIPTa0, CIPTa4
eor CIPTc4, CIPTa0
std Y+32, CIPTa0
adiw rY, 40
dec CIPTy
brne KeccakChiIotaPrepareTheta_yLoop
subi rY, 200
sbc rY+1, zero
lpm r0, Z+ ;Round Constant
ld CIPTa0, Y
eor CIPTa0, r0
st Y+, CIPTa0
movw pRound, rZ
movw rZ, rX
eor CIPTc0, r0
st Z+, CIPTc0
std Z+7, CIPTc1
std Z+15, CIPTc2
std Z+23, CIPTc3
std Z+31, CIPTc4
movw rX, rZ
movw rZ, pRound
dec CIPTz
brne KeccakChiIotaPrepareTheta_zLoop
#undef CIPTa0
#undef CIPTa1
#undef CIPTa2
#undef CIPTa3
#undef CIPTa4
#undef CIPTc0
#undef CIPTc1
#undef CIPTc2
#undef CIPTc3
#undef CIPTc4
#undef CIPTz
#undef CIPTy
;Check for terminator
lpm r0, Z
inc r0
breq Keccak_Done
rjmp Keccak_RoundLoop
Keccak_Done:
ret
#undef rTemp
#undef rTempBis
#undef rTempTer
#undef pRound
#undef rpState
#undef zero
#undef rX
#undef rY
#undef rZ

934
c_src/KeccakF-1600-avr8asm-fast.s Executable file
View File

@ -0,0 +1,934 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by Ronny Van Keer, hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include "Keccak-avr8-settings.h"
#include "crypto_hash.h"
#define cKeccakR_SizeInBytes (cKeccakR/8)
#ifndef crypto_hash_BYTES
#ifdef cKeccakFixedOutputLengthInBytes
#define crypto_hash_BYTES cKeccakFixedOutputLengthInBytes
#else
#define crypto_hash_BYTES cKeccakR_SizeInBytes
#endif
#endif
// Registers used in all routines
#define zero 1
#define rpState 24
#define rX 26
#define rY 28
#define rZ 30
/*
* int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen )
*
* argument out is passed in r24:r25
* argument in is passed in r22:r23
* argument inlen is passed in r14:r21, only lowest 16-bits (r14-r15) are used
*/
.global crypto_hash // populate.py, please update crypto_hash
crypto_hash: // populate.py, please update crypto_hash
// crypto_hash only registers
#define rInLen 16 //(2 regs)
#define rT1 18
#define rT2 19
#define rT3 20
#define sp 0x3D
push r2
push r3
push r4
push r5
push r6
push r7
push r8
push r9
push r10
push r11
push r12
push r13
push r14
push r15
push r16
push r17
push r28
push r29
// Allocate state (25*8) + C variables (5*8)
in rZ, sp
in rZ+1, sp+1
subi rZ, 240
sbci rZ+1, 0
in r0, 0x3F
cli
out sp+1, rZ+1
out sp, rZ
out 0x3F, r0
adiw rZ, 41 // pointer to start of state, end of C, compensate post decrement
push r24 // save out pointer
push r25
movw rpState, rZ
movw rY, r22 //y contains in pointer
movw rInLen, r14
ldi rT3, 5*5*2 //clear state (4 bytes each iteration)
clearStateLoop:
st z+, zero
st z+, zero
st z+, zero
st z+, zero
dec rT3
brne clearStateLoop
// Full blocks
cpi rInLen, cKeccakR_SizeInBytes
cpc rInLen+1, zero
brcs ch_lastblock
ch_FullRateLoop:
ldi rT3, cKeccakR_SizeInBytes/8
movw rZ, rpState
ch_XorLanesLoop:
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
subi rT3, 1
brne ch_XorLanesLoop
push rY
push rY+1
push rInLen
push rInLen+1
call KeccakF
pop rInLen+1
pop rInLen
pop rY+1
pop rY
subi rInLen, cKeccakR_SizeInBytes
sbci rInLen+1, 0
cpi rInLen, cKeccakR_SizeInBytes
cpc rInLen+1, zero
brcc ch_FullRateLoop
ch_lastblock: // XOR last uncomplete block into state
movw rZ, rpState
lsr rInLen
brcc ch_xorBytes2
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
subi rInLen, 0
ch_xorBytes2:
breq ch_Padding
ch_xorBytes2Loop:
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
ld rT1, Y+
ld rT2, Z
eor rT1, rT2
st Z+, rT1
subi rInLen, 1
brne ch_xorBytes2Loop
ch_Padding:
ldi rT1, 1
ld rT2, Z
eor rT1, rT2
st Z, rT1
ldi rZ, cKeccakR_SizeInBytes-1
add rZ, rpState
mov rZ+1, rpState+1
adc rZ+1, zero
ld rT1, Z
subi rT1, 0x80
st Z, rT1
call KeccakF
//output
ldi rT3, crypto_hash_BYTES/4 ; copy 4 bytes per iteration
movw rY, rpState
pop rZ+1 ; restore out pointer
pop rZ
outputLoop:
ld rT1, Y+
st Z+, rT1
ld rT1, Y+
st Z+, rT1
ld rT1, Y+
st Z+, rT1
ld rT1, Y+
st Z+, rT1
dec rT3
brne outputLoop
// Free state and pop registers
ldi rZ, 199
add rpState, rZ
adc rpState+1, zero
in r0, 0x3F
cli
out sp+1, rpState+1
out sp, rpState
out 0x3F, r0
pop r29
pop r28
pop r17
pop r16
pop r15
pop r14
pop r13
pop r12
pop r11
pop r10
pop r9
pop r8
pop r7
pop r6
pop r5
pop r4
pop r3
pop r2
// return 0
mov r24, zero
mov r25, zero
#undef rInLen
#undef rT1
#undef rT2
#undef rT3
#undef sp
ret
#define ROT_BIT(a) ((a) & 7)
#define ROT_BYTE(a) ((((a)/8 + !!(((a)%8) > 4)) & 7) * 9)
KeccakF_RhoPiConstants:
.BYTE ROT_BIT( 1), ROT_BYTE( 3), 10 * 8
.BYTE ROT_BIT( 3), ROT_BYTE( 6), 7 * 8
.BYTE ROT_BIT( 6), ROT_BYTE(10), 11 * 8
.BYTE ROT_BIT(10), ROT_BYTE(15), 17 * 8
.BYTE ROT_BIT(15), ROT_BYTE(21), 18 * 8
.BYTE ROT_BIT(21), ROT_BYTE(28), 3 * 8
.BYTE ROT_BIT(28), ROT_BYTE(36), 5 * 8
.BYTE ROT_BIT(36), ROT_BYTE(45), 16 * 8
.BYTE ROT_BIT(45), ROT_BYTE(55), 8 * 8
.BYTE ROT_BIT(55), ROT_BYTE( 2), 21 * 8
.BYTE ROT_BIT( 2), ROT_BYTE(14), 24 * 8
.BYTE ROT_BIT(14), ROT_BYTE(27), 4 * 8
.BYTE ROT_BIT(27), ROT_BYTE(41), 15 * 8
.BYTE ROT_BIT(41), ROT_BYTE(56), 23 * 8
.BYTE ROT_BIT(56), ROT_BYTE( 8), 19 * 8
.BYTE ROT_BIT( 8), ROT_BYTE(25), 13 * 8
.BYTE ROT_BIT(25), ROT_BYTE(43), 12 * 8
.BYTE ROT_BIT(43), ROT_BYTE(62), 2 * 8
.BYTE ROT_BIT(62), ROT_BYTE(18), 20 * 8
.BYTE ROT_BIT(18), ROT_BYTE(39), 14 * 8
.BYTE ROT_BIT(39), ROT_BYTE(61), 22 * 8
.BYTE ROT_BIT(61), ROT_BYTE(20), 9 * 8
.BYTE ROT_BIT(20), ROT_BYTE(44), 6 * 8
.BYTE ROT_BIT(44), ROT_BYTE( 1), 1 * 8
KeccakF_RoundConstants:
.BYTE 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.BYTE 0x82, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.BYTE 0x8a, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x00, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
.BYTE 0x8b, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.BYTE 0x01, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
.BYTE 0x81, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
.BYTE 0x09, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.BYTE 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.BYTE 0x09, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
.BYTE 0x0a, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
.BYTE 0x8b, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
.BYTE 0x8b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x89, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x02, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x0a, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
.BYTE 0x0a, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
.BYTE 0x81, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
.BYTE 0x80, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80
.BYTE 0x01, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00
.BYTE 0x08, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80
.BYTE 0xFF, 0 //terminator
.text
// KeccakF
// Not callable from C!
//
// argument rpState is passed in r24:r25
//
KeccakF:
// Variables used in multiple operations
#define rTemp 2 // 8 regs (2-9)
#define rTempBis 10 // 8 regs (10-17)
#define rTempTer 18 // 4 regs (18-21)
#define pRound 22 // 2 regs (22-23)
// Initial Prepare Theta
#define TCIPx rTempTer
movw rZ, rpState // Z points to 8 C
sbiw rZ, 40
ldi TCIPx, 5
movw rY, rpState
KeccakInitialPrepTheta_Loop:
ld rTemp+0, Y+ ;state[x]
ld rTemp+1, Y+
ld rTemp+2, Y+
ld rTemp+3, Y+
ld rTemp+4, Y+
ld rTemp+5, Y+
ld rTemp+6, Y+
ld rTemp+7, Y+
adiw rY, 32
ld r0, Y+ ;state[5+x]
eor rTemp+0, r0
ld r0, Y+
eor rTemp+1, r0
ld r0, Y+
eor rTemp+2, r0
ld r0, Y+
eor rTemp+3, r0
ld r0, Y+
eor rTemp+4, r0
ld r0, Y+
eor rTemp+5, r0
ld r0, Y+
eor rTemp+6, r0
ld r0, Y+
eor rTemp+7, r0
adiw rY, 32
ld r0, Y+ ;state[10+x]
eor rTemp+0, r0
ld r0, Y+
eor rTemp+1, r0
ld r0, Y+
eor rTemp+2, r0
ld r0, Y+
eor rTemp+3, r0
ld r0, Y+
eor rTemp+4, r0
ld r0, Y+
eor rTemp+5, r0
ld r0, Y+
eor rTemp+6, r0
ld r0, Y+
eor rTemp+7, r0
adiw rY, 32
ld r0, Y+ ;state[15+x]
eor rTemp+0, r0
ld r0, Y+
eor rTemp+1, r0
ld r0, Y+
eor rTemp+2, r0
ld r0, Y+
eor rTemp+3, r0
ld r0, Y+
eor rTemp+4, r0
ld r0, Y+
eor rTemp+5, r0
ld r0, Y+
eor rTemp+6, r0
ld r0, Y+
eor rTemp+7, r0
adiw rY, 32
ld r0, Y+ ;state[20+x]
eor rTemp+0, r0
ld r0, Y+
eor rTemp+1, r0
ld r0, Y+
eor rTemp+2, r0
ld r0, Y+
eor rTemp+3, r0
ld r0, Y+
eor rTemp+4, r0
ld r0, Y+
eor rTemp+5, r0
ld r0, Y+
eor rTemp+6, r0
ld r0, Y+
eor rTemp+7, r0
st Z+, rTemp+0
st Z+, rTemp+1
st Z+, rTemp+2
st Z+, rTemp+3
st Z+, rTemp+4
st Z+, rTemp+5
st Z+, rTemp+6
st Z+, rTemp+7
subi rY, 160
sbc rY+1, zero
subi TCIPx, 1
breq KeccakInitialPrepTheta_Done
rjmp KeccakInitialPrepTheta_Loop
KeccakInitialPrepTheta_Done:
#undef TCIPx
ldi pRound, lo8(KeccakF_RoundConstants)
ldi pRound+1, hi8(KeccakF_RoundConstants)
Keccak_RoundLoop:
// Theta
#define TCplus rX
#define TCminus rZ
#define TCcoordX rTempTer
#define TCcoordY rTempTer+1
movw TCminus, rpState
sbiw TCminus, 1*8
movw TCplus, rpState
sbiw TCplus, 4*8
movw rY, rpState
ldi TCcoordX, 0x16
KeccakTheta_Loop1:
ld rTemp+0, X+
ld rTemp+1, X+
ld rTemp+2, X+
ld rTemp+3, X+
ld rTemp+4, X+
ld rTemp+5, X+
ld rTemp+6, X+
ld rTemp+7, X+
lsl rTemp+0
rol rTemp+1
rol rTemp+2
rol rTemp+3
rol rTemp+4
rol rTemp+5
rol rTemp+6
rol rTemp+7
adc rTemp+0, zero
ld r0, Z+
eor rTemp+0, r0
ld r0, Z+
eor rTemp+1, r0
ld r0, Z+
eor rTemp+2, r0
ld r0, Z+
eor rTemp+3, r0
ld r0, Z+
eor rTemp+4, r0
ld r0, Z+
eor rTemp+5, r0
ld r0, Z+
eor rTemp+6, r0
ld r0, Z+
eor rTemp+7, r0
ldi TCcoordY, 5
KeccakTheta_Loop2:
ld r0, Y
eor r0, rTemp+0
st Y+, r0
ld r0, Y
eor r0, rTemp+1
st Y+, r0
ld r0, Y
eor r0, rTemp+2
st Y+, r0
ld r0, Y
eor r0, rTemp+3
st Y+, r0
ld r0, Y
eor r0, rTemp+4
st Y+, r0
ld r0, Y
eor r0, rTemp+5
st Y+, r0
ld r0, Y
eor r0, rTemp+6
st Y+, r0
ld r0, Y
eor r0, rTemp+7
st Y+, r0
adiw rY, 32
dec TCcoordY
brne KeccakTheta_Loop2
subi rY, 200-8
sbc rY+1, zero
lsr TCcoordX
brcc 1f
breq KeccakTheta_End
rjmp KeccakTheta_Loop1
1:
cpi TCcoordX, 0x0B
brne 2f
sbiw TCminus, 40
rjmp KeccakTheta_Loop1
2:
sbiw TCplus, 40
rjmp KeccakTheta_Loop1
KeccakTheta_End:
#undef TCplus
#undef TCminus
#undef TCcoordX
#undef TCcoordY
// Rho Pi
#define RPpConst rTempTer // 2 regs
#define RPindex rTempTer+2
#define RPpBitRot rX
#define RPpByteRot pRound
sbiw rY, 32
ld rTemp+0, Y+
ld rTemp+1, Y+
ld rTemp+2, Y+
ld rTemp+3, Y+
ld rTemp+4, Y+
ld rTemp+5, Y+
ld rTemp+6, Y+
ld rTemp+7, Y+
push pRound
push pRound+1
ldi RPpConst, lo8(KeccakF_RhoPiConstants)
ldi RPpConst+1, hi8(KeccakF_RhoPiConstants)
ldi RPpBitRot, pm_lo8(bit_rot_jmp_table)
ldi RPpBitRot+1, pm_hi8(bit_rot_jmp_table)
ldi RPpByteRot, pm_lo8(rotate64_0byte_left)
ldi RPpByteRot+1, pm_hi8(rotate64_0byte_left)
KeccakRhoPi_Loop:
; get rotation codes and state index
movw rZ, RPpConst
lpm r0, Z+ ;bits
lpm rTempBis, Z+ ;bytes
lpm RPindex, Z+
movw RPpConst, rZ
; do bit rotation
movw rZ, RPpBitRot
add rZ, r0
adc rZ+1, zero
ijmp
KeccakRhoPi_RhoBitRotateDone:
movw rY, rpState
add rY, RPindex
adc rY+1, zero
movw rZ, RPpByteRot
add rZ, rTempBis
adc rZ+1, zero
ijmp
KeccakRhoPi_PiStore:
sbiw rY, 8
st Y+, rTemp+0
st Y+, rTemp+1
st Y+, rTemp+2
st Y+, rTemp+3
st Y+, rTemp+4
st Y+, rTemp+5
st Y+, rTemp+6
st Y+, rTemp+7
movw rTemp+0, rTempBis+0
movw rTemp+2, rTempBis+2
movw rTemp+4, rTempBis+4
movw rTemp+6, rTempBis+6
KeccakRhoPi_RhoDone:
subi RPindex, 8
brne KeccakRhoPi_Loop
pop pRound+1
pop pRound
#undef RPpConst
#undef RPindex
#undef RPpBitRot
#undef RPpByteRot
// Chi Iota prepare Theta
#define CIPTa0 rTemp
#define CIPTa1 rTemp+1
#define CIPTa2 rTemp+2
#define CIPTa3 rTemp+3
#define CIPTa4 rTemp+4
#define CIPTc0 rTempBis
#define CIPTc1 rTempBis+1
#define CIPTc2 rTempBis+2
#define CIPTc3 rTempBis+3
#define CIPTc4 rTempBis+4
#define CIPTz rTempBis+6
#define CIPTy rTempBis+7
movw rY, rpState
movw rX, rpState ; 5 * C
sbiw rX, 40
movw rZ, pRound
ldi CIPTz, 8
KeccakChiIotaPrepareTheta_zLoop:
mov CIPTc0, zero
mov CIPTc1, zero
movw CIPTc2, CIPTc0
mov CIPTc4, zero
ldi CIPTy, 5
KeccakChiIotaPrepareTheta_yLoop:
ld CIPTa0, Y
ldd CIPTa1, Y+8
ldd CIPTa2, Y+16
ldd CIPTa3, Y+24
ldd CIPTa4, Y+32
;*p = t = a0 ^ ((~a1) & a2); c0 ^= t;
mov r0, CIPTa1
com r0
and r0, CIPTa2
eor r0, CIPTa0
eor CIPTc0, r0
st Y, r0
;*(p+8) = t = a1 ^ ((~a2) & a3); c1 ^= t;
mov r0, CIPTa2
com r0
and r0, CIPTa3
eor r0, CIPTa1
eor CIPTc1, r0
std Y+8, r0
;*(p+16) = a2 ^= ((~a3) & a4); c2 ^= a2;
mov r0, CIPTa3
com r0
and r0, CIPTa4
eor r0, CIPTa2
eor CIPTc2, r0
std Y+16, r0
;*(p+24) = a3 ^= ((~a4) & a0); c3 ^= a3;
mov r0, CIPTa4
com r0
and r0, CIPTa0
eor r0, CIPTa3
eor CIPTc3, r0
std Y+24, r0
;*(p+32) = a4 ^= ((~a0) & a1); c4 ^= a4;
com CIPTa0
and CIPTa0, CIPTa1
eor CIPTa0, CIPTa4
eor CIPTc4, CIPTa0
std Y+32, CIPTa0
adiw rY, 40
dec CIPTy
brne KeccakChiIotaPrepareTheta_yLoop
subi rY, 200
sbc rY+1, zero
lpm r0, Z+ ;Round Constant
ld CIPTa0, Y
eor CIPTa0, r0
st Y+, CIPTa0
movw pRound, rZ
movw rZ, rX
eor CIPTc0, r0
st Z+, CIPTc0
std Z+7, CIPTc1
std Z+15, CIPTc2
std Z+23, CIPTc3
std Z+31, CIPTc4
movw rX, rZ
movw rZ, pRound
dec CIPTz
brne KeccakChiIotaPrepareTheta_zLoop
#undef CIPTa0
#undef CIPTa1
#undef CIPTa2
#undef CIPTa3
#undef CIPTa4
#undef CIPTc0
#undef CIPTc1
#undef CIPTc2
#undef CIPTc3
#undef CIPTc4
#undef CIPTz
#undef CIPTy
;Check for terminator
lpm r0, Z
inc r0
breq Keccak_Done
rjmp Keccak_RoundLoop
Keccak_Done:
ret
bit_rot_jmp_table:
rjmp KeccakRhoPi_RhoBitRotateDone
rjmp rotate64_1bit_left
rjmp rotate64_2bit_left
rjmp rotate64_3bit_left
rjmp rotate64_4bit_left
rjmp rotate64_3bit_right
rjmp rotate64_2bit_right
rjmp rotate64_1bit_right
rotate64_4bit_left:
lsl rTemp
rol rTemp+1
rol rTemp+2
rol rTemp+3
rol rTemp+4
rol rTemp+5
rol rTemp+6
rol rTemp+7
adc rTemp, r1
rotate64_3bit_left:
lsl rTemp
rol rTemp+1
rol rTemp+2
rol rTemp+3
rol rTemp+4
rol rTemp+5
rol rTemp+6
rol rTemp+7
adc rTemp, r1
rotate64_2bit_left:
lsl rTemp
rol rTemp+1
rol rTemp+2
rol rTemp+3
rol rTemp+4
rol rTemp+5
rol rTemp+6
rol rTemp+7
adc rTemp, r1
rotate64_1bit_left:
lsl rTemp
rol rTemp+1
rol rTemp+2
rol rTemp+3
rol rTemp+4
rol rTemp+5
rol rTemp+6
rol rTemp+7
adc rTemp, r1
rjmp KeccakRhoPi_RhoBitRotateDone
rotate64_3bit_right:
bst rTemp, 0
ror rTemp+7
ror rTemp+6
ror rTemp+5
ror rTemp+4
ror rTemp+3
ror rTemp+2
ror rTemp+1
ror rTemp
bld rTemp+7, 7
rotate64_2bit_right:
bst rTemp, 0
ror rTemp+7
ror rTemp+6
ror rTemp+5
ror rTemp+4
ror rTemp+3
ror rTemp+2
ror rTemp+1
ror rTemp
bld rTemp+7, 7
rotate64_1bit_right:
bst rTemp, 0
ror rTemp+7
ror rTemp+6
ror rTemp+5
ror rTemp+4
ror rTemp+3
ror rTemp+2
ror rTemp+1
ror rTemp
bld rTemp+7, 7
rjmp KeccakRhoPi_RhoBitRotateDone
/*
** Each byte rotate routine must be 9 instructions long.
*/
rotate64_0byte_left:
ld rTempBis+0, Y+
ld rTempBis+1, Y+
ld rTempBis+2, Y+
ld rTempBis+3, Y+
ld rTempBis+4, Y+
ld rTempBis+5, Y+
ld rTempBis+6, Y+
ld rTempBis+7, Y+
rjmp KeccakRhoPi_PiStore
rotate64_1byte_left:
ld rTempBis+1, Y+
ld rTempBis+2, Y+
ld rTempBis+3, Y+
ld rTempBis+4, Y+
ld rTempBis+5, Y+
ld rTempBis+6, Y+
ld rTempBis+7, Y+
ld rTempBis+0, Y+
rjmp KeccakRhoPi_PiStore
rotate64_2byte_left:
ld rTempBis+2, Y+
ld rTempBis+3, Y+
ld rTempBis+4, Y+
ld rTempBis+5, Y+
ld rTempBis+6, Y+
ld rTempBis+7, Y+
ld rTempBis+0, Y+
ld rTempBis+1, Y+
rjmp KeccakRhoPi_PiStore
rotate64_3byte_left:
ld rTempBis+3, Y+
ld rTempBis+4, Y+
ld rTempBis+5, Y+
ld rTempBis+6, Y+
ld rTempBis+7, Y+
ld rTempBis+0, Y+
ld rTempBis+1, Y+
ld rTempBis+2, Y+
rjmp KeccakRhoPi_PiStore
rotate64_4byte_left:
ld rTempBis+4, Y+
ld rTempBis+5, Y+
ld rTempBis+6, Y+
ld rTempBis+7, Y+
ld rTempBis+0, Y+
ld rTempBis+1, Y+
ld rTempBis+2, Y+
ld rTempBis+3, Y+
rjmp KeccakRhoPi_PiStore
rotate64_5byte_left:
ld rTempBis+5, Y+
ld rTempBis+6, Y+
ld rTempBis+7, Y+
ld rTempBis+0, Y+
ld rTempBis+1, Y+
ld rTempBis+2, Y+
ld rTempBis+3, Y+
ld rTempBis+4, Y+
rjmp KeccakRhoPi_PiStore
rotate64_6byte_left:
ld rTempBis+6, Y+
ld rTempBis+7, Y+
ld rTempBis+0, Y+
ld rTempBis+1, Y+
ld rTempBis+2, Y+
ld rTempBis+3, Y+
ld rTempBis+4, Y+
ld rTempBis+5, Y+
rjmp KeccakRhoPi_PiStore
rotate64_7byte_left:
ld rTempBis+7, Y+
ld rTempBis+0, Y+
ld rTempBis+1, Y+
ld rTempBis+2, Y+
ld rTempBis+3, Y+
ld rTempBis+4, Y+
ld rTempBis+5, Y+
ld rTempBis+6, Y+
rjmp KeccakRhoPi_PiStore
#undef rTemp
#undef rTempBis
#undef rTempTer
#undef pRound
#undef rpState
#undef zero
#undef rX
#undef rY
#undef rZ

View File

@ -0,0 +1,446 @@
@ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
@ Michaël Peeters and Gilles Van Assche. For more information, feedback or
@ questions, please refer to our website: http://keccak.noekeon.org/
@
@ Implementation by Ronny Van Keer, hereby denoted as "the implementer".
@
@ To the extent possible under law, the implementer has waived all copyright
@ and related or neighboring rights to the source code in this file.
@ http://creativecommons.org/publicdomain/zero/1.0/
@ This file was created from a .asm file
@ using the ads2gas.pl script.
.equ DO1STROUNDING, 0
@ PRESERVE8
.text
@// --- offsets in state
.equ Aba, 0*8
.equ Aga, 1*8
.equ Aka, 2*8
.equ Ama, 3*8
.equ Asa, 4*8
@// --- macros
.macro KeccakThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5
@Prepare Theta
@Ca = Aba^Aga^Aka^Ama^Asa@
@Ce = Abe^Age^Ake^Ame^Ase@
@Ci = Abi^Agi^Aki^Ami^Asi@
@Co = Abo^Ago^Ako^Amo^Aso@
@Cu = Abu^Agu^Aku^Amu^Asu@
@De = Ca^ROL64(Ci, 1)@
@Di = Ce^ROL64(Co, 1)@
@Do = Ci^ROL64(Cu, 1)@
@Du = Co^ROL64(Ca, 1)@
@Da = Cu^ROL64(Ce, 1)@
veor.64 q4, q6, q7
veor.64 q5, q9, q10
veor.64 d8, d8, d9
veor.64 d10, d10, d11
veor.64 d1, d8, d16
veor.64 d2, d10, d17
veor.64 q4, q11, q12
veor.64 q5, q14, q15
veor.64 d8, d8, d9
veor.64 d10, d10, d11
veor.64 d3, d8, d26
vadd.u64 q4, q1, q1
veor.64 d4, d10, d27
vmov.64 d0, d5
vsri.64 q4, q1, #63
vadd.u64 q5, q2, q2
veor.64 q4, q4, q0
vsri.64 q5, q2, #63
vadd.u64 d7, d1, d1
veor.64 \argA2, \argA2, d8
veor.64 q5, q5, q1
vsri.64 d7, d1, #63
vshl.u64 d1, \argA2, #44
veor.64 \argA3, \argA3, d9
veor.64 d7, d7, d4
@Ba = argA1^Da@
@Be = ROL64((argA2^De), 44)@
@Bi = ROL64((argA3^Di), 43)@
@Bo = ROL64((argA4^Do), 21)@
@Bu = ROL64((argA5^Du), 14)@
@argA2 = Be ^((~Bi)& Bo )@
@argA3 = Bi ^((~Bo)& Bu )@
@argA4 = Bo ^((~Bu)& Ba )@
@argA5 = Bu ^((~Ba)& Be )@
@argA1 = Ba ^((~Be)& Bi )@ argA1 ^= KeccakF1600RoundConstants[i+round]@
vsri.64 d1, \argA2, #64-44
vshl.u64 d2, \argA3, #43
vldr.64 d0, [sp, #\argA1]
veor.64 \argA4, \argA4, d10
vsri.64 d2, \argA3, #64-43
vshl.u64 d3, \argA4, #21
veor.64 \argA5, \argA5, d11
veor.64 d0, d0, d7
vsri.64 d3, \argA4, #64-21
vbic.64 d5, d2, d1
vshl.u64 d4, \argA5, #14
vbic.64 \argA2, d3, d2
vld1.64 d6, [r3]!
veor.64 d5, d0
vsri.64 d4, \argA5, #64-14
veor.64 d5, d6
vbic.64 \argA5, d1, d0
vbic.64 \argA3, d4, d3
vbic.64 \argA4, d0, d4
veor.64 \argA2, d1
vstr.64 d5, [sp, #\argA1]
veor.64 \argA3, d2
veor.64 \argA4, d3
veor.64 \argA5, d4
.endm
.macro KeccakThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5
@d2 = ROL64((argA1^Da), 3)@
@d3 = ROL64((argA2^De), 45)@
@d4 = ROL64((argA3^Di), 61)@
@d0 = ROL64((argA4^Do), 28)@
@d1 = ROL64((argA5^Du), 20)@
@argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@
@argA2 = Be ^((~Bi)& Bo )@
@argA3 = Bi ^((~Bo)& Bu )@
@argA4 = Bo ^((~Bu)& Ba )@
@argA5 = Bu ^((~Ba)& Be )@
veor.64 \argA2, \argA2, d8
veor.64 \argA3, \argA3, d9
vshl.u64 d3, \argA2, #45
vldr.64 d6, [sp, #\argA1]
vshl.u64 d4, \argA3, #61
veor.64 \argA4, \argA4, d10
vsri.64 d3, \argA2, #64-45
veor.64 \argA5, \argA5, d11
vsri.64 d4, \argA3, #64-61
vshl.u64 d0, \argA4, #28
veor.64 d6, d6, d7
vshl.u64 d1, \argA5, #20
vbic.64 \argA3, d4, d3
vsri.64 d0, \argA4, #64-28
vbic.64 \argA4, d0, d4
vshl.u64 d2, d6, #3
vsri.64 d1, \argA5, #64-20
veor.64 \argA4, d3
vsri.64 d2, d6, #64-3
vbic.64 \argA5, d1, d0
vbic.64 d6, d2, d1
vbic.64 \argA2, d3, d2
veor.64 d6, d0
veor.64 \argA2, d1
vstr.64 d6, [sp, #\argA1]
veor.64 \argA3, d2
veor.64 d5, d6
veor.64 \argA5, d4
.endm
.macro KeccakThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5
@d4 = ROL64((argA1^Da), 18)@
@d0 = ROL64((argA2^De), 1)@
@d1 = ROL64((argA3^Di), 6)@
@d2 = ROL64((argA4^Do), 25)@
@d3 = ROL64((argA5^Du), 8)@
@argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@
@argA2 = Be ^((~Bi)& Bo )@
@argA3 = Bi ^((~Bo)& Bu )@
@argA4 = Bo ^((~Bu)& Ba )@
@argA5 = Bu ^((~Ba)& Be )@
veor.64 \argA3, \argA3, d9
veor.64 \argA4, \argA4, d10
vshl.u64 d1, \argA3, #6
vldr.64 d6, [sp, #\argA1]
vshl.u64 d2, \argA4, #25
veor.64 \argA5, \argA5, d11
vsri.64 d1, \argA3, #64-6
veor.64 \argA2, \argA2, d8
vsri.64 d2, \argA4, #64-25
vext.8 d3, \argA5, \argA5, #7
veor.64 d6, d6, d7
vbic.64 \argA3, d2, d1
vadd.u64 d0, \argA2, \argA2
vbic.64 \argA4, d3, d2
vsri.64 d0, \argA2, #64-1
vshl.u64 d4, d6, #18
veor.64 \argA2, d1, \argA4
veor.64 \argA3, d0
vsri.64 d4, d6, #64-18
vstr.64 \argA3, [sp, #\argA1]
veor.64 d5, \argA3
vbic.64 \argA5, d1, d0
vbic.64 \argA3, d4, d3
vbic.64 \argA4, d0, d4
veor.64 \argA3, d2
veor.64 \argA4, d3
veor.64 \argA5, d4
.endm
.macro KeccakThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5
@d1 = ROL64((argA1^Da), 36)@
@d2 = ROL64((argA2^De), 10)@
@d3 = ROL64((argA3^Di), 15)@
@d4 = ROL64((argA4^Do), 56)@
@d0 = ROL64((argA5^Du), 27)@
@argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@
@argA2 = Be ^((~Bi)& Bo )@
@argA3 = Bi ^((~Bo)& Bu )@
@argA4 = Bo ^((~Bu)& Ba )@
@argA5 = Bu ^((~Ba)& Be )@
veor.64 \argA2, \argA2, d8
veor.64 \argA3, \argA3, d9
vshl.u64 d2, \argA2, #10
vldr.64 d6, [sp, #\argA1]
vshl.u64 d3, \argA3, #15
veor.64 \argA4, \argA4, d10
vsri.64 d2, \argA2, #64-10
vsri.64 d3, \argA3, #64-15
veor.64 \argA5, \argA5, d11
vext.8 d4, \argA4, \argA4, #1
vbic.64 \argA2, d3, d2
vshl.u64 d0, \argA5, #27
veor.64 d6, d6, d7
vbic.64 \argA3, d4, d3
vsri.64 d0, \argA5, #64-27
vshl.u64 d1, d6, #36
veor.64 \argA3, d2
vbic.64 \argA4, d0, d4
vsri.64 d1, d6, #64-36
veor.64 \argA4, d3
vbic.64 d6, d2, d1
vbic.64 \argA5, d1, d0
veor.64 d6, d0
veor.64 \argA2, d1
vstr.64 d6, [sp, #\argA1]
veor.64 d5, d6
veor.64 \argA5, d4
.endm
.macro KeccakThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5
@d3 = ROL64((argA1^Da), 41)@
@d4 = ROL64((argA2^De), 2)@
@d0 = ROL64((argA3^Di), 62)@
@d1 = ROL64((argA4^Do), 55)@
@d2 = ROL64((argA5^Du), 39)@
@argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@
@argA2 = Be ^((~Bi)& Bo )@
@argA3 = Bi ^((~Bo)& Bu )@
@argA4 = Bo ^((~Bu)& Ba )@
@argA5 = Bu ^((~Ba)& Be )@
veor.64 \argA2, \argA2, d8
veor.64 \argA3, \argA3, d9
vshl.u64 d4, \argA2, #2
veor.64 \argA5, \argA5, d11
vshl.u64 d0, \argA3, #62
vldr.64 d6, [sp, #\argA1]
vsri.64 d4, \argA2, #64-2
veor.64 \argA4, \argA4, d10
vsri.64 d0, \argA3, #64-62
vshl.u64 d1, \argA4, #55
veor.64 d6, d6, d7
vshl.u64 d2, \argA5, #39
vsri.64 d1, \argA4, #64-55
vbic.64 \argA4, d0, d4
vsri.64 d2, \argA5, #64-39
vbic.64 \argA2, d1, d0
vshl.u64 d3, d6, #41
veor.64 \argA5, d4, \argA2
vbic.64 \argA2, d2, d1
vsri.64 d3, d6, #64-41
veor.64 d6, d0, \argA2
vbic.64 \argA2, d3, d2
vbic.64 \argA3, d4, d3
veor.64 \argA2, d1
vstr.64 d6, [sp, #\argA1]
veor.64 d5, d6
veor.64 \argA3, d2
veor.64 \argA4, d3
.endm
@// --- constants
.align 8
.ltorg
KeccakF1600RoundConstantsWithTerminator:
.quad 0x0000000000000001
.quad 0x0000000000008082
.quad 0x800000000000808a
.quad 0x8000000080008000
.quad 0x000000000000808b
.quad 0x0000000080000001
.quad 0x8000000080008081
.quad 0x8000000000008009
.quad 0x000000000000008a
.quad 0x0000000000000088
.quad 0x0000000080008009
.quad 0x000000008000000a
.quad 0x000000008000808b
.quad 0x800000000000008b
.quad 0x8000000000008089
.quad 0x8000000000008003
.quad 0x8000000000008002
.quad 0x8000000000000080
.quad 0x000000000000800a
.quad 0x800000008000000a
.quad 0x8000000080008081
.quad 0x8000000000008080
.quad 0x0000000080000001
.quad 0x8000000080008008
.quad 0xFFFFFFFFFFFFFFFF @//terminator
.align 8
@// --- code
@not callable from C!
.global KeccakF_armv7a_neon_asm
KeccakF_armv7a_neon_asm: @
adr r3, KeccakF1600RoundConstantsWithTerminator
roundLoop:
KeccakThetaRhoPiChiIota Aba, d13, d19, d25, d31
KeccakThetaRhoPiChi1 Aka, d15, d21, d22, d28
KeccakThetaRhoPiChi2 Asa, d12, d18, d24, d30
KeccakThetaRhoPiChi3 Aga, d14, d20, d26, d27
KeccakThetaRhoPiChi4 Ama, d16, d17, d23, d29
KeccakThetaRhoPiChiIota Aba, d15, d18, d26, d29
KeccakThetaRhoPiChi1 Asa, d14, d17, d25, d28
KeccakThetaRhoPiChi2 Ama, d13, d21, d24, d27
KeccakThetaRhoPiChi3 Aka, d12, d20, d23, d31
KeccakThetaRhoPiChi4 Aga, d16, d19, d22, d30
KeccakThetaRhoPiChiIota Aba, d14, d21, d23, d30
KeccakThetaRhoPiChi1 Ama, d12, d19, d26, d28
KeccakThetaRhoPiChi2 Aga, d15, d17, d24, d31
KeccakThetaRhoPiChi3 Asa, d13, d20, d22, d29
KeccakThetaRhoPiChi4 Aka, d16, d18, d25, d27
KeccakThetaRhoPiChiIota Aba, d12, d17, d22, d27
KeccakThetaRhoPiChi1 Aga, d13, d18, d23, d28
KeccakThetaRhoPiChi2 Aka, d14, d19, d24, d29
ldr r0, [r3]
KeccakThetaRhoPiChi3 Ama, d15, d20, d25, d30
cmp r0, #0xFFFFFFFF
KeccakThetaRhoPiChi4 Asa, d16, d21, d26, d31
bne roundLoop
bx lr
@
.align 8
@//void KeccakF_armv7a( tKeccakLane * state ) callable from C
.global KeccakF_armv7a_neon
KeccakF_armv7a_neon: @
vpush {q4-q7}
sub sp,sp, #5*8
vldr.64 d0, [r0, #0*8]
vldr.64 d12, [r0, #1*8]
vldr.64 d17, [r0, #2*8]
vldr.64 d22, [r0, #3*8]
vldr.64 d27, [r0, #4*8]
vldr.64 d1, [r0, #5*8]
vldr.64 d13, [r0, #6*8]
vldr.64 d18, [r0, #7*8]
vldr.64 d23, [r0, #8*8]
vldr.64 d28, [r0, #9*8]
vldr.64 d2, [r0, #10*8]
vldr.64 d14, [r0, #11*8]
vldr.64 d19, [r0, #12*8]
vldr.64 d24, [r0, #13*8]
vldr.64 d29, [r0, #14*8]
vldr.64 d3, [r0, #15*8]
vldr.64 d15, [r0, #16*8]
vldr.64 d20, [r0, #17*8]
vldr.64 d25, [r0, #18*8]
vldr.64 d30, [r0, #19*8]
vldr.64 d4, [r0, #20*8]
vldr.64 d16, [r0, #21*8]
vldr.64 d21, [r0, #22*8]
vldr.64 d26, [r0, #23*8]
vldr.64 d31, [r0, #24*8]
vstr.64 d0, [sp, #Aba]
mov r2, lr
vstr.64 d1, [sp, #Aga]
veor.64 q0, q0, q1
vstr.64 d2, [sp, #Aka]
veor.64 d5, d0, d1
vstr.64 d3, [sp, #Ama]
mov r1, r0
vstr.64 d4, [sp, #Asa]
veor.64 d5, d5, d4
bl KeccakF_armv7a_neon_asm
vpop.64 { d0- d4 }
vstr.64 d0, [r1, #0*8]
vstr.64 d12, [r1, #1*8]
vstr.64 d17, [r1, #2*8]
vstr.64 d22, [r1, #3*8]
vstr.64 d27, [r1, #4*8]
vstr.64 d1, [r1, #5*8]
vstr.64 d13, [r1, #6*8]
vstr.64 d18, [r1, #7*8]
vstr.64 d23, [r1, #8*8]
vstr.64 d28, [r1, #9*8]
vstr.64 d2, [r1, #10*8]
vstr.64 d14, [r1, #11*8]
vstr.64 d19, [r1, #12*8]
vstr.64 d24, [r1, #13*8]
vstr.64 d29, [r1, #14*8]
vstr.64 d3, [r1, #15*8]
vstr.64 d15, [r1, #16*8]
vstr.64 d20, [r1, #17*8]
vstr.64 d25, [r1, #18*8]
vstr.64 d30, [r1, #19*8]
vstr.64 d4, [r1, #20*8]
vstr.64 d16, [r1, #21*8]
vstr.64 d21, [r1, #22*8]
vstr.64 d26, [r1, #23*8]
vstr.64 d31, [r1, #24*8]
vpop {q4-q7}
bx r2
@

6
c_src/KeccakF-1600-int-set.h Executable file
View File

@ -0,0 +1,6 @@
#define ProvideFast576
#define ProvideFast832
#define ProvideFast1024
#define ProvideFast1088
#define ProvideFast1152
#define ProvideFast1344

46
c_src/KeccakF-1600-interface.h Executable file
View File

@ -0,0 +1,46 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakPermutationInterface_h_
#define _KeccakPermutationInterface_h_
#include "KeccakF-1600-int-set.h"
void KeccakInitialize( void );
void KeccakInitializeState(unsigned char *state);
void KeccakPermutation(unsigned char *state);
#ifdef ProvideFast576
void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast832
void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast1024
void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast1088
void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast1152
void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data);
#endif
#ifdef ProvideFast1344
void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data);
#endif
void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount);
#ifdef ProvideFast1024
void KeccakExtract1024bits(const unsigned char *state, unsigned char *data);
#endif
void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount);
#endif

View File

@ -0,0 +1,4 @@
#define Unrolling 2
//#define UseBebigokimisa
//#define UseInterleaveTables
#define UseSchedule 3

524
c_src/KeccakF-1600-opt32.c Executable file
View File

@ -0,0 +1,524 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "brg_endian.h"
#include "KeccakF-1600-opt32-settings.h"
#include "KeccakF-1600-interface.h"
typedef unsigned char UINT8;
typedef unsigned short UINT16;
typedef unsigned int UINT32;
typedef unsigned long long int UINT64;
#ifdef UseInterleaveTables
int interleaveTablesBuilt = 0;
UINT16 interleaveTable[65536];
UINT16 deinterleaveTable[65536];
void buildInterleaveTables()
{
UINT32 i, j;
UINT16 x;
if (!interleaveTablesBuilt) {
for(i=0; i<65536; i++) {
x = 0;
for(j=0; j<16; j++) {
if (i & (1 << j))
x |= (1 << (j/2 + 8*(j%2)));
}
interleaveTable[i] = x;
deinterleaveTable[x] = (UINT16)i;
}
interleaveTablesBuilt = 1;
}
}
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
#define xor2bytesIntoInterleavedWords(even, odd, source, j) \
i##j = interleaveTable[((const UINT16*)source)[j]]; \
((UINT8*)even)[j] ^= i##j & 0xFF; \
((UINT8*)odd)[j] ^= i##j >> 8;
#define setInterleavedWordsInto2bytes(dest, even, odd, j) \
d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \
((UINT16*)dest)[j] = d##j;
#else // (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
#define xor2bytesIntoInterleavedWords(even, odd, source, j) \
i##j = interleaveTable[source[2*j] ^ ((UINT16)source[2*j+1] << 8)]; \
*even ^= (i##j & 0xFF) << (j*8); \
*odd ^= ((i##j >> 8) & 0xFF) << (j*8);
#define setInterleavedWordsInto2bytes(dest, even, odd, j) \
d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \
dest[2*j] = d##j & 0xFF; \
dest[2*j+1] = d##j >> 8;
#endif // Endianness
void xor8bytesIntoInterleavedWords(UINT32 *even, UINT32 *odd, const UINT8* source)
{
UINT16 i0, i1, i2, i3;
xor2bytesIntoInterleavedWords(even, odd, source, 0)
xor2bytesIntoInterleavedWords(even, odd, source, 1)
xor2bytesIntoInterleavedWords(even, odd, source, 2)
xor2bytesIntoInterleavedWords(even, odd, source, 3)
}
#define xorLanesIntoState(laneCount, state, input) \
{ \
int i; \
for(i=0; i<(laneCount); i++) \
xor8bytesIntoInterleavedWords(state+i*2, state+i*2+1, input+i*8); \
}
void setInterleavedWordsInto8bytes(UINT8* dest, UINT32 even, UINT32 odd)
{
UINT16 d0, d1, d2, d3;
setInterleavedWordsInto2bytes(dest, even, odd, 0)
setInterleavedWordsInto2bytes(dest, even, odd, 1)
setInterleavedWordsInto2bytes(dest, even, odd, 2)
setInterleavedWordsInto2bytes(dest, even, odd, 3)
}
#define extractLanes(laneCount, state, data) \
{ \
int i; \
for(i=0; i<(laneCount); i++) \
setInterleavedWordsInto8bytes(data+i*8, ((UINT32*)state)[i*2], ((UINT32*)state)[i*2+1]); \
}
#else // No interleaving tables
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
// Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
#define xorInterleavedLE(rateInLanes, state, input) \
{ \
const UINT32 * pI = (const UINT32 *)input; \
UINT32 * pS = state; \
UINT32 t, x0, x1; \
int i; \
for (i = (rateInLanes)-1; i >= 0; --i) \
{ \
x0 = *(pI++); \
t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); \
t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); \
t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); \
t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); \
x1 = *(pI++); \
t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); \
t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); \
t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); \
t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); \
*(pS++) ^= (UINT16)x0 | (x1 << 16); \
*(pS++) ^= (x0 >> 16) | (x1 & 0xFFFF0000); \
} \
}
#define xorLanesIntoState(laneCount, state, input) \
xorInterleavedLE(laneCount, state, input)
#else // (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
// Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
UINT64 toInterleaving(UINT64 x)
{
UINT64 t;
t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1);
t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2);
t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4);
t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8);
t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16);
return x;
}
void xor8bytesIntoInterleavedWords(UINT32* evenAndOdd, const UINT8* source)
{
// This can be optimized
UINT64 sourceWord =
(UINT64)source[0]
^ (((UINT64)source[1]) << 8)
^ (((UINT64)source[2]) << 16)
^ (((UINT64)source[3]) << 24)
^ (((UINT64)source[4]) << 32)
^ (((UINT64)source[5]) << 40)
^ (((UINT64)source[6]) << 48)
^ (((UINT64)source[7]) << 56);
UINT64 evenAndOddWord = toInterleaving(sourceWord);
evenAndOdd[0] ^= (UINT32)evenAndOddWord;
evenAndOdd[1] ^= (UINT32)(evenAndOddWord >> 32);
}
#define xorLanesIntoState(laneCount, state, input) \
{ \
int i; \
for(i=0; i<(laneCount); i++) \
xor8bytesIntoInterleavedWords(state+i*2, input+i*8); \
}
#endif // Endianness
// Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002
UINT64 fromInterleaving(UINT64 x)
{
UINT64 t;
t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16);
t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8);
t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4);
t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2);
t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1);
return x;
}
void setInterleavedWordsInto8bytes(UINT8* dest, UINT32* evenAndOdd)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
((UINT64*)dest)[0] = fromInterleaving(*(UINT64*)evenAndOdd);
#else // (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN)
// This can be optimized
UINT64 evenAndOddWord = (UINT64)evenAndOdd[0] ^ ((UINT64)evenAndOdd[1] << 32);
UINT64 destWord = fromInterleaving(evenAndOddWord);
dest[0] = destWord & 0xFF;
dest[1] = (destWord >> 8) & 0xFF;
dest[2] = (destWord >> 16) & 0xFF;
dest[3] = (destWord >> 24) & 0xFF;
dest[4] = (destWord >> 32) & 0xFF;
dest[5] = (destWord >> 40) & 0xFF;
dest[6] = (destWord >> 48) & 0xFF;
dest[7] = (destWord >> 56) & 0xFF;
#endif // Endianness
}
#define extractLanes(laneCount, state, data) \
{ \
int i; \
for(i=0; i<(laneCount); i++) \
setInterleavedWordsInto8bytes(data+i*8, (UINT32*)state+i*2); \
}
#endif // With or without interleaving tables
#if defined(_MSC_VER)
#define ROL32(a, offset) _rotl(a, offset)
#elif (defined (__arm__) && defined(__ARMCC_VERSION))
#define ROL32(a, offset) __ror(a, 32-(offset))
#else
#define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset))))
#endif
#include "KeccakF-1600-unrolling.macros"
#include "KeccakF-1600-32.macros"
#if (UseSchedule == 3)
#ifdef UseBebigokimisa
#error "No lane complementing with schedule 3."
#endif
#if (Unrolling != 2)
#error "Only unrolling 2 is supported by schedule 3."
#endif
void KeccakPermutationOnWords(UINT32 *state)
{
rounds
}
void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount)
{
xorLanesIntoState(laneCount, state, input)
rounds
}
#ifdef ProvideFast576
void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input)
{
xorLanesIntoState(9, state, input)
rounds
}
#endif
#ifdef ProvideFast832
void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input)
{
xorLanesIntoState(13, state, input)
rounds
}
#endif
#ifdef ProvideFast1024
void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input)
{
xorLanesIntoState(16, state, input)
rounds
}
#endif
#ifdef ProvideFast1088
void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input)
{
xorLanesIntoState(17, state, input)
rounds
}
#endif
#ifdef ProvideFast1152
void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input)
{
xorLanesIntoState(18, state, input)
rounds
}
#endif
#ifdef ProvideFast1344
void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input)
{
xorLanesIntoState(21, state, input)
rounds
}
#endif
#else // (Schedule != 3)
void KeccakPermutationOnWords(UINT32 *state)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromState(A, state)
rounds
}
void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount)
{
declareABCDE
unsigned int i;
xorLanesIntoState(laneCount, state, input)
copyFromState(A, state)
rounds
}
#ifdef ProvideFast576
void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input)
{
declareABCDE
unsigned int i;
xorLanesIntoState(9, state, input)
copyFromState(A, state)
rounds
}
#endif
#ifdef ProvideFast832
void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input)
{
declareABCDE
unsigned int i;
xorLanesIntoState(13, state, input)
copyFromState(A, state)
rounds
}
#endif
#ifdef ProvideFast1024
void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input)
{
declareABCDE
unsigned int i;
xorLanesIntoState(16, state, input)
copyFromState(A, state)
rounds
}
#endif
#ifdef ProvideFast1088
void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input)
{
declareABCDE
unsigned int i;
xorLanesIntoState(17, state, input)
copyFromState(A, state)
rounds
}
#endif
#ifdef ProvideFast1152
void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input)
{
declareABCDE
unsigned int i;
xorLanesIntoState(18, state, input)
copyFromState(A, state)
rounds
}
#endif
#ifdef ProvideFast1344
void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input)
{
declareABCDE
unsigned int i;
xorLanesIntoState(21, state, input)
copyFromState(A, state)
rounds
}
#endif
#endif
void KeccakInitialize()
{
#ifdef UseInterleaveTables
buildInterleaveTables();
#endif
}
void KeccakInitializeState(unsigned char *state)
{
memset(state, 0, 200);
#ifdef UseBebigokimisa
((UINT32*)state)[ 2] = ~(UINT32)0;
((UINT32*)state)[ 3] = ~(UINT32)0;
((UINT32*)state)[ 4] = ~(UINT32)0;
((UINT32*)state)[ 5] = ~(UINT32)0;
((UINT32*)state)[16] = ~(UINT32)0;
((UINT32*)state)[17] = ~(UINT32)0;
((UINT32*)state)[24] = ~(UINT32)0;
((UINT32*)state)[25] = ~(UINT32)0;
((UINT32*)state)[34] = ~(UINT32)0;
((UINT32*)state)[35] = ~(UINT32)0;
((UINT32*)state)[40] = ~(UINT32)0;
((UINT32*)state)[41] = ~(UINT32)0;
#endif
}
void KeccakPermutation(unsigned char *state)
{
// We assume the state is always stored as interleaved 32-bit words
KeccakPermutationOnWords((UINT32*)state);
}
#ifdef ProvideFast576
void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring576bits((UINT32*)state, data);
}
#endif
#ifdef ProvideFast832
void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring832bits((UINT32*)state, data);
}
#endif
#ifdef ProvideFast1024
void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring1024bits((UINT32*)state, data);
}
#endif
#ifdef ProvideFast1088
void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring1088bits((UINT32*)state, data);
}
#endif
#ifdef ProvideFast1152
void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring1152bits((UINT32*)state, data);
}
#endif
#ifdef ProvideFast1344
void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationOnWordsAfterXoring1344bits((UINT32*)state, data);
}
#endif
void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
{
KeccakPermutationOnWordsAfterXoring((UINT32*)state, data, laneCount);
}
#ifdef ProvideFast1024
void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
{
extractLanes(16, state, data)
#ifdef UseBebigokimisa
((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2];
((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3];
((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4];
((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5];
((UINT32*)data)[16] = ~((UINT32*)data)[16];
((UINT32*)data)[17] = ~((UINT32*)data)[17];
((UINT32*)data)[24] = ~((UINT32*)data)[24];
((UINT32*)data)[25] = ~((UINT32*)data)[25];
#endif
}
#endif
void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
{
extractLanes(laneCount, state, data)
#ifdef UseBebigokimisa
if (laneCount > 1) {
((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2];
((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3];
if (laneCount > 2) {
((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4];
((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5];
if (laneCount > 8) {
((UINT32*)data)[16] = ~((UINT32*)data)[16];
((UINT32*)data)[17] = ~((UINT32*)data)[17];
if (laneCount > 12) {
((UINT32*)data)[24] = ~((UINT32*)data)[24];
((UINT32*)data)[25] = ~((UINT32*)data)[25];
if (laneCount > 17) {
((UINT32*)data)[34] = ~((UINT32*)data)[34];
((UINT32*)data)[35] = ~((UINT32*)data)[35];
if (laneCount > 20) {
((UINT32*)data)[40] = ~((UINT32*)data)[40];
((UINT32*)data)[41] = ~((UINT32*)data)[41];
}
}
}
}
}
}
#endif
}

View File

@ -0,0 +1,7 @@
#define Unrolling 24
#define UseBebigokimisa
//#define UseSSE
//#define UseOnlySIMD64
//#define UseMMX
//#define UseSHLD
//#define UseXOP

504
c_src/KeccakF-1600-opt64.c Executable file
View File

@ -0,0 +1,504 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "brg_endian.h"
#include "KeccakF-1600-opt64-settings.h"
#include "KeccakF-1600-interface.h"
typedef unsigned char UINT8;
typedef unsigned long long int UINT64;
#if defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(32)))
#elif defined(_MSC_VER)
#define ALIGN __declspec(align(32))
#else
#define ALIGN
#endif
#if defined(UseSSE)
#include <x86intrin.h>
typedef __m128i V64;
typedef __m128i V128;
typedef union {
V128 v128;
UINT64 v64[2];
} V6464;
#define ANDnu64(a, b) _mm_andnot_si128(a, b)
#define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
#define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
#define ROL64(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
#define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
#define XOR64(a, b) _mm_xor_si128(a, b)
#define XOReq64(a, b) a = _mm_xor_si128(a, b)
#define SHUFFLEBYTES128(a, b) _mm_shuffle_epi8(a, b)
#define ANDnu128(a, b) _mm_andnot_si128(a, b)
#define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
#define CONST128(a) _mm_load_si128((const V128 *)&(a))
#define LOAD128(a) _mm_load_si128((const V128 *)&(a))
#define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
#define ROL64in128(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
#define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
#define XOR128(a, b) _mm_xor_si128(a, b)
#define XOReq128(a, b) a = _mm_xor_si128(a, b)
#define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
#define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
#define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
#define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
#define ZERO128() _mm_setzero_si128()
#ifdef UseOnlySIMD64
#include "KeccakF-1600-simd64.macros"
#else
ALIGN const UINT64 rho8_56[2] = {0x0605040302010007, 0x080F0E0D0C0B0A09};
#include "KeccakF-1600-simd128.macros"
#endif
#ifdef UseBebigokimisa
#error "UseBebigokimisa cannot be used in combination with UseSSE"
#endif
#elif defined(UseXOP)
#include <x86intrin.h>
typedef __m128i V64;
typedef __m128i V128;
#define LOAD64(a) _mm_loadl_epi64((const V64 *)&(a))
#define CONST64(a) _mm_loadl_epi64((const V64 *)&(a))
#define STORE64(a, b) _mm_storel_epi64((V64 *)&(a), b)
#define XOR64(a, b) _mm_xor_si128(a, b)
#define XOReq64(a, b) a = _mm_xor_si128(a, b)
#define ANDnu128(a, b) _mm_andnot_si128(a, b)
#define LOAD6464(a, b) _mm_set_epi64((__m64)(a), (__m64)(b))
#define CONST128(a) _mm_load_si128((const V128 *)&(a))
#define LOAD128(a) _mm_load_si128((const V128 *)&(a))
#define LOAD128u(a) _mm_loadu_si128((const V128 *)&(a))
#define STORE128(a, b) _mm_store_si128((V128 *)&(a), b)
#define XOR128(a, b) _mm_xor_si128(a, b)
#define XOReq128(a, b) a = _mm_xor_si128(a, b)
#define ZERO128() _mm_setzero_si128()
#define SWAP64(a) _mm_shuffle_epi32(a, 0x4E)
#define GET64LOLO(a, b) _mm_unpacklo_epi64(a, b)
#define GET64HIHI(a, b) _mm_unpackhi_epi64(a, b)
#define GET64LOHI(a, b) ((__m128i)_mm_blend_pd((__m128d)a, (__m128d)b, 2))
#define GET64HILO(a, b) SWAP64(GET64LOHI(b, a))
#define COPY64HI2LO(a) _mm_shuffle_epi32(a, 0xEE)
#define COPY64LO2HI(a) _mm_shuffle_epi32(a, 0x44)
#define ROL6464same(a, o) _mm_roti_epi64(a, o)
#define ROL6464(a, r1, r2) _mm_rot_epi64(a, CONST128( rot_##r1##_##r2 ))
ALIGN const UINT64 rot_0_20[2] = { 0, 20};
ALIGN const UINT64 rot_44_3[2] = {44, 3};
ALIGN const UINT64 rot_43_45[2] = {43, 45};
ALIGN const UINT64 rot_21_61[2] = {21, 61};
ALIGN const UINT64 rot_14_28[2] = {14, 28};
ALIGN const UINT64 rot_1_36[2] = { 1, 36};
ALIGN const UINT64 rot_6_10[2] = { 6, 10};
ALIGN const UINT64 rot_25_15[2] = {25, 15};
ALIGN const UINT64 rot_8_56[2] = { 8, 56};
ALIGN const UINT64 rot_18_27[2] = {18, 27};
ALIGN const UINT64 rot_62_55[2] = {62, 55};
ALIGN const UINT64 rot_39_41[2] = {39, 41};
#if defined(UseSimulatedXOP)
// For debugging purposes, when XOP is not available
#undef ROL6464
#undef ROL6464same
#define ROL6464same(a, o) _mm_or_si128(_mm_slli_epi64(a, o), _mm_srli_epi64(a, 64-(o)))
V128 ROL6464(V128 a, int r0, int r1)
{
V128 a0 = ROL64(a, r0);
V128 a1 = COPY64HI2LO(ROL64(a, r1));
return GET64LOLO(a0, a1);
}
#endif
#include "KeccakF-1600-xop.macros"
#ifdef UseBebigokimisa
#error "UseBebigokimisa cannot be used in combination with UseXOP"
#endif
#elif defined(UseMMX)
#include <mmintrin.h>
typedef __m64 V64;
#define ANDnu64(a, b) _mm_andnot_si64(a, b)
#if (defined(_MSC_VER) || defined (__INTEL_COMPILER))
#define LOAD64(a) *(V64*)&(a)
#define CONST64(a) *(V64*)&(a)
#define STORE64(a, b) *(V64*)&(a) = b
#else
#define LOAD64(a) (V64)a
#define CONST64(a) (V64)a
#define STORE64(a, b) a = (UINT64)b
#endif
#define ROL64(a, o) _mm_or_si64(_mm_slli_si64(a, o), _mm_srli_si64(a, 64-(o)))
#define XOR64(a, b) _mm_xor_si64(a, b)
#define XOReq64(a, b) a = _mm_xor_si64(a, b)
#include "KeccakF-1600-simd64.macros"
#ifdef UseBebigokimisa
#error "UseBebigokimisa cannot be used in combination with UseMMX"
#endif
#else
#if defined(_MSC_VER)
#define ROL64(a, offset) _rotl64(a, offset)
#elif defined(UseSHLD)
#define ROL64(x,N) ({ \
register UINT64 __out; \
register UINT64 __in = x; \
__asm__ ("shld %2,%0,%0" : "=r"(__out) : "0"(__in), "i"(N)); \
__out; \
})
#else
#define ROL64(a, offset) ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset)))
#endif
#include "KeccakF-1600-64.macros"
#endif
#include "KeccakF-1600-unrolling.macros"
void KeccakPermutationOnWords(UINT64 *state)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromState(A, state)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
void KeccakPermutationOnWordsAfterXoring(UINT64 *state, const UINT64 *input, unsigned int laneCount)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
unsigned int j;
for(j=0; j<laneCount; j++)
state[j] ^= input[j];
copyFromState(A, state)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#ifdef ProvideFast576
void KeccakPermutationOnWordsAfterXoring576bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor576bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast832
void KeccakPermutationOnWordsAfterXoring832bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor832bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast1024
void KeccakPermutationOnWordsAfterXoring1024bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor1024bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast1088
void KeccakPermutationOnWordsAfterXoring1088bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor1088bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast1152
void KeccakPermutationOnWordsAfterXoring1152bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor1152bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
#ifdef ProvideFast1344
void KeccakPermutationOnWordsAfterXoring1344bits(UINT64 *state, const UINT64 *input)
{
declareABCDE
#if (Unrolling != 24)
unsigned int i;
#endif
copyFromStateAndXor1344bits(A, state, input)
rounds
#if defined(UseMMX)
_mm_empty();
#endif
}
#endif
void KeccakInitialize()
{
}
void KeccakInitializeState(unsigned char *state)
{
memset(state, 0, 200);
#ifdef UseBebigokimisa
((UINT64*)state)[ 1] = ~(UINT64)0;
((UINT64*)state)[ 2] = ~(UINT64)0;
((UINT64*)state)[ 8] = ~(UINT64)0;
((UINT64*)state)[12] = ~(UINT64)0;
((UINT64*)state)[17] = ~(UINT64)0;
((UINT64*)state)[20] = ~(UINT64)0;
#endif
}
void KeccakPermutation(unsigned char *state)
{
// We assume the state is always stored as words
KeccakPermutationOnWords((UINT64*)state);
}
void fromBytesToWord(UINT64 *word, const UINT8 *bytes)
{
unsigned int i;
*word = 0;
for(i=0; i<(64/8); i++)
*word |= (UINT64)(bytes[i]) << (8*i);
}
#ifdef ProvideFast576
void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[9];
unsigned int i;
for(i=0; i<9; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring576bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast832
void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[13];
unsigned int i;
for(i=0; i<13; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring832bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast1024
void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[16];
unsigned int i;
for(i=0; i<16; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring1024bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast1088
void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[17];
unsigned int i;
for(i=0; i<17; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring1088bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast1152
void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[18];
unsigned int i;
for(i=0; i<18; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring1152bits((UINT64*)state, dataAsWords);
#endif
}
#endif
#ifdef ProvideFast1344
void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, (const UINT64*)data);
#else
UINT64 dataAsWords[21];
unsigned int i;
for(i=0; i<21; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring1344bits((UINT64*)state, dataAsWords);
#endif
}
#endif
void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWordsAfterXoring((UINT64*)state, (const UINT64*)data, laneCount);
#else
UINT64 dataAsWords[25];
unsigned int i;
for(i=0; i<laneCount; i++)
fromBytesToWord(dataAsWords+i, data+(i*8));
KeccakPermutationOnWordsAfterXoring((UINT64*)state, dataAsWords, laneCount);
#endif
}
void fromWordToBytes(UINT8 *bytes, const UINT64 word)
{
unsigned int i;
for(i=0; i<(64/8); i++)
bytes[i] = (word >> (8*i)) & 0xFF;
}
#ifdef ProvideFast1024
void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
memcpy(data, state, 128);
#else
unsigned int i;
for(i=0; i<16; i++)
fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
#endif
#ifdef UseBebigokimisa
((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
((UINT64*)data)[12] = ~((UINT64*)data)[12];
#endif
}
#endif
void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
{
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
memcpy(data, state, laneCount*8);
#else
unsigned int i;
for(i=0; i<laneCount; i++)
fromWordToBytes(data+(i*8), ((const UINT64*)state)[i]);
#endif
#ifdef UseBebigokimisa
if (laneCount > 1) {
((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
if (laneCount > 2) {
((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
if (laneCount > 8) {
((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
if (laneCount > 12) {
((UINT64*)data)[12] = ~((UINT64*)data)[12];
if (laneCount > 17) {
((UINT64*)data)[17] = ~((UINT64*)data)[17];
if (laneCount > 20) {
((UINT64*)data)[20] = ~((UINT64*)data)[20];
}
}
}
}
}
}
#endif
}

300
c_src/KeccakF-1600-reference.c Executable file
View File

@ -0,0 +1,300 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <stdio.h>
#include <string.h>
#include "brg_endian.h"
#include "displayIntermediateValues.h"
#include "KeccakNISTInterface.h"
#include "KeccakF-1600-interface.h"
typedef unsigned char UINT8;
typedef unsigned long long int UINT64;
#define nrRounds 24
UINT64 KeccakRoundConstants[nrRounds];
#define nrLanes 25
unsigned int KeccakRhoOffsets[nrLanes];
void KeccakPermutationOnWords(UINT64 *state);
void theta(UINT64 *A);
void rho(UINT64 *A);
void pi(UINT64 *A);
void chi(UINT64 *A);
void iota(UINT64 *A, unsigned int indexRound);
void fromBytesToWords(UINT64 *stateAsWords, const unsigned char *state)
{
unsigned int i, j;
for(i=0; i<(KeccakPermutationSize/64); i++) {
stateAsWords[i] = 0;
for(j=0; j<(64/8); j++)
stateAsWords[i] |= (UINT64)(state[i*(64/8)+j]) << (8*j);
}
}
void fromWordsToBytes(unsigned char *state, const UINT64 *stateAsWords)
{
unsigned int i, j;
for(i=0; i<(KeccakPermutationSize/64); i++)
for(j=0; j<(64/8); j++)
state[i*(64/8)+j] = (stateAsWords[i] >> (8*j)) & 0xFF;
}
void KeccakPermutation(unsigned char *state)
{
#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN)
UINT64 stateAsWords[KeccakPermutationSize/64];
#endif
displayStateAsBytes(1, "Input of permutation", state);
#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN)
KeccakPermutationOnWords((UINT64*)state);
#else
fromBytesToWords(stateAsWords, state);
KeccakPermutationOnWords(stateAsWords);
fromWordsToBytes(state, stateAsWords);
#endif
displayStateAsBytes(1, "State after permutation", state);
}
void KeccakPermutationAfterXor(unsigned char *state, const unsigned char *data, unsigned int dataLengthInBytes)
{
unsigned int i;
for(i=0; i<dataLengthInBytes; i++)
state[i] ^= data[i];
KeccakPermutation(state);
}
void KeccakPermutationOnWords(UINT64 *state)
{
unsigned int i;
displayStateAs64bitWords(3, "Same, with lanes as 64-bit words", state);
for(i=0; i<nrRounds; i++) {
displayRoundNumber(3, i);
theta(state);
displayStateAs64bitWords(3, "After theta", state);
rho(state);
displayStateAs64bitWords(3, "After rho", state);
pi(state);
displayStateAs64bitWords(3, "After pi", state);
chi(state);
displayStateAs64bitWords(3, "After chi", state);
iota(state, i);
displayStateAs64bitWords(3, "After iota", state);
}
}
#define index(x, y) (((x)%5)+5*((y)%5))
#define ROL64(a, offset) ((offset != 0) ? ((((UINT64)a) << offset) ^ (((UINT64)a) >> (64-offset))) : a)
void theta(UINT64 *A)
{
unsigned int x, y;
UINT64 C[5], D[5];
for(x=0; x<5; x++) {
C[x] = 0;
for(y=0; y<5; y++)
C[x] ^= A[index(x, y)];
}
for(x=0; x<5; x++)
D[x] = ROL64(C[(x+1)%5], 1) ^ C[(x+4)%5];
for(x=0; x<5; x++)
for(y=0; y<5; y++)
A[index(x, y)] ^= D[x];
}
void rho(UINT64 *A)
{
unsigned int x, y;
for(x=0; x<5; x++) for(y=0; y<5; y++)
A[index(x, y)] = ROL64(A[index(x, y)], KeccakRhoOffsets[index(x, y)]);
}
void pi(UINT64 *A)
{
unsigned int x, y;
UINT64 tempA[25];
for(x=0; x<5; x++) for(y=0; y<5; y++)
tempA[index(x, y)] = A[index(x, y)];
for(x=0; x<5; x++) for(y=0; y<5; y++)
A[index(0*x+1*y, 2*x+3*y)] = tempA[index(x, y)];
}
void chi(UINT64 *A)
{
unsigned int x, y;
UINT64 C[5];
for(y=0; y<5; y++) {
for(x=0; x<5; x++)
C[x] = A[index(x, y)] ^ ((~A[index(x+1, y)]) & A[index(x+2, y)]);
for(x=0; x<5; x++)
A[index(x, y)] = C[x];
}
}
void iota(UINT64 *A, unsigned int indexRound)
{
A[index(0, 0)] ^= KeccakRoundConstants[indexRound];
}
int LFSR86540(UINT8 *LFSR)
{
int result = ((*LFSR) & 0x01) != 0;
if (((*LFSR) & 0x80) != 0)
// Primitive polynomial over GF(2): x^8+x^6+x^5+x^4+1
(*LFSR) = ((*LFSR) << 1) ^ 0x71;
else
(*LFSR) <<= 1;
return result;
}
void KeccakInitializeRoundConstants()
{
UINT8 LFSRstate = 0x01;
unsigned int i, j, bitPosition;
for(i=0; i<nrRounds; i++) {
KeccakRoundConstants[i] = 0;
for(j=0; j<7; j++) {
bitPosition = (1<<j)-1; //2^j-1
if (LFSR86540(&LFSRstate))
KeccakRoundConstants[i] ^= (UINT64)1<<bitPosition;
}
}
}
void KeccakInitializeRhoOffsets()
{
unsigned int x, y, t, newX, newY;
KeccakRhoOffsets[index(0, 0)] = 0;
x = 1;
y = 0;
for(t=0; t<24; t++) {
KeccakRhoOffsets[index(x, y)] = ((t+1)*(t+2)/2) % 64;
newX = (0*x+1*y) % 5;
newY = (2*x+3*y) % 5;
x = newX;
y = newY;
}
}
void KeccakInitialize()
{
KeccakInitializeRoundConstants();
KeccakInitializeRhoOffsets();
}
void displayRoundConstants(FILE *f)
{
unsigned int i;
for(i=0; i<nrRounds; i++) {
fprintf(f, "RC[%02i][0][0] = ", i);
fprintf(f, "%08X", (unsigned int)(KeccakRoundConstants[i] >> 32));
fprintf(f, "%08X", (unsigned int)(KeccakRoundConstants[i] & 0xFFFFFFFFULL));
fprintf(f, "\n");
}
fprintf(f, "\n");
}
void displayRhoOffsets(FILE *f)
{
unsigned int x, y;
for(y=0; y<5; y++) for(x=0; x<5; x++) {
fprintf(f, "RhoOffset[%i][%i] = ", x, y);
fprintf(f, "%2i", KeccakRhoOffsets[index(x, y)]);
fprintf(f, "\n");
}
fprintf(f, "\n");
}
void KeccakInitializeState(unsigned char *state)
{
memset(state, 0, KeccakPermutationSizeInBytes);
}
#ifdef ProvideFast576
void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationAfterXor(state, data, 72);
}
#endif
#ifdef ProvideFast832
void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationAfterXor(state, data, 104);
}
#endif
#ifdef ProvideFast1024
void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationAfterXor(state, data, 128);
}
#endif
#ifdef ProvideFast1088
void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationAfterXor(state, data, 136);
}
#endif
#ifdef ProvideFast1152
void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationAfterXor(state, data, 144);
}
#endif
#ifdef ProvideFast1344
void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationAfterXor(state, data, 168);
}
#endif
void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
{
KeccakPermutationAfterXor(state, data, laneCount*8);
}
#ifdef ProvideFast1024
void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
{
memcpy(data, state, 128);
}
#endif
void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
{
memcpy(data, state, laneCount*8);
}

20
c_src/KeccakF-1600-reference.h Executable file
View File

@ -0,0 +1,20 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakPermutationReference_h_
#define _KeccakPermutationReference_h_
void displayRoundConstants(FILE *f);
void displayRhoOffsets(FILE *f);
#endif

View File

@ -0,0 +1,371 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <stdio.h>
#include <string.h>
#include "brg_endian.h"
#include "displayIntermediateValues.h"
#include "KeccakNISTInterface.h"
#include "KeccakF-1600-interface.h"
typedef unsigned char UINT8;
typedef unsigned int UINT32;
#define nrRounds 24
UINT32 KeccakRoundConstants[nrRounds][2];
#define nrLanes 25
unsigned int KeccakRhoOffsets[nrLanes];
void KeccakPermutationOnWords(UINT32 *state);
void theta(UINT32 *A);
void rho(UINT32 *A);
void pi(UINT32 *A);
void chi(UINT32 *A);
void iota(UINT32 *A, unsigned int indexRound);
void toBitInterleaving(UINT32 low, UINT32 high, UINT32 *even, UINT32 *odd)
{
unsigned int i;
*even = 0;
*odd = 0;
for(i=0; i<64; i++) {
unsigned int inBit;
if (i < 32)
inBit = (low >> i) & 1;
else
inBit = (high >> (i-32)) & 1;
if ((i % 2) == 0)
*even |= inBit << (i/2);
else
*odd |= inBit << ((i-1)/2);
}
}
void fromBitInterleaving(UINT32 even, UINT32 odd, UINT32 *low, UINT32 *high)
{
unsigned int i;
*low = 0;
*high = 0;
for(i=0; i<64; i++) {
unsigned int inBit;
if ((i % 2) == 0)
inBit = (even >> (i/2)) & 1;
else
inBit = (odd >> ((i-1)/2)) & 1;
if (i < 32)
*low |= inBit << i;
else
*high |= inBit << (i-32);
}
}
void fromBytesToWords(UINT32 *stateAsWords, const unsigned char *state)
{
unsigned int i, j;
UINT32 low, high;
UINT32 even, odd;
for(i=0; i<(KeccakPermutationSize/64); i++) {
low = 0;
high = 0;
for(j=0; j<(32/8); j++)
low |= (UINT32)(state[i*(64/8)+j]) << (8*j);
for(j=(32/8); j<(64/8); j++)
high |= (UINT32)(state[i*(64/8)+j]) << (8*j-32);
toBitInterleaving(low, high, &even, &odd);
stateAsWords[2*i+0] = even;
stateAsWords[2*i+1] = odd;
}
}
void fromWordsToBytes(unsigned char *state, const UINT32 *stateAsWords)
{
unsigned int i, j;
UINT32 low, high;
for(i=0; i<(KeccakPermutationSize/64); i++) {
fromBitInterleaving(stateAsWords[2*i+0], stateAsWords[2*i+1], &low, &high);
for(j=0; j<(32/8); j++)
state[i*(64/8)+j] = (low >> (8*j)) & 0xFF;
for(j=32/8; j<(64/8); j++)
state[i*(64/8)+j] = (high >> (8*j-32)) & 0xFF;
}
}
void KeccakPermutation(unsigned char *state)
{
UINT32 stateAsWords[KeccakPermutationSize/32];
displayStateAsBytes(1, "Input of permutation", state);
fromBytesToWords(stateAsWords, state);
KeccakPermutationOnWords(stateAsWords);
fromWordsToBytes(state, stateAsWords);
displayStateAsBytes(1, "State after permutation", state);
}
void KeccakPermutationAfterXor(unsigned char *state, const unsigned char *data, unsigned int dataLengthInBytes)
{
unsigned int i;
for(i=0; i<dataLengthInBytes; i++)
state[i] ^= data[i];
KeccakPermutation(state);
}
void KeccakPermutationOnWords(UINT32 *state)
{
unsigned int i;
displayStateAs32bitWords(3, "Same, with lanes as pairs of 32-bit words (bit interleaving)", state);
for(i=0; i<nrRounds; i++) {
displayRoundNumber(3, i);
theta(state);
displayStateAs32bitWords(3, "After theta", state);
rho(state);
displayStateAs32bitWords(3, "After rho", state);
pi(state);
displayStateAs32bitWords(3, "After pi", state);
chi(state);
displayStateAs32bitWords(3, "After chi", state);
iota(state, i);
displayStateAs32bitWords(3, "After iota", state);
}
}
#define index(x, y,z) ((((x)%5)+5*((y)%5))*2 + z)
#define ROL32(a, offset) ((offset != 0) ? ((((UINT32)a) << offset) ^ (((UINT32)a) >> (32-offset))) : a)
void ROL64(UINT32 inEven, UINT32 inOdd, UINT32 *outEven, UINT32 *outOdd, unsigned int offset)
{
if ((offset % 2) == 0) {
*outEven = ROL32(inEven, offset/2);
*outOdd = ROL32(inOdd, offset/2);
}
else {
*outEven = ROL32(inOdd, (offset+1)/2);
*outOdd = ROL32(inEven, (offset-1)/2);
}
}
void theta(UINT32 *A)
{
unsigned int x, y, z;
UINT32 C[5][2], D[5][2];
for(x=0; x<5; x++) {
for(z=0; z<2; z++) {
C[x][z] = 0;
for(y=0; y<5; y++)
C[x][z] ^= A[index(x, y, z)];
}
}
for(x=0; x<5; x++) {
ROL64(C[(x+1)%5][0], C[(x+1)%5][1], &(D[x][0]), &(D[x][1]), 1);
for(z=0; z<2; z++)
D[x][z] ^= C[(x+4)%5][z];
}
for(x=0; x<5; x++)
for(y=0; y<5; y++)
for(z=0; z<2; z++)
A[index(x, y, z)] ^= D[x][z];
}
void rho(UINT32 *A)
{
unsigned int x, y;
for(x=0; x<5; x++) for(y=0; y<5; y++)
ROL64(A[index(x, y, 0)], A[index(x, y, 1)], &(A[index(x, y, 0)]), &(A[index(x, y, 1)]), KeccakRhoOffsets[5*y+x]);
}
void pi(UINT32 *A)
{
unsigned int x, y, z;
UINT32 tempA[50];
for(x=0; x<5; x++) for(y=0; y<5; y++) for(z=0; z<2; z++)
tempA[index(x, y, z)] = A[index(x, y, z)];
for(x=0; x<5; x++) for(y=0; y<5; y++) for(z=0; z<2; z++)
A[index(0*x+1*y, 2*x+3*y, z)] = tempA[index(x, y, z)];
}
void chi(UINT32 *A)
{
unsigned int x, y, z;
UINT32 C[5][2];
for(y=0; y<5; y++) {
for(x=0; x<5; x++)
for(z=0; z<2; z++)
C[x][z] = A[index(x, y, z)] ^ ((~A[index(x+1, y, z)]) & A[index(x+2, y, z)]);
for(x=0; x<5; x++)
for(z=0; z<2; z++)
A[index(x, y, z)] = C[x][z];
}
}
void iota(UINT32 *A, unsigned int indexRound)
{
A[index(0, 0, 0)] ^= KeccakRoundConstants[indexRound][0];
A[index(0, 0, 1)] ^= KeccakRoundConstants[indexRound][1];
}
int LFSR86540(UINT8 *LFSR)
{
int result = ((*LFSR) & 0x01) != 0;
if (((*LFSR) & 0x80) != 0)
// Primitive polynomial over GF(2): x^8+x^6+x^5+x^4+1
(*LFSR) = ((*LFSR) << 1) ^ 0x71;
else
(*LFSR) <<= 1;
return result;
}
void KeccakInitializeRoundConstants()
{
UINT8 LFSRstate = 0x01;
unsigned int i, j, bitPosition;
UINT32 low, high;
for(i=0; i<nrRounds; i++) {
low = high = 0;
for(j=0; j<7; j++) {
bitPosition = (1<<j)-1; //2^j-1
if (LFSR86540(&LFSRstate)) {
if (bitPosition < 32)
low ^= (UINT32)1 << bitPosition;
else
high ^= (UINT32)1 << (bitPosition-32);
}
}
toBitInterleaving(low, high, &(KeccakRoundConstants[i][0]), &(KeccakRoundConstants[i][1]));
}
}
void KeccakInitializeRhoOffsets()
{
unsigned int x, y, t, newX, newY;
KeccakRhoOffsets[0] = 0;
x = 1;
y = 0;
for(t=0; t<24; t++) {
KeccakRhoOffsets[5*y+x] = ((t+1)*(t+2)/2) % 64;
newX = (0*x+1*y) % 5;
newY = (2*x+3*y) % 5;
x = newX;
y = newY;
}
}
void KeccakInitialize()
{
KeccakInitializeRoundConstants();
KeccakInitializeRhoOffsets();
}
void displayRoundConstants(FILE *f)
{
unsigned int i;
for(i=0; i<nrRounds; i++) {
fprintf(f, "RC[%02i][0][0] = ", i);
fprintf(f, "%08X:%08X", (unsigned int)(KeccakRoundConstants[i][0]), (unsigned int)(KeccakRoundConstants[i][1]));
fprintf(f, "\n");
}
fprintf(f, "\n");
}
void displayRhoOffsets(FILE *f)
{
unsigned int x, y;
for(y=0; y<5; y++) for(x=0; x<5; x++) {
fprintf(f, "RhoOffset[%i][%i] = ", x, y);
fprintf(f, "%2i", KeccakRhoOffsets[5*y+x]);
fprintf(f, "\n");
}
fprintf(f, "\n");
}
void KeccakInitializeState(unsigned char *state)
{
memset(state, 0, KeccakPermutationSizeInBytes);
}
#ifdef ProvideFast576
void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationAfterXor(state, data, 72);
}
#endif
#ifdef ProvideFast832
void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationAfterXor(state, data, 104);
}
#endif
#ifdef ProvideFast1024
void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationAfterXor(state, data, 128);
}
#endif
#ifdef ProvideFast1088
void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationAfterXor(state, data, 136);
}
#endif
#ifdef ProvideFast1152
void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationAfterXor(state, data, 144);
}
#endif
#ifdef ProvideFast1344
void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data)
{
KeccakPermutationAfterXor(state, data, 168);
}
#endif
void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount)
{
KeccakPermutationAfterXor(state, data, laneCount*8);
}
#ifdef ProvideFast1024
void KeccakExtract1024bits(const unsigned char *state, unsigned char *data)
{
memcpy(data, state, 128);
}
#endif
void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
{
memcpy(data, state, laneCount*8);
}

651
c_src/KeccakF-1600-simd128.macros Executable file
View File

@ -0,0 +1,651 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define declareABCDE \
V6464 Abage, Abegi, Abigo, Abogu, Abuga; \
V6464 Akame, Akemi, Akimo, Akomu, Akuma; \
V6464 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio, Asae, Asio; \
V64 Aba, Abe, Abi, Abo, Abu; \
V64 Aga, Age, Agi, Ago, Agu; \
V64 Aka, Ake, Aki, Ako, Aku; \
V64 Ama, Ame, Ami, Amo, Amu; \
V64 Asa, Ase, Asi, Aso, Asu; \
V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \
V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \
V64 Bba, Bbe, Bbi, Bbo, Bbu; \
V64 Bga, Bge, Bgi, Bgo, Bgu; \
V64 Bka, Bke, Bki, Bko, Bku; \
V64 Bma, Bme, Bmi, Bmo, Bmu; \
V64 Bsa, Bse, Bsi, Bso, Bsu; \
V128 Cae, Cei, Cio, Cou, Cua, Dei, Dou; \
V64 Ca, Ce, Ci, Co, Cu; \
V64 Da, De, Di, Do, Du; \
V6464 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \
V6464 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \
V64 Eba, Ebe, Ebi, Ebo, Ebu; \
V64 Ega, Ege, Egi, Ego, Egu; \
V64 Eka, Eke, Eki, Eko, Eku; \
V64 Ema, Eme, Emi, Emo, Emu; \
V64 Esa, Ese, Esi, Eso, Esu; \
V128 Zero;
#define prepareTheta
#define computeD \
Cua = GET64LOLO(Cu, Cae); \
Dei = XOR128(Cae, ROL64in128(Cio, 1)); \
Dou = XOR128(Cio, ROL64in128(Cua, 1)); \
Da = XOR64(Cu, ROL64in128(COPY64HI2LO(Cae), 1)); \
De = Dei; \
Di = COPY64HI2LO(Dei); \
Do = Dou; \
Du = COPY64HI2LO(Dou);
// --- Theta Rho Pi Chi Iota Prepare-theta
// --- 64-bit lanes mapped to 64-bit and 128-bit words
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
computeD \
\
A##ba = LOAD64(A##bage.v64[0]); \
XOReq64(A##ba, Da); \
Bba = A##ba; \
XOReq64(A##gu, Du); \
Bge = ROL64(A##gu, 20); \
Bbage = GET64LOLO(Bba, Bge); \
A##ge = LOAD64(A##bage.v64[1]); \
XOReq64(A##ge, De); \
Bbe = ROL64(A##ge, 44); \
A##ka = LOAD64(A##kame.v64[0]); \
XOReq64(A##ka, Da); \
Bgi = ROL64(A##ka, 3); \
Bbegi = GET64LOLO(Bbe, Bgi); \
XOReq64(A##ki, Di); \
Bbi = ROL64(A##ki, 43); \
A##me = LOAD64(A##kame.v64[1]); \
XOReq64(A##me, De); \
Bgo = ROL64(A##me, 45); \
Bbigo = GET64LOLO(Bbi, Bgo); \
E##bage.v128 = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \
XOReq128(E##bage.v128, CONST64(KeccakF1600RoundConstants[i])); \
Cae = E##bage.v128; \
XOReq64(A##mo, Do); \
Bbo = ROL64(A##mo, 21); \
XOReq64(A##si, Di); \
Bgu = ROL64(A##si, 61); \
Bbogu = GET64LOLO(Bbo, Bgu); \
E##begi.v128 = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \
Cei = E##begi.v128; \
XOReq64(A##su, Du); \
Bbu = ROL64(A##su, 14); \
XOReq64(A##bo, Do); \
Bga = ROL64(A##bo, 28); \
Bbuga = GET64LOLO(Bbu, Bga); \
E##bigo.v128 = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \
E##bi = E##bigo.v128; \
E##go = GET64HIHI(E##bigo.v128, E##bigo.v128); \
Cio = E##bigo.v128; \
E##bogu.v128 = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \
E##bo = E##bogu.v128; \
E##gu = GET64HIHI(E##bogu.v128, E##bogu.v128); \
Cou = E##bogu.v128; \
E##buga.v128 = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \
E##bu = E##buga.v128; \
E##ga = GET64HIHI(E##buga.v128, E##buga.v128); \
Cua = E##buga.v128; \
\
A##be = LOAD64(A##begi.v64[0]); \
XOReq64(A##be, De); \
Bka = ROL64(A##be, 1); \
XOReq64(A##ga, Da); \
Bme = ROL64(A##ga, 36); \
Bkame = GET64LOLO(Bka, Bme); \
A##gi = LOAD64(A##begi.v64[1]); \
XOReq64(A##gi, Di); \
Bke = ROL64(A##gi, 6); \
A##ke = LOAD64(A##kemi.v64[0]); \
XOReq64(A##ke, De); \
Bmi = ROL64(A##ke, 10); \
Bkemi = GET64LOLO(Bke, Bmi); \
XOReq64(A##ko, Do); \
Bki = ROL64(A##ko, 25); \
A##mi = LOAD64(A##kemi.v64[1]); \
XOReq64(A##mi, Di); \
Bmo = ROL64(A##mi, 15); \
Bkimo = GET64LOLO(Bki, Bmo); \
E##kame.v128 = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \
XOReq128(Cae, E##kame.v128); \
Bkomu = GET64LOLO(XOR64(A##mu, Du), XOR64(A##so, Do)); \
Bkomu = SHUFFLEBYTES128(Bkomu, CONST128(rho8_56)); \
E##kemi.v128 = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \
XOReq128(Cei, E##kemi.v128); \
XOReq64(A##sa, Da); \
Bku = ROL64(A##sa, 18); \
XOReq64(A##bu, Du); \
Bma = ROL64(A##bu, 27); \
Bkuma = GET64LOLO(Bku, Bma); \
E##kimo.v128 = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \
E##ki = E##kimo.v128; \
E##mo = GET64HIHI(E##kimo.v128, E##kimo.v128); \
XOReq128(Cio, E##kimo.v128); \
E##komu.v128 = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \
E##ko = E##komu.v128; \
E##mu = GET64HIHI(E##komu.v128, E##komu.v128); \
XOReq128(Cou, E##komu.v128); \
E##kuma.v128 = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \
E##ku = E##kuma.v128; \
E##ma = GET64HIHI(E##kuma.v128, E##kuma.v128); \
XOReq128(Cua, E##kuma.v128); \
\
XOReq64(A##bi, Di); \
Bsa = ROL64(A##bi, 62); \
XOReq64(A##go, Do); \
Bse = ROL64(A##go, 55); \
XOReq64(A##ku, Du); \
Bsi = ROL64(A##ku, 39); \
E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
Ca = E##sa; \
XOReq64(A##ma, Da); \
Bso = ROL64(A##ma, 41); \
E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
Ce = E##se; \
XOReq128(Cae, GET64LOLO(Ca, Ce)); \
XOReq64(A##se, De); \
Bsu = ROL64(A##se, 2); \
E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
Ci = E##si; \
E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
Co = E##so; \
XOReq128(Cio, GET64LOLO(Ci, Co)); \
E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
Cu = E##su; \
\
Zero = ZERO128(); \
XOReq128(Cae, GET64HIHI(Cua, Zero)); \
XOReq128(Cae, GET64LOLO(Zero, Cei)); \
XOReq128(Cio, GET64HIHI(Cei, Zero)); \
XOReq128(Cio, GET64LOLO(Zero, Cou)); \
XOReq128(Cua, GET64HIHI(Cou, Zero)); \
XOReq64(Cu, Cua); \
// --- Theta Rho Pi Chi Iota
// --- 64-bit lanes mapped to 64-bit and 128-bit words
#define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E)
const UINT64 KeccakF1600RoundConstants[24] = {
0x0000000000000001ULL,
0x0000000000008082ULL,
0x800000000000808aULL,
0x8000000080008000ULL,
0x000000000000808bULL,
0x0000000080000001ULL,
0x8000000080008081ULL,
0x8000000000008009ULL,
0x000000000000008aULL,
0x0000000000000088ULL,
0x0000000080008009ULL,
0x000000008000000aULL,
0x000000008000808bULL,
0x800000000000008bULL,
0x8000000000008089ULL,
0x8000000000008003ULL,
0x8000000000008002ULL,
0x8000000000000080ULL,
0x000000000000800aULL,
0x800000008000000aULL,
0x8000000080008081ULL,
0x8000000000008080ULL,
0x0000000080000001ULL,
0x8000000080008008ULL };
#define copyFromStateAndXor576bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = LOAD64(state[ 9]); \
XOReq64(Cu, X##gu); \
X##kae.v128 = LOAD128(state[10]); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = LOAD128(state[12]); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = LOAD64(state[14]); \
XOReq64(Cu, X##ku); \
X##mae.v128 = LOAD128u(state[15]); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor832bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD64(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = LOAD64(state[14]); \
XOReq64(Cu, X##ku); \
X##mae.v128 = LOAD128u(state[15]); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor1024bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
XOReq64(Cu, X##ku); \
X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD64(input[15])); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor1088bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
XOReq64(Cu, X##ku); \
X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor1152bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
XOReq64(Cu, X##ku); \
X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromStateAndXor1344bits(X, state, input) \
X##bae.v128 = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cu = X##bu; \
X##gae.v128 = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
XOReq64(Cu, X##gu); \
X##kae.v128 = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
XOReq64(Cu, X##ku); \
X##mae.v128 = XOR128(LOAD128u(state[15]), LOAD128u(input[15])); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
XOReq64(Cu, X##mu); \
X##sae.v128 = XOR128(LOAD128(state[20]), LOAD64(input[20])); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyFromState(X, state) \
X##bae.v128 = LOAD128(state[ 0]); \
X##ba = X##bae.v128; \
X##be = GET64HIHI(X##bae.v128, X##bae.v128); \
Cae = X##bae.v128; \
X##bio.v128 = LOAD128(state[ 2]); \
X##bi = X##bio.v128; \
X##bo = GET64HIHI(X##bio.v128, X##bio.v128); \
Cio = X##bio.v128; \
X##bu = LOAD64(state[ 4]); \
Cu = X##bu; \
X##gae.v128 = LOAD128u(state[ 5]); \
X##ga = X##gae.v128; \
X##ge = GET64HIHI(X##gae.v128, X##gae.v128); \
X##bage.v128 = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae.v128); \
X##gio.v128 = LOAD128u(state[ 7]); \
X##gi = X##gio.v128; \
X##begi.v128 = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio.v128, X##gio.v128); \
XOReq128(Cio, X##gio.v128); \
X##gu = LOAD64(state[ 9]); \
XOReq64(Cu, X##gu); \
X##kae.v128 = LOAD128(state[10]); \
X##ka = X##kae.v128; \
X##ke = GET64HIHI(X##kae.v128, X##kae.v128); \
XOReq128(Cae, X##kae.v128); \
X##kio.v128 = LOAD128(state[12]); \
X##ki = X##kio.v128; \
X##ko = GET64HIHI(X##kio.v128, X##kio.v128); \
XOReq128(Cio, X##kio.v128); \
X##ku = LOAD64(state[14]); \
XOReq64(Cu, X##ku); \
X##mae.v128 = LOAD128u(state[15]); \
X##ma = X##mae.v128; \
X##me = GET64HIHI(X##mae.v128, X##mae.v128); \
X##kame.v128 = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, X##mae.v128); \
X##mio.v128 = LOAD128u(state[17]); \
X##mi = X##mio.v128; \
X##kemi.v128 = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio.v128, X##mio.v128); \
XOReq128(Cio, X##mio.v128); \
X##mu = LOAD64(state[19]); \
XOReq64(Cu, X##mu); \
X##sae.v128 = LOAD128(state[20]); \
X##sa = X##sae.v128; \
X##se = GET64HIHI(X##sae.v128, X##sae.v128); \
XOReq128(Cae, X##sae.v128); \
X##sio.v128 = LOAD128(state[22]); \
X##si = X##sio.v128; \
X##so = GET64HIHI(X##sio.v128, X##sio.v128); \
XOReq128(Cio, X##sio.v128); \
X##su = LOAD64(state[24]); \
XOReq64(Cu, X##su); \
#define copyToState(state, X) \
state[ 0] = A##bage.v64[0]; \
state[ 1] = A##begi.v64[0]; \
STORE64(state[ 2], X##bi); \
STORE64(state[ 3], X##bo); \
STORE64(state[ 4], X##bu); \
STORE64(state[ 5], X##ga); \
state[ 6] = A##bage.v64[1]; \
state[ 7] = A##begi.v64[1]; \
STORE64(state[ 8], X##go); \
STORE64(state[ 9], X##gu); \
state[10] = X##kame.v64[0]; \
state[11] = X##kemi.v64[0]; \
STORE64(state[12], X##ki); \
STORE64(state[13], X##ko); \
STORE64(state[14], X##ku); \
STORE64(state[15], X##ma); \
state[16] = X##kame.v64[1]; \
state[17] = X##kemi.v64[1]; \
STORE64(state[18], X##mo); \
STORE64(state[19], X##mu); \
STORE64(state[20], X##sa); \
STORE64(state[21], X##se); \
STORE64(state[22], X##si); \
STORE64(state[23], X##so); \
STORE64(state[24], X##su); \
#define copyStateVariables(X, Y) \
X##bage = Y##bage; \
X##begi = Y##begi; \
X##bi = Y##bi; \
X##bo = Y##bo; \
X##bu = Y##bu; \
X##ga = Y##ga; \
X##go = Y##go; \
X##gu = Y##gu; \
X##kame = Y##kame; \
X##kemi = Y##kemi; \
X##ki = Y##ki; \
X##ko = Y##ko; \
X##ku = Y##ku; \
X##ma = Y##ma; \
X##mo = Y##mo; \
X##mu = Y##mu; \
X##sa = Y##sa; \
X##se = Y##se; \
X##si = Y##si; \
X##so = Y##so; \
X##su = Y##su; \

517
c_src/KeccakF-1600-simd64.macros Executable file
View File

@ -0,0 +1,517 @@
/*
Code automatically generated by KeccakTools!
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define declareABCDE \
V64 Aba, Abe, Abi, Abo, Abu; \
V64 Aga, Age, Agi, Ago, Agu; \
V64 Aka, Ake, Aki, Ako, Aku; \
V64 Ama, Ame, Ami, Amo, Amu; \
V64 Asa, Ase, Asi, Aso, Asu; \
V64 Bba, Bbe, Bbi, Bbo, Bbu; \
V64 Bga, Bge, Bgi, Bgo, Bgu; \
V64 Bka, Bke, Bki, Bko, Bku; \
V64 Bma, Bme, Bmi, Bmo, Bmu; \
V64 Bsa, Bse, Bsi, Bso, Bsu; \
V64 Ca, Ce, Ci, Co, Cu; \
V64 Da, De, Di, Do, Du; \
V64 Eba, Ebe, Ebi, Ebo, Ebu; \
V64 Ega, Ege, Egi, Ego, Egu; \
V64 Eka, Eke, Eki, Eko, Eku; \
V64 Ema, Eme, Emi, Emo, Emu; \
V64 Esa, Ese, Esi, Eso, Esu; \
#define prepareTheta \
Ca = XOR64(Aba, XOR64(Aga, XOR64(Aka, XOR64(Ama, Asa)))); \
Ce = XOR64(Abe, XOR64(Age, XOR64(Ake, XOR64(Ame, Ase)))); \
Ci = XOR64(Abi, XOR64(Agi, XOR64(Aki, XOR64(Ami, Asi)))); \
Co = XOR64(Abo, XOR64(Ago, XOR64(Ako, XOR64(Amo, Aso)))); \
Cu = XOR64(Abu, XOR64(Agu, XOR64(Aku, XOR64(Amu, Asu)))); \
// --- Code for round, with prepare-theta
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
Da = XOR64(Cu, ROL64(Ce, 1)); \
De = XOR64(Ca, ROL64(Ci, 1)); \
Di = XOR64(Ce, ROL64(Co, 1)); \
Do = XOR64(Ci, ROL64(Cu, 1)); \
Du = XOR64(Co, ROL64(Ca, 1)); \
\
XOReq64(A##ba, Da); \
Bba = A##ba; \
XOReq64(A##ge, De); \
Bbe = ROL64(A##ge, 44); \
XOReq64(A##ki, Di); \
Bbi = ROL64(A##ki, 43); \
E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \
XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \
Ca = E##ba; \
XOReq64(A##mo, Do); \
Bbo = ROL64(A##mo, 21); \
E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \
Ce = E##be; \
XOReq64(A##su, Du); \
Bbu = ROL64(A##su, 14); \
E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \
Ci = E##bi; \
E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \
Co = E##bo; \
E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \
Cu = E##bu; \
\
XOReq64(A##bo, Do); \
Bga = ROL64(A##bo, 28); \
XOReq64(A##gu, Du); \
Bge = ROL64(A##gu, 20); \
XOReq64(A##ka, Da); \
Bgi = ROL64(A##ka, 3); \
E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \
XOReq64(Ca, E##ga); \
XOReq64(A##me, De); \
Bgo = ROL64(A##me, 45); \
E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \
XOReq64(Ce, E##ge); \
XOReq64(A##si, Di); \
Bgu = ROL64(A##si, 61); \
E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \
XOReq64(Ci, E##gi); \
E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \
XOReq64(Co, E##go); \
E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \
XOReq64(Cu, E##gu); \
\
XOReq64(A##be, De); \
Bka = ROL64(A##be, 1); \
XOReq64(A##gi, Di); \
Bke = ROL64(A##gi, 6); \
XOReq64(A##ko, Do); \
Bki = ROL64(A##ko, 25); \
E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \
XOReq64(Ca, E##ka); \
XOReq64(A##mu, Du); \
Bko = ROL64(A##mu, 8); \
E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \
XOReq64(Ce, E##ke); \
XOReq64(A##sa, Da); \
Bku = ROL64(A##sa, 18); \
E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \
XOReq64(Ci, E##ki); \
E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \
XOReq64(Co, E##ko); \
E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \
XOReq64(Cu, E##ku); \
\
XOReq64(A##bu, Du); \
Bma = ROL64(A##bu, 27); \
XOReq64(A##ga, Da); \
Bme = ROL64(A##ga, 36); \
XOReq64(A##ke, De); \
Bmi = ROL64(A##ke, 10); \
E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \
XOReq64(Ca, E##ma); \
XOReq64(A##mi, Di); \
Bmo = ROL64(A##mi, 15); \
E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \
XOReq64(Ce, E##me); \
XOReq64(A##so, Do); \
Bmu = ROL64(A##so, 56); \
E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \
XOReq64(Ci, E##mi); \
E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \
XOReq64(Co, E##mo); \
E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \
XOReq64(Cu, E##mu); \
\
XOReq64(A##bi, Di); \
Bsa = ROL64(A##bi, 62); \
XOReq64(A##go, Do); \
Bse = ROL64(A##go, 55); \
XOReq64(A##ku, Du); \
Bsi = ROL64(A##ku, 39); \
E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
XOReq64(Ca, E##sa); \
XOReq64(A##ma, Da); \
Bso = ROL64(A##ma, 41); \
E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
XOReq64(Ce, E##se); \
XOReq64(A##se, De); \
Bsu = ROL64(A##se, 2); \
E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
XOReq64(Ci, E##si); \
E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
XOReq64(Co, E##so); \
E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
XOReq64(Cu, E##su); \
\
// --- Code for round
// --- 64-bit lanes mapped to 64-bit words
#define thetaRhoPiChiIota(i, A, E) \
Da = XOR64(Cu, ROL64(Ce, 1)); \
De = XOR64(Ca, ROL64(Ci, 1)); \
Di = XOR64(Ce, ROL64(Co, 1)); \
Do = XOR64(Ci, ROL64(Cu, 1)); \
Du = XOR64(Co, ROL64(Ca, 1)); \
\
XOReq64(A##ba, Da); \
Bba = A##ba; \
XOReq64(A##ge, De); \
Bbe = ROL64(A##ge, 44); \
XOReq64(A##ki, Di); \
Bbi = ROL64(A##ki, 43); \
E##ba = XOR64(Bba, ANDnu64(Bbe, Bbi)); \
XOReq64(E##ba, CONST64(KeccakF1600RoundConstants[i])); \
XOReq64(A##mo, Do); \
Bbo = ROL64(A##mo, 21); \
E##be = XOR64(Bbe, ANDnu64(Bbi, Bbo)); \
XOReq64(A##su, Du); \
Bbu = ROL64(A##su, 14); \
E##bi = XOR64(Bbi, ANDnu64(Bbo, Bbu)); \
E##bo = XOR64(Bbo, ANDnu64(Bbu, Bba)); \
E##bu = XOR64(Bbu, ANDnu64(Bba, Bbe)); \
\
XOReq64(A##bo, Do); \
Bga = ROL64(A##bo, 28); \
XOReq64(A##gu, Du); \
Bge = ROL64(A##gu, 20); \
XOReq64(A##ka, Da); \
Bgi = ROL64(A##ka, 3); \
E##ga = XOR64(Bga, ANDnu64(Bge, Bgi)); \
XOReq64(A##me, De); \
Bgo = ROL64(A##me, 45); \
E##ge = XOR64(Bge, ANDnu64(Bgi, Bgo)); \
XOReq64(A##si, Di); \
Bgu = ROL64(A##si, 61); \
E##gi = XOR64(Bgi, ANDnu64(Bgo, Bgu)); \
E##go = XOR64(Bgo, ANDnu64(Bgu, Bga)); \
E##gu = XOR64(Bgu, ANDnu64(Bga, Bge)); \
\
XOReq64(A##be, De); \
Bka = ROL64(A##be, 1); \
XOReq64(A##gi, Di); \
Bke = ROL64(A##gi, 6); \
XOReq64(A##ko, Do); \
Bki = ROL64(A##ko, 25); \
E##ka = XOR64(Bka, ANDnu64(Bke, Bki)); \
XOReq64(A##mu, Du); \
Bko = ROL64(A##mu, 8); \
E##ke = XOR64(Bke, ANDnu64(Bki, Bko)); \
XOReq64(A##sa, Da); \
Bku = ROL64(A##sa, 18); \
E##ki = XOR64(Bki, ANDnu64(Bko, Bku)); \
E##ko = XOR64(Bko, ANDnu64(Bku, Bka)); \
E##ku = XOR64(Bku, ANDnu64(Bka, Bke)); \
\
XOReq64(A##bu, Du); \
Bma = ROL64(A##bu, 27); \
XOReq64(A##ga, Da); \
Bme = ROL64(A##ga, 36); \
XOReq64(A##ke, De); \
Bmi = ROL64(A##ke, 10); \
E##ma = XOR64(Bma, ANDnu64(Bme, Bmi)); \
XOReq64(A##mi, Di); \
Bmo = ROL64(A##mi, 15); \
E##me = XOR64(Bme, ANDnu64(Bmi, Bmo)); \
XOReq64(A##so, Do); \
Bmu = ROL64(A##so, 56); \
E##mi = XOR64(Bmi, ANDnu64(Bmo, Bmu)); \
E##mo = XOR64(Bmo, ANDnu64(Bmu, Bma)); \
E##mu = XOR64(Bmu, ANDnu64(Bma, Bme)); \
\
XOReq64(A##bi, Di); \
Bsa = ROL64(A##bi, 62); \
XOReq64(A##go, Do); \
Bse = ROL64(A##go, 55); \
XOReq64(A##ku, Du); \
Bsi = ROL64(A##ku, 39); \
E##sa = XOR64(Bsa, ANDnu64(Bse, Bsi)); \
XOReq64(A##ma, Da); \
Bso = ROL64(A##ma, 41); \
E##se = XOR64(Bse, ANDnu64(Bsi, Bso)); \
XOReq64(A##se, De); \
Bsu = ROL64(A##se, 2); \
E##si = XOR64(Bsi, ANDnu64(Bso, Bsu)); \
E##so = XOR64(Bso, ANDnu64(Bsu, Bsa)); \
E##su = XOR64(Bsu, ANDnu64(Bsa, Bse)); \
\
const UINT64 KeccakF1600RoundConstants[24] = {
0x0000000000000001ULL,
0x0000000000008082ULL,
0x800000000000808aULL,
0x8000000080008000ULL,
0x000000000000808bULL,
0x0000000080000001ULL,
0x8000000080008081ULL,
0x8000000000008009ULL,
0x000000000000008aULL,
0x0000000000000088ULL,
0x0000000080008009ULL,
0x000000008000000aULL,
0x000000008000808bULL,
0x800000000000008bULL,
0x8000000000008089ULL,
0x8000000000008003ULL,
0x8000000000008002ULL,
0x8000000000000080ULL,
0x000000000000800aULL,
0x800000008000000aULL,
0x8000000080008081ULL,
0x8000000000008080ULL,
0x0000000080000001ULL,
0x8000000080008008ULL };
#define copyFromStateAndXor576bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = LOAD64(state[ 9]); \
X##ka = LOAD64(state[10]); \
X##ke = LOAD64(state[11]); \
X##ki = LOAD64(state[12]); \
X##ko = LOAD64(state[13]); \
X##ku = LOAD64(state[14]); \
X##ma = LOAD64(state[15]); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor832bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = LOAD64(state[13]); \
X##ku = LOAD64(state[14]); \
X##ma = LOAD64(state[15]); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1024bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1088bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1152bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromStateAndXor1344bits(X, state, input) \
X##ba = XOR64(LOAD64(state[ 0]), LOAD64(input[ 0])); \
X##be = XOR64(LOAD64(state[ 1]), LOAD64(input[ 1])); \
X##bi = XOR64(LOAD64(state[ 2]), LOAD64(input[ 2])); \
X##bo = XOR64(LOAD64(state[ 3]), LOAD64(input[ 3])); \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
X##ga = XOR64(LOAD64(state[ 5]), LOAD64(input[ 5])); \
X##ge = XOR64(LOAD64(state[ 6]), LOAD64(input[ 6])); \
X##gi = XOR64(LOAD64(state[ 7]), LOAD64(input[ 7])); \
X##go = XOR64(LOAD64(state[ 8]), LOAD64(input[ 8])); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##ka = XOR64(LOAD64(state[10]), LOAD64(input[10])); \
X##ke = XOR64(LOAD64(state[11]), LOAD64(input[11])); \
X##ki = XOR64(LOAD64(state[12]), LOAD64(input[12])); \
X##ko = XOR64(LOAD64(state[13]), LOAD64(input[13])); \
X##ku = XOR64(LOAD64(state[14]), LOAD64(input[14])); \
X##ma = XOR64(LOAD64(state[15]), LOAD64(input[15])); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##mi = XOR64(LOAD64(state[17]), LOAD64(input[17])); \
X##mo = XOR64(LOAD64(state[18]), LOAD64(input[18])); \
X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
X##sa = XOR64(LOAD64(state[20]), LOAD64(input[20])); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyFromState(X, state) \
X##ba = LOAD64(state[ 0]); \
X##be = LOAD64(state[ 1]); \
X##bi = LOAD64(state[ 2]); \
X##bo = LOAD64(state[ 3]); \
X##bu = LOAD64(state[ 4]); \
X##ga = LOAD64(state[ 5]); \
X##ge = LOAD64(state[ 6]); \
X##gi = LOAD64(state[ 7]); \
X##go = LOAD64(state[ 8]); \
X##gu = LOAD64(state[ 9]); \
X##ka = LOAD64(state[10]); \
X##ke = LOAD64(state[11]); \
X##ki = LOAD64(state[12]); \
X##ko = LOAD64(state[13]); \
X##ku = LOAD64(state[14]); \
X##ma = LOAD64(state[15]); \
X##me = LOAD64(state[16]); \
X##mi = LOAD64(state[17]); \
X##mo = LOAD64(state[18]); \
X##mu = LOAD64(state[19]); \
X##sa = LOAD64(state[20]); \
X##se = LOAD64(state[21]); \
X##si = LOAD64(state[22]); \
X##so = LOAD64(state[23]); \
X##su = LOAD64(state[24]); \
#define copyToState(state, X) \
STORE64(state[ 0], X##ba); \
STORE64(state[ 1], X##be); \
STORE64(state[ 2], X##bi); \
STORE64(state[ 3], X##bo); \
STORE64(state[ 4], X##bu); \
STORE64(state[ 5], X##ga); \
STORE64(state[ 6], X##ge); \
STORE64(state[ 7], X##gi); \
STORE64(state[ 8], X##go); \
STORE64(state[ 9], X##gu); \
STORE64(state[10], X##ka); \
STORE64(state[11], X##ke); \
STORE64(state[12], X##ki); \
STORE64(state[13], X##ko); \
STORE64(state[14], X##ku); \
STORE64(state[15], X##ma); \
STORE64(state[16], X##me); \
STORE64(state[17], X##mi); \
STORE64(state[18], X##mo); \
STORE64(state[19], X##mu); \
STORE64(state[20], X##sa); \
STORE64(state[21], X##se); \
STORE64(state[22], X##si); \
STORE64(state[23], X##so); \
STORE64(state[24], X##su); \
#define copyStateVariables(X, Y) \
X##ba = Y##ba; \
X##be = Y##be; \
X##bi = Y##bi; \
X##bo = Y##bo; \
X##bu = Y##bu; \
X##ga = Y##ga; \
X##ge = Y##ge; \
X##gi = Y##gi; \
X##go = Y##go; \
X##gu = Y##gu; \
X##ka = Y##ka; \
X##ke = Y##ke; \
X##ki = Y##ki; \
X##ko = Y##ko; \
X##ku = Y##ku; \
X##ma = Y##ma; \
X##me = Y##me; \
X##mi = Y##mi; \
X##mo = Y##mo; \
X##mu = Y##mu; \
X##sa = Y##sa; \
X##se = Y##se; \
X##si = Y##si; \
X##so = Y##so; \
X##su = Y##su; \

View File

@ -0,0 +1,124 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#if (Unrolling == 24)
#define rounds \
prepareTheta \
thetaRhoPiChiIotaPrepareTheta( 0, A, E) \
thetaRhoPiChiIotaPrepareTheta( 1, E, A) \
thetaRhoPiChiIotaPrepareTheta( 2, A, E) \
thetaRhoPiChiIotaPrepareTheta( 3, E, A) \
thetaRhoPiChiIotaPrepareTheta( 4, A, E) \
thetaRhoPiChiIotaPrepareTheta( 5, E, A) \
thetaRhoPiChiIotaPrepareTheta( 6, A, E) \
thetaRhoPiChiIotaPrepareTheta( 7, E, A) \
thetaRhoPiChiIotaPrepareTheta( 8, A, E) \
thetaRhoPiChiIotaPrepareTheta( 9, E, A) \
thetaRhoPiChiIotaPrepareTheta(10, A, E) \
thetaRhoPiChiIotaPrepareTheta(11, E, A) \
thetaRhoPiChiIotaPrepareTheta(12, A, E) \
thetaRhoPiChiIotaPrepareTheta(13, E, A) \
thetaRhoPiChiIotaPrepareTheta(14, A, E) \
thetaRhoPiChiIotaPrepareTheta(15, E, A) \
thetaRhoPiChiIotaPrepareTheta(16, A, E) \
thetaRhoPiChiIotaPrepareTheta(17, E, A) \
thetaRhoPiChiIotaPrepareTheta(18, A, E) \
thetaRhoPiChiIotaPrepareTheta(19, E, A) \
thetaRhoPiChiIotaPrepareTheta(20, A, E) \
thetaRhoPiChiIotaPrepareTheta(21, E, A) \
thetaRhoPiChiIotaPrepareTheta(22, A, E) \
thetaRhoPiChiIota(23, E, A) \
copyToState(state, A)
#elif (Unrolling == 12)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=12) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 5, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 6, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 7, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+ 8, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+ 9, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+10, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+11, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 8)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=8) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+6, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+7, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 6)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=6) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+4, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+5, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 4)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=4) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
thetaRhoPiChiIotaPrepareTheta(i+3, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 3)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=3) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
thetaRhoPiChiIotaPrepareTheta(i+2, A, E) \
copyStateVariables(A, E) \
} \
copyToState(state, A)
#elif (Unrolling == 2)
#define rounds \
prepareTheta \
for(i=0; i<24; i+=2) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
thetaRhoPiChiIotaPrepareTheta(i+1, E, A) \
} \
copyToState(state, A)
#elif (Unrolling == 1)
#define rounds \
prepareTheta \
for(i=0; i<24; i++) { \
thetaRhoPiChiIotaPrepareTheta(i , A, E) \
copyStateVariables(A, E) \
} \
copyToState(state, A)
#else
#error "Unrolling is not correctly specified!"
#endif

62
c_src/KeccakF-1600-x86-64-asm.c Executable file
View File

@ -0,0 +1,62 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by Ronny Van Keer,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "KeccakF-1600-interface.h"
#define UseBebigokimisa
typedef unsigned char UINT8;
typedef unsigned long long int UINT64;
void KeccakInitialize()
{
}
void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount)
{
memcpy(data, state, laneCount*8);
#ifdef UseBebigokimisa
if (laneCount > 8)
{
((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8];
if (laneCount > 12)
{
((UINT64*)data)[12] = ~((UINT64*)data)[12];
if (laneCount > 17)
{
((UINT64*)data)[17] = ~((UINT64*)data)[17];
if (laneCount > 20)
{
((UINT64*)data)[20] = ~((UINT64*)data)[20];
}
}
}
}
else
{
if (laneCount > 1)
{
((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1];
if (laneCount > 2)
{
((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2];
}
}
}
#endif
}

766
c_src/KeccakF-1600-x86-64-gas.s Executable file
View File

@ -0,0 +1,766 @@
#
# The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
# Michaël Peeters and Gilles Van Assche. For more information, feedback or
# questions, please refer to our website: http://keccak.noekeon.org/
#
# Implementation by Ronny Van Keer,
# hereby denoted as "the implementer".
#
# To the extent possible under law, the implementer has waived all copyright
# and related or neighboring rights to the source code in this file.
# http://creativecommons.org/publicdomain/zero/1.0/
#
.text
#// --- defines
.equ UseSIMD, 1
.equ _ba, 0*8
.equ _be, 1*8
.equ _bi, 2*8
.equ _bo, 3*8
.equ _bu, 4*8
.equ _ga, 5*8
.equ _ge, 6*8
.equ _gi, 7*8
.equ _go, 8*8
.equ _gu, 9*8
.equ _ka, 10*8
.equ _ke, 11*8
.equ _ki, 12*8
.equ _ko, 13*8
.equ _ku, 14*8
.equ _ma, 15*8
.equ _me, 16*8
.equ _mi, 17*8
.equ _mo, 18*8
.equ _mu, 19*8
.equ _sa, 20*8
.equ _se, 21*8
.equ _si, 22*8
.equ _so, 23*8
.equ _su, 24*8
# arguments
.equ apState, %rdi
.equ apInput, %rsi
.equ aNbrWords, %rdx
# xor input into state section
.equ xpState, %r9
# round vars
.equ rT1, %rax
.equ rpState, %rdi
.equ rpStack, %rsp
.equ rDa, %rbx
.equ rDe, %rcx
.equ rDi, %rdx
.equ rDo, %r8
.equ rDu, %r9
.equ rBa, %r10
.equ rBe, %r11
.equ rBi, %r12
.equ rBo, %r13
.equ rBu, %r14
.equ rCa, %rsi
.equ rCe, %rbp
.equ rCi, rBi
.equ rCo, rBo
.equ rCu, %r15
.macro mKeccakRound iState, oState, rc, lastRound
movq rCe, rDa
rolq rDa
movq _bi(\iState), rCi
xorq _gi(\iState), rDi
xorq rCu, rDa
xorq _ki(\iState), rCi
xorq _mi(\iState), rDi
xorq rDi, rCi
movq rCi, rDe
rolq rDe
movq _bo(\iState), rCo
xorq _go(\iState), rDo
xorq rCa, rDe
xorq _ko(\iState), rCo
xorq _mo(\iState), rDo
xorq rDo, rCo
movq rCo, rDi
rolq rDi
movq rCu, rDo
xorq rCe, rDi
rolq rDo
movq rCa, rDu
xorq rCi, rDo
rolq rDu
movq _ba(\iState), rBa
movq _ge(\iState), rBe
xorq rCo, rDu
movq _ki(\iState), rBi
movq _mo(\iState), rBo
movq _su(\iState), rBu
xorq rDe, rBe
rolq $44, rBe
xorq rDi, rBi
xorq rDa, rBa
rolq $43, rBi
movq rBe, rCa
movq $\rc, rT1
orq rBi, rCa
xorq rBa, rT1
xorq rT1, rCa
movq rCa, _ba(\oState)
xorq rDu, rBu
rolq $14, rBu
movq rBa, rCu
andq rBe, rCu
xorq rBu, rCu
movq rCu, _bu(\oState)
xorq rDo, rBo
rolq $21, rBo
movq rBo, rT1
andq rBu, rT1
xorq rBi, rT1
movq rT1, _bi(\oState)
notq rBi
orq rBa, rBu
orq rBo, rBi
xorq rBo, rBu
xorq rBe, rBi
movq rBu, _bo(\oState)
movq rBi, _be(\oState)
.if \lastRound == 0
movq rBi, rCe
.endif
movq _gu(\iState), rBe
xorq rDu, rBe
movq _ka(\iState), rBi
rolq $20, rBe
xorq rDa, rBi
rolq $3, rBi
movq _bo(\iState), rBa
movq rBe, rT1
orq rBi, rT1
xorq rDo, rBa
movq _me(\iState), rBo
movq _si(\iState), rBu
rolq $28, rBa
xorq rBa, rT1
movq rT1, _ga(\oState)
.if \lastRound == 0
xor rT1, rCa
.endif
xorq rDe, rBo
rolq $45, rBo
movq rBi, rT1
andq rBo, rT1
xorq rBe, rT1
movq rT1, _ge(\oState)
.if \lastRound == 0
xorq rT1, rCe
.endif
xorq rDi, rBu
rolq $61, rBu
movq rBu, rT1
orq rBa, rT1
xorq rBo, rT1
movq rT1, _go(\oState)
andq rBe, rBa
xorq rBu, rBa
movq rBa, _gu(\oState)
notq rBu
.if \lastRound == 0
xorq rBa, rCu
.endif
orq rBu, rBo
xorq rBi, rBo
movq rBo, _gi(\oState)
movq _be(\iState), rBa
movq _gi(\iState), rBe
movq _ko(\iState), rBi
movq _mu(\iState), rBo
movq _sa(\iState), rBu
xorq rDi, rBe
rolq $6, rBe
xorq rDo, rBi
rolq $25, rBi
movq rBe, rT1
orq rBi, rT1
xorq rDe, rBa
rolq $1, rBa
xorq rBa, rT1
movq rT1, _ka(\oState)
.if \lastRound == 0
xor rT1, rCa
.endif
xorq rDu, rBo
rolq $8, rBo
movq rBi, rT1
andq rBo, rT1
xorq rBe, rT1
movq rT1, _ke(\oState)
.if \lastRound == 0
xorq rT1, rCe
.endif
xorq rDa, rBu
rolq $18, rBu
notq rBo
movq rBo, rT1
andq rBu, rT1
xorq rBi, rT1
movq rT1, _ki(\oState)
movq rBu, rT1
orq rBa, rT1
xorq rBo, rT1
movq rT1, _ko(\oState)
andq rBe, rBa
xorq rBu, rBa
movq rBa, _ku(\oState)
.if \lastRound == 0
xorq rBa, rCu
.endif
movq _ga(\iState), rBe
xorq rDa, rBe
movq _ke(\iState), rBi
rolq $36, rBe
xorq rDe, rBi
movq _bu(\iState), rBa
rolq $10, rBi
movq rBe, rT1
movq _mi(\iState), rBo
andq rBi, rT1
xorq rDu, rBa
movq _so(\iState), rBu
rolq $27, rBa
xorq rBa, rT1
movq rT1, _ma(\oState)
.if \lastRound == 0
xor rT1, rCa
.endif
xorq rDi, rBo
rolq $15, rBo
movq rBi, rT1
orq rBo, rT1
xorq rBe, rT1
movq rT1, _me(\oState)
.if \lastRound == 0
xorq rT1, rCe
.endif
xorq rDo, rBu
rolq $56, rBu
notq rBo
movq rBo, rT1
orq rBu, rT1
xorq rBi, rT1
movq rT1, _mi(\oState)
orq rBa, rBe
xorq rBu, rBe
movq rBe, _mu(\oState)
andq rBa, rBu
xorq rBo, rBu
movq rBu, _mo(\oState)
.if \lastRound == 0
xorq rBe, rCu
.endif
movq _bi(\iState), rBa
movq _go(\iState), rBe
movq _ku(\iState), rBi
xorq rDi, rBa
movq _ma(\iState), rBo
rolq $62, rBa
xorq rDo, rBe
movq _se(\iState), rBu
rolq $55, rBe
xorq rDu, rBi
movq rBa, rDu
xorq rDe, rBu
rolq $2, rBu
andq rBe, rDu
xorq rBu, rDu
movq rDu, _su(\oState)
rolq $39, rBi
.if \lastRound == 0
xorq rDu, rCu
.endif
notq rBe
xorq rDa, rBo
movq rBe, rDa
andq rBi, rDa
xorq rBa, rDa
movq rDa, _sa(\oState)
.if \lastRound == 0
xor rDa, rCa
.endif
rolq $41, rBo
movq rBi, rDe
orq rBo, rDe
xorq rBe, rDe
movq rDe, _se(\oState)
.if \lastRound == 0
xorq rDe, rCe
.endif
movq rBo, rDi
movq rBu, rDo
andq rBu, rDi
orq rBa, rDo
xorq rBi, rDi
xorq rBo, rDo
movq rDi, _si(\oState)
movq rDo, _so(\oState)
.endm
.macro mKeccakPermutation
subq $8*25, %rsp
movq _ba(rpState), rCa
movq _be(rpState), rCe
movq _bu(rpState), rCu
xorq _ga(rpState), rCa
xorq _ge(rpState), rCe
xorq _gu(rpState), rCu
xorq _ka(rpState), rCa
xorq _ke(rpState), rCe
xorq _ku(rpState), rCu
xorq _ma(rpState), rCa
xorq _me(rpState), rCe
xorq _mu(rpState), rCu
xorq _sa(rpState), rCa
xorq _se(rpState), rCe
movq _si(rpState), rDi
movq _so(rpState), rDo
xorq _su(rpState), rCu
mKeccakRound rpState, rpStack, 0x0000000000000001, 0
mKeccakRound rpStack, rpState, 0x0000000000008082, 0
mKeccakRound rpState, rpStack, 0x800000000000808a, 0
mKeccakRound rpStack, rpState, 0x8000000080008000, 0
mKeccakRound rpState, rpStack, 0x000000000000808b, 0
mKeccakRound rpStack, rpState, 0x0000000080000001, 0
mKeccakRound rpState, rpStack, 0x8000000080008081, 0
mKeccakRound rpStack, rpState, 0x8000000000008009, 0
mKeccakRound rpState, rpStack, 0x000000000000008a, 0
mKeccakRound rpStack, rpState, 0x0000000000000088, 0
mKeccakRound rpState, rpStack, 0x0000000080008009, 0
mKeccakRound rpStack, rpState, 0x000000008000000a, 0
mKeccakRound rpState, rpStack, 0x000000008000808b, 0
mKeccakRound rpStack, rpState, 0x800000000000008b, 0
mKeccakRound rpState, rpStack, 0x8000000000008089, 0
mKeccakRound rpStack, rpState, 0x8000000000008003, 0
mKeccakRound rpState, rpStack, 0x8000000000008002, 0
mKeccakRound rpStack, rpState, 0x8000000000000080, 0
mKeccakRound rpState, rpStack, 0x000000000000800a, 0
mKeccakRound rpStack, rpState, 0x800000008000000a, 0
mKeccakRound rpState, rpStack, 0x8000000080008081, 0
mKeccakRound rpStack, rpState, 0x8000000000008080, 0
mKeccakRound rpState, rpStack, 0x0000000080000001, 0
mKeccakRound rpStack, rpState, 0x8000000080008008, 1
addq $8*25, %rsp
.endm
.macro mPushRegs
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
.endm
.macro mPopRegs
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
.endm
.macro mXorState128 input, state, offset
.if UseSIMD == 0
movq \offset(\input), %rax
movq \offset+8(\input), %rcx
xorq %rax, \offset(\state)
xorq %rcx, \offset+8(\state)
.else
movdqu \offset(\input), %xmm0
pxor \offset(\state), %xmm0
movdqu %xmm0, \offset(\state)
.endif
.endm
.macro mXorState256 input, state, offset
.if UseSIMD == 0
movq \offset(\input), %rax
movq \offset+8(\input), %r10
movq \offset+16(\input), %rcx
movq \offset+24(\input), %r8
xorq %rax, \offset(\state)
xorq %r10, \offset+8(\state)
xorq %rcx, \offset+16(\state)
xorq %r8, \offset+24(\state)
.else
movdqu \offset(\input), %xmm0
pxor \offset(\state), %xmm0
movdqu \offset+16(\input), %xmm1
pxor \offset+16(\state), %xmm1
movdqu %xmm0, \offset(\state)
movdqu %xmm1, \offset+16(\state)
.endif
.endm
.macro mXorState512 input, state, offset
.if UseSIMD == 0
mXorState256 \input, \state, \offset
mXorState256 \input, \state, \offset+32
.else
movdqu \offset(\input), %xmm0
movdqu \offset+16(\input), %xmm1
pxor \offset(\state), %xmm0
movdqu \offset+32(\input), %xmm2
pxor \offset+16(\state), %xmm1
movdqu %xmm0, \offset(\state)
movdqu \offset+48(\input), %xmm3
pxor \offset+32(\state), %xmm2
movdqu %xmm1, \offset+16(\state)
pxor \offset+48(\state), %xmm3
movdqu %xmm2, \offset+32(\state)
movdqu %xmm3, \offset+48(\state)
.endif
.endm
# -------------------------------------------------------------------------
.size KeccakPermutation, .-KeccakPermutation
.align 2
.global KeccakPermutation
.type KeccakPermutation, %function
KeccakPermutation:
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb576bits, .-KeccakAbsorb576bits
.align 2
.global KeccakAbsorb576bits
.type KeccakAbsorb576bits, %function
KeccakAbsorb576bits:
mXorState512 apInput, apState, 0
movq 64(apInput), %rax
xorq %rax, 64(apState)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb832bits, .-KeccakAbsorb832bits
.align 2
.global KeccakAbsorb832bits
.type KeccakAbsorb832bits, %function
KeccakAbsorb832bits:
mXorState512 apInput, apState, 0
mXorState256 apInput, apState, 64
movq 96(apInput), %rax
xorq %rax, 96(apState)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb1024bits, .-KeccakAbsorb1024bits
.align 2
.global KeccakAbsorb1024bits
.type KeccakAbsorb1024bits, %function
KeccakAbsorb1024bits:
mXorState512 apInput, apState, 0
mXorState512 apInput, apState, 64
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb1088bits, .-KeccakAbsorb1088bits
.align 2
.global KeccakAbsorb1088bits
.type KeccakAbsorb1088bits, %function
KeccakAbsorb1088bits:
mXorState512 apInput, apState, 0
mXorState512 apInput, apState, 64
movq 128(apInput), %rax
xorq %rax, 128(apState)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb1152bits, .-KeccakAbsorb1152bits
.align 2
.global KeccakAbsorb1152bits
.type KeccakAbsorb1152bits, %function
KeccakAbsorb1152bits:
mXorState512 apInput, apState, 0
mXorState512 apInput, apState, 64
mXorState128 apInput, apState, 128
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb1344bits, .-KeccakAbsorb1344bits
.align 2
.global KeccakAbsorb1344bits
.type KeccakAbsorb1344bits, %function
KeccakAbsorb1344bits:
mXorState512 apInput, apState, 0
mXorState512 apInput, apState, 64
mXorState256 apInput, apState, 128
movq 160(apInput), %rax
xorq %rax, 160(apState)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb, .-KeccakAbsorb
.align 2
.global KeccakAbsorb
.type KeccakAbsorb, %function
KeccakAbsorb:
movq apState, xpState
test $16, aNbrWords
jz xorInputToState8
mXorState512 apInput, xpState, 0
mXorState512 apInput, xpState, 64
addq $128, apInput
addq $128, xpState
xorInputToState8:
test $8, aNbrWords
jz xorInputToState4
mXorState512 apInput, xpState, 0
addq $64, apInput
addq $64, xpState
xorInputToState4:
test $4, aNbrWords
jz xorInputToState2
mXorState256 apInput, xpState, 0
addq $32, apInput
addq $32, xpState
xorInputToState2:
test $2, aNbrWords
jz xorInputToState1
mXorState128 apInput, xpState, 0
addq $16, apInput
addq $16, xpState
xorInputToState1:
test $1, aNbrWords
jz xorInputToStateDone
movq (apInput), %rax
xorq %rax, (xpState)
xorInputToStateDone:
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakInitializeState, .-KeccakInitializeState
.align 2
.global KeccakInitializeState
.type KeccakInitializeState, %function
KeccakInitializeState:
xorq %rax, %rax
xorq %rcx, %rcx
notq %rcx
.if UseSIMD == 0
movq %rax, 0*8(apState)
movq %rcx, 1*8(apState)
movq %rcx, 2*8(apState)
movq %rax, 3*8(apState)
movq %rax, 4*8(apState)
movq %rax, 5*8(apState)
movq %rax, 6*8(apState)
movq %rax, 7*8(apState)
movq %rcx, 8*8(apState)
movq %rax, 9*8(apState)
movq %rax, 10*8(apState)
movq %rax, 11*8(apState)
movq %rcx, 12*8(apState)
movq %rax, 13*8(apState)
movq %rax, 14*8(apState)
movq %rax, 15*8(apState)
movq %rax, 16*8(apState)
movq %rcx, 17*8(apState)
movq %rax, 18*8(apState)
movq %rax, 19*8(apState)
movq %rcx, 20*8(apState)
movq %rax, 21*8(apState)
movq %rax, 22*8(apState)
movq %rax, 23*8(apState)
movq %rax, 24*8(apState)
.else
pxor %xmm0, %xmm0
movq %rax, 0*8(apState)
movq %rcx, 1*8(apState)
movq %rcx, 2*8(apState)
movq %rax, 3*8(apState)
movdqu %xmm0, 4*8(apState)
movdqu %xmm0, 6*8(apState)
movq %rcx, 8*8(apState)
movq %rax, 9*8(apState)
movdqu %xmm0, 10*8(apState)
movq %rcx, 12*8(apState)
movq %rax, 13*8(apState)
movdqu %xmm0, 14*8(apState)
movq %rax, 16*8(apState)
movq %rcx, 17*8(apState)
movdqu %xmm0, 18*8(apState)
movq %rcx, 20*8(apState)
movq %rax, 21*8(apState)
movdqu %xmm0, 22*8(apState)
movq %rax, 24*8(apState)
.endif
ret
# -------------------------------------------------------------------------
.size KeccakExtract1024bits, .-KeccakExtract1024bits
.align 2
.global KeccakExtract1024bits
.type KeccakExtract1024bits, %function
KeccakExtract1024bits:
movq 0*8(apState), %rax
movq 1*8(apState), %rcx
movq 2*8(apState), %rdx
movq 3*8(apState), %r8
notq %rcx
notq %rdx
movq %rax, 0*8(%rsi)
movq %rcx, 1*8(%rsi)
movq %rdx, 2*8(%rsi)
movq %r8, 3*8(%rsi)
movq 4*8(apState), %rax
movq 5*8(apState), %rcx
movq 6*8(apState), %rdx
movq 7*8(apState), %r8
movq %rax, 4*8(%rsi)
movq %rcx, 5*8(%rsi)
movq %rdx, 6*8(%rsi)
movq %r8, 7*8(%rsi)
movq 8*8(apState), %rax
movq 9*8(apState), %rcx
movq 10*8(apState), %rdx
movq 11*8(apState), %r8
notq %rax
movq %rax, 8*8(%rsi)
movq %rcx, 9*8(%rsi)
movq %rdx, 10*8(%rsi)
movq %r8, 11*8(%rsi)
movq 12*8(apState), %rax
movq 13*8(apState), %rcx
movq 14*8(apState), %rdx
movq 15*8(apState), %r8
notq %rax
movq %rax, 12*8(%rsi)
movq %rcx, 13*8(%rsi)
movq %rdx, 14*8(%rsi)
movq %r8, 15*8(%rsi)
ret

View File

@ -0,0 +1,766 @@
#
# The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
# Michaël Peeters and Gilles Van Assche. For more information, feedback or
# questions, please refer to our website: http://keccak.noekeon.org/
#
# Implementation by Ronny Van Keer,
# hereby denoted as "the implementer".
#
# To the extent possible under law, the implementer has waived all copyright
# and related or neighboring rights to the source code in this file.
# http://creativecommons.org/publicdomain/zero/1.0/
#
.text
#// --- defines
.equ UseSIMD, 1
.equ _ba, 0*8
.equ _be, 1*8
.equ _bi, 2*8
.equ _bo, 3*8
.equ _bu, 4*8
.equ _ga, 5*8
.equ _ge, 6*8
.equ _gi, 7*8
.equ _go, 8*8
.equ _gu, 9*8
.equ _ka, 10*8
.equ _ke, 11*8
.equ _ki, 12*8
.equ _ko, 13*8
.equ _ku, 14*8
.equ _ma, 15*8
.equ _me, 16*8
.equ _mi, 17*8
.equ _mo, 18*8
.equ _mu, 19*8
.equ _sa, 20*8
.equ _se, 21*8
.equ _si, 22*8
.equ _so, 23*8
.equ _su, 24*8
# arguments
.equ apState, %rdi
.equ apInput, %rsi
.equ aNbrWords, %rdx
# xor input into state section
.equ xpState, %r9
# round vars
.equ rT1, %rax
.equ rpState, %rdi
.equ rpStack, %rsp
.equ rDa, %rbx
.equ rDe, %rcx
.equ rDi, %rdx
.equ rDo, %r8
.equ rDu, %r9
.equ rBa, %r10
.equ rBe, %r11
.equ rBi, %r12
.equ rBo, %r13
.equ rBu, %r14
.equ rCa, %rsi
.equ rCe, %rbp
.equ rCi, rBi
.equ rCo, rBo
.equ rCu, %r15
.macro mKeccakRound iState, oState, rc, lastRound
movq rCe, rDa
shld $1, rDa, rDa
movq _bi(\iState), rCi
xorq _gi(\iState), rDi
xorq _ki(\iState), rCi
xorq rCu, rDa
xorq _mi(\iState), rDi
xorq rDi, rCi
movq rCi, rDe
shld $1, rDe, rDe
movq _bo(\iState), rCo
xorq _go(\iState), rDo
xorq _ko(\iState), rCo
xorq rCa, rDe
xorq _mo(\iState), rDo
xorq rDo, rCo
movq rCo, rDi
shld $1, rDi, rDi
movq rCu, rDo
xorq rCe, rDi
shld $1, rDo, rDo
movq rCa, rDu
xorq rCi, rDo
shld $1, rDu, rDu
movq _ba(\iState), rBa
movq _ge(\iState), rBe
xorq rCo, rDu
movq _ki(\iState), rBi
movq _mo(\iState), rBo
movq _su(\iState), rBu
xorq rDe, rBe
shld $44, rBe, rBe
xorq rDi, rBi
xorq rDa, rBa
shld $43, rBi, rBi
movq rBe, rCa
movq $\rc, rT1
orq rBi, rCa
xorq rBa, rT1
xorq rT1, rCa
movq rCa, _ba(\oState)
xorq rDu, rBu
shld $14, rBu, rBu
movq rBa, rCu
andq rBe, rCu
xorq rBu, rCu
movq rCu, _bu(\oState)
xorq rDo, rBo
shld $21, rBo, rBo
movq rBo, rT1
andq rBu, rT1
xorq rBi, rT1
movq rT1, _bi(\oState)
notq rBi
orq rBa, rBu
orq rBo, rBi
xorq rBo, rBu
xorq rBe, rBi
movq rBu, _bo(\oState)
movq rBi, _be(\oState)
.if \lastRound == 0
movq rBi, rCe
.endif
movq _gu(\iState), rBe
xorq rDu, rBe
movq _ka(\iState), rBi
shld $20, rBe, rBe
xorq rDa, rBi
shld $3, rBi, rBi
movq _bo(\iState), rBa
movq rBe, rT1
orq rBi, rT1
xorq rDo, rBa
movq _me(\iState), rBo
movq _si(\iState), rBu
shld $28, rBa, rBa
xorq rBa, rT1
movq rT1, _ga(\oState)
.if \lastRound == 0
xor rT1, rCa
.endif
xorq rDe, rBo
shld $45, rBo, rBo
movq rBi, rT1
andq rBo, rT1
xorq rBe, rT1
movq rT1, _ge(\oState)
.if \lastRound == 0
xorq rT1, rCe
.endif
xorq rDi, rBu
shld $61, rBu, rBu
movq rBu, rT1
orq rBa, rT1
xorq rBo, rT1
movq rT1, _go(\oState)
andq rBe, rBa
xorq rBu, rBa
movq rBa, _gu(\oState)
notq rBu
.if \lastRound == 0
xorq rBa, rCu
.endif
orq rBu, rBo
xorq rBi, rBo
movq rBo, _gi(\oState)
movq _be(\iState), rBa
movq _gi(\iState), rBe
movq _ko(\iState), rBi
movq _mu(\iState), rBo
movq _sa(\iState), rBu
xorq rDi, rBe
shld $6, rBe, rBe
xorq rDo, rBi
shld $25, rBi, rBi
movq rBe, rT1
orq rBi, rT1
xorq rDe, rBa
shld $1, rBa, rBa
xorq rBa, rT1
movq rT1, _ka(\oState)
.if \lastRound == 0
xor rT1, rCa
.endif
xorq rDu, rBo
shld $8, rBo, rBo
movq rBi, rT1
andq rBo, rT1
xorq rBe, rT1
movq rT1, _ke(\oState)
.if \lastRound == 0
xorq rT1, rCe
.endif
xorq rDa, rBu
shld $18, rBu, rBu
notq rBo
movq rBo, rT1
andq rBu, rT1
xorq rBi, rT1
movq rT1, _ki(\oState)
movq rBu, rT1
orq rBa, rT1
xorq rBo, rT1
movq rT1, _ko(\oState)
andq rBe, rBa
xorq rBu, rBa
movq rBa, _ku(\oState)
.if \lastRound == 0
xorq rBa, rCu
.endif
movq _ga(\iState), rBe
xorq rDa, rBe
movq _ke(\iState), rBi
shld $36, rBe, rBe
xorq rDe, rBi
movq _bu(\iState), rBa
shld $10, rBi, rBi
movq rBe, rT1
movq _mi(\iState), rBo
andq rBi, rT1
xorq rDu, rBa
movq _so(\iState), rBu
shld $27, rBa, rBa
xorq rBa, rT1
movq rT1, _ma(\oState)
.if \lastRound == 0
xor rT1, rCa
.endif
xorq rDi, rBo
shld $15, rBo, rBo
movq rBi, rT1
orq rBo, rT1
xorq rBe, rT1
movq rT1, _me(\oState)
.if \lastRound == 0
xorq rT1, rCe
.endif
xorq rDo, rBu
shld $56, rBu, rBu
notq rBo
movq rBo, rT1
orq rBu, rT1
xorq rBi, rT1
movq rT1, _mi(\oState)
orq rBa, rBe
xorq rBu, rBe
movq rBe, _mu(\oState)
andq rBa, rBu
xorq rBo, rBu
movq rBu, _mo(\oState)
.if \lastRound == 0
xorq rBe, rCu
.endif
movq _bi(\iState), rBa
movq _go(\iState), rBe
movq _ku(\iState), rBi
xorq rDi, rBa
movq _ma(\iState), rBo
shld $62, rBa, rBa
xorq rDo, rBe
movq _se(\iState), rBu
shld $55, rBe, rBe
xorq rDu, rBi
movq rBa, rDu
xorq rDe, rBu
shld $2, rBu, rBu
andq rBe, rDu
xorq rBu, rDu
movq rDu, _su(\oState)
shld $39, rBi, rBi
.if \lastRound == 0
xorq rDu, rCu
.endif
notq rBe
xorq rDa, rBo
movq rBe, rDa
andq rBi, rDa
xorq rBa, rDa
movq rDa, _sa(\oState)
.if \lastRound == 0
xor rDa, rCa
.endif
shld $41, rBo, rBo
movq rBi, rDe
orq rBo, rDe
xorq rBe, rDe
movq rDe, _se(\oState)
.if \lastRound == 0
xorq rDe, rCe
.endif
movq rBo, rDi
movq rBu, rDo
andq rBu, rDi
orq rBa, rDo
xorq rBi, rDi
xorq rBo, rDo
movq rDi, _si(\oState)
movq rDo, _so(\oState)
.endm
.macro mKeccakPermutation
subq $8*25, %rsp
movq _ba(rpState), rCa
movq _be(rpState), rCe
movq _bu(rpState), rCu
xorq _ga(rpState), rCa
xorq _ge(rpState), rCe
xorq _gu(rpState), rCu
xorq _ka(rpState), rCa
xorq _ke(rpState), rCe
xorq _ku(rpState), rCu
xorq _ma(rpState), rCa
xorq _me(rpState), rCe
xorq _mu(rpState), rCu
xorq _sa(rpState), rCa
xorq _se(rpState), rCe
movq _si(rpState), rDi
movq _so(rpState), rDo
xorq _su(rpState), rCu
mKeccakRound rpState, rpStack, 0x0000000000000001, 0
mKeccakRound rpStack, rpState, 0x0000000000008082, 0
mKeccakRound rpState, rpStack, 0x800000000000808a, 0
mKeccakRound rpStack, rpState, 0x8000000080008000, 0
mKeccakRound rpState, rpStack, 0x000000000000808b, 0
mKeccakRound rpStack, rpState, 0x0000000080000001, 0
mKeccakRound rpState, rpStack, 0x8000000080008081, 0
mKeccakRound rpStack, rpState, 0x8000000000008009, 0
mKeccakRound rpState, rpStack, 0x000000000000008a, 0
mKeccakRound rpStack, rpState, 0x0000000000000088, 0
mKeccakRound rpState, rpStack, 0x0000000080008009, 0
mKeccakRound rpStack, rpState, 0x000000008000000a, 0
mKeccakRound rpState, rpStack, 0x000000008000808b, 0
mKeccakRound rpStack, rpState, 0x800000000000008b, 0
mKeccakRound rpState, rpStack, 0x8000000000008089, 0
mKeccakRound rpStack, rpState, 0x8000000000008003, 0
mKeccakRound rpState, rpStack, 0x8000000000008002, 0
mKeccakRound rpStack, rpState, 0x8000000000000080, 0
mKeccakRound rpState, rpStack, 0x000000000000800a, 0
mKeccakRound rpStack, rpState, 0x800000008000000a, 0
mKeccakRound rpState, rpStack, 0x8000000080008081, 0
mKeccakRound rpStack, rpState, 0x8000000000008080, 0
mKeccakRound rpState, rpStack, 0x0000000080000001, 0
mKeccakRound rpStack, rpState, 0x8000000080008008, 1
addq $8*25, %rsp
.endm
.macro mPushRegs
pushq %rbx
pushq %rbp
pushq %r12
pushq %r13
pushq %r14
pushq %r15
.endm
.macro mPopRegs
popq %r15
popq %r14
popq %r13
popq %r12
popq %rbp
popq %rbx
.endm
.macro mXorState128 input, state, offset
.if UseSIMD == 0
movq \offset(\input), %rax
movq \offset+8(\input), %rcx
xorq %rax, \offset(\state)
xorq %rcx, \offset+8(\state)
.else
movdqu \offset(\input), %xmm0
pxor \offset(\state), %xmm0
movdqu %xmm0, \offset(\state)
.endif
.endm
.macro mXorState256 input, state, offset
.if UseSIMD == 0
movq \offset(\input), %rax
movq \offset+8(\input), %r10
movq \offset+16(\input), %rcx
movq \offset+24(\input), %r8
xorq %rax, \offset(\state)
xorq %r10, \offset+8(\state)
xorq %rcx, \offset+16(\state)
xorq %r8, \offset+24(\state)
.else
movdqu \offset(\input), %xmm0
pxor \offset(\state), %xmm0
movdqu \offset+16(\input), %xmm1
pxor \offset+16(\state), %xmm1
movdqu %xmm0, \offset(\state)
movdqu %xmm1, \offset+16(\state)
.endif
.endm
.macro mXorState512 input, state, offset
.if UseSIMD == 0
mXorState256 \input, \state, \offset
mXorState256 \input, \state, \offset+32
.else
movdqu \offset(\input), %xmm0
movdqu \offset+16(\input), %xmm1
pxor \offset(\state), %xmm0
movdqu \offset+32(\input), %xmm2
pxor \offset+16(\state), %xmm1
movdqu %xmm0, \offset(\state)
movdqu \offset+48(\input), %xmm3
pxor \offset+32(\state), %xmm2
movdqu %xmm1, \offset+16(\state)
pxor \offset+48(\state), %xmm3
movdqu %xmm2, \offset+32(\state)
movdqu %xmm3, \offset+48(\state)
.endif
.endm
# -------------------------------------------------------------------------
.size KeccakPermutation, .-KeccakPermutation
.align 2
.global KeccakPermutation
.type KeccakPermutation, %function
KeccakPermutation:
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb576bits, .-KeccakAbsorb576bits
.align 2
.global KeccakAbsorb576bits
.type KeccakAbsorb576bits, %function
KeccakAbsorb576bits:
mXorState512 apInput, apState, 0
movq 64(apInput), %rax
xorq %rax, 64(apState)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb832bits, .-KeccakAbsorb832bits
.align 2
.global KeccakAbsorb832bits
.type KeccakAbsorb832bits, %function
KeccakAbsorb832bits:
mXorState512 apInput, apState, 0
mXorState256 apInput, apState, 64
movq 96(apInput), %rax
xorq %rax, 96(apState)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb1024bits, .-KeccakAbsorb1024bits
.align 2
.global KeccakAbsorb1024bits
.type KeccakAbsorb1024bits, %function
KeccakAbsorb1024bits:
mXorState512 apInput, apState, 0
mXorState512 apInput, apState, 64
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb1088bits, .-KeccakAbsorb1088bits
.align 2
.global KeccakAbsorb1088bits
.type KeccakAbsorb1088bits, %function
KeccakAbsorb1088bits:
mXorState512 apInput, apState, 0
mXorState512 apInput, apState, 64
movq 128(apInput), %rax
xorq %rax, 128(apState)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb1152bits, .-KeccakAbsorb1152bits
.align 2
.global KeccakAbsorb1152bits
.type KeccakAbsorb1152bits, %function
KeccakAbsorb1152bits:
mXorState512 apInput, apState, 0
mXorState512 apInput, apState, 64
mXorState128 apInput, apState, 128
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb1344bits, .-KeccakAbsorb1344bits
.align 2
.global KeccakAbsorb1344bits
.type KeccakAbsorb1344bits, %function
KeccakAbsorb1344bits:
mXorState512 apInput, apState, 0
mXorState512 apInput, apState, 64
mXorState256 apInput, apState, 128
movq 160(apInput), %rax
xorq %rax, 160(apState)
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakAbsorb, .-KeccakAbsorb
.align 2
.global KeccakAbsorb
.type KeccakAbsorb, %function
KeccakAbsorb:
movq apState, xpState
test $16, aNbrWords
jz xorInputToState8
mXorState512 apInput, xpState, 0
mXorState512 apInput, xpState, 64
addq $128, apInput
addq $128, xpState
xorInputToState8:
test $8, aNbrWords
jz xorInputToState4
mXorState512 apInput, xpState, 0
addq $64, apInput
addq $64, xpState
xorInputToState4:
test $4, aNbrWords
jz xorInputToState2
mXorState256 apInput, xpState, 0
addq $32, apInput
addq $32, xpState
xorInputToState2:
test $2, aNbrWords
jz xorInputToState1
mXorState128 apInput, xpState, 0
addq $16, apInput
addq $16, xpState
xorInputToState1:
test $1, aNbrWords
jz xorInputToStateDone
movq (apInput), %rax
xorq %rax, (xpState)
xorInputToStateDone:
mPushRegs
mKeccakPermutation
mPopRegs
ret
# -------------------------------------------------------------------------
.size KeccakInitializeState, .-KeccakInitializeState
.align 2
.global KeccakInitializeState
.type KeccakInitializeState, %function
KeccakInitializeState:
xorq %rax, %rax
xorq %rcx, %rcx
notq %rcx
.if UseSIMD == 0
movq %rax, 0*8(apState)
movq %rcx, 1*8(apState)
movq %rcx, 2*8(apState)
movq %rax, 3*8(apState)
movq %rax, 4*8(apState)
movq %rax, 5*8(apState)
movq %rax, 6*8(apState)
movq %rax, 7*8(apState)
movq %rcx, 8*8(apState)
movq %rax, 9*8(apState)
movq %rax, 10*8(apState)
movq %rax, 11*8(apState)
movq %rcx, 12*8(apState)
movq %rax, 13*8(apState)
movq %rax, 14*8(apState)
movq %rax, 15*8(apState)
movq %rax, 16*8(apState)
movq %rcx, 17*8(apState)
movq %rax, 18*8(apState)
movq %rax, 19*8(apState)
movq %rcx, 20*8(apState)
movq %rax, 21*8(apState)
movq %rax, 22*8(apState)
movq %rax, 23*8(apState)
movq %rax, 24*8(apState)
.else
pxor %xmm0, %xmm0
movq %rax, 0*8(apState)
movq %rcx, 1*8(apState)
movq %rcx, 2*8(apState)
movq %rax, 3*8(apState)
movdqu %xmm0, 4*8(apState)
movdqu %xmm0, 6*8(apState)
movq %rcx, 8*8(apState)
movq %rax, 9*8(apState)
movdqu %xmm0, 10*8(apState)
movq %rcx, 12*8(apState)
movq %rax, 13*8(apState)
movdqu %xmm0, 14*8(apState)
movq %rax, 16*8(apState)
movq %rcx, 17*8(apState)
movdqu %xmm0, 18*8(apState)
movq %rcx, 20*8(apState)
movq %rax, 21*8(apState)
movdqu %xmm0, 22*8(apState)
movq %rax, 24*8(apState)
.endif
ret
# -------------------------------------------------------------------------
.size KeccakExtract1024bits, .-KeccakExtract1024bits
.align 2
.global KeccakExtract1024bits
.type KeccakExtract1024bits, %function
KeccakExtract1024bits:
movq 0*8(apState), %rax
movq 1*8(apState), %rcx
movq 2*8(apState), %rdx
movq 3*8(apState), %r8
notq %rcx
notq %rdx
movq %rax, 0*8(%rsi)
movq %rcx, 1*8(%rsi)
movq %rdx, 2*8(%rsi)
movq %r8, 3*8(%rsi)
movq 4*8(apState), %rax
movq 5*8(apState), %rcx
movq 6*8(apState), %rdx
movq 7*8(apState), %r8
movq %rax, 4*8(%rsi)
movq %rcx, 5*8(%rsi)
movq %rdx, 6*8(%rsi)
movq %r8, 7*8(%rsi)
movq 8*8(apState), %rax
movq 9*8(apState), %rcx
movq 10*8(apState), %rdx
movq 11*8(apState), %r8
notq %rax
movq %rax, 8*8(%rsi)
movq %rcx, 9*8(%rsi)
movq %rdx, 10*8(%rsi)
movq %r8, 11*8(%rsi)
movq 12*8(apState), %rax
movq 13*8(apState), %rcx
movq 14*8(apState), %rdx
movq 15*8(apState), %r8
notq %rax
movq %rax, 12*8(%rsi)
movq %rcx, 13*8(%rsi)
movq %rdx, 14*8(%rsi)
movq %r8, 15*8(%rsi)
ret

573
c_src/KeccakF-1600-xop.macros Executable file
View File

@ -0,0 +1,573 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#define declareABCDE \
V128 Abage, Abegi, Abigo, Abogu, Abuga; \
V128 Akame, Akemi, Akimo, Akomu, Akuma; \
V128 Abae, Abio, Agae, Agio, Akae, Akio, Amae, Amio; \
V64 Aba, Abe, Abi, Abo, Abu; \
V64 Aga, Age, Agi, Ago, Agu; \
V64 Aka, Ake, Aki, Ako, Aku; \
V64 Ama, Ame, Ami, Amo, Amu; \
V128 Asase, Asiso; \
V64 Asu; \
V128 Bbage, Bbegi, Bbigo, Bbogu, Bbuga; \
V128 Bkame, Bkemi, Bkimo, Bkomu, Bkuma; \
V128 Bsase, Bsesi, Bsiso, Bsosu, Bsusa; \
V128 Cae, Cei, Cio, Cou, Cua; \
V128 Dau, Dea, Die, Doi, Duo; \
V128 Dua, Dae, Dei, Dio, Dou; \
V128 Ebage, Ebegi, Ebigo, Ebogu, Ebuga; \
V128 Ekame, Ekemi, Ekimo, Ekomu, Ekuma; \
V128 Esase, Esiso; \
V64 Esu; \
V128 Zero;
#define prepareTheta
#define computeD \
Cua = GET64LOLO(Cua, Cae); \
Dei = XOR128(Cae, ROL6464same(Cio, 1)); \
Dou = XOR128(Cio, ROL6464same(Cua, 1)); \
Cei = GET64HILO(Cae, Cio); \
Dae = XOR128(Cua, ROL6464same(Cei, 1)); \
Dau = GET64LOHI(Dae, Dou); \
Dea = SWAP64(Dae); \
Die = SWAP64(Dei); \
Doi = GET64LOLO(Dou, Die); \
Duo = SWAP64(Dou);
// --- Theta Rho Pi Chi Iota Prepare-theta
// --- 64-bit lanes mapped to 64-bit and 128-bit words
#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \
computeD \
\
Bbage = XOR128(GET64LOHI(A##bage, A##bogu), Dau); \
Bbage = ROL6464(Bbage, 0, 20); \
Bbegi = XOR128(GET64HILO(A##bage, A##kame), Dea); \
Bbegi = ROL6464(Bbegi, 44, 3); \
Bbigo = XOR128(GET64LOHI(A##kimo, A##kame), Die); \
Bbigo = ROL6464(Bbigo, 43, 45); \
E##bage = XOR128(Bbage, ANDnu128(Bbegi, Bbigo)); \
XOReq128(E##bage, CONST64(KeccakF1600RoundConstants[i])); \
Cae = E##bage; \
Bbogu = XOR128(GET64HILO(A##kimo, A##siso), Doi); \
Bbogu = ROL6464(Bbogu, 21, 61); \
E##begi = XOR128(Bbegi, ANDnu128(Bbigo, Bbogu)); \
Cei = E##begi; \
Bbuga = XOR128(GET64LOLO(A##su, A##bogu), Duo); \
Bbuga = ROL6464(Bbuga, 14, 28); \
E##bigo = XOR128(Bbigo, ANDnu128(Bbogu, Bbuga)); \
Cio = E##bigo; \
E##bogu = XOR128(Bbogu, ANDnu128(Bbuga, Bbage)); \
Cou = E##bogu; \
E##buga = XOR128(Bbuga, ANDnu128(Bbage, Bbegi)); \
Cua = E##buga; \
\
Bkame = XOR128(GET64LOHI(A##begi, A##buga), Dea); \
Bkame = ROL6464(Bkame, 1, 36); \
Bkemi = XOR128(GET64HILO(A##begi, A##kemi), Die); \
Bkemi = ROL6464(Bkemi, 6, 10); \
Bkimo = XOR128(GET64LOHI(A##komu, A##kemi), Doi); \
Bkimo = ROL6464(Bkimo, 25, 15); \
E##kame = XOR128(Bkame, ANDnu128(Bkemi, Bkimo)); \
XOReq128(Cae, E##kame); \
Bkomu = XOR128(GET64HIHI(A##komu, A##siso), Duo); \
Bkomu = ROL6464(Bkomu, 8, 56); \
E##kemi = XOR128(Bkemi, ANDnu128(Bkimo, Bkomu)); \
XOReq128(Cei, E##kemi); \
Bkuma = XOR128(GET64LOLO(A##sase, A##buga), Dau); \
Bkuma = ROL6464(Bkuma, 18, 27); \
E##kimo = XOR128(Bkimo, ANDnu128(Bkomu, Bkuma)); \
XOReq128(Cio, E##kimo); \
E##komu = XOR128(Bkomu, ANDnu128(Bkuma, Bkame)); \
XOReq128(Cou, E##komu); \
E##kuma = XOR128(Bkuma, ANDnu128(Bkame, Bkemi)); \
XOReq128(Cua, E##kuma); \
\
Bsase = XOR128(A##bigo, SWAP64(Doi)); \
Bsase = ROL6464(Bsase, 62, 55); \
Bsiso = XOR128(A##kuma, SWAP64(Dau)); \
Bsiso = ROL6464(Bsiso, 39, 41); \
Bsusa = XOR64(COPY64HI2LO(A##sase), Dei); \
Bsusa = ROL6464same(Bsusa, 2); \
Bsusa = GET64LOLO(Bsusa, Bsase); \
Bsesi = GET64HILO(Bsase, Bsiso); \
Bsosu = GET64HILO(Bsiso, Bsusa); \
E##sase = XOR128(Bsase, ANDnu128(Bsesi, Bsiso)); \
XOReq128(Cae, E##sase); \
E##siso = XOR128(Bsiso, ANDnu128(Bsosu, Bsusa)); \
XOReq128(Cio, E##siso); \
E##su = GET64LOLO(XOR128(Bsusa, ANDnu128(Bsase, Bsesi)), Zero); \
XOReq128(Cua, E##su); \
\
Zero = ZERO128(); \
XOReq128(Cae, GET64HIHI(Cua, Zero)); \
XOReq128(Cae, GET64LOLO(Zero, Cei)); \
XOReq128(Cio, GET64HIHI(Cei, Zero)); \
XOReq128(Cio, GET64LOLO(Zero, Cou)); \
XOReq128(Cua, GET64HIHI(Cou, Zero)); \
// --- Theta Rho Pi Chi Iota
// --- 64-bit lanes mapped to 64-bit and 128-bit words
#define thetaRhoPiChiIota(i, A, E) thetaRhoPiChiIotaPrepareTheta(i, A, E)
const UINT64 KeccakF1600RoundConstants[24] = {
0x0000000000000001ULL,
0x0000000000008082ULL,
0x800000000000808aULL,
0x8000000080008000ULL,
0x000000000000808bULL,
0x0000000080000001ULL,
0x8000000080008081ULL,
0x8000000000008009ULL,
0x000000000000008aULL,
0x0000000000000088ULL,
0x0000000080008009ULL,
0x000000008000000aULL,
0x000000008000808bULL,
0x800000000000008bULL,
0x8000000000008089ULL,
0x8000000000008003ULL,
0x8000000000008002ULL,
0x8000000000000080ULL,
0x000000000000800aULL,
0x800000008000000aULL,
0x8000000080008081ULL,
0x8000000000008080ULL,
0x0000000080000001ULL,
0x8000000080008008ULL };
#define copyFromStateAndXor576bits(X, state, input) \
X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cua = X##bu; \
X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = LOAD64(state[ 9]); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = LOAD128(state[10]); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = LOAD128(state[12]); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = LOAD128(state[14]); \
XOReq64(Cua, X##kuma); \
X##me = LOAD64(state[16]); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = LOAD128u(state[17]); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = LOAD64(state[19]); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = LOAD128(state[20]); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyFromStateAndXor832bits(X, state, input) \
X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cua = X##bu; \
X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = XOR128(LOAD128(state[12]), LOAD64(input[12])); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = LOAD128(state[14]); \
XOReq64(Cua, X##kuma); \
X##me = LOAD64(state[16]); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = LOAD128u(state[17]); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = LOAD64(state[19]); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = LOAD128(state[20]); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyFromStateAndXor1024bits(X, state, input) \
X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cua = X##bu; \
X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
XOReq64(Cua, X##kuma); \
X##me = LOAD64(state[16]); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = LOAD128u(state[17]); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = LOAD64(state[19]); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = LOAD128(state[20]); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyFromStateAndXor1088bits(X, state, input) \
X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cua = X##bu; \
X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
XOReq64(Cua, X##kuma); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = LOAD128u(state[17]); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = LOAD64(state[19]); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = LOAD128(state[20]); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyFromStateAndXor1152bits(X, state, input) \
X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cua = X##bu; \
X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
XOReq64(Cua, X##kuma); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = XOR128(LOAD128u(state[17]), LOAD64(input[17])); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = LOAD64(state[19]); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = LOAD128(state[20]); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyFromStateAndXor1344bits(X, state, input) \
X##bae = XOR128(LOAD128(state[ 0]), LOAD128u(input[ 0])); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = XOR128(LOAD128(state[ 2]), LOAD128u(input[ 2])); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = XOR64(LOAD64(state[ 4]), LOAD64(input[ 4])); \
Cua = X##bu; \
X##gae = XOR128(LOAD128u(state[ 5]), LOAD128u(input[ 5])); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = XOR128(LOAD128u(state[ 7]), LOAD128u(input[ 7])); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = XOR64(LOAD64(state[ 9]), LOAD64(input[ 9])); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = XOR128(LOAD128(state[10]), LOAD128u(input[10])); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = XOR128(LOAD128(state[12]), LOAD128u(input[12])); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = XOR128(LOAD128(state[14]), LOAD128(input[14])); \
XOReq64(Cua, X##kuma); \
X##me = XOR64(LOAD64(state[16]), LOAD64(input[16])); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = XOR128(LOAD128u(state[17]), LOAD128u(input[17])); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = XOR64(LOAD64(state[19]), LOAD64(input[19])); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = XOR128(LOAD128(state[20]), LOAD64(input[20])); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyFromState(X, state) \
X##bae = LOAD128(state[ 0]); \
X##ba = X##bae; \
X##be = GET64HIHI(X##bae, X##bae); \
Cae = X##bae; \
X##bio = LOAD128(state[ 2]); \
X##bi = X##bio; \
X##bo = GET64HIHI(X##bio, X##bio); \
Cio = X##bio; \
X##bu = LOAD64(state[ 4]); \
Cua = X##bu; \
X##gae = LOAD128u(state[ 5]); \
X##ga = X##gae; \
X##buga = GET64LOLO(X##bu, X##ga); \
X##ge = GET64HIHI(X##gae, X##gae); \
X##bage = GET64LOLO(X##ba, X##ge); \
XOReq128(Cae, X##gae); \
X##gio = LOAD128u(state[ 7]); \
X##gi = X##gio; \
X##begi = GET64LOLO(X##be, X##gi); \
X##go = GET64HIHI(X##gio, X##gio); \
X##bigo = GET64LOLO(X##bi, X##go); \
XOReq128(Cio, X##gio); \
X##gu = LOAD64(state[ 9]); \
X##bogu = GET64LOLO(X##bo, X##gu); \
XOReq64(Cua, X##gu); \
X##kae = LOAD128(state[10]); \
X##ka = X##kae; \
X##ke = GET64HIHI(X##kae, X##kae); \
XOReq128(Cae, X##kae); \
X##kio = LOAD128(state[12]); \
X##ki = X##kio; \
X##ko = GET64HIHI(X##kio, X##kio); \
XOReq128(Cio, X##kio); \
X##kuma = LOAD128(state[14]); \
XOReq64(Cua, X##kuma); \
X##me = LOAD64(state[16]); \
X##kame = GET64LOLO(X##ka, X##me); \
XOReq128(Cae, GET64HIHI(X##kuma, X##kame)); \
X##mio = LOAD128u(state[17]); \
X##mi = X##mio; \
X##kemi = GET64LOLO(X##ke, X##mi); \
X##mo = GET64HIHI(X##mio, X##mio); \
X##kimo = GET64LOLO(X##ki, X##mo); \
XOReq128(Cio, X##mio); \
X##mu = LOAD64(state[19]); \
X##komu = GET64LOLO(X##ko, X##mu); \
XOReq64(Cua, X##mu); \
X##sase = LOAD128(state[20]); \
XOReq128(Cae, X##sase); \
X##siso = LOAD128(state[22]); \
XOReq128(Cio, X##siso); \
X##su = LOAD64(state[24]); \
XOReq64(Cua, X##su); \
#define copyToState(state, X) \
STORE64(state[ 0], X##bage); \
STORE64(state[ 1], X##begi); \
STORE64(state[ 2], X##bigo); \
STORE64(state[ 3], X##bogu); \
STORE128(state[ 4], X##buga); \
STORE64(state[ 6], COPY64HI2LO(X##bage)); \
STORE64(state[ 7], COPY64HI2LO(X##begi)); \
STORE64(state[ 8], COPY64HI2LO(X##bigo)); \
STORE64(state[ 9], COPY64HI2LO(X##bogu)); \
STORE64(state[10], X##kame); \
STORE64(state[11], X##kemi); \
STORE64(state[12], X##kimo); \
STORE64(state[13], X##komu); \
STORE128(state[14], X##kuma); \
STORE64(state[16], COPY64HI2LO(X##kame)); \
STORE64(state[17], COPY64HI2LO(X##kemi)); \
STORE64(state[18], COPY64HI2LO(X##kimo)); \
STORE64(state[19], COPY64HI2LO(X##komu)); \
STORE128(state[20], X##sase); \
STORE128(state[22], X##siso); \
STORE64(state[24], X##su); \
#define copyStateVariables(X, Y) \
X##bage = Y##bage; \
X##begi = Y##begi; \
X##bigo = Y##bigo; \
X##bogu = Y##bogu; \
X##buga = Y##buga; \
X##kame = Y##kame; \
X##kemi = Y##kemi; \
X##kimo = Y##kimo; \
X##komu = Y##komu; \
X##kuma = Y##kuma; \
X##sase = Y##sase; \
X##siso = Y##siso; \
X##su = Y##su; \

81
c_src/KeccakNISTInterface.c Executable file
View File

@ -0,0 +1,81 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "KeccakNISTInterface.h"
#include "KeccakF-1600-interface.h"
HashReturn Init(hashState *state, int hashbitlen)
{
switch(hashbitlen) {
case 0: // Default parameters, arbitrary length output
InitSponge((spongeState*)state, 1024, 576);
break;
case 224:
InitSponge((spongeState*)state, 1152, 448);
break;
case 256:
InitSponge((spongeState*)state, 1088, 512);
break;
case 384:
InitSponge((spongeState*)state, 832, 768);
break;
case 512:
InitSponge((spongeState*)state, 576, 1024);
break;
default:
return BAD_HASHLEN;
}
state->fixedOutputLength = hashbitlen;
return SUCCESS;
}
HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen)
{
if ((databitlen % 8) == 0)
return Absorb((spongeState*)state, data, databitlen);
else {
HashReturn ret = Absorb((spongeState*)state, data, databitlen - (databitlen % 8));
if (ret == SUCCESS) {
unsigned char lastByte;
// Align the last partial byte to the least significant bits
lastByte = data[databitlen/8] >> (8 - (databitlen % 8));
return Absorb((spongeState*)state, &lastByte, databitlen % 8);
}
else
return ret;
}
}
HashReturn Final(hashState *state, BitSequence *hashval)
{
return Squeeze(state, hashval, state->fixedOutputLength);
}
HashReturn Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval)
{
hashState state;
HashReturn result;
if ((hashbitlen != 224) && (hashbitlen != 256) && (hashbitlen != 384) && (hashbitlen != 512))
return BAD_HASHLEN; // Only the four fixed output lengths available through this API
result = Init(&state, hashbitlen);
if (result != SUCCESS)
return result;
result = Update(&state, data, databitlen);
if (result != SUCCESS)
return result;
result = Final(&state, hashval);
return result;
}

70
c_src/KeccakNISTInterface.h Executable file
View File

@ -0,0 +1,70 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakNISTInterface_h_
#define _KeccakNISTInterface_h_
#include "KeccakSponge.h"
typedef unsigned char BitSequence;
typedef unsigned long long DataLength;
typedef enum { SUCCESS = 0, FAIL = 1, BAD_HASHLEN = 2 } HashReturn;
typedef spongeState hashState;
/**
* Function to initialize the state of the Keccak[r, c] sponge function.
* The rate r and capacity c values are determined from @a hashbitlen.
* @param state Pointer to the state of the sponge function to be initialized.
* @param hashbitlen The desired number of output bits,
* or 0 for Keccak[] with default parameters
* and arbitrarily-long output.
* @pre The value of hashbitlen must be one of 0, 224, 256, 384 and 512.
* @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect.
*/
HashReturn Init(hashState *state, int hashbitlen);
/**
* Function to give input data for the sponge function to absorb.
* @param state Pointer to the state of the sponge function initialized by Init().
* @param data Pointer to the input data.
* When @a databitLen is not a multiple of 8, the last bits of data must be
* in the most significant bits of the last byte.
* @param databitLen The number of input bits provided in the input data.
* @pre In the previous call to Absorb(), databitLen was a multiple of 8.
* @return SUCCESS if successful, FAIL otherwise.
*/
HashReturn Update(hashState *state, const BitSequence *data, DataLength databitlen);
/**
* Function to squeeze output data from the sponge function.
* If @a hashbitlen was not 0 in the call to Init(), the number of output bits is equal to @a hashbitlen.
* If @a hashbitlen was 0 in the call to Init(), the output bits must be extracted using the Squeeze() function.
* @param state Pointer to the state of the sponge function initialized by Init().
* @param hashval Pointer to the buffer where to store the output data.
* @return SUCCESS if successful, FAIL otherwise.
*/
HashReturn Final(hashState *state, BitSequence *hashval);
/**
* Function to compute a hash using the Keccak[r, c] sponge function.
* The rate r and capacity c values are determined from @a hashbitlen.
* @param hashbitlen The desired number of output bits.
* @param data Pointer to the input data.
* When @a databitLen is not a multiple of 8, the last bits of data must be
* in the most significant bits of the last byte.
* @param databitLen The number of input bits provided in the input data.
* @param hashval Pointer to the buffer where to store the output data.
* @pre The value of hashbitlen must be one of 224, 256, 384 and 512.
* @return SUCCESS if successful, BAD_HASHLEN if the value of hashbitlen is incorrect.
*/
HashReturn Hash(int hashbitlen, const BitSequence *data, DataLength databitlen, BitSequence *hashval);
#endif

266
c_src/KeccakSponge.c Executable file
View File

@ -0,0 +1,266 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <string.h>
#include "KeccakSponge.h"
#include "KeccakF-1600-interface.h"
#ifdef KeccakReference
#include "displayIntermediateValues.h"
#endif
int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity)
{
if (rate+capacity != 1600)
return 1;
if ((rate <= 0) || (rate >= 1600) || ((rate % 64) != 0))
return 1;
KeccakInitialize();
state->rate = rate;
state->capacity = capacity;
state->fixedOutputLength = 0;
KeccakInitializeState(state->state);
memset(state->dataQueue, 0, KeccakMaximumRateInBytes);
state->bitsInQueue = 0;
state->squeezing = 0;
state->bitsAvailableForSqueezing = 0;
return 0;
}
void AbsorbQueue(spongeState *state)
{
// state->bitsInQueue is assumed to be equal to state->rate
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", state->dataQueue, state->rate/8);
#endif
#ifdef ProvideFast576
if (state->rate == 576)
KeccakAbsorb576bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast832
if (state->rate == 832)
KeccakAbsorb832bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast1024
if (state->rate == 1024)
KeccakAbsorb1024bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast1088
if (state->rate == 1088)
KeccakAbsorb1088bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast1152
if (state->rate == 1152)
KeccakAbsorb1152bits(state->state, state->dataQueue);
else
#endif
#ifdef ProvideFast1344
if (state->rate == 1344)
KeccakAbsorb1344bits(state->state, state->dataQueue);
else
#endif
KeccakAbsorb(state->state, state->dataQueue, state->rate/64);
state->bitsInQueue = 0;
}
int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen)
{
unsigned long long i, j, wholeBlocks;
unsigned int partialBlock, partialByte;
const unsigned char *curData;
if ((state->bitsInQueue % 8) != 0)
return 1; // Only the last call may contain a partial byte
if (state->squeezing)
return 1; // Too late for additional input
i = 0;
while(i < databitlen) {
if ((state->bitsInQueue == 0) && (databitlen >= state->rate) && (i <= (databitlen-state->rate))) {
wholeBlocks = (databitlen-i)/state->rate;
curData = data+i/8;
#ifdef ProvideFast576
if (state->rate == 576) {
for(j=0; j<wholeBlocks; j++, curData+=576/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb576bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast832
if (state->rate == 832) {
for(j=0; j<wholeBlocks; j++, curData+=832/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb832bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast1024
if (state->rate == 1024) {
for(j=0; j<wholeBlocks; j++, curData+=1024/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb1024bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast1088
if (state->rate == 1088) {
for(j=0; j<wholeBlocks; j++, curData+=1088/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb1088bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast1152
if (state->rate == 1152) {
for(j=0; j<wholeBlocks; j++, curData+=1152/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb1152bits(state->state, curData);
}
}
else
#endif
#ifdef ProvideFast1344
if (state->rate == 1344) {
for(j=0; j<wholeBlocks; j++, curData+=1344/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb1344bits(state->state, curData);
}
}
else
#endif
{
for(j=0; j<wholeBlocks; j++, curData+=state->rate/8) {
#ifdef KeccakReference
displayBytes(1, "Block to be absorbed", curData, state->rate/8);
#endif
KeccakAbsorb(state->state, curData, state->rate/64);
}
}
i += wholeBlocks*state->rate;
}
else {
partialBlock = (unsigned int)(databitlen - i);
if (partialBlock+state->bitsInQueue > state->rate)
partialBlock = state->rate-state->bitsInQueue;
partialByte = partialBlock % 8;
partialBlock -= partialByte;
memcpy(state->dataQueue+state->bitsInQueue/8, data+i/8, partialBlock/8);
state->bitsInQueue += partialBlock;
i += partialBlock;
if (state->bitsInQueue == state->rate)
AbsorbQueue(state);
if (partialByte > 0) {
unsigned char mask = (1 << partialByte)-1;
state->dataQueue[state->bitsInQueue/8] = data[i/8] & mask;
state->bitsInQueue += partialByte;
i += partialByte;
}
}
}
return 0;
}
void PadAndSwitchToSqueezingPhase(spongeState *state)
{
// Note: the bits are numbered from 0=LSB to 7=MSB
if (state->bitsInQueue + 1 == state->rate) {
state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8);
AbsorbQueue(state);
memset(state->dataQueue, 0, state->rate/8);
}
else {
memset(state->dataQueue + (state->bitsInQueue+7)/8, 0, state->rate/8 - (state->bitsInQueue+7)/8);
state->dataQueue[state->bitsInQueue/8 ] |= 1 << (state->bitsInQueue % 8);
}
state->dataQueue[(state->rate-1)/8] |= 1 << ((state->rate-1) % 8);
AbsorbQueue(state);
#ifdef KeccakReference
displayText(1, "--- Switching to squeezing phase ---");
#endif
#ifdef ProvideFast1024
if (state->rate == 1024) {
KeccakExtract1024bits(state->state, state->dataQueue);
state->bitsAvailableForSqueezing = 1024;
}
else
#endif
{
KeccakExtract(state->state, state->dataQueue, state->rate/64);
state->bitsAvailableForSqueezing = state->rate;
}
#ifdef KeccakReference
displayBytes(1, "Block available for squeezing", state->dataQueue, state->bitsAvailableForSqueezing/8);
#endif
state->squeezing = 1;
}
int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength)
{
unsigned long long i;
unsigned int partialBlock;
if (!state->squeezing)
PadAndSwitchToSqueezingPhase(state);
if ((outputLength % 8) != 0)
return 1; // Only multiple of 8 bits are allowed, truncation can be done at user level
i = 0;
while(i < outputLength) {
if (state->bitsAvailableForSqueezing == 0) {
KeccakPermutation(state->state);
#ifdef ProvideFast1024
if (state->rate == 1024) {
KeccakExtract1024bits(state->state, state->dataQueue);
state->bitsAvailableForSqueezing = 1024;
}
else
#endif
{
KeccakExtract(state->state, state->dataQueue, state->rate/64);
state->bitsAvailableForSqueezing = state->rate;
}
#ifdef KeccakReference
displayBytes(1, "Block available for squeezing", state->dataQueue, state->bitsAvailableForSqueezing/8);
#endif
}
partialBlock = state->bitsAvailableForSqueezing;
if ((unsigned long long)partialBlock > outputLength - i)
partialBlock = (unsigned int)(outputLength - i);
memcpy(output+i/8, state->dataQueue+(state->rate-state->bitsAvailableForSqueezing)/8, partialBlock/8);
state->bitsAvailableForSqueezing -= partialBlock;
i += partialBlock;
}
return 0;
}

76
c_src/KeccakSponge.h Executable file
View File

@ -0,0 +1,76 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _KeccakSponge_h_
#define _KeccakSponge_h_
#define KeccakPermutationSize 1600
#define KeccakPermutationSizeInBytes (KeccakPermutationSize/8)
#define KeccakMaximumRate 1536
#define KeccakMaximumRateInBytes (KeccakMaximumRate/8)
#if defined(__GNUC__)
#define ALIGN __attribute__ ((aligned(32)))
#elif defined(_MSC_VER)
#define ALIGN __declspec(align(32))
#else
#define ALIGN
#endif
ALIGN typedef struct spongeStateStruct {
ALIGN unsigned char state[KeccakPermutationSizeInBytes];
ALIGN unsigned char dataQueue[KeccakMaximumRateInBytes];
unsigned int rate;
unsigned int capacity;
unsigned int bitsInQueue;
unsigned int fixedOutputLength;
int squeezing;
unsigned int bitsAvailableForSqueezing;
} spongeState;
/**
* Function to initialize the state of the Keccak[r, c] sponge function.
* The sponge function is set to the absorbing phase.
* @param state Pointer to the state of the sponge function to be initialized.
* @param rate The value of the rate r.
* @param capacity The value of the capacity c.
* @pre One must have r+c=1600 and the rate a multiple of 64 bits in this implementation.
* @return Zero if successful, 1 otherwise.
*/
int InitSponge(spongeState *state, unsigned int rate, unsigned int capacity);
/**
* Function to give input data for the sponge function to absorb.
* @param state Pointer to the state of the sponge function initialized by InitSponge().
* @param data Pointer to the input data.
* When @a databitLen is not a multiple of 8, the last bits of data must be
* in the least significant bits of the last byte.
* @param databitLen The number of input bits provided in the input data.
* @pre In the previous call to Absorb(), databitLen was a multiple of 8.
* @pre The sponge function must be in the absorbing phase,
* i.e., Squeeze() must not have been called before.
* @return Zero if successful, 1 otherwise.
*/
int Absorb(spongeState *state, const unsigned char *data, unsigned long long databitlen);
/**
* Function to squeeze output data from the sponge function.
* If the sponge function was in the absorbing phase, this function
* switches it to the squeezing phase.
* @param state Pointer to the state of the sponge function initialized by InitSponge().
* @param output Pointer to the buffer where to store the output data.
* @param outputLength The number of output bits desired.
* It must be a multiple of 8.
* @return Zero if successful, 1 otherwise.
*/
int Squeeze(spongeState *state, unsigned char *output, unsigned long long outputLength);
#endif

142
c_src/brg_endian.h Executable file
View File

@ -0,0 +1,142 @@
/*
---------------------------------------------------------------------------
Copyright (c) 1998-2008, Brian Gladman, Worcester, UK. All rights reserved.
LICENSE TERMS
The redistribution and use of this software (with or without changes)
is allowed without the payment of fees or royalties provided that:
1. source code distributions include the above copyright notice, this
list of conditions and the following disclaimer;
2. binary distributions include the above copyright notice, this list
of conditions and the following disclaimer in their documentation;
3. the name of the copyright holder is not used to endorse products
built using this software without specific written permission.
DISCLAIMER
This software is provided 'as is' with no explicit or implied warranties
in respect of its properties, including, but not limited to, correctness
and/or fitness for purpose.
---------------------------------------------------------------------------
Issue Date: 20/12/2007
Changes for ARM 9/9/2010
*/
#ifndef _BRG_ENDIAN_H
#define _BRG_ENDIAN_H
#define IS_BIG_ENDIAN 4321 /* byte 0 is most significant (mc68k) */
#define IS_LITTLE_ENDIAN 1234 /* byte 0 is least significant (i386) */
#if 0
/* Include files where endian defines and byteswap functions may reside */
#if defined( __sun )
# include <sys/isa_defs.h>
#elif defined( __FreeBSD__ ) || defined( __OpenBSD__ ) || defined( __NetBSD__ )
# include <sys/endian.h>
#elif defined( BSD ) && ( BSD >= 199103 ) || defined( __APPLE__ ) || \
defined( __CYGWIN32__ ) || defined( __DJGPP__ ) || defined( __osf__ )
# include <machine/endian.h>
#elif defined( __linux__ ) || defined( __GNUC__ ) || defined( __GNU_LIBRARY__ )
# if !defined( __MINGW32__ ) && !defined( _AIX )
# include <endian.h>
# if !defined( __BEOS__ )
# include <byteswap.h>
# endif
# endif
#endif
#endif
/* Now attempt to set the define for platform byte order using any */
/* of the four forms SYMBOL, _SYMBOL, __SYMBOL & __SYMBOL__, which */
/* seem to encompass most endian symbol definitions */
#if defined( BIG_ENDIAN ) && defined( LITTLE_ENDIAN )
# if defined( BYTE_ORDER ) && BYTE_ORDER == BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( BYTE_ORDER ) && BYTE_ORDER == LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( _BIG_ENDIAN ) && defined( _LITTLE_ENDIAN )
# if defined( _BYTE_ORDER ) && _BYTE_ORDER == _BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( _BYTE_ORDER ) && _BYTE_ORDER == _LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( _BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( _LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN ) && defined( __LITTLE_ENDIAN )
# if defined( __BYTE_ORDER ) && __BYTE_ORDER == __BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER ) && __BYTE_ORDER == __LITTLE_ENDIAN
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
#if defined( __BIG_ENDIAN__ ) && defined( __LITTLE_ENDIAN__ )
# if defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __BIG_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# elif defined( __BYTE_ORDER__ ) && __BYTE_ORDER__ == __LITTLE_ENDIAN__
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif defined( __BIG_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined( __LITTLE_ENDIAN__ )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#endif
/* if the platform byte order could not be determined, then try to */
/* set this define using common machine defines */
#if !defined(PLATFORM_BYTE_ORDER)
#if defined( __alpha__ ) || defined( __alpha ) || defined( i386 ) || \
defined( __i386__ ) || defined( _M_I86 ) || defined( _M_IX86 ) || \
defined( __OS2__ ) || defined( sun386 ) || defined( __TURBOC__ ) || \
defined( vax ) || defined( vms ) || defined( VMS ) || \
defined( __VMS ) || defined( _M_X64 )
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif defined( AMIGA ) || defined( applec ) || defined( __AS400__ ) || \
defined( _CRAY ) || defined( __hppa ) || defined( __hp9000 ) || \
defined( ibm370 ) || defined( mc68000 ) || defined( m68k ) || \
defined( __MRC__ ) || defined( __MVS__ ) || defined( __MWERKS__ ) || \
defined( sparc ) || defined( __sparc) || defined( SYMANTEC_C ) || \
defined( __VOS__ ) || defined( __TIGCC__ ) || defined( __TANDEM ) || \
defined( THINK_C ) || defined( __VMCMS__ ) || defined( _AIX )
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#elif defined(__arm__)
# ifdef __BIG_ENDIAN
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
# else
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
# endif
#elif 1 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_LITTLE_ENDIAN
#elif 0 /* **** EDIT HERE IF NECESSARY **** */
# define PLATFORM_BYTE_ORDER IS_BIG_ENDIAN
#else
# error Please edit lines 132 or 134 in brg_endian.h to set the platform byte order
#endif
#endif
#endif

117
c_src/displayIntermediateValues.c Executable file
View File

@ -0,0 +1,117 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#include <stdio.h>
#include "displayIntermediateValues.h"
#include "KeccakNISTInterface.h"
FILE *intermediateValueFile = 0;
int displayLevel = 0;
void displaySetIntermediateValueFile(FILE *f)
{
intermediateValueFile = f;
}
void displaySetLevel(int level)
{
displayLevel = level;
}
void displayBytes(int level, const char *text, const unsigned char *bytes, unsigned int size)
{
unsigned int i;
if ((intermediateValueFile) && (level <= displayLevel)) {
fprintf(intermediateValueFile, "%s:\n", text);
for(i=0; i<size; i++)
fprintf(intermediateValueFile, "%02X ", bytes[i]);
fprintf(intermediateValueFile, "\n");
fprintf(intermediateValueFile, "\n");
}
}
void displayBits(int level, const char *text, const unsigned char *data, unsigned int size, int MSBfirst)
{
unsigned int i, iByte, iBit;
if ((intermediateValueFile) && (level <= displayLevel)) {
fprintf(intermediateValueFile, "%s:\n", text);
for(i=0; i<size; i++) {
iByte = i/8;
iBit = i%8;
if (MSBfirst)
fprintf(intermediateValueFile, "%d ", ((data[iByte] << iBit) & 0x80) != 0);
else
fprintf(intermediateValueFile, "%d ", ((data[iByte] >> iBit) & 0x01) != 0);
}
fprintf(intermediateValueFile, "\n");
fprintf(intermediateValueFile, "\n");
}
}
void displayStateAsBytes(int level, const char *text, const unsigned char *state)
{
displayBytes(level, text, state, KeccakPermutationSizeInBytes);
}
void displayStateAs32bitWords(int level, const char *text, const unsigned int *state)
{
unsigned int i;
if ((intermediateValueFile) && (level <= displayLevel)) {
fprintf(intermediateValueFile, "%s:\n", text);
for(i=0; i<KeccakPermutationSize/64; i++) {
fprintf(intermediateValueFile, "%08X:%08X", (unsigned int)state[2*i+0], (unsigned int)state[2*i+1]);
if ((i%5) == 4)
fprintf(intermediateValueFile, "\n");
else
fprintf(intermediateValueFile, " ");
}
}
}
void displayStateAs64bitWords(int level, const char *text, const unsigned long long int *state)
{
unsigned int i;
if ((intermediateValueFile) && (level <= displayLevel)) {
fprintf(intermediateValueFile, "%s:\n", text);
for(i=0; i<KeccakPermutationSize/64; i++) {
fprintf(intermediateValueFile, "%08X", (unsigned int)(state[i] >> 32));
fprintf(intermediateValueFile, "%08X", (unsigned int)(state[i] & 0xFFFFFFFFULL));
if ((i%5) == 4)
fprintf(intermediateValueFile, "\n");
else
fprintf(intermediateValueFile, " ");
}
}
}
void displayRoundNumber(int level, unsigned int i)
{
if ((intermediateValueFile) && (level <= displayLevel)) {
fprintf(intermediateValueFile, "\n");
fprintf(intermediateValueFile, "--- Round %d ---\n", i);
fprintf(intermediateValueFile, "\n");
}
}
void displayText(int level, const char *text)
{
if ((intermediateValueFile) && (level <= displayLevel)) {
fprintf(intermediateValueFile, text);
fprintf(intermediateValueFile, "\n");
fprintf(intermediateValueFile, "\n");
}
}

View File

@ -0,0 +1,29 @@
/*
The Keccak sponge function, designed by Guido Bertoni, Joan Daemen,
Michaël Peeters and Gilles Van Assche. For more information, feedback or
questions, please refer to our website: http://keccak.noekeon.org/
Implementation by the designers,
hereby denoted as "the implementer".
To the extent possible under law, the implementer has waived all copyright
and related or neighboring rights to the source code in this file.
http://creativecommons.org/publicdomain/zero/1.0/
*/
#ifndef _displayIntermediateValues_h_
#define _displayIntermediateValues_h_
#include <stdio.h>
void displaySetIntermediateValueFile(FILE *f);
void displaySetLevel(int level);
void displayBytes(int level, const char *text, const unsigned char *bytes, unsigned int size);
void displayBits(int level, const char *text, const unsigned char *data, unsigned int size, int MSBfirst);
void displayStateAsBytes(int level, const char *text, const unsigned char *state);
void displayStateAs32bitWords(int level, const char *text, const unsigned int *state);
void displayStateAs64bitWords(int level, const char *text, const unsigned long long int *state);
void displayRoundNumber(int level, unsigned int i);
void displayText(int level, const char *text);
#endif

144
c_src/sha3_nif.c Normal file
View File

@ -0,0 +1,144 @@
#include "erl_nif.h"
#include "KeccakNISTInterface.h"
typedef struct nif_hash_context nif_hash_context;
struct nif_hash_context {
int bitlen;
hashState state;
};
static void sha3_resource_cleanup(ErlNifEnv* env, void* arg);
static ERL_NIF_TERM nif_hash_init(ErlNifEnv* env, int argc,
const ERL_NIF_TERM argv[]);
static ERL_NIF_TERM nif_hash_update(ErlNifEnv* env, int argc,
const ERL_NIF_TERM argv[]);
static ERL_NIF_TERM nif_hash_final(ErlNifEnv* env, int argc,
const ERL_NIF_TERM argv[]);
static ERL_NIF_TERM nif_hash(ErlNifEnv* env, int argc,
const ERL_NIF_TERM argv[]);
static ErlNifFunc nif_funcs[] =
{
{"hash_init", 1, nif_hash_init},
{"hash_update", 2, nif_hash_update},
{"hash_final", 1, nif_hash_final},
{"hash", 2, nif_hash}
};
static ErlNifResourceType *sha3_resource_type;
static void
sha3_resource_cleanup(ErlNifEnv* env, void* arg)
{
/* do nothing */
}
static ERL_NIF_TERM
nif_hash_init(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
ERL_NIF_TERM ctxt_term;
nif_hash_context *ctxt;
int bitlen;
if (!enif_get_int(env, argv[0], &bitlen))
return 0;
if (bitlen != 224 && bitlen != 256 && bitlen != 384 && bitlen != 512)
return 0;
ctxt = enif_alloc_resource(sha3_resource_type, sizeof(nif_hash_context));
ctxt->bitlen = bitlen;
Init(&ctxt->state, bitlen);
ctxt_term = enif_make_resource(env, ctxt);
enif_release_resource(ctxt);
return ctxt_term;
}
static ERL_NIF_TERM
nif_hash_update(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
ERL_NIF_TERM ctxt_term;
ErlNifBinary src_bin;
nif_hash_context *ctxt, *new;
hashState state;
if (!enif_get_resource(env, argv[0], sha3_resource_type, (void **)&ctxt) ||
!enif_inspect_binary(env, argv[1], &src_bin))
return 0;
state = ctxt->state;
Update(&state, src_bin.data, src_bin.size * 8);
new = enif_alloc_resource(sha3_resource_type, sizeof(nif_hash_context));
new->bitlen = ctxt->bitlen;
new->state = state;
ctxt_term = enif_make_resource(env, new);
enif_release_resource(new);
return ctxt_term;
}
static ERL_NIF_TERM
nif_hash_final(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
ERL_NIF_TERM digest_term;
ErlNifBinary digest_bin;
nif_hash_context *ctxt;
hashState state;
if (!enif_get_resource(env, argv[0], sha3_resource_type, (void **)&ctxt))
return 0;
state = ctxt->state;
enif_alloc_binary(ctxt->bitlen / 8, &digest_bin);
Final(&state, digest_bin.data);
digest_term = enif_make_binary(env, &digest_bin);
enif_release_binary(&digest_bin);
return digest_term;
}
static ERL_NIF_TERM
nif_hash(ErlNifEnv* env, int argc, const ERL_NIF_TERM argv[])
{
ERL_NIF_TERM digest_term;
ErlNifBinary src_bin, digest_bin;
int bitlen;
if (!enif_get_int(env, argv[0], &bitlen) ||
!enif_inspect_binary(env, argv[1], &src_bin))
return 0;
if (bitlen != 224 && bitlen != 256 && bitlen != 384 && bitlen != 512)
return 0;
enif_alloc_binary(bitlen / 8, &digest_bin);
Hash(bitlen, src_bin.data, src_bin.size * 8, digest_bin.data);
digest_term = enif_make_binary(env, &digest_bin);
enif_release_binary(&digest_bin);
return digest_term;
}
static int
on_load(ErlNifEnv* env, void** priv_data, ERL_NIF_TERM load_info)
{
ErlNifResourceFlags flags = ERL_NIF_RT_CREATE | ERL_NIF_RT_TAKEOVER;
sha3_resource_type = enif_open_resource_type(env, NULL, "sha3_resource",
&sha3_resource_cleanup, flags, NULL);
if (sha3_resource_type == NULL)
return -1;
else
return 0;
}
static void
on_unload(ErlNifEnv* env, void* priv_data)
{
/* do nothing */
}
ERL_NIF_INIT(sha3, nif_funcs, &on_load, NULL, NULL, &on_unload);

8
doc/overview.edoc Normal file
View File

@ -0,0 +1,8 @@
@title SHA-3 for Erlang
@author SUZUKI Tetsuya <tetsuya.suzuki@gmail.com>
@copyright 2012- SUZUKI Tetsuya
@version 0.1.0
@reference <a href="http://en.wikipedia.org/wiki/SHA-3">Wikipedia: SHA-3</a>
@reference <a href="http://csrc.nist.gov/groups/ST/hash/sha-3/index.html">NIST: Cryptographic Hash Algorithm Competition</a>
@reference <a href="http://keccak.noekeon.org/">The Keccak sponge function family</a>

BIN
rebar vendored Executable file

Binary file not shown.

33
rebar.config Normal file
View File

@ -0,0 +1,33 @@
{erl_opts, [{i, "src"},
warnings_as_errors,
{w, all},
warn_export_all]}.
{clean_files, [".eunit",
"ebin/*.beam"]}.
{port_env, [{"CFLAGS", "$CFLAGS -O2 -finline-functions -fomit-frame-pointer -fno-strict-aliasing -Wmissing-prototypes -Wall -std=c99"}]}.
{port_specs, [
% TODO: support optimization
% {"i386", "priv/sha3_nif.so", ["c_src/sha3_nif.c",
% "c_src/KeccakNISTInterface.c",
% "c_src/KeccakSponge.c",
% "c_src/KeccakF-1600-opt32.c",
% "c_src/displayIntermediateValues.c"]},
% {"x86_64", "priv/sha3_nif.so", ["c_src/sha3_nif.c",
% "c_src/KeccakNISTInterface.c",
% "c_src/KeccakSponge.c",
% "c_src/KeccakF-1600-opt64.c",
% "c_src/displayIntermediateValues.c"]},
{"priv/sha3_nif.so", ["c_src/sha3_nif.c",
"c_src/KeccakNISTInterface.c",
"c_src/KeccakSponge.c",
"c_src/KeccakF-1600-reference.c",
"c_src/displayIntermediateValues.c"]}
]}.
{eunit_opts, [{report,{eunit_surefire,[{dir,"."}]}}]}.
{xref_checks, [fail_on_warning, undefined_function_calls]}.

12
src/sha3.app.src Normal file
View File

@ -0,0 +1,12 @@
{application, sha3,
[
{description, ""},
{vsn, "0.1.0"},
{registered, []},
{applications, [
kernel,
stdlib
]},
{modules, [sha3]},
{env, []}
]}.

41
src/sha3.erl Normal file
View File

@ -0,0 +1,41 @@
-module(sha3).
-export([hash_init/1, hash_update/2, hash_final/1, hash/2]).
-on_load(init/0).
-type bitlen() :: 224 | 256 | 384 | 512.
-type context() :: binary().
-type digest() :: <<_:224>> | <<_:256>> | <<_:384>> | <<_:512>>.
-define(nif_stub, nif_stub_error(?LINE)).
nif_stub_error(Line) ->
erlang:nif_error({nif_not_loaded,module,?MODULE,line,Line}).
init() ->
PrivDir = case code:priv_dir(?MODULE) of
{error, bad_name} ->
EbinDir = filename:dirname(code:which(?MODULE)),
AppPath = filename:dirname(EbinDir),
filename:join(AppPath, "priv");
Path ->
Path
end,
erlang:load_nif(filename:join(PrivDir, sha3_nif), 0).
-spec hash_init(bitlen()) -> context().
hash_init(_BitLen) ->
?nif_stub.
-spec hash_update(context(), binary()) -> context().
hash_update(_Context, _Binary) ->
?nif_stub.
-spec hash_final(context()) -> digest().
hash_final(_Context) ->
?nif_stub.
-spec hash(bitlen(), binary()) -> digest().
hash(_BitLen, _Binary) ->
?nif_stub.

48
test/sha3_tests.erl Normal file
View File

@ -0,0 +1,48 @@
-module(sha3_tests).
-include_lib("eunit/include/eunit.hrl").
simple_data() ->
<<16#00112233445566778899AABBCCDDEEFF:128>>.
simple_digest() ->
<<16#038907E89C919CD8F90A7FBC5A88FF9278108DAEF3EBCDA0CEB383E1:224>>.
simple_test() ->
Digest = sha3:hash(224, simple_data()),
Expected = simple_digest(),
?assertEqual(Expected, Digest).
update_test() ->
Context1 = sha3:hash_init(224),
Context2 = sha3:hash_update(Context1, simple_data()),
Digest = sha3:hash_final(Context2),
Expected = simple_digest(),
?assertEqual(Expected, Digest).
update_context_test() ->
Context1 = sha3:hash_init(224),
Context2 = sha3:hash_update(Context1, simple_data()),
Context3 = sha3:hash_update(Context1, simple_data()),
Digest1 = sha3:hash_final(Context2),
Digest2 = sha3:hash_final(Context3),
Expected = simple_digest(),
?assertEqual(Expected, Digest1),
?assertEqual(Expected, Digest2).
hash_224_test() ->
?assertEqual(<<16#038907E89C919CD8F90A7FBC5A88FF9278108DAEF3EBCDA0CEB383E1:224>>,
sha3:hash(224, <<16#00112233445566778899AABBCCDDEEFF:128>>)).
hash_256_test() ->
?assertEqual(<<16#22BCE46032802AF0ABFACF3768F7BE04A34F5F01DF60F44FFD52D3CA937350C0:256>>,
sha3:hash(256, <<16#00112233445566778899AABBCCDDEEFF:128>>)).
hash_384_test() ->
?assertEqual(<<16#25FAC1ADECBE1B254976FE32C2FE78829B23D7D84316141ECD208D6806A9DB4352A014ADA4106BA0D210DDA0FD18E150:384>>,
sha3:hash(384, <<16#00112233445566778899AABBCCDDEEFF:128>>)).
hash_512_test() ->
?assertEqual(<<16#94EE7851163C39C3489373AA0BF885D95925EAD7484C586D2E0D01D9C8069D3C30E2EEA2DC63A91B517FE53E43A31D764A2154A2DA92876366B138ABC4406805:512>>,
sha3:hash(512, <<16#00112233445566778899AABBCCDDEEFF:128>>)).