From 9bd90833708fcd0351e2e424f502ad1423278981 Mon Sep 17 00:00:00 2001 From: SUZUKI Tetsuya Date: Sun, 7 Oct 2012 16:45:43 +0900 Subject: [PATCH] remove unused files --- c_src/AVR8-rotate64.h | 27 - c_src/AVR8-rotate64.s | 285 ---- c_src/Keccak-avr8-settings.h | 2 - c_src/KeccakF-1600-32-rvk.macros | 555 -------- c_src/KeccakF-1600-32-s1.macros | 1187 ----------------- c_src/KeccakF-1600-32-s2.macros | 1187 ----------------- c_src/KeccakF-1600-32.macros | 26 - c_src/KeccakF-1600-arm.c | 123 -- c_src/KeccakF-1600-armcc.s | 653 --------- c_src/KeccakF-1600-armgcc.s | 686 ---------- c_src/KeccakF-1600-avr8.c | 163 --- c_src/KeccakF-1600-avr8asm-compact.s | 647 --------- c_src/KeccakF-1600-avr8asm-fast.s | 934 ------------- .../KeccakF-1600-inplace-armgcc-ARMv7A-NEON.s | 446 ------- c_src/KeccakF-1600-opt32-settings.h | 4 - c_src/KeccakF-1600-opt32.c | 524 -------- c_src/KeccakF-1600-reference.c | 300 ----- c_src/KeccakF-1600-reference.h | 20 - c_src/KeccakF-1600-reference32BI.c | 371 ------ c_src/KeccakF-1600-x86-64-asm.c | 62 - c_src/KeccakF-1600-x86-64-gas.s | 766 ----------- c_src/KeccakF-1600-x86-64-shld-gas.s | 766 ----------- rebar.config | 14 +- 23 files changed, 2 insertions(+), 9746 deletions(-) delete mode 100755 c_src/AVR8-rotate64.h delete mode 100755 c_src/AVR8-rotate64.s delete mode 100755 c_src/Keccak-avr8-settings.h delete mode 100755 c_src/KeccakF-1600-32-rvk.macros delete mode 100755 c_src/KeccakF-1600-32-s1.macros delete mode 100755 c_src/KeccakF-1600-32-s2.macros delete mode 100755 c_src/KeccakF-1600-32.macros delete mode 100755 c_src/KeccakF-1600-arm.c delete mode 100755 c_src/KeccakF-1600-armcc.s delete mode 100755 c_src/KeccakF-1600-armgcc.s delete mode 100755 c_src/KeccakF-1600-avr8.c delete mode 100755 c_src/KeccakF-1600-avr8asm-compact.s delete mode 100755 c_src/KeccakF-1600-avr8asm-fast.s delete mode 100755 c_src/KeccakF-1600-inplace-armgcc-ARMv7A-NEON.s delete mode 100755 c_src/KeccakF-1600-opt32-settings.h delete mode 100755 c_src/KeccakF-1600-opt32.c delete mode 100755 c_src/KeccakF-1600-reference.c delete mode 100755 c_src/KeccakF-1600-reference.h delete mode 100755 c_src/KeccakF-1600-reference32BI.c delete mode 100755 c_src/KeccakF-1600-x86-64-asm.c delete mode 100755 c_src/KeccakF-1600-x86-64-gas.s delete mode 100755 c_src/KeccakF-1600-x86-64-shld-gas.s diff --git a/c_src/AVR8-rotate64.h b/c_src/AVR8-rotate64.h deleted file mode 100755 index 4f921b9..0000000 --- a/c_src/AVR8-rotate64.h +++ /dev/null @@ -1,27 +0,0 @@ -/* -File: AVR8-rotate64.h - -This code is originally by Daniel Otte (daniel.otte@rub.de) in 2006-2010 as part of the AVR-Crypto-Lib, and was then improved by Ronny Van Keer, STMicroelectronics, in 2010. - -Implementation by Daniel Otte and Ronny Van Keer, -hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#ifndef ROTATE64_H_ -#define ROTATE64_H_ - -#include - - -#define ROT_CODE(a) ((((a)/8+((((a)%8)>4)?1:0))<<4) | ((a) & 7)) - -uint64_t rotate64_1bit_left(uint64_t a); -uint64_t rotate64_1bit_right(uint64_t a); -uint64_t rotate64left_code(uint64_t a, int8_t code); - -#endif /* ROTATE64_H_ */ - diff --git a/c_src/AVR8-rotate64.s b/c_src/AVR8-rotate64.s deleted file mode 100755 index f30d030..0000000 --- a/c_src/AVR8-rotate64.s +++ /dev/null @@ -1,285 +0,0 @@ -/* -File: AVR8-rotate64.s - -This code is originally by Daniel Otte (daniel.otte@rub.de) in 2006-2010 as part of the AVR-Crypto-Lib, and was then improved by Ronny Van Keer, STMicroelectronics, in 2010. - -Implementation by Daniel Otte and Ronny Van Keer, -hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -.global rotate64_1bit_left -rotate64_4bit_left: - lsl r18 - rol r19 - rol r20 - rol r21 - rol r22 - rol r23 - rol r24 - rol r25 - adc r18, r1 -rotate64_3bit_left: - lsl r18 - rol r19 - rol r20 - rol r21 - rol r22 - rol r23 - rol r24 - rol r25 - adc r18, r1 -rotate64_2bit_left: - lsl r18 - rol r19 - rol r20 - rol r21 - rol r22 - rol r23 - rol r24 - rol r25 - adc r18, r1 -rotate64_1bit_left: - lsl r18 - rol r19 - rol r20 - rol r21 - rol r22 - rol r23 - rol r24 - rol r25 - adc r18, r1 - ret - -.global rotate64_1bit_right -rotate64_3bit_right: - bst r18, 0 - ror r25 - ror r24 - ror r23 - ror r22 - ror r21 - ror r20 - ror r19 - ror r18 - bld r25, 7 -rotate64_2bit_right: - bst r18, 0 - ror r25 - ror r24 - ror r23 - ror r22 - ror r21 - ror r20 - ror r19 - ror r18 - bld r25, 7 -rotate64_1bit_right: - bst r18, 0 - ror r25 - ror r24 - ror r23 - ror r22 - ror r21 - ror r20 - ror r19 - ror r18 - bld r25, 7 - ret - -/* -** Each byte rotate routine must be 16 instructions long. -*/ -rotate64_0byte_left: - andi r16, 0x07 - ldi r30, pm_lo8(bit_rot_jmp_table) - ldi r31, pm_hi8(bit_rot_jmp_table) - add r30, r16 - - adc r31, r1 - ijmp - nop - nop - - nop - nop - nop - nop - - nop - nop - nop - nop - -rotate64_1byte_left: - mov r0, r25 - mov r25, r24 - mov r24, r23 - mov r23, r22 - - mov r22, r21 - mov r21, r20 - mov r20, r19 - mov r19, r18 - - mov r18, r0 - andi r16, 0x07 - ldi r30, pm_lo8(bit_rot_jmp_table) - ldi r31, pm_hi8(bit_rot_jmp_table) - - add r30, r16 - adc r31, r1 - ijmp - nop - -rotate64_2byte_left: - movw r0, r24 - movw r24, r22 - movw r22, r20 - movw r20, r18 - - movw r18, r0 - clr r1 - andi r16, 0x07 - ldi r30, pm_lo8(bit_rot_jmp_table) - - ldi r31, pm_hi8(bit_rot_jmp_table) - add r30, r16 - adc r31, r1 - ijmp - - nop - nop - nop - nop - -rotate64_3byte_left: - mov r0, r25 - mov r25, r22 - mov r22, r19 - mov r19, r24 - - mov r24, r21 - mov r21, r18 - mov r18, r23 - mov r23, r20 - - mov r20, r0 - andi r16, 0x07 - ldi r30, pm_lo8(bit_rot_jmp_table) - ldi r31, pm_hi8(bit_rot_jmp_table) - - add r30, r16 - adc r31, r1 - ijmp - nop - -rotate64_4byte_left: - movw r0, r24 - movw r24, r20 - movw r20, r0 - movw r0, r22 - - movw r22, r18 - movw r18, r0 - clr r1 - andi r16, 0x07 - - ldi r30, pm_lo8(bit_rot_jmp_table) - ldi r31, pm_hi8(bit_rot_jmp_table) - add r30, r16 - adc r31, r1 - - ijmp - nop - nop - nop - -rotate64_5byte_left: - mov r0, r25 - mov r25, r20 - mov r20, r23 - mov r23, r18 - - mov r18, r21 - mov r21, r24 - mov r24, r19 - mov r19, r22 - - mov r22, r0 - andi r16, 0x07 - ldi r30, pm_lo8(bit_rot_jmp_table) - ldi r31, pm_hi8(bit_rot_jmp_table) - - add r30, r16 - adc r31, r1 - ijmp - nop - -rotate64_6byte_left: - movw r0, r18 - movw r18, r20 - movw r20, r22 - movw r22, r24 - - movw r24, r0 - clr r1 - andi r16, 0x07 - ldi r30, pm_lo8(bit_rot_jmp_table) - - ldi r31, pm_hi8(bit_rot_jmp_table) - add r30, r16 - adc r31, r1 - ijmp - - nop - nop - nop - nop - -rotate64_7byte_left: - mov r0, r18 - mov r18, r19 - mov r19, r20 - mov r20, r21 - - mov r21, r22 - mov r22, r23 - mov r23, r24 - mov r24, r25 - - mov r25, r0 - andi r16, 0x07 - ldi r30, pm_lo8(bit_rot_jmp_table) - ldi r31, pm_hi8(bit_rot_jmp_table) - - add r30, r16 - adc r31, r1 - ijmp - nop - - -bit_rot_jmp_table: - ret - rjmp rotate64_1bit_left - rjmp rotate64_2bit_left - rjmp rotate64_3bit_left - rjmp rotate64_4bit_left - rjmp rotate64_3bit_right - rjmp rotate64_2bit_right - rjmp rotate64_1bit_right - -.global rotate64left_code -rotate64left_code: - ldi r30, pm_lo8(rotate64_0byte_left) - ldi r31, pm_hi8(rotate64_0byte_left) - mov r0, r16 - andi r16, 0x70 - add r30, r16 - adc r31, r1 - mov r16, r0 - ijmp - \ No newline at end of file diff --git a/c_src/Keccak-avr8-settings.h b/c_src/Keccak-avr8-settings.h deleted file mode 100755 index 030e8eb..0000000 --- a/c_src/Keccak-avr8-settings.h +++ /dev/null @@ -1,2 +0,0 @@ -#define cKeccakR 1088 -#define cKeccakFixedOutputLengthInBytes 32 diff --git a/c_src/KeccakF-1600-32-rvk.macros b/c_src/KeccakF-1600-32-rvk.macros deleted file mode 100755 index c0c9029..0000000 --- a/c_src/KeccakF-1600-32-rvk.macros +++ /dev/null @@ -1,555 +0,0 @@ -/* -The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -Michaël Peeters and Gilles Van Assche. For more information, feedback or -questions, please refer to our website: http://keccak.noekeon.org/ - -Implementation by Ronny Van Keer, -hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -static const UINT32 KeccakF1600RoundConstants_int2[2*24] = -{ - 0x00000001UL, 0x00000000UL, - 0x00000000UL, 0x00000089UL, - 0x00000000UL, 0x8000008bUL, - 0x00000000UL, 0x80008080UL, - 0x00000001UL, 0x0000008bUL, - 0x00000001UL, 0x00008000UL, - 0x00000001UL, 0x80008088UL, - 0x00000001UL, 0x80000082UL, - 0x00000000UL, 0x0000000bUL, - 0x00000000UL, 0x0000000aUL, - 0x00000001UL, 0x00008082UL, - 0x00000000UL, 0x00008003UL, - 0x00000001UL, 0x0000808bUL, - 0x00000001UL, 0x8000000bUL, - 0x00000001UL, 0x8000008aUL, - 0x00000001UL, 0x80000081UL, - 0x00000000UL, 0x80000081UL, - 0x00000000UL, 0x80000008UL, - 0x00000000UL, 0x00000083UL, - 0x00000000UL, 0x80008003UL, - 0x00000001UL, 0x80008088UL, - 0x00000000UL, 0x80000088UL, - 0x00000001UL, 0x00008000UL, - 0x00000000UL, 0x80008082UL -}; - -#undef rounds - -#define rounds \ -{ \ - UINT32 Da0, De0, Di0, Do0, Du0; \ - UINT32 Da1, De1, Di1, Do1, Du1; \ - UINT32 Ba, Be, Bi, Bo, Bu; \ - UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \ - UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \ - UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \ - UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \ - UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \ - UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \ - UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \ - UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \ - UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \ - UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \ - UINT32 Cw, Cx, Cy, Cz; \ - UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \ - UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \ - UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \ - UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \ - UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \ - UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \ - UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \ - UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \ - UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \ - UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \ - const UINT32 * pRoundConstants = KeccakF1600RoundConstants_int2; \ - UINT32 i; \ -\ - copyFromState(A, state) \ -\ - for( i = 12; i != 0; --i ) { \ - Cx = Abu0^Agu0^Aku0^Amu0^Asu0; \ - Du1 = Abe1^Age1^Ake1^Ame1^Ase1; \ - Da0 = Cx^ROL32(Du1, 1); \ - Cz = Abu1^Agu1^Aku1^Amu1^Asu1; \ - Du0 = Abe0^Age0^Ake0^Ame0^Ase0; \ - Da1 = Cz^Du0; \ -\ - Cw = Abi0^Agi0^Aki0^Ami0^Asi0; \ - Do0 = Cw^ROL32(Cz, 1); \ - Cy = Abi1^Agi1^Aki1^Ami1^Asi1; \ - Do1 = Cy^Cx; \ -\ - Cx = Aba0^Aga0^Aka0^Ama0^Asa0; \ - De0 = Cx^ROL32(Cy, 1); \ - Cz = Aba1^Aga1^Aka1^Ama1^Asa1; \ - De1 = Cz^Cw; \ -\ - Cy = Abo1^Ago1^Ako1^Amo1^Aso1; \ - Di0 = Du0^ROL32(Cy, 1); \ - Cw = Abo0^Ago0^Ako0^Amo0^Aso0; \ - Di1 = Du1^Cw; \ -\ - Du0 = Cw^ROL32(Cz, 1); \ - Du1 = Cy^Cx; \ -\ - Aba0 ^= Da0; \ - Ba = Aba0; \ - Age0 ^= De0; \ - Be = ROL32(Age0, 22); \ - Aki1 ^= Di1; \ - Bi = ROL32(Aki1, 22); \ - Amo1 ^= Do1; \ - Bo = ROL32(Amo1, 11); \ - Asu0 ^= Du0; \ - Bu = ROL32(Asu0, 7); \ - Eba0 = Ba ^((~Be)& Bi ) ^ *(pRoundConstants++); \ - Ebe0 = Be ^((~Bi)& Bo ); \ - Ebi0 = Bi ^((~Bo)& Bu ); \ - Ebo0 = Bo ^((~Bu)& Ba ); \ - Ebu0 = Bu ^((~Ba)& Be ); \ -\ - Abo0 ^= Do0; \ - Ba = ROL32(Abo0, 14); \ - Agu0 ^= Du0; \ - Be = ROL32(Agu0, 10); \ - Aka1 ^= Da1; \ - Bi = ROL32(Aka1, 2); \ - Ame1 ^= De1; \ - Bo = ROL32(Ame1, 23); \ - Asi1 ^= Di1; \ - Bu = ROL32(Asi1, 31); \ - Ega0 = Ba ^((~Be)& Bi ); \ - Ege0 = Be ^((~Bi)& Bo ); \ - Egi0 = Bi ^((~Bo)& Bu ); \ - Ego0 = Bo ^((~Bu)& Ba ); \ - Egu0 = Bu ^((~Ba)& Be ); \ -\ - Abe1 ^= De1; \ - Ba = ROL32(Abe1, 1); \ - Agi0 ^= Di0; \ - Be = ROL32(Agi0, 3); \ - Ako1 ^= Do1; \ - Bi = ROL32(Ako1, 13); \ - Amu0 ^= Du0; \ - Bo = ROL32(Amu0, 4); \ - Asa0 ^= Da0; \ - Bu = ROL32(Asa0, 9); \ - Eka0 = Ba ^((~Be)& Bi ); \ - Eke0 = Be ^((~Bi)& Bo ); \ - Eki0 = Bi ^((~Bo)& Bu ); \ - Eko0 = Bo ^((~Bu)& Ba ); \ - Eku0 = Bu ^((~Ba)& Be ); \ -\ - Abu1 ^= Du1; \ - Ba = ROL32(Abu1, 14); \ - Aga0 ^= Da0; \ - Be = ROL32(Aga0, 18); \ - Ake0 ^= De0; \ - Bi = ROL32(Ake0, 5); \ - Ami1 ^= Di1; \ - Bo = ROL32(Ami1, 8); \ - Aso0 ^= Do0; \ - Bu = ROL32(Aso0, 28); \ - Ema0 = Ba ^((~Be)& Bi ); \ - Eme0 = Be ^((~Bi)& Bo ); \ - Emi0 = Bi ^((~Bo)& Bu ); \ - Emo0 = Bo ^((~Bu)& Ba ); \ - Emu0 = Bu ^((~Ba)& Be ); \ -\ - Abi0 ^= Di0; \ - Ba = ROL32(Abi0, 31); \ - Ago1 ^= Do1; \ - Be = ROL32(Ago1, 28); \ - Aku1 ^= Du1; \ - Bi = ROL32(Aku1, 20); \ - Ama1 ^= Da1; \ - Bo = ROL32(Ama1, 21); \ - Ase0 ^= De0; \ - Bu = ROL32(Ase0, 1); \ - Esa0 = Ba ^((~Be)& Bi ); \ - Ese0 = Be ^((~Bi)& Bo ); \ - Esi0 = Bi ^((~Bo)& Bu ); \ - Eso0 = Bo ^((~Bu)& Ba ); \ - Esu0 = Bu ^((~Ba)& Be ); \ -\ - Aba1 ^= Da1; \ - Ba = Aba1; \ - Age1 ^= De1; \ - Be = ROL32(Age1, 22); \ - Aki0 ^= Di0; \ - Bi = ROL32(Aki0, 21); \ - Amo0 ^= Do0; \ - Bo = ROL32(Amo0, 10); \ - Asu1 ^= Du1; \ - Bu = ROL32(Asu1, 7); \ - Eba1 = Ba ^((~Be)& Bi ); \ - Eba1 ^= *(pRoundConstants++); \ - Ebe1 = Be ^((~Bi)& Bo ); \ - Ebi1 = Bi ^((~Bo)& Bu ); \ - Ebo1 = Bo ^((~Bu)& Ba ); \ - Ebu1 = Bu ^((~Ba)& Be ); \ -\ - Abo1 ^= Do1; \ - Ba = ROL32(Abo1, 14); \ - Agu1 ^= Du1; \ - Be = ROL32(Agu1, 10); \ - Aka0 ^= Da0; \ - Bi = ROL32(Aka0, 1); \ - Ame0 ^= De0; \ - Bo = ROL32(Ame0, 22); \ - Asi0 ^= Di0; \ - Bu = ROL32(Asi0, 30); \ - Ega1 = Ba ^((~Be)& Bi ); \ - Ege1 = Be ^((~Bi)& Bo ); \ - Egi1 = Bi ^((~Bo)& Bu ); \ - Ego1 = Bo ^((~Bu)& Ba ); \ - Egu1 = Bu ^((~Ba)& Be ); \ -\ - Abe0 ^= De0; \ - Ba = Abe0; \ - Agi1 ^= Di1; \ - Be = ROL32(Agi1, 3); \ - Ako0 ^= Do0; \ - Bi = ROL32(Ako0, 12); \ - Amu1 ^= Du1; \ - Bo = ROL32(Amu1, 4); \ - Asa1 ^= Da1; \ - Bu = ROL32(Asa1, 9); \ - Eka1 = Ba ^((~Be)& Bi ); \ - Eke1 = Be ^((~Bi)& Bo ); \ - Eki1 = Bi ^((~Bo)& Bu ); \ - Eko1 = Bo ^((~Bu)& Ba ); \ - Eku1 = Bu ^((~Ba)& Be ); \ -\ - Abu0 ^= Du0; \ - Ba = ROL32(Abu0, 13); \ - Aga1 ^= Da1; \ - Be = ROL32(Aga1, 18); \ - Ake1 ^= De1; \ - Bi = ROL32(Ake1, 5); \ - Ami0 ^= Di0; \ - Bo = ROL32(Ami0, 7); \ - Aso1 ^= Do1; \ - Bu = ROL32(Aso1, 28); \ - Ema1 = Ba ^((~Be)& Bi ); \ - Eme1 = Be ^((~Bi)& Bo ); \ - Emi1 = Bi ^((~Bo)& Bu ); \ - Emo1 = Bo ^((~Bu)& Ba ); \ - Emu1 = Bu ^((~Ba)& Be ); \ -\ - Abi1 ^= Di1; \ - Ba = ROL32(Abi1, 31); \ - Ago0 ^= Do0; \ - Be = ROL32(Ago0, 27); \ - Aku0 ^= Du0; \ - Bi = ROL32(Aku0, 19); \ - Ama0 ^= Da0; \ - Bo = ROL32(Ama0, 20); \ - Ase1 ^= De1; \ - Bu = ROL32(Ase1, 1); \ - Esa1 = Ba ^((~Be)& Bi ); \ - Ese1 = Be ^((~Bi)& Bo ); \ - Esi1 = Bi ^((~Bo)& Bu ); \ - Eso1 = Bo ^((~Bu)& Ba ); \ - Esu1 = Bu ^((~Ba)& Be ); \ -\ - Cx = Ebu0^Egu0^Eku0^Emu0^Esu0; \ - Du1 = Ebe1^Ege1^Eke1^Eme1^Ese1; \ - Da0 = Cx^ROL32(Du1, 1); \ - Cz = Ebu1^Egu1^Eku1^Emu1^Esu1; \ - Du0 = Ebe0^Ege0^Eke0^Eme0^Ese0; \ - Da1 = Cz^Du0; \ -\ - Cw = Ebi0^Egi0^Eki0^Emi0^Esi0; \ - Do0 = Cw^ROL32(Cz, 1); \ - Cy = Ebi1^Egi1^Eki1^Emi1^Esi1; \ - Do1 = Cy^Cx; \ -\ - Cx = Eba0^Ega0^Eka0^Ema0^Esa0; \ - De0 = Cx^ROL32(Cy, 1); \ - Cz = Eba1^Ega1^Eka1^Ema1^Esa1; \ - De1 = Cz^Cw; \ -\ - Cy = Ebo1^Ego1^Eko1^Emo1^Eso1; \ - Di0 = Du0^ROL32(Cy, 1); \ - Cw = Ebo0^Ego0^Eko0^Emo0^Eso0; \ - Di1 = Du1^Cw; \ -\ - Du0 = Cw^ROL32(Cz, 1); \ - Du1 = Cy^Cx; \ -\ - Eba0 ^= Da0; \ - Ba = Eba0; \ - Ege0 ^= De0; \ - Be = ROL32(Ege0, 22); \ - Eki1 ^= Di1; \ - Bi = ROL32(Eki1, 22); \ - Emo1 ^= Do1; \ - Bo = ROL32(Emo1, 11); \ - Esu0 ^= Du0; \ - Bu = ROL32(Esu0, 7); \ - Aba0 = Ba ^((~Be)& Bi ); \ - Aba0 ^= *(pRoundConstants++); \ - Abe0 = Be ^((~Bi)& Bo ); \ - Abi0 = Bi ^((~Bo)& Bu ); \ - Abo0 = Bo ^((~Bu)& Ba ); \ - Abu0 = Bu ^((~Ba)& Be ); \ -\ - Ebo0 ^= Do0; \ - Ba = ROL32(Ebo0, 14); \ - Egu0 ^= Du0; \ - Be = ROL32(Egu0, 10); \ - Eka1 ^= Da1; \ - Bi = ROL32(Eka1, 2); \ - Eme1 ^= De1; \ - Bo = ROL32(Eme1, 23); \ - Esi1 ^= Di1; \ - Bu = ROL32(Esi1, 31); \ - Aga0 = Ba ^((~Be)& Bi ); \ - Age0 = Be ^((~Bi)& Bo ); \ - Agi0 = Bi ^((~Bo)& Bu ); \ - Ago0 = Bo ^((~Bu)& Ba ); \ - Agu0 = Bu ^((~Ba)& Be ); \ -\ - Ebe1 ^= De1; \ - Ba = ROL32(Ebe1, 1); \ - Egi0 ^= Di0; \ - Be = ROL32(Egi0, 3); \ - Eko1 ^= Do1; \ - Bi = ROL32(Eko1, 13); \ - Emu0 ^= Du0; \ - Bo = ROL32(Emu0, 4); \ - Esa0 ^= Da0; \ - Bu = ROL32(Esa0, 9); \ - Aka0 = Ba ^((~Be)& Bi ); \ - Ake0 = Be ^((~Bi)& Bo ); \ - Aki0 = Bi ^((~Bo)& Bu ); \ - Ako0 = Bo ^((~Bu)& Ba ); \ - Aku0 = Bu ^((~Ba)& Be ); \ -\ - Ebu1 ^= Du1; \ - Ba = ROL32(Ebu1, 14); \ - Ega0 ^= Da0; \ - Be = ROL32(Ega0, 18); \ - Eke0 ^= De0; \ - Bi = ROL32(Eke0, 5); \ - Emi1 ^= Di1; \ - Bo = ROL32(Emi1, 8); \ - Eso0 ^= Do0; \ - Bu = ROL32(Eso0, 28); \ - Ama0 = Ba ^((~Be)& Bi ); \ - Ame0 = Be ^((~Bi)& Bo ); \ - Ami0 = Bi ^((~Bo)& Bu ); \ - Amo0 = Bo ^((~Bu)& Ba ); \ - Amu0 = Bu ^((~Ba)& Be ); \ -\ - Ebi0 ^= Di0; \ - Ba = ROL32(Ebi0, 31); \ - Ego1 ^= Do1; \ - Be = ROL32(Ego1, 28); \ - Eku1 ^= Du1; \ - Bi = ROL32(Eku1, 20); \ - Ema1 ^= Da1; \ - Bo = ROL32(Ema1, 21); \ - Ese0 ^= De0; \ - Bu = ROL32(Ese0, 1); \ - Asa0 = Ba ^((~Be)& Bi ); \ - Ase0 = Be ^((~Bi)& Bo ); \ - Asi0 = Bi ^((~Bo)& Bu ); \ - Aso0 = Bo ^((~Bu)& Ba ); \ - Asu0 = Bu ^((~Ba)& Be ); \ -\ - Eba1 ^= Da1; \ - Ba = Eba1; \ - Ege1 ^= De1; \ - Be = ROL32(Ege1, 22); \ - Eki0 ^= Di0; \ - Bi = ROL32(Eki0, 21); \ - Emo0 ^= Do0; \ - Bo = ROL32(Emo0, 10); \ - Esu1 ^= Du1; \ - Bu = ROL32(Esu1, 7); \ - Aba1 = Ba ^((~Be)& Bi ); \ - Aba1 ^= *(pRoundConstants++); \ - Abe1 = Be ^((~Bi)& Bo ); \ - Abi1 = Bi ^((~Bo)& Bu ); \ - Abo1 = Bo ^((~Bu)& Ba ); \ - Abu1 = Bu ^((~Ba)& Be ); \ -\ - Ebo1 ^= Do1; \ - Ba = ROL32(Ebo1, 14); \ - Egu1 ^= Du1; \ - Be = ROL32(Egu1, 10); \ - Eka0 ^= Da0; \ - Bi = ROL32(Eka0, 1); \ - Eme0 ^= De0; \ - Bo = ROL32(Eme0, 22); \ - Esi0 ^= Di0; \ - Bu = ROL32(Esi0, 30); \ - Aga1 = Ba ^((~Be)& Bi ); \ - Age1 = Be ^((~Bi)& Bo ); \ - Agi1 = Bi ^((~Bo)& Bu ); \ - Ago1 = Bo ^((~Bu)& Ba ); \ - Agu1 = Bu ^((~Ba)& Be ); \ -\ - Ebe0 ^= De0; \ - Ba = Ebe0; \ - Egi1 ^= Di1; \ - Be = ROL32(Egi1, 3); \ - Eko0 ^= Do0; \ - Bi = ROL32(Eko0, 12); \ - Emu1 ^= Du1; \ - Bo = ROL32(Emu1, 4); \ - Esa1 ^= Da1; \ - Bu = ROL32(Esa1, 9); \ - Aka1 = Ba ^((~Be)& Bi ); \ - Ake1 = Be ^((~Bi)& Bo ); \ - Aki1 = Bi ^((~Bo)& Bu ); \ - Ako1 = Bo ^((~Bu)& Ba ); \ - Aku1 = Bu ^((~Ba)& Be ); \ -\ - Ebu0 ^= Du0; \ - Ba = ROL32(Ebu0, 13); \ - Ega1 ^= Da1; \ - Be = ROL32(Ega1, 18); \ - Eke1 ^= De1; \ - Bi = ROL32(Eke1, 5); \ - Emi0 ^= Di0; \ - Bo = ROL32(Emi0, 7); \ - Eso1 ^= Do1; \ - Bu = ROL32(Eso1, 28); \ - Ama1 = Ba ^((~Be)& Bi ); \ - Ame1 = Be ^((~Bi)& Bo ); \ - Ami1 = Bi ^((~Bo)& Bu ); \ - Amo1 = Bo ^((~Bu)& Ba ); \ - Amu1 = Bu ^((~Ba)& Be ); \ -\ - Ebi1 ^= Di1; \ - Ba = ROL32(Ebi1, 31); \ - Ego0 ^= Do0; \ - Be = ROL32(Ego0, 27); \ - Eku0 ^= Du0; \ - Bi = ROL32(Eku0, 19); \ - Ema0 ^= Da0; \ - Bo = ROL32(Ema0, 20); \ - Ese1 ^= De1; \ - Bu = ROL32(Ese1, 1); \ - Asa1 = Ba ^((~Be)& Bi ); \ - Ase1 = Be ^((~Bi)& Bo ); \ - Asi1 = Bi ^((~Bo)& Bu ); \ - Aso1 = Bo ^((~Bu)& Ba ); \ - Asu1 = Bu ^((~Ba)& Be ); \ - } \ - copyToState(state, A) \ -} - -#define copyFromState(X, state) \ - X##ba0 = state[ 0]; \ - X##ba1 = state[ 1]; \ - X##be0 = state[ 2]; \ - X##be1 = state[ 3]; \ - X##bi0 = state[ 4]; \ - X##bi1 = state[ 5]; \ - X##bo0 = state[ 6]; \ - X##bo1 = state[ 7]; \ - X##bu0 = state[ 8]; \ - X##bu1 = state[ 9]; \ - X##ga0 = state[10]; \ - X##ga1 = state[11]; \ - X##ge0 = state[12]; \ - X##ge1 = state[13]; \ - X##gi0 = state[14]; \ - X##gi1 = state[15]; \ - X##go0 = state[16]; \ - X##go1 = state[17]; \ - X##gu0 = state[18]; \ - X##gu1 = state[19]; \ - X##ka0 = state[20]; \ - X##ka1 = state[21]; \ - X##ke0 = state[22]; \ - X##ke1 = state[23]; \ - X##ki0 = state[24]; \ - X##ki1 = state[25]; \ - X##ko0 = state[26]; \ - X##ko1 = state[27]; \ - X##ku0 = state[28]; \ - X##ku1 = state[29]; \ - X##ma0 = state[30]; \ - X##ma1 = state[31]; \ - X##me0 = state[32]; \ - X##me1 = state[33]; \ - X##mi0 = state[34]; \ - X##mi1 = state[35]; \ - X##mo0 = state[36]; \ - X##mo1 = state[37]; \ - X##mu0 = state[38]; \ - X##mu1 = state[39]; \ - X##sa0 = state[40]; \ - X##sa1 = state[41]; \ - X##se0 = state[42]; \ - X##se1 = state[43]; \ - X##si0 = state[44]; \ - X##si1 = state[45]; \ - X##so0 = state[46]; \ - X##so1 = state[47]; \ - X##su0 = state[48]; \ - X##su1 = state[49]; \ - -#define copyToState(state, X) \ - state[ 0] = X##ba0; \ - state[ 1] = X##ba1; \ - state[ 2] = X##be0; \ - state[ 3] = X##be1; \ - state[ 4] = X##bi0; \ - state[ 5] = X##bi1; \ - state[ 6] = X##bo0; \ - state[ 7] = X##bo1; \ - state[ 8] = X##bu0; \ - state[ 9] = X##bu1; \ - state[10] = X##ga0; \ - state[11] = X##ga1; \ - state[12] = X##ge0; \ - state[13] = X##ge1; \ - state[14] = X##gi0; \ - state[15] = X##gi1; \ - state[16] = X##go0; \ - state[17] = X##go1; \ - state[18] = X##gu0; \ - state[19] = X##gu1; \ - state[20] = X##ka0; \ - state[21] = X##ka1; \ - state[22] = X##ke0; \ - state[23] = X##ke1; \ - state[24] = X##ki0; \ - state[25] = X##ki1; \ - state[26] = X##ko0; \ - state[27] = X##ko1; \ - state[28] = X##ku0; \ - state[29] = X##ku1; \ - state[30] = X##ma0; \ - state[31] = X##ma1; \ - state[32] = X##me0; \ - state[33] = X##me1; \ - state[34] = X##mi0; \ - state[35] = X##mi1; \ - state[36] = X##mo0; \ - state[37] = X##mo1; \ - state[38] = X##mu0; \ - state[39] = X##mu1; \ - state[40] = X##sa0; \ - state[41] = X##sa1; \ - state[42] = X##se0; \ - state[43] = X##se1; \ - state[44] = X##si0; \ - state[45] = X##si1; \ - state[46] = X##so0; \ - state[47] = X##so1; \ - state[48] = X##su0; \ - state[49] = X##su1; \ - diff --git a/c_src/KeccakF-1600-32-s1.macros b/c_src/KeccakF-1600-32-s1.macros deleted file mode 100755 index 973cc19..0000000 --- a/c_src/KeccakF-1600-32-s1.macros +++ /dev/null @@ -1,1187 +0,0 @@ -/* -Code automatically generated by KeccakTools! - -The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -Michaël Peeters and Gilles Van Assche. For more information, feedback or -questions, please refer to our website: http://keccak.noekeon.org/ - -Implementation by the designers, -hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#define declareABCDE \ - UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \ - UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \ - UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \ - UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \ - UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \ - UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \ - UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \ - UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \ - UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \ - UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \ - UINT32 Bba0, Bbe0, Bbi0, Bbo0, Bbu0; \ - UINT32 Bba1, Bbe1, Bbi1, Bbo1, Bbu1; \ - UINT32 Bga0, Bge0, Bgi0, Bgo0, Bgu0; \ - UINT32 Bga1, Bge1, Bgi1, Bgo1, Bgu1; \ - UINT32 Bka0, Bke0, Bki0, Bko0, Bku0; \ - UINT32 Bka1, Bke1, Bki1, Bko1, Bku1; \ - UINT32 Bma0, Bme0, Bmi0, Bmo0, Bmu0; \ - UINT32 Bma1, Bme1, Bmi1, Bmo1, Bmu1; \ - UINT32 Bsa0, Bse0, Bsi0, Bso0, Bsu0; \ - UINT32 Bsa1, Bse1, Bsi1, Bso1, Bsu1; \ - UINT32 Ca0, Ce0, Ci0, Co0, Cu0; \ - UINT32 Ca1, Ce1, Ci1, Co1, Cu1; \ - UINT32 Da0, De0, Di0, Do0, Du0; \ - UINT32 Da1, De1, Di1, Do1, Du1; \ - UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \ - UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \ - UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \ - UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \ - UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \ - UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \ - UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \ - UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \ - UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \ - UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \ - -#define prepareTheta \ - Ca0 = Aba0^Aga0^Aka0^Ama0^Asa0; \ - Ca1 = Aba1^Aga1^Aka1^Ama1^Asa1; \ - Ce0 = Abe0^Age0^Ake0^Ame0^Ase0; \ - Ce1 = Abe1^Age1^Ake1^Ame1^Ase1; \ - Ci0 = Abi0^Agi0^Aki0^Ami0^Asi0; \ - Ci1 = Abi1^Agi1^Aki1^Ami1^Asi1; \ - Co0 = Abo0^Ago0^Ako0^Amo0^Aso0; \ - Co1 = Abo1^Ago1^Ako1^Amo1^Aso1; \ - Cu0 = Abu0^Agu0^Aku0^Amu0^Asu0; \ - Cu1 = Abu1^Agu1^Aku1^Amu1^Asu1; \ - -#ifdef UseBebigokimisa -// --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') -// --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words -#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ - Da0 = Cu0^ROL32(Ce1, 1); \ - Da1 = Cu1^Ce0; \ - De0 = Ca0^ROL32(Ci1, 1); \ - De1 = Ca1^Ci0; \ - Di0 = Ce0^ROL32(Co1, 1); \ - Di1 = Ce1^Co0; \ - Do0 = Ci0^ROL32(Cu1, 1); \ - Do1 = Ci1^Cu0; \ - Du0 = Co0^ROL32(Ca1, 1); \ - Du1 = Co1^Ca0; \ -\ - A##ba0 ^= Da0; \ - Bba0 = A##ba0; \ - A##ge0 ^= De0; \ - Bbe0 = ROL32(A##ge0, 22); \ - A##ki1 ^= Di1; \ - Bbi0 = ROL32(A##ki1, 22); \ - A##mo1 ^= Do1; \ - Bbo0 = ROL32(A##mo1, 11); \ - A##su0 ^= Du0; \ - Bbu0 = ROL32(A##su0, 7); \ - E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \ - E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ - Ca0 = E##ba0; \ - E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \ - Ce0 = E##be0; \ - E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \ - Ci0 = E##bi0; \ - E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \ - Co0 = E##bo0; \ - E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \ - Cu0 = E##bu0; \ -\ - A##ba1 ^= Da1; \ - Bba1 = A##ba1; \ - A##ge1 ^= De1; \ - Bbe1 = ROL32(A##ge1, 22); \ - A##ki0 ^= Di0; \ - Bbi1 = ROL32(A##ki0, 21); \ - A##mo0 ^= Do0; \ - Bbo1 = ROL32(A##mo0, 10); \ - A##su1 ^= Du1; \ - Bbu1 = ROL32(A##su1, 7); \ - E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \ - E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ - Ca1 = E##ba1; \ - E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \ - Ce1 = E##be1; \ - E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \ - Ci1 = E##bi1; \ - E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \ - Co1 = E##bo1; \ - E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \ - Cu1 = E##bu1; \ -\ - A##bo0 ^= Do0; \ - Bga0 = ROL32(A##bo0, 14); \ - A##gu0 ^= Du0; \ - Bge0 = ROL32(A##gu0, 10); \ - A##ka1 ^= Da1; \ - Bgi0 = ROL32(A##ka1, 2); \ - A##me1 ^= De1; \ - Bgo0 = ROL32(A##me1, 23); \ - A##si1 ^= Di1; \ - Bgu0 = ROL32(A##si1, 31); \ - E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \ - Ca0 ^= E##ga0; \ - E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \ - Ce0 ^= E##ge0; \ - E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \ - Ci0 ^= E##gi0; \ - E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \ - Co0 ^= E##go0; \ - E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \ - Cu0 ^= E##gu0; \ -\ - A##bo1 ^= Do1; \ - Bga1 = ROL32(A##bo1, 14); \ - A##gu1 ^= Du1; \ - Bge1 = ROL32(A##gu1, 10); \ - A##ka0 ^= Da0; \ - Bgi1 = ROL32(A##ka0, 1); \ - A##me0 ^= De0; \ - Bgo1 = ROL32(A##me0, 22); \ - A##si0 ^= Di0; \ - Bgu1 = ROL32(A##si0, 30); \ - E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \ - Ca1 ^= E##ga1; \ - E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \ - Ce1 ^= E##ge1; \ - E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \ - Ci1 ^= E##gi1; \ - E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \ - Co1 ^= E##go1; \ - E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \ - Cu1 ^= E##gu1; \ -\ - A##be1 ^= De1; \ - Bka0 = ROL32(A##be1, 1); \ - A##gi0 ^= Di0; \ - Bke0 = ROL32(A##gi0, 3); \ - A##ko1 ^= Do1; \ - Bki0 = ROL32(A##ko1, 13); \ - A##mu0 ^= Du0; \ - Bko0 = ROL32(A##mu0, 4); \ - A##sa0 ^= Da0; \ - Bku0 = ROL32(A##sa0, 9); \ - E##ka0 = Bka0 ^( Bke0 | Bki0 ); \ - Ca0 ^= E##ka0; \ - E##ke0 = Bke0 ^( Bki0 & Bko0 ); \ - Ce0 ^= E##ke0; \ - E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ - Ci0 ^= E##ki0; \ - E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \ - Co0 ^= E##ko0; \ - E##ku0 = Bku0 ^( Bka0 & Bke0 ); \ - Cu0 ^= E##ku0; \ -\ - A##be0 ^= De0; \ - Bka1 = A##be0; \ - A##gi1 ^= Di1; \ - Bke1 = ROL32(A##gi1, 3); \ - A##ko0 ^= Do0; \ - Bki1 = ROL32(A##ko0, 12); \ - A##mu1 ^= Du1; \ - Bko1 = ROL32(A##mu1, 4); \ - A##sa1 ^= Da1; \ - Bku1 = ROL32(A##sa1, 9); \ - E##ka1 = Bka1 ^( Bke1 | Bki1 ); \ - Ca1 ^= E##ka1; \ - E##ke1 = Bke1 ^( Bki1 & Bko1 ); \ - Ce1 ^= E##ke1; \ - E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ - Ci1 ^= E##ki1; \ - E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \ - Co1 ^= E##ko1; \ - E##ku1 = Bku1 ^( Bka1 & Bke1 ); \ - Cu1 ^= E##ku1; \ -\ - A##bu1 ^= Du1; \ - Bma0 = ROL32(A##bu1, 14); \ - A##ga0 ^= Da0; \ - Bme0 = ROL32(A##ga0, 18); \ - A##ke0 ^= De0; \ - Bmi0 = ROL32(A##ke0, 5); \ - A##mi1 ^= Di1; \ - Bmo0 = ROL32(A##mi1, 8); \ - A##so0 ^= Do0; \ - Bmu0 = ROL32(A##so0, 28); \ - E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \ - Ca0 ^= E##ma0; \ - E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \ - Ce0 ^= E##me0; \ - E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \ - Ci0 ^= E##mi0; \ - E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \ - Co0 ^= E##mo0; \ - E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \ - Cu0 ^= E##mu0; \ -\ - A##bu0 ^= Du0; \ - Bma1 = ROL32(A##bu0, 13); \ - A##ga1 ^= Da1; \ - Bme1 = ROL32(A##ga1, 18); \ - A##ke1 ^= De1; \ - Bmi1 = ROL32(A##ke1, 5); \ - A##mi0 ^= Di0; \ - Bmo1 = ROL32(A##mi0, 7); \ - A##so1 ^= Do1; \ - Bmu1 = ROL32(A##so1, 28); \ - E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \ - Ca1 ^= E##ma1; \ - E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \ - Ce1 ^= E##me1; \ - E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \ - Ci1 ^= E##mi1; \ - E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \ - Co1 ^= E##mo1; \ - E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \ - Cu1 ^= E##mu1; \ -\ - A##bi0 ^= Di0; \ - Bsa0 = ROL32(A##bi0, 31); \ - A##go1 ^= Do1; \ - Bse0 = ROL32(A##go1, 28); \ - A##ku1 ^= Du1; \ - Bsi0 = ROL32(A##ku1, 20); \ - A##ma1 ^= Da1; \ - Bso0 = ROL32(A##ma1, 21); \ - A##se0 ^= De0; \ - Bsu0 = ROL32(A##se0, 1); \ - E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ - Ca0 ^= E##sa0; \ - E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \ - Ce0 ^= E##se0; \ - E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \ - Ci0 ^= E##si0; \ - E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \ - Co0 ^= E##so0; \ - E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \ - Cu0 ^= E##su0; \ -\ - A##bi1 ^= Di1; \ - Bsa1 = ROL32(A##bi1, 31); \ - A##go0 ^= Do0; \ - Bse1 = ROL32(A##go0, 27); \ - A##ku0 ^= Du0; \ - Bsi1 = ROL32(A##ku0, 19); \ - A##ma0 ^= Da0; \ - Bso1 = ROL32(A##ma0, 20); \ - A##se1 ^= De1; \ - Bsu1 = ROL32(A##se1, 1); \ - E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ - Ca1 ^= E##sa1; \ - E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \ - Ce1 ^= E##se1; \ - E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \ - Ci1 ^= E##si1; \ - E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \ - Co1 ^= E##so1; \ - E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \ - Cu1 ^= E##su1; \ -\ - -// --- Code for round (lane complementing pattern 'bebigokimisa') -// --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words -#define thetaRhoPiChiIota(i, A, E) \ - Da0 = Cu0^ROL32(Ce1, 1); \ - Da1 = Cu1^Ce0; \ - De0 = Ca0^ROL32(Ci1, 1); \ - De1 = Ca1^Ci0; \ - Di0 = Ce0^ROL32(Co1, 1); \ - Di1 = Ce1^Co0; \ - Do0 = Ci0^ROL32(Cu1, 1); \ - Do1 = Ci1^Cu0; \ - Du0 = Co0^ROL32(Ca1, 1); \ - Du1 = Co1^Ca0; \ -\ - A##ba0 ^= Da0; \ - Bba0 = A##ba0; \ - A##ge0 ^= De0; \ - Bbe0 = ROL32(A##ge0, 22); \ - A##ki1 ^= Di1; \ - Bbi0 = ROL32(A##ki1, 22); \ - A##mo1 ^= Do1; \ - Bbo0 = ROL32(A##mo1, 11); \ - A##su0 ^= Du0; \ - Bbu0 = ROL32(A##su0, 7); \ - E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \ - E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ - E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \ - E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \ - E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \ - E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \ -\ - A##ba1 ^= Da1; \ - Bba1 = A##ba1; \ - A##ge1 ^= De1; \ - Bbe1 = ROL32(A##ge1, 22); \ - A##ki0 ^= Di0; \ - Bbi1 = ROL32(A##ki0, 21); \ - A##mo0 ^= Do0; \ - Bbo1 = ROL32(A##mo0, 10); \ - A##su1 ^= Du1; \ - Bbu1 = ROL32(A##su1, 7); \ - E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \ - E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ - E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \ - E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \ - E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \ - E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \ -\ - A##bo0 ^= Do0; \ - Bga0 = ROL32(A##bo0, 14); \ - A##gu0 ^= Du0; \ - Bge0 = ROL32(A##gu0, 10); \ - A##ka1 ^= Da1; \ - Bgi0 = ROL32(A##ka1, 2); \ - A##me1 ^= De1; \ - Bgo0 = ROL32(A##me1, 23); \ - A##si1 ^= Di1; \ - Bgu0 = ROL32(A##si1, 31); \ - E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \ - E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \ - E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \ - E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \ - E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \ -\ - A##bo1 ^= Do1; \ - Bga1 = ROL32(A##bo1, 14); \ - A##gu1 ^= Du1; \ - Bge1 = ROL32(A##gu1, 10); \ - A##ka0 ^= Da0; \ - Bgi1 = ROL32(A##ka0, 1); \ - A##me0 ^= De0; \ - Bgo1 = ROL32(A##me0, 22); \ - A##si0 ^= Di0; \ - Bgu1 = ROL32(A##si0, 30); \ - E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \ - E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \ - E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \ - E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \ - E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \ -\ - A##be1 ^= De1; \ - Bka0 = ROL32(A##be1, 1); \ - A##gi0 ^= Di0; \ - Bke0 = ROL32(A##gi0, 3); \ - A##ko1 ^= Do1; \ - Bki0 = ROL32(A##ko1, 13); \ - A##mu0 ^= Du0; \ - Bko0 = ROL32(A##mu0, 4); \ - A##sa0 ^= Da0; \ - Bku0 = ROL32(A##sa0, 9); \ - E##ka0 = Bka0 ^( Bke0 | Bki0 ); \ - E##ke0 = Bke0 ^( Bki0 & Bko0 ); \ - E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ - E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \ - E##ku0 = Bku0 ^( Bka0 & Bke0 ); \ -\ - A##be0 ^= De0; \ - Bka1 = A##be0; \ - A##gi1 ^= Di1; \ - Bke1 = ROL32(A##gi1, 3); \ - A##ko0 ^= Do0; \ - Bki1 = ROL32(A##ko0, 12); \ - A##mu1 ^= Du1; \ - Bko1 = ROL32(A##mu1, 4); \ - A##sa1 ^= Da1; \ - Bku1 = ROL32(A##sa1, 9); \ - E##ka1 = Bka1 ^( Bke1 | Bki1 ); \ - E##ke1 = Bke1 ^( Bki1 & Bko1 ); \ - E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ - E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \ - E##ku1 = Bku1 ^( Bka1 & Bke1 ); \ -\ - A##bu1 ^= Du1; \ - Bma0 = ROL32(A##bu1, 14); \ - A##ga0 ^= Da0; \ - Bme0 = ROL32(A##ga0, 18); \ - A##ke0 ^= De0; \ - Bmi0 = ROL32(A##ke0, 5); \ - A##mi1 ^= Di1; \ - Bmo0 = ROL32(A##mi1, 8); \ - A##so0 ^= Do0; \ - Bmu0 = ROL32(A##so0, 28); \ - E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \ - E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \ - E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \ - E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \ - E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \ -\ - A##bu0 ^= Du0; \ - Bma1 = ROL32(A##bu0, 13); \ - A##ga1 ^= Da1; \ - Bme1 = ROL32(A##ga1, 18); \ - A##ke1 ^= De1; \ - Bmi1 = ROL32(A##ke1, 5); \ - A##mi0 ^= Di0; \ - Bmo1 = ROL32(A##mi0, 7); \ - A##so1 ^= Do1; \ - Bmu1 = ROL32(A##so1, 28); \ - E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \ - E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \ - E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \ - E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \ - E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \ -\ - A##bi0 ^= Di0; \ - Bsa0 = ROL32(A##bi0, 31); \ - A##go1 ^= Do1; \ - Bse0 = ROL32(A##go1, 28); \ - A##ku1 ^= Du1; \ - Bsi0 = ROL32(A##ku1, 20); \ - A##ma1 ^= Da1; \ - Bso0 = ROL32(A##ma1, 21); \ - A##se0 ^= De0; \ - Bsu0 = ROL32(A##se0, 1); \ - E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ - E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \ - E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \ - E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \ - E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \ -\ - A##bi1 ^= Di1; \ - Bsa1 = ROL32(A##bi1, 31); \ - A##go0 ^= Do0; \ - Bse1 = ROL32(A##go0, 27); \ - A##ku0 ^= Du0; \ - Bsi1 = ROL32(A##ku0, 19); \ - A##ma0 ^= Da0; \ - Bso1 = ROL32(A##ma0, 20); \ - A##se1 ^= De1; \ - Bsu1 = ROL32(A##se1, 1); \ - E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ - E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \ - E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \ - E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \ - E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \ -\ - -#else // UseBebigokimisa -// --- Code for round, with prepare-theta -// --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words -#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ - Da0 = Cu0^ROL32(Ce1, 1); \ - Da1 = Cu1^Ce0; \ - De0 = Ca0^ROL32(Ci1, 1); \ - De1 = Ca1^Ci0; \ - Di0 = Ce0^ROL32(Co1, 1); \ - Di1 = Ce1^Co0; \ - Do0 = Ci0^ROL32(Cu1, 1); \ - Do1 = Ci1^Cu0; \ - Du0 = Co0^ROL32(Ca1, 1); \ - Du1 = Co1^Ca0; \ -\ - A##ba0 ^= Da0; \ - Bba0 = A##ba0; \ - A##ge0 ^= De0; \ - Bbe0 = ROL32(A##ge0, 22); \ - A##ki1 ^= Di1; \ - Bbi0 = ROL32(A##ki1, 22); \ - A##mo1 ^= Do1; \ - Bbo0 = ROL32(A##mo1, 11); \ - A##su0 ^= Du0; \ - Bbu0 = ROL32(A##su0, 7); \ - E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \ - E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ - Ca0 = E##ba0; \ - E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \ - Ce0 = E##be0; \ - E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \ - Ci0 = E##bi0; \ - E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \ - Co0 = E##bo0; \ - E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \ - Cu0 = E##bu0; \ -\ - A##ba1 ^= Da1; \ - Bba1 = A##ba1; \ - A##ge1 ^= De1; \ - Bbe1 = ROL32(A##ge1, 22); \ - A##ki0 ^= Di0; \ - Bbi1 = ROL32(A##ki0, 21); \ - A##mo0 ^= Do0; \ - Bbo1 = ROL32(A##mo0, 10); \ - A##su1 ^= Du1; \ - Bbu1 = ROL32(A##su1, 7); \ - E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \ - E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ - Ca1 = E##ba1; \ - E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \ - Ce1 = E##be1; \ - E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \ - Ci1 = E##bi1; \ - E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \ - Co1 = E##bo1; \ - E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \ - Cu1 = E##bu1; \ -\ - A##bo0 ^= Do0; \ - Bga0 = ROL32(A##bo0, 14); \ - A##gu0 ^= Du0; \ - Bge0 = ROL32(A##gu0, 10); \ - A##ka1 ^= Da1; \ - Bgi0 = ROL32(A##ka1, 2); \ - A##me1 ^= De1; \ - Bgo0 = ROL32(A##me1, 23); \ - A##si1 ^= Di1; \ - Bgu0 = ROL32(A##si1, 31); \ - E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \ - Ca0 ^= E##ga0; \ - E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \ - Ce0 ^= E##ge0; \ - E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \ - Ci0 ^= E##gi0; \ - E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \ - Co0 ^= E##go0; \ - E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \ - Cu0 ^= E##gu0; \ -\ - A##bo1 ^= Do1; \ - Bga1 = ROL32(A##bo1, 14); \ - A##gu1 ^= Du1; \ - Bge1 = ROL32(A##gu1, 10); \ - A##ka0 ^= Da0; \ - Bgi1 = ROL32(A##ka0, 1); \ - A##me0 ^= De0; \ - Bgo1 = ROL32(A##me0, 22); \ - A##si0 ^= Di0; \ - Bgu1 = ROL32(A##si0, 30); \ - E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \ - Ca1 ^= E##ga1; \ - E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \ - Ce1 ^= E##ge1; \ - E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \ - Ci1 ^= E##gi1; \ - E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \ - Co1 ^= E##go1; \ - E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \ - Cu1 ^= E##gu1; \ -\ - A##be1 ^= De1; \ - Bka0 = ROL32(A##be1, 1); \ - A##gi0 ^= Di0; \ - Bke0 = ROL32(A##gi0, 3); \ - A##ko1 ^= Do1; \ - Bki0 = ROL32(A##ko1, 13); \ - A##mu0 ^= Du0; \ - Bko0 = ROL32(A##mu0, 4); \ - A##sa0 ^= Da0; \ - Bku0 = ROL32(A##sa0, 9); \ - E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \ - Ca0 ^= E##ka0; \ - E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \ - Ce0 ^= E##ke0; \ - E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ - Ci0 ^= E##ki0; \ - E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \ - Co0 ^= E##ko0; \ - E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \ - Cu0 ^= E##ku0; \ -\ - A##be0 ^= De0; \ - Bka1 = A##be0; \ - A##gi1 ^= Di1; \ - Bke1 = ROL32(A##gi1, 3); \ - A##ko0 ^= Do0; \ - Bki1 = ROL32(A##ko0, 12); \ - A##mu1 ^= Du1; \ - Bko1 = ROL32(A##mu1, 4); \ - A##sa1 ^= Da1; \ - Bku1 = ROL32(A##sa1, 9); \ - E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \ - Ca1 ^= E##ka1; \ - E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \ - Ce1 ^= E##ke1; \ - E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ - Ci1 ^= E##ki1; \ - E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \ - Co1 ^= E##ko1; \ - E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \ - Cu1 ^= E##ku1; \ -\ - A##bu1 ^= Du1; \ - Bma0 = ROL32(A##bu1, 14); \ - A##ga0 ^= Da0; \ - Bme0 = ROL32(A##ga0, 18); \ - A##ke0 ^= De0; \ - Bmi0 = ROL32(A##ke0, 5); \ - A##mi1 ^= Di1; \ - Bmo0 = ROL32(A##mi1, 8); \ - A##so0 ^= Do0; \ - Bmu0 = ROL32(A##so0, 28); \ - E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \ - Ca0 ^= E##ma0; \ - E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \ - Ce0 ^= E##me0; \ - E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \ - Ci0 ^= E##mi0; \ - E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \ - Co0 ^= E##mo0; \ - E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \ - Cu0 ^= E##mu0; \ -\ - A##bu0 ^= Du0; \ - Bma1 = ROL32(A##bu0, 13); \ - A##ga1 ^= Da1; \ - Bme1 = ROL32(A##ga1, 18); \ - A##ke1 ^= De1; \ - Bmi1 = ROL32(A##ke1, 5); \ - A##mi0 ^= Di0; \ - Bmo1 = ROL32(A##mi0, 7); \ - A##so1 ^= Do1; \ - Bmu1 = ROL32(A##so1, 28); \ - E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \ - Ca1 ^= E##ma1; \ - E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \ - Ce1 ^= E##me1; \ - E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \ - Ci1 ^= E##mi1; \ - E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \ - Co1 ^= E##mo1; \ - E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \ - Cu1 ^= E##mu1; \ -\ - A##bi0 ^= Di0; \ - Bsa0 = ROL32(A##bi0, 31); \ - A##go1 ^= Do1; \ - Bse0 = ROL32(A##go1, 28); \ - A##ku1 ^= Du1; \ - Bsi0 = ROL32(A##ku1, 20); \ - A##ma1 ^= Da1; \ - Bso0 = ROL32(A##ma1, 21); \ - A##se0 ^= De0; \ - Bsu0 = ROL32(A##se0, 1); \ - E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ - Ca0 ^= E##sa0; \ - E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \ - Ce0 ^= E##se0; \ - E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \ - Ci0 ^= E##si0; \ - E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \ - Co0 ^= E##so0; \ - E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \ - Cu0 ^= E##su0; \ -\ - A##bi1 ^= Di1; \ - Bsa1 = ROL32(A##bi1, 31); \ - A##go0 ^= Do0; \ - Bse1 = ROL32(A##go0, 27); \ - A##ku0 ^= Du0; \ - Bsi1 = ROL32(A##ku0, 19); \ - A##ma0 ^= Da0; \ - Bso1 = ROL32(A##ma0, 20); \ - A##se1 ^= De1; \ - Bsu1 = ROL32(A##se1, 1); \ - E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ - Ca1 ^= E##sa1; \ - E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \ - Ce1 ^= E##se1; \ - E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \ - Ci1 ^= E##si1; \ - E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \ - Co1 ^= E##so1; \ - E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \ - Cu1 ^= E##su1; \ -\ - -// --- Code for round -// --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words -#define thetaRhoPiChiIota(i, A, E) \ - Da0 = Cu0^ROL32(Ce1, 1); \ - Da1 = Cu1^Ce0; \ - De0 = Ca0^ROL32(Ci1, 1); \ - De1 = Ca1^Ci0; \ - Di0 = Ce0^ROL32(Co1, 1); \ - Di1 = Ce1^Co0; \ - Do0 = Ci0^ROL32(Cu1, 1); \ - Do1 = Ci1^Cu0; \ - Du0 = Co0^ROL32(Ca1, 1); \ - Du1 = Co1^Ca0; \ -\ - A##ba0 ^= Da0; \ - Bba0 = A##ba0; \ - A##ge0 ^= De0; \ - Bbe0 = ROL32(A##ge0, 22); \ - A##ki1 ^= Di1; \ - Bbi0 = ROL32(A##ki1, 22); \ - A##mo1 ^= Do1; \ - Bbo0 = ROL32(A##mo1, 11); \ - A##su0 ^= Du0; \ - Bbu0 = ROL32(A##su0, 7); \ - E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \ - E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ - E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \ - E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \ - E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \ - E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \ -\ - A##ba1 ^= Da1; \ - Bba1 = A##ba1; \ - A##ge1 ^= De1; \ - Bbe1 = ROL32(A##ge1, 22); \ - A##ki0 ^= Di0; \ - Bbi1 = ROL32(A##ki0, 21); \ - A##mo0 ^= Do0; \ - Bbo1 = ROL32(A##mo0, 10); \ - A##su1 ^= Du1; \ - Bbu1 = ROL32(A##su1, 7); \ - E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \ - E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ - E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \ - E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \ - E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \ - E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \ -\ - A##bo0 ^= Do0; \ - Bga0 = ROL32(A##bo0, 14); \ - A##gu0 ^= Du0; \ - Bge0 = ROL32(A##gu0, 10); \ - A##ka1 ^= Da1; \ - Bgi0 = ROL32(A##ka1, 2); \ - A##me1 ^= De1; \ - Bgo0 = ROL32(A##me1, 23); \ - A##si1 ^= Di1; \ - Bgu0 = ROL32(A##si1, 31); \ - E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \ - E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \ - E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \ - E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \ - E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \ -\ - A##bo1 ^= Do1; \ - Bga1 = ROL32(A##bo1, 14); \ - A##gu1 ^= Du1; \ - Bge1 = ROL32(A##gu1, 10); \ - A##ka0 ^= Da0; \ - Bgi1 = ROL32(A##ka0, 1); \ - A##me0 ^= De0; \ - Bgo1 = ROL32(A##me0, 22); \ - A##si0 ^= Di0; \ - Bgu1 = ROL32(A##si0, 30); \ - E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \ - E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \ - E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \ - E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \ - E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \ -\ - A##be1 ^= De1; \ - Bka0 = ROL32(A##be1, 1); \ - A##gi0 ^= Di0; \ - Bke0 = ROL32(A##gi0, 3); \ - A##ko1 ^= Do1; \ - Bki0 = ROL32(A##ko1, 13); \ - A##mu0 ^= Du0; \ - Bko0 = ROL32(A##mu0, 4); \ - A##sa0 ^= Da0; \ - Bku0 = ROL32(A##sa0, 9); \ - E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \ - E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \ - E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ - E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \ - E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \ -\ - A##be0 ^= De0; \ - Bka1 = A##be0; \ - A##gi1 ^= Di1; \ - Bke1 = ROL32(A##gi1, 3); \ - A##ko0 ^= Do0; \ - Bki1 = ROL32(A##ko0, 12); \ - A##mu1 ^= Du1; \ - Bko1 = ROL32(A##mu1, 4); \ - A##sa1 ^= Da1; \ - Bku1 = ROL32(A##sa1, 9); \ - E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \ - E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \ - E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ - E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \ - E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \ -\ - A##bu1 ^= Du1; \ - Bma0 = ROL32(A##bu1, 14); \ - A##ga0 ^= Da0; \ - Bme0 = ROL32(A##ga0, 18); \ - A##ke0 ^= De0; \ - Bmi0 = ROL32(A##ke0, 5); \ - A##mi1 ^= Di1; \ - Bmo0 = ROL32(A##mi1, 8); \ - A##so0 ^= Do0; \ - Bmu0 = ROL32(A##so0, 28); \ - E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \ - E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \ - E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \ - E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \ - E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \ -\ - A##bu0 ^= Du0; \ - Bma1 = ROL32(A##bu0, 13); \ - A##ga1 ^= Da1; \ - Bme1 = ROL32(A##ga1, 18); \ - A##ke1 ^= De1; \ - Bmi1 = ROL32(A##ke1, 5); \ - A##mi0 ^= Di0; \ - Bmo1 = ROL32(A##mi0, 7); \ - A##so1 ^= Do1; \ - Bmu1 = ROL32(A##so1, 28); \ - E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \ - E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \ - E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \ - E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \ - E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \ -\ - A##bi0 ^= Di0; \ - Bsa0 = ROL32(A##bi0, 31); \ - A##go1 ^= Do1; \ - Bse0 = ROL32(A##go1, 28); \ - A##ku1 ^= Du1; \ - Bsi0 = ROL32(A##ku1, 20); \ - A##ma1 ^= Da1; \ - Bso0 = ROL32(A##ma1, 21); \ - A##se0 ^= De0; \ - Bsu0 = ROL32(A##se0, 1); \ - E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ - E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \ - E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \ - E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \ - E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \ -\ - A##bi1 ^= Di1; \ - Bsa1 = ROL32(A##bi1, 31); \ - A##go0 ^= Do0; \ - Bse1 = ROL32(A##go0, 27); \ - A##ku0 ^= Du0; \ - Bsi1 = ROL32(A##ku0, 19); \ - A##ma0 ^= Da0; \ - Bso1 = ROL32(A##ma0, 20); \ - A##se1 ^= De1; \ - Bsu1 = ROL32(A##se1, 1); \ - E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ - E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \ - E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \ - E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \ - E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \ -\ - -#endif // UseBebigokimisa - -const UINT32 KeccakF1600RoundConstants_int2_0[24] = { - 0x00000001UL, - 0x00000000UL, - 0x00000000UL, - 0x00000000UL, - 0x00000001UL, - 0x00000001UL, - 0x00000001UL, - 0x00000001UL, - 0x00000000UL, - 0x00000000UL, - 0x00000001UL, - 0x00000000UL, - 0x00000001UL, - 0x00000001UL, - 0x00000001UL, - 0x00000001UL, - 0x00000000UL, - 0x00000000UL, - 0x00000000UL, - 0x00000000UL, - 0x00000001UL, - 0x00000000UL, - 0x00000001UL, - 0x00000000UL }; - -const UINT32 KeccakF1600RoundConstants_int2_1[24] = { - 0x00000000UL, - 0x00000089UL, - 0x8000008bUL, - 0x80008080UL, - 0x0000008bUL, - 0x00008000UL, - 0x80008088UL, - 0x80000082UL, - 0x0000000bUL, - 0x0000000aUL, - 0x00008082UL, - 0x00008003UL, - 0x0000808bUL, - 0x8000000bUL, - 0x8000008aUL, - 0x80000081UL, - 0x80000081UL, - 0x80000008UL, - 0x00000083UL, - 0x80008003UL, - 0x80008088UL, - 0x80000088UL, - 0x00008000UL, - 0x80008082UL }; - -#define copyFromStateAndXor1024bits(X, state, input) \ - X##ba0 = state[ 0]^input[ 0]; \ - X##ba1 = state[ 1]^input[ 1]; \ - X##be0 = state[ 2]^input[ 2]; \ - X##be1 = state[ 3]^input[ 3]; \ - X##bi0 = state[ 4]^input[ 4]; \ - X##bi1 = state[ 5]^input[ 5]; \ - X##bo0 = state[ 6]^input[ 6]; \ - X##bo1 = state[ 7]^input[ 7]; \ - X##bu0 = state[ 8]^input[ 8]; \ - X##bu1 = state[ 9]^input[ 9]; \ - X##ga0 = state[10]^input[10]; \ - X##ga1 = state[11]^input[11]; \ - X##ge0 = state[12]^input[12]; \ - X##ge1 = state[13]^input[13]; \ - X##gi0 = state[14]^input[14]; \ - X##gi1 = state[15]^input[15]; \ - X##go0 = state[16]^input[16]; \ - X##go1 = state[17]^input[17]; \ - X##gu0 = state[18]^input[18]; \ - X##gu1 = state[19]^input[19]; \ - X##ka0 = state[20]^input[20]; \ - X##ka1 = state[21]^input[21]; \ - X##ke0 = state[22]^input[22]; \ - X##ke1 = state[23]^input[23]; \ - X##ki0 = state[24]^input[24]; \ - X##ki1 = state[25]^input[25]; \ - X##ko0 = state[26]^input[26]; \ - X##ko1 = state[27]^input[27]; \ - X##ku0 = state[28]^input[28]; \ - X##ku1 = state[29]^input[29]; \ - X##ma0 = state[30]^input[30]; \ - X##ma1 = state[31]^input[31]; \ - X##me0 = state[32]; \ - X##me1 = state[33]; \ - X##mi0 = state[34]; \ - X##mi1 = state[35]; \ - X##mo0 = state[36]; \ - X##mo1 = state[37]; \ - X##mu0 = state[38]; \ - X##mu1 = state[39]; \ - X##sa0 = state[40]; \ - X##sa1 = state[41]; \ - X##se0 = state[42]; \ - X##se1 = state[43]; \ - X##si0 = state[44]; \ - X##si1 = state[45]; \ - X##so0 = state[46]; \ - X##so1 = state[47]; \ - X##su0 = state[48]; \ - X##su1 = state[49]; \ - -#define copyFromStateAndXor1088bits(X, state, input) \ - X##ba0 = state[ 0]^input[ 0]; \ - X##ba1 = state[ 1]^input[ 1]; \ - X##be0 = state[ 2]^input[ 2]; \ - X##be1 = state[ 3]^input[ 3]; \ - X##bi0 = state[ 4]^input[ 4]; \ - X##bi1 = state[ 5]^input[ 5]; \ - X##bo0 = state[ 6]^input[ 6]; \ - X##bo1 = state[ 7]^input[ 7]; \ - X##bu0 = state[ 8]^input[ 8]; \ - X##bu1 = state[ 9]^input[ 9]; \ - X##ga0 = state[10]^input[10]; \ - X##ga1 = state[11]^input[11]; \ - X##ge0 = state[12]^input[12]; \ - X##ge1 = state[13]^input[13]; \ - X##gi0 = state[14]^input[14]; \ - X##gi1 = state[15]^input[15]; \ - X##go0 = state[16]^input[16]; \ - X##go1 = state[17]^input[17]; \ - X##gu0 = state[18]^input[18]; \ - X##gu1 = state[19]^input[19]; \ - X##ka0 = state[20]^input[20]; \ - X##ka1 = state[21]^input[21]; \ - X##ke0 = state[22]^input[22]; \ - X##ke1 = state[23]^input[23]; \ - X##ki0 = state[24]^input[24]; \ - X##ki1 = state[25]^input[25]; \ - X##ko0 = state[26]^input[26]; \ - X##ko1 = state[27]^input[27]; \ - X##ku0 = state[28]^input[28]; \ - X##ku1 = state[29]^input[29]; \ - X##ma0 = state[30]^input[30]; \ - X##ma1 = state[31]^input[31]; \ - X##me0 = state[32]^input[32]; \ - X##me1 = state[33]^input[33]; \ - X##mi0 = state[34]; \ - X##mi1 = state[35]; \ - X##mo0 = state[36]; \ - X##mo1 = state[37]; \ - X##mu0 = state[38]; \ - X##mu1 = state[39]; \ - X##sa0 = state[40]; \ - X##sa1 = state[41]; \ - X##se0 = state[42]; \ - X##se1 = state[43]; \ - X##si0 = state[44]; \ - X##si1 = state[45]; \ - X##so0 = state[46]; \ - X##so1 = state[47]; \ - X##su0 = state[48]; \ - X##su1 = state[49]; \ - -#define copyFromState(X, state) \ - X##ba0 = state[ 0]; \ - X##ba1 = state[ 1]; \ - X##be0 = state[ 2]; \ - X##be1 = state[ 3]; \ - X##bi0 = state[ 4]; \ - X##bi1 = state[ 5]; \ - X##bo0 = state[ 6]; \ - X##bo1 = state[ 7]; \ - X##bu0 = state[ 8]; \ - X##bu1 = state[ 9]; \ - X##ga0 = state[10]; \ - X##ga1 = state[11]; \ - X##ge0 = state[12]; \ - X##ge1 = state[13]; \ - X##gi0 = state[14]; \ - X##gi1 = state[15]; \ - X##go0 = state[16]; \ - X##go1 = state[17]; \ - X##gu0 = state[18]; \ - X##gu1 = state[19]; \ - X##ka0 = state[20]; \ - X##ka1 = state[21]; \ - X##ke0 = state[22]; \ - X##ke1 = state[23]; \ - X##ki0 = state[24]; \ - X##ki1 = state[25]; \ - X##ko0 = state[26]; \ - X##ko1 = state[27]; \ - X##ku0 = state[28]; \ - X##ku1 = state[29]; \ - X##ma0 = state[30]; \ - X##ma1 = state[31]; \ - X##me0 = state[32]; \ - X##me1 = state[33]; \ - X##mi0 = state[34]; \ - X##mi1 = state[35]; \ - X##mo0 = state[36]; \ - X##mo1 = state[37]; \ - X##mu0 = state[38]; \ - X##mu1 = state[39]; \ - X##sa0 = state[40]; \ - X##sa1 = state[41]; \ - X##se0 = state[42]; \ - X##se1 = state[43]; \ - X##si0 = state[44]; \ - X##si1 = state[45]; \ - X##so0 = state[46]; \ - X##so1 = state[47]; \ - X##su0 = state[48]; \ - X##su1 = state[49]; \ - -#define copyToState(state, X) \ - state[ 0] = X##ba0; \ - state[ 1] = X##ba1; \ - state[ 2] = X##be0; \ - state[ 3] = X##be1; \ - state[ 4] = X##bi0; \ - state[ 5] = X##bi1; \ - state[ 6] = X##bo0; \ - state[ 7] = X##bo1; \ - state[ 8] = X##bu0; \ - state[ 9] = X##bu1; \ - state[10] = X##ga0; \ - state[11] = X##ga1; \ - state[12] = X##ge0; \ - state[13] = X##ge1; \ - state[14] = X##gi0; \ - state[15] = X##gi1; \ - state[16] = X##go0; \ - state[17] = X##go1; \ - state[18] = X##gu0; \ - state[19] = X##gu1; \ - state[20] = X##ka0; \ - state[21] = X##ka1; \ - state[22] = X##ke0; \ - state[23] = X##ke1; \ - state[24] = X##ki0; \ - state[25] = X##ki1; \ - state[26] = X##ko0; \ - state[27] = X##ko1; \ - state[28] = X##ku0; \ - state[29] = X##ku1; \ - state[30] = X##ma0; \ - state[31] = X##ma1; \ - state[32] = X##me0; \ - state[33] = X##me1; \ - state[34] = X##mi0; \ - state[35] = X##mi1; \ - state[36] = X##mo0; \ - state[37] = X##mo1; \ - state[38] = X##mu0; \ - state[39] = X##mu1; \ - state[40] = X##sa0; \ - state[41] = X##sa1; \ - state[42] = X##se0; \ - state[43] = X##se1; \ - state[44] = X##si0; \ - state[45] = X##si1; \ - state[46] = X##so0; \ - state[47] = X##so1; \ - state[48] = X##su0; \ - state[49] = X##su1; \ - -#define copyStateVariables(X, Y) \ - X##ba0 = Y##ba0; \ - X##ba1 = Y##ba1; \ - X##be0 = Y##be0; \ - X##be1 = Y##be1; \ - X##bi0 = Y##bi0; \ - X##bi1 = Y##bi1; \ - X##bo0 = Y##bo0; \ - X##bo1 = Y##bo1; \ - X##bu0 = Y##bu0; \ - X##bu1 = Y##bu1; \ - X##ga0 = Y##ga0; \ - X##ga1 = Y##ga1; \ - X##ge0 = Y##ge0; \ - X##ge1 = Y##ge1; \ - X##gi0 = Y##gi0; \ - X##gi1 = Y##gi1; \ - X##go0 = Y##go0; \ - X##go1 = Y##go1; \ - X##gu0 = Y##gu0; \ - X##gu1 = Y##gu1; \ - X##ka0 = Y##ka0; \ - X##ka1 = Y##ka1; \ - X##ke0 = Y##ke0; \ - X##ke1 = Y##ke1; \ - X##ki0 = Y##ki0; \ - X##ki1 = Y##ki1; \ - X##ko0 = Y##ko0; \ - X##ko1 = Y##ko1; \ - X##ku0 = Y##ku0; \ - X##ku1 = Y##ku1; \ - X##ma0 = Y##ma0; \ - X##ma1 = Y##ma1; \ - X##me0 = Y##me0; \ - X##me1 = Y##me1; \ - X##mi0 = Y##mi0; \ - X##mi1 = Y##mi1; \ - X##mo0 = Y##mo0; \ - X##mo1 = Y##mo1; \ - X##mu0 = Y##mu0; \ - X##mu1 = Y##mu1; \ - X##sa0 = Y##sa0; \ - X##sa1 = Y##sa1; \ - X##se0 = Y##se0; \ - X##se1 = Y##se1; \ - X##si0 = Y##si0; \ - X##si1 = Y##si1; \ - X##so0 = Y##so0; \ - X##so1 = Y##so1; \ - X##su0 = Y##su0; \ - X##su1 = Y##su1; \ - diff --git a/c_src/KeccakF-1600-32-s2.macros b/c_src/KeccakF-1600-32-s2.macros deleted file mode 100755 index 3c27a34..0000000 --- a/c_src/KeccakF-1600-32-s2.macros +++ /dev/null @@ -1,1187 +0,0 @@ -/* -Code automatically generated by KeccakTools! - -The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -Michaël Peeters and Gilles Van Assche. For more information, feedback or -questions, please refer to our website: http://keccak.noekeon.org/ - -Implementation by the designers, -hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#define declareABCDE \ - UINT32 Aba0, Abe0, Abi0, Abo0, Abu0; \ - UINT32 Aba1, Abe1, Abi1, Abo1, Abu1; \ - UINT32 Aga0, Age0, Agi0, Ago0, Agu0; \ - UINT32 Aga1, Age1, Agi1, Ago1, Agu1; \ - UINT32 Aka0, Ake0, Aki0, Ako0, Aku0; \ - UINT32 Aka1, Ake1, Aki1, Ako1, Aku1; \ - UINT32 Ama0, Ame0, Ami0, Amo0, Amu0; \ - UINT32 Ama1, Ame1, Ami1, Amo1, Amu1; \ - UINT32 Asa0, Ase0, Asi0, Aso0, Asu0; \ - UINT32 Asa1, Ase1, Asi1, Aso1, Asu1; \ - UINT32 Bba0, Bbe0, Bbi0, Bbo0, Bbu0; \ - UINT32 Bba1, Bbe1, Bbi1, Bbo1, Bbu1; \ - UINT32 Bga0, Bge0, Bgi0, Bgo0, Bgu0; \ - UINT32 Bga1, Bge1, Bgi1, Bgo1, Bgu1; \ - UINT32 Bka0, Bke0, Bki0, Bko0, Bku0; \ - UINT32 Bka1, Bke1, Bki1, Bko1, Bku1; \ - UINT32 Bma0, Bme0, Bmi0, Bmo0, Bmu0; \ - UINT32 Bma1, Bme1, Bmi1, Bmo1, Bmu1; \ - UINT32 Bsa0, Bse0, Bsi0, Bso0, Bsu0; \ - UINT32 Bsa1, Bse1, Bsi1, Bso1, Bsu1; \ - UINT32 Ca0, Ce0, Ci0, Co0, Cu0; \ - UINT32 Ca1, Ce1, Ci1, Co1, Cu1; \ - UINT32 Da0, De0, Di0, Do0, Du0; \ - UINT32 Da1, De1, Di1, Do1, Du1; \ - UINT32 Eba0, Ebe0, Ebi0, Ebo0, Ebu0; \ - UINT32 Eba1, Ebe1, Ebi1, Ebo1, Ebu1; \ - UINT32 Ega0, Ege0, Egi0, Ego0, Egu0; \ - UINT32 Ega1, Ege1, Egi1, Ego1, Egu1; \ - UINT32 Eka0, Eke0, Eki0, Eko0, Eku0; \ - UINT32 Eka1, Eke1, Eki1, Eko1, Eku1; \ - UINT32 Ema0, Eme0, Emi0, Emo0, Emu0; \ - UINT32 Ema1, Eme1, Emi1, Emo1, Emu1; \ - UINT32 Esa0, Ese0, Esi0, Eso0, Esu0; \ - UINT32 Esa1, Ese1, Esi1, Eso1, Esu1; \ - -#define prepareTheta \ - Ca0 = Aba0^Aga0^Aka0^Ama0^Asa0; \ - Ca1 = Aba1^Aga1^Aka1^Ama1^Asa1; \ - Ce0 = Abe0^Age0^Ake0^Ame0^Ase0; \ - Ce1 = Abe1^Age1^Ake1^Ame1^Ase1; \ - Ci0 = Abi0^Agi0^Aki0^Ami0^Asi0; \ - Ci1 = Abi1^Agi1^Aki1^Ami1^Asi1; \ - Co0 = Abo0^Ago0^Ako0^Amo0^Aso0; \ - Co1 = Abo1^Ago1^Ako1^Amo1^Aso1; \ - Cu0 = Abu0^Agu0^Aku0^Amu0^Asu0; \ - Cu1 = Abu1^Agu1^Aku1^Amu1^Asu1; \ - -#ifdef UseBebigokimisa -// --- Code for round, with prepare-theta (lane complementing pattern 'bebigokimisa') -// --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words -#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ - Da0 = Cu0^ROL32(Ce1, 1); \ - Da1 = Cu1^Ce0; \ - De0 = Ca0^ROL32(Ci1, 1); \ - De1 = Ca1^Ci0; \ - Di0 = Ce0^ROL32(Co1, 1); \ - Di1 = Ce1^Co0; \ - Do0 = Ci0^ROL32(Cu1, 1); \ - Do1 = Ci1^Cu0; \ - Du0 = Co0^ROL32(Ca1, 1); \ - Du1 = Co1^Ca0; \ -\ - A##ba0 ^= Da0; \ - Bba0 = A##ba0; \ - A##ge0 ^= De0; \ - Bbe0 = ROL32(A##ge0, 22); \ - A##ki1 ^= Di1; \ - Bbi0 = ROL32(A##ki1, 22); \ - E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \ - E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ - Ca0 = E##ba0; \ - A##mo1 ^= Do1; \ - Bbo0 = ROL32(A##mo1, 11); \ - E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \ - Ce0 = E##be0; \ - A##su0 ^= Du0; \ - Bbu0 = ROL32(A##su0, 7); \ - E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \ - Ci0 = E##bi0; \ - E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \ - Co0 = E##bo0; \ - E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \ - Cu0 = E##bu0; \ -\ - A##ba1 ^= Da1; \ - Bba1 = A##ba1; \ - A##ge1 ^= De1; \ - Bbe1 = ROL32(A##ge1, 22); \ - A##ki0 ^= Di0; \ - Bbi1 = ROL32(A##ki0, 21); \ - E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \ - E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ - Ca1 = E##ba1; \ - A##mo0 ^= Do0; \ - Bbo1 = ROL32(A##mo0, 10); \ - E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \ - Ce1 = E##be1; \ - A##su1 ^= Du1; \ - Bbu1 = ROL32(A##su1, 7); \ - E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \ - Ci1 = E##bi1; \ - E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \ - Co1 = E##bo1; \ - E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \ - Cu1 = E##bu1; \ -\ - A##bo0 ^= Do0; \ - Bga0 = ROL32(A##bo0, 14); \ - A##gu0 ^= Du0; \ - Bge0 = ROL32(A##gu0, 10); \ - A##ka1 ^= Da1; \ - Bgi0 = ROL32(A##ka1, 2); \ - E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \ - Ca0 ^= E##ga0; \ - A##me1 ^= De1; \ - Bgo0 = ROL32(A##me1, 23); \ - E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \ - Ce0 ^= E##ge0; \ - A##si1 ^= Di1; \ - Bgu0 = ROL32(A##si1, 31); \ - E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \ - Ci0 ^= E##gi0; \ - E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \ - Co0 ^= E##go0; \ - E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \ - Cu0 ^= E##gu0; \ -\ - A##bo1 ^= Do1; \ - Bga1 = ROL32(A##bo1, 14); \ - A##gu1 ^= Du1; \ - Bge1 = ROL32(A##gu1, 10); \ - A##ka0 ^= Da0; \ - Bgi1 = ROL32(A##ka0, 1); \ - E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \ - Ca1 ^= E##ga1; \ - A##me0 ^= De0; \ - Bgo1 = ROL32(A##me0, 22); \ - E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \ - Ce1 ^= E##ge1; \ - A##si0 ^= Di0; \ - Bgu1 = ROL32(A##si0, 30); \ - E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \ - Ci1 ^= E##gi1; \ - E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \ - Co1 ^= E##go1; \ - E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \ - Cu1 ^= E##gu1; \ -\ - A##be1 ^= De1; \ - Bka0 = ROL32(A##be1, 1); \ - A##gi0 ^= Di0; \ - Bke0 = ROL32(A##gi0, 3); \ - A##ko1 ^= Do1; \ - Bki0 = ROL32(A##ko1, 13); \ - E##ka0 = Bka0 ^( Bke0 | Bki0 ); \ - Ca0 ^= E##ka0; \ - A##mu0 ^= Du0; \ - Bko0 = ROL32(A##mu0, 4); \ - E##ke0 = Bke0 ^( Bki0 & Bko0 ); \ - Ce0 ^= E##ke0; \ - A##sa0 ^= Da0; \ - Bku0 = ROL32(A##sa0, 9); \ - E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ - Ci0 ^= E##ki0; \ - E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \ - Co0 ^= E##ko0; \ - E##ku0 = Bku0 ^( Bka0 & Bke0 ); \ - Cu0 ^= E##ku0; \ -\ - A##be0 ^= De0; \ - Bka1 = A##be0; \ - A##gi1 ^= Di1; \ - Bke1 = ROL32(A##gi1, 3); \ - A##ko0 ^= Do0; \ - Bki1 = ROL32(A##ko0, 12); \ - E##ka1 = Bka1 ^( Bke1 | Bki1 ); \ - Ca1 ^= E##ka1; \ - A##mu1 ^= Du1; \ - Bko1 = ROL32(A##mu1, 4); \ - E##ke1 = Bke1 ^( Bki1 & Bko1 ); \ - Ce1 ^= E##ke1; \ - A##sa1 ^= Da1; \ - Bku1 = ROL32(A##sa1, 9); \ - E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ - Ci1 ^= E##ki1; \ - E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \ - Co1 ^= E##ko1; \ - E##ku1 = Bku1 ^( Bka1 & Bke1 ); \ - Cu1 ^= E##ku1; \ -\ - A##bu1 ^= Du1; \ - Bma0 = ROL32(A##bu1, 14); \ - A##ga0 ^= Da0; \ - Bme0 = ROL32(A##ga0, 18); \ - A##ke0 ^= De0; \ - Bmi0 = ROL32(A##ke0, 5); \ - E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \ - Ca0 ^= E##ma0; \ - A##mi1 ^= Di1; \ - Bmo0 = ROL32(A##mi1, 8); \ - E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \ - Ce0 ^= E##me0; \ - A##so0 ^= Do0; \ - Bmu0 = ROL32(A##so0, 28); \ - E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \ - Ci0 ^= E##mi0; \ - E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \ - Co0 ^= E##mo0; \ - E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \ - Cu0 ^= E##mu0; \ -\ - A##bu0 ^= Du0; \ - Bma1 = ROL32(A##bu0, 13); \ - A##ga1 ^= Da1; \ - Bme1 = ROL32(A##ga1, 18); \ - A##ke1 ^= De1; \ - Bmi1 = ROL32(A##ke1, 5); \ - E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \ - Ca1 ^= E##ma1; \ - A##mi0 ^= Di0; \ - Bmo1 = ROL32(A##mi0, 7); \ - E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \ - Ce1 ^= E##me1; \ - A##so1 ^= Do1; \ - Bmu1 = ROL32(A##so1, 28); \ - E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \ - Ci1 ^= E##mi1; \ - E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \ - Co1 ^= E##mo1; \ - E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \ - Cu1 ^= E##mu1; \ -\ - A##bi0 ^= Di0; \ - Bsa0 = ROL32(A##bi0, 31); \ - A##go1 ^= Do1; \ - Bse0 = ROL32(A##go1, 28); \ - A##ku1 ^= Du1; \ - Bsi0 = ROL32(A##ku1, 20); \ - E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ - Ca0 ^= E##sa0; \ - A##ma1 ^= Da1; \ - Bso0 = ROL32(A##ma1, 21); \ - E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \ - Ce0 ^= E##se0; \ - A##se0 ^= De0; \ - Bsu0 = ROL32(A##se0, 1); \ - E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \ - Ci0 ^= E##si0; \ - E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \ - Co0 ^= E##so0; \ - E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \ - Cu0 ^= E##su0; \ -\ - A##bi1 ^= Di1; \ - Bsa1 = ROL32(A##bi1, 31); \ - A##go0 ^= Do0; \ - Bse1 = ROL32(A##go0, 27); \ - A##ku0 ^= Du0; \ - Bsi1 = ROL32(A##ku0, 19); \ - E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ - Ca1 ^= E##sa1; \ - A##ma0 ^= Da0; \ - Bso1 = ROL32(A##ma0, 20); \ - E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \ - Ce1 ^= E##se1; \ - A##se1 ^= De1; \ - Bsu1 = ROL32(A##se1, 1); \ - E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \ - Ci1 ^= E##si1; \ - E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \ - Co1 ^= E##so1; \ - E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \ - Cu1 ^= E##su1; \ -\ - -// --- Code for round (lane complementing pattern 'bebigokimisa') -// --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words -#define thetaRhoPiChiIota(i, A, E) \ - Da0 = Cu0^ROL32(Ce1, 1); \ - Da1 = Cu1^Ce0; \ - De0 = Ca0^ROL32(Ci1, 1); \ - De1 = Ca1^Ci0; \ - Di0 = Ce0^ROL32(Co1, 1); \ - Di1 = Ce1^Co0; \ - Do0 = Ci0^ROL32(Cu1, 1); \ - Do1 = Ci1^Cu0; \ - Du0 = Co0^ROL32(Ca1, 1); \ - Du1 = Co1^Ca0; \ -\ - A##ba0 ^= Da0; \ - Bba0 = A##ba0; \ - A##ge0 ^= De0; \ - Bbe0 = ROL32(A##ge0, 22); \ - A##ki1 ^= Di1; \ - Bbi0 = ROL32(A##ki1, 22); \ - E##ba0 = Bba0 ^( Bbe0 | Bbi0 ); \ - E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ - A##mo1 ^= Do1; \ - Bbo0 = ROL32(A##mo1, 11); \ - E##be0 = Bbe0 ^((~Bbi0)| Bbo0 ); \ - A##su0 ^= Du0; \ - Bbu0 = ROL32(A##su0, 7); \ - E##bi0 = Bbi0 ^( Bbo0 & Bbu0 ); \ - E##bo0 = Bbo0 ^( Bbu0 | Bba0 ); \ - E##bu0 = Bbu0 ^( Bba0 & Bbe0 ); \ -\ - A##ba1 ^= Da1; \ - Bba1 = A##ba1; \ - A##ge1 ^= De1; \ - Bbe1 = ROL32(A##ge1, 22); \ - A##ki0 ^= Di0; \ - Bbi1 = ROL32(A##ki0, 21); \ - E##ba1 = Bba1 ^( Bbe1 | Bbi1 ); \ - E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ - A##mo0 ^= Do0; \ - Bbo1 = ROL32(A##mo0, 10); \ - E##be1 = Bbe1 ^((~Bbi1)| Bbo1 ); \ - A##su1 ^= Du1; \ - Bbu1 = ROL32(A##su1, 7); \ - E##bi1 = Bbi1 ^( Bbo1 & Bbu1 ); \ - E##bo1 = Bbo1 ^( Bbu1 | Bba1 ); \ - E##bu1 = Bbu1 ^( Bba1 & Bbe1 ); \ -\ - A##bo0 ^= Do0; \ - Bga0 = ROL32(A##bo0, 14); \ - A##gu0 ^= Du0; \ - Bge0 = ROL32(A##gu0, 10); \ - A##ka1 ^= Da1; \ - Bgi0 = ROL32(A##ka1, 2); \ - E##ga0 = Bga0 ^( Bge0 | Bgi0 ); \ - A##me1 ^= De1; \ - Bgo0 = ROL32(A##me1, 23); \ - E##ge0 = Bge0 ^( Bgi0 & Bgo0 ); \ - A##si1 ^= Di1; \ - Bgu0 = ROL32(A##si1, 31); \ - E##gi0 = Bgi0 ^( Bgo0 |(~Bgu0)); \ - E##go0 = Bgo0 ^( Bgu0 | Bga0 ); \ - E##gu0 = Bgu0 ^( Bga0 & Bge0 ); \ -\ - A##bo1 ^= Do1; \ - Bga1 = ROL32(A##bo1, 14); \ - A##gu1 ^= Du1; \ - Bge1 = ROL32(A##gu1, 10); \ - A##ka0 ^= Da0; \ - Bgi1 = ROL32(A##ka0, 1); \ - E##ga1 = Bga1 ^( Bge1 | Bgi1 ); \ - A##me0 ^= De0; \ - Bgo1 = ROL32(A##me0, 22); \ - E##ge1 = Bge1 ^( Bgi1 & Bgo1 ); \ - A##si0 ^= Di0; \ - Bgu1 = ROL32(A##si0, 30); \ - E##gi1 = Bgi1 ^( Bgo1 |(~Bgu1)); \ - E##go1 = Bgo1 ^( Bgu1 | Bga1 ); \ - E##gu1 = Bgu1 ^( Bga1 & Bge1 ); \ -\ - A##be1 ^= De1; \ - Bka0 = ROL32(A##be1, 1); \ - A##gi0 ^= Di0; \ - Bke0 = ROL32(A##gi0, 3); \ - A##ko1 ^= Do1; \ - Bki0 = ROL32(A##ko1, 13); \ - E##ka0 = Bka0 ^( Bke0 | Bki0 ); \ - A##mu0 ^= Du0; \ - Bko0 = ROL32(A##mu0, 4); \ - E##ke0 = Bke0 ^( Bki0 & Bko0 ); \ - A##sa0 ^= Da0; \ - Bku0 = ROL32(A##sa0, 9); \ - E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ - E##ko0 = (~Bko0)^( Bku0 | Bka0 ); \ - E##ku0 = Bku0 ^( Bka0 & Bke0 ); \ -\ - A##be0 ^= De0; \ - Bka1 = A##be0; \ - A##gi1 ^= Di1; \ - Bke1 = ROL32(A##gi1, 3); \ - A##ko0 ^= Do0; \ - Bki1 = ROL32(A##ko0, 12); \ - E##ka1 = Bka1 ^( Bke1 | Bki1 ); \ - A##mu1 ^= Du1; \ - Bko1 = ROL32(A##mu1, 4); \ - E##ke1 = Bke1 ^( Bki1 & Bko1 ); \ - A##sa1 ^= Da1; \ - Bku1 = ROL32(A##sa1, 9); \ - E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ - E##ko1 = (~Bko1)^( Bku1 | Bka1 ); \ - E##ku1 = Bku1 ^( Bka1 & Bke1 ); \ -\ - A##bu1 ^= Du1; \ - Bma0 = ROL32(A##bu1, 14); \ - A##ga0 ^= Da0; \ - Bme0 = ROL32(A##ga0, 18); \ - A##ke0 ^= De0; \ - Bmi0 = ROL32(A##ke0, 5); \ - E##ma0 = Bma0 ^( Bme0 & Bmi0 ); \ - A##mi1 ^= Di1; \ - Bmo0 = ROL32(A##mi1, 8); \ - E##me0 = Bme0 ^( Bmi0 | Bmo0 ); \ - A##so0 ^= Do0; \ - Bmu0 = ROL32(A##so0, 28); \ - E##mi0 = Bmi0 ^((~Bmo0)| Bmu0 ); \ - E##mo0 = (~Bmo0)^( Bmu0 & Bma0 ); \ - E##mu0 = Bmu0 ^( Bma0 | Bme0 ); \ -\ - A##bu0 ^= Du0; \ - Bma1 = ROL32(A##bu0, 13); \ - A##ga1 ^= Da1; \ - Bme1 = ROL32(A##ga1, 18); \ - A##ke1 ^= De1; \ - Bmi1 = ROL32(A##ke1, 5); \ - E##ma1 = Bma1 ^( Bme1 & Bmi1 ); \ - A##mi0 ^= Di0; \ - Bmo1 = ROL32(A##mi0, 7); \ - E##me1 = Bme1 ^( Bmi1 | Bmo1 ); \ - A##so1 ^= Do1; \ - Bmu1 = ROL32(A##so1, 28); \ - E##mi1 = Bmi1 ^((~Bmo1)| Bmu1 ); \ - E##mo1 = (~Bmo1)^( Bmu1 & Bma1 ); \ - E##mu1 = Bmu1 ^( Bma1 | Bme1 ); \ -\ - A##bi0 ^= Di0; \ - Bsa0 = ROL32(A##bi0, 31); \ - A##go1 ^= Do1; \ - Bse0 = ROL32(A##go1, 28); \ - A##ku1 ^= Du1; \ - Bsi0 = ROL32(A##ku1, 20); \ - E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ - A##ma1 ^= Da1; \ - Bso0 = ROL32(A##ma1, 21); \ - E##se0 = (~Bse0)^( Bsi0 | Bso0 ); \ - A##se0 ^= De0; \ - Bsu0 = ROL32(A##se0, 1); \ - E##si0 = Bsi0 ^( Bso0 & Bsu0 ); \ - E##so0 = Bso0 ^( Bsu0 | Bsa0 ); \ - E##su0 = Bsu0 ^( Bsa0 & Bse0 ); \ -\ - A##bi1 ^= Di1; \ - Bsa1 = ROL32(A##bi1, 31); \ - A##go0 ^= Do0; \ - Bse1 = ROL32(A##go0, 27); \ - A##ku0 ^= Du0; \ - Bsi1 = ROL32(A##ku0, 19); \ - E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ - A##ma0 ^= Da0; \ - Bso1 = ROL32(A##ma0, 20); \ - E##se1 = (~Bse1)^( Bsi1 | Bso1 ); \ - A##se1 ^= De1; \ - Bsu1 = ROL32(A##se1, 1); \ - E##si1 = Bsi1 ^( Bso1 & Bsu1 ); \ - E##so1 = Bso1 ^( Bsu1 | Bsa1 ); \ - E##su1 = Bsu1 ^( Bsa1 & Bse1 ); \ -\ - -#else // UseBebigokimisa -// --- Code for round, with prepare-theta -// --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words -#define thetaRhoPiChiIotaPrepareTheta(i, A, E) \ - Da0 = Cu0^ROL32(Ce1, 1); \ - Da1 = Cu1^Ce0; \ - De0 = Ca0^ROL32(Ci1, 1); \ - De1 = Ca1^Ci0; \ - Di0 = Ce0^ROL32(Co1, 1); \ - Di1 = Ce1^Co0; \ - Do0 = Ci0^ROL32(Cu1, 1); \ - Do1 = Ci1^Cu0; \ - Du0 = Co0^ROL32(Ca1, 1); \ - Du1 = Co1^Ca0; \ -\ - A##ba0 ^= Da0; \ - Bba0 = A##ba0; \ - A##ge0 ^= De0; \ - Bbe0 = ROL32(A##ge0, 22); \ - A##ki1 ^= Di1; \ - Bbi0 = ROL32(A##ki1, 22); \ - E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \ - E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ - Ca0 = E##ba0; \ - A##mo1 ^= Do1; \ - Bbo0 = ROL32(A##mo1, 11); \ - E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \ - Ce0 = E##be0; \ - A##su0 ^= Du0; \ - Bbu0 = ROL32(A##su0, 7); \ - E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \ - Ci0 = E##bi0; \ - E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \ - Co0 = E##bo0; \ - E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \ - Cu0 = E##bu0; \ -\ - A##ba1 ^= Da1; \ - Bba1 = A##ba1; \ - A##ge1 ^= De1; \ - Bbe1 = ROL32(A##ge1, 22); \ - A##ki0 ^= Di0; \ - Bbi1 = ROL32(A##ki0, 21); \ - E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \ - E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ - Ca1 = E##ba1; \ - A##mo0 ^= Do0; \ - Bbo1 = ROL32(A##mo0, 10); \ - E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \ - Ce1 = E##be1; \ - A##su1 ^= Du1; \ - Bbu1 = ROL32(A##su1, 7); \ - E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \ - Ci1 = E##bi1; \ - E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \ - Co1 = E##bo1; \ - E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \ - Cu1 = E##bu1; \ -\ - A##bo0 ^= Do0; \ - Bga0 = ROL32(A##bo0, 14); \ - A##gu0 ^= Du0; \ - Bge0 = ROL32(A##gu0, 10); \ - A##ka1 ^= Da1; \ - Bgi0 = ROL32(A##ka1, 2); \ - E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \ - Ca0 ^= E##ga0; \ - A##me1 ^= De1; \ - Bgo0 = ROL32(A##me1, 23); \ - E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \ - Ce0 ^= E##ge0; \ - A##si1 ^= Di1; \ - Bgu0 = ROL32(A##si1, 31); \ - E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \ - Ci0 ^= E##gi0; \ - E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \ - Co0 ^= E##go0; \ - E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \ - Cu0 ^= E##gu0; \ -\ - A##bo1 ^= Do1; \ - Bga1 = ROL32(A##bo1, 14); \ - A##gu1 ^= Du1; \ - Bge1 = ROL32(A##gu1, 10); \ - A##ka0 ^= Da0; \ - Bgi1 = ROL32(A##ka0, 1); \ - E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \ - Ca1 ^= E##ga1; \ - A##me0 ^= De0; \ - Bgo1 = ROL32(A##me0, 22); \ - E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \ - Ce1 ^= E##ge1; \ - A##si0 ^= Di0; \ - Bgu1 = ROL32(A##si0, 30); \ - E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \ - Ci1 ^= E##gi1; \ - E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \ - Co1 ^= E##go1; \ - E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \ - Cu1 ^= E##gu1; \ -\ - A##be1 ^= De1; \ - Bka0 = ROL32(A##be1, 1); \ - A##gi0 ^= Di0; \ - Bke0 = ROL32(A##gi0, 3); \ - A##ko1 ^= Do1; \ - Bki0 = ROL32(A##ko1, 13); \ - E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \ - Ca0 ^= E##ka0; \ - A##mu0 ^= Du0; \ - Bko0 = ROL32(A##mu0, 4); \ - E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \ - Ce0 ^= E##ke0; \ - A##sa0 ^= Da0; \ - Bku0 = ROL32(A##sa0, 9); \ - E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ - Ci0 ^= E##ki0; \ - E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \ - Co0 ^= E##ko0; \ - E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \ - Cu0 ^= E##ku0; \ -\ - A##be0 ^= De0; \ - Bka1 = A##be0; \ - A##gi1 ^= Di1; \ - Bke1 = ROL32(A##gi1, 3); \ - A##ko0 ^= Do0; \ - Bki1 = ROL32(A##ko0, 12); \ - E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \ - Ca1 ^= E##ka1; \ - A##mu1 ^= Du1; \ - Bko1 = ROL32(A##mu1, 4); \ - E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \ - Ce1 ^= E##ke1; \ - A##sa1 ^= Da1; \ - Bku1 = ROL32(A##sa1, 9); \ - E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ - Ci1 ^= E##ki1; \ - E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \ - Co1 ^= E##ko1; \ - E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \ - Cu1 ^= E##ku1; \ -\ - A##bu1 ^= Du1; \ - Bma0 = ROL32(A##bu1, 14); \ - A##ga0 ^= Da0; \ - Bme0 = ROL32(A##ga0, 18); \ - A##ke0 ^= De0; \ - Bmi0 = ROL32(A##ke0, 5); \ - E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \ - Ca0 ^= E##ma0; \ - A##mi1 ^= Di1; \ - Bmo0 = ROL32(A##mi1, 8); \ - E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \ - Ce0 ^= E##me0; \ - A##so0 ^= Do0; \ - Bmu0 = ROL32(A##so0, 28); \ - E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \ - Ci0 ^= E##mi0; \ - E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \ - Co0 ^= E##mo0; \ - E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \ - Cu0 ^= E##mu0; \ -\ - A##bu0 ^= Du0; \ - Bma1 = ROL32(A##bu0, 13); \ - A##ga1 ^= Da1; \ - Bme1 = ROL32(A##ga1, 18); \ - A##ke1 ^= De1; \ - Bmi1 = ROL32(A##ke1, 5); \ - E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \ - Ca1 ^= E##ma1; \ - A##mi0 ^= Di0; \ - Bmo1 = ROL32(A##mi0, 7); \ - E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \ - Ce1 ^= E##me1; \ - A##so1 ^= Do1; \ - Bmu1 = ROL32(A##so1, 28); \ - E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \ - Ci1 ^= E##mi1; \ - E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \ - Co1 ^= E##mo1; \ - E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \ - Cu1 ^= E##mu1; \ -\ - A##bi0 ^= Di0; \ - Bsa0 = ROL32(A##bi0, 31); \ - A##go1 ^= Do1; \ - Bse0 = ROL32(A##go1, 28); \ - A##ku1 ^= Du1; \ - Bsi0 = ROL32(A##ku1, 20); \ - E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ - Ca0 ^= E##sa0; \ - A##ma1 ^= Da1; \ - Bso0 = ROL32(A##ma1, 21); \ - E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \ - Ce0 ^= E##se0; \ - A##se0 ^= De0; \ - Bsu0 = ROL32(A##se0, 1); \ - E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \ - Ci0 ^= E##si0; \ - E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \ - Co0 ^= E##so0; \ - E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \ - Cu0 ^= E##su0; \ -\ - A##bi1 ^= Di1; \ - Bsa1 = ROL32(A##bi1, 31); \ - A##go0 ^= Do0; \ - Bse1 = ROL32(A##go0, 27); \ - A##ku0 ^= Du0; \ - Bsi1 = ROL32(A##ku0, 19); \ - E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ - Ca1 ^= E##sa1; \ - A##ma0 ^= Da0; \ - Bso1 = ROL32(A##ma0, 20); \ - E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \ - Ce1 ^= E##se1; \ - A##se1 ^= De1; \ - Bsu1 = ROL32(A##se1, 1); \ - E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \ - Ci1 ^= E##si1; \ - E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \ - Co1 ^= E##so1; \ - E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \ - Cu1 ^= E##su1; \ -\ - -// --- Code for round -// --- using factor 2 interleaving, 64-bit lanes mapped to 32-bit words -#define thetaRhoPiChiIota(i, A, E) \ - Da0 = Cu0^ROL32(Ce1, 1); \ - Da1 = Cu1^Ce0; \ - De0 = Ca0^ROL32(Ci1, 1); \ - De1 = Ca1^Ci0; \ - Di0 = Ce0^ROL32(Co1, 1); \ - Di1 = Ce1^Co0; \ - Do0 = Ci0^ROL32(Cu1, 1); \ - Do1 = Ci1^Cu0; \ - Du0 = Co0^ROL32(Ca1, 1); \ - Du1 = Co1^Ca0; \ -\ - A##ba0 ^= Da0; \ - Bba0 = A##ba0; \ - A##ge0 ^= De0; \ - Bbe0 = ROL32(A##ge0, 22); \ - A##ki1 ^= Di1; \ - Bbi0 = ROL32(A##ki1, 22); \ - E##ba0 = Bba0 ^((~Bbe0)& Bbi0 ); \ - E##ba0 ^= KeccakF1600RoundConstants_int2_0[i]; \ - A##mo1 ^= Do1; \ - Bbo0 = ROL32(A##mo1, 11); \ - E##be0 = Bbe0 ^((~Bbi0)& Bbo0 ); \ - A##su0 ^= Du0; \ - Bbu0 = ROL32(A##su0, 7); \ - E##bi0 = Bbi0 ^((~Bbo0)& Bbu0 ); \ - E##bo0 = Bbo0 ^((~Bbu0)& Bba0 ); \ - E##bu0 = Bbu0 ^((~Bba0)& Bbe0 ); \ -\ - A##ba1 ^= Da1; \ - Bba1 = A##ba1; \ - A##ge1 ^= De1; \ - Bbe1 = ROL32(A##ge1, 22); \ - A##ki0 ^= Di0; \ - Bbi1 = ROL32(A##ki0, 21); \ - E##ba1 = Bba1 ^((~Bbe1)& Bbi1 ); \ - E##ba1 ^= KeccakF1600RoundConstants_int2_1[i]; \ - A##mo0 ^= Do0; \ - Bbo1 = ROL32(A##mo0, 10); \ - E##be1 = Bbe1 ^((~Bbi1)& Bbo1 ); \ - A##su1 ^= Du1; \ - Bbu1 = ROL32(A##su1, 7); \ - E##bi1 = Bbi1 ^((~Bbo1)& Bbu1 ); \ - E##bo1 = Bbo1 ^((~Bbu1)& Bba1 ); \ - E##bu1 = Bbu1 ^((~Bba1)& Bbe1 ); \ -\ - A##bo0 ^= Do0; \ - Bga0 = ROL32(A##bo0, 14); \ - A##gu0 ^= Du0; \ - Bge0 = ROL32(A##gu0, 10); \ - A##ka1 ^= Da1; \ - Bgi0 = ROL32(A##ka1, 2); \ - E##ga0 = Bga0 ^((~Bge0)& Bgi0 ); \ - A##me1 ^= De1; \ - Bgo0 = ROL32(A##me1, 23); \ - E##ge0 = Bge0 ^((~Bgi0)& Bgo0 ); \ - A##si1 ^= Di1; \ - Bgu0 = ROL32(A##si1, 31); \ - E##gi0 = Bgi0 ^((~Bgo0)& Bgu0 ); \ - E##go0 = Bgo0 ^((~Bgu0)& Bga0 ); \ - E##gu0 = Bgu0 ^((~Bga0)& Bge0 ); \ -\ - A##bo1 ^= Do1; \ - Bga1 = ROL32(A##bo1, 14); \ - A##gu1 ^= Du1; \ - Bge1 = ROL32(A##gu1, 10); \ - A##ka0 ^= Da0; \ - Bgi1 = ROL32(A##ka0, 1); \ - E##ga1 = Bga1 ^((~Bge1)& Bgi1 ); \ - A##me0 ^= De0; \ - Bgo1 = ROL32(A##me0, 22); \ - E##ge1 = Bge1 ^((~Bgi1)& Bgo1 ); \ - A##si0 ^= Di0; \ - Bgu1 = ROL32(A##si0, 30); \ - E##gi1 = Bgi1 ^((~Bgo1)& Bgu1 ); \ - E##go1 = Bgo1 ^((~Bgu1)& Bga1 ); \ - E##gu1 = Bgu1 ^((~Bga1)& Bge1 ); \ -\ - A##be1 ^= De1; \ - Bka0 = ROL32(A##be1, 1); \ - A##gi0 ^= Di0; \ - Bke0 = ROL32(A##gi0, 3); \ - A##ko1 ^= Do1; \ - Bki0 = ROL32(A##ko1, 13); \ - E##ka0 = Bka0 ^((~Bke0)& Bki0 ); \ - A##mu0 ^= Du0; \ - Bko0 = ROL32(A##mu0, 4); \ - E##ke0 = Bke0 ^((~Bki0)& Bko0 ); \ - A##sa0 ^= Da0; \ - Bku0 = ROL32(A##sa0, 9); \ - E##ki0 = Bki0 ^((~Bko0)& Bku0 ); \ - E##ko0 = Bko0 ^((~Bku0)& Bka0 ); \ - E##ku0 = Bku0 ^((~Bka0)& Bke0 ); \ -\ - A##be0 ^= De0; \ - Bka1 = A##be0; \ - A##gi1 ^= Di1; \ - Bke1 = ROL32(A##gi1, 3); \ - A##ko0 ^= Do0; \ - Bki1 = ROL32(A##ko0, 12); \ - E##ka1 = Bka1 ^((~Bke1)& Bki1 ); \ - A##mu1 ^= Du1; \ - Bko1 = ROL32(A##mu1, 4); \ - E##ke1 = Bke1 ^((~Bki1)& Bko1 ); \ - A##sa1 ^= Da1; \ - Bku1 = ROL32(A##sa1, 9); \ - E##ki1 = Bki1 ^((~Bko1)& Bku1 ); \ - E##ko1 = Bko1 ^((~Bku1)& Bka1 ); \ - E##ku1 = Bku1 ^((~Bka1)& Bke1 ); \ -\ - A##bu1 ^= Du1; \ - Bma0 = ROL32(A##bu1, 14); \ - A##ga0 ^= Da0; \ - Bme0 = ROL32(A##ga0, 18); \ - A##ke0 ^= De0; \ - Bmi0 = ROL32(A##ke0, 5); \ - E##ma0 = Bma0 ^((~Bme0)& Bmi0 ); \ - A##mi1 ^= Di1; \ - Bmo0 = ROL32(A##mi1, 8); \ - E##me0 = Bme0 ^((~Bmi0)& Bmo0 ); \ - A##so0 ^= Do0; \ - Bmu0 = ROL32(A##so0, 28); \ - E##mi0 = Bmi0 ^((~Bmo0)& Bmu0 ); \ - E##mo0 = Bmo0 ^((~Bmu0)& Bma0 ); \ - E##mu0 = Bmu0 ^((~Bma0)& Bme0 ); \ -\ - A##bu0 ^= Du0; \ - Bma1 = ROL32(A##bu0, 13); \ - A##ga1 ^= Da1; \ - Bme1 = ROL32(A##ga1, 18); \ - A##ke1 ^= De1; \ - Bmi1 = ROL32(A##ke1, 5); \ - E##ma1 = Bma1 ^((~Bme1)& Bmi1 ); \ - A##mi0 ^= Di0; \ - Bmo1 = ROL32(A##mi0, 7); \ - E##me1 = Bme1 ^((~Bmi1)& Bmo1 ); \ - A##so1 ^= Do1; \ - Bmu1 = ROL32(A##so1, 28); \ - E##mi1 = Bmi1 ^((~Bmo1)& Bmu1 ); \ - E##mo1 = Bmo1 ^((~Bmu1)& Bma1 ); \ - E##mu1 = Bmu1 ^((~Bma1)& Bme1 ); \ -\ - A##bi0 ^= Di0; \ - Bsa0 = ROL32(A##bi0, 31); \ - A##go1 ^= Do1; \ - Bse0 = ROL32(A##go1, 28); \ - A##ku1 ^= Du1; \ - Bsi0 = ROL32(A##ku1, 20); \ - E##sa0 = Bsa0 ^((~Bse0)& Bsi0 ); \ - A##ma1 ^= Da1; \ - Bso0 = ROL32(A##ma1, 21); \ - E##se0 = Bse0 ^((~Bsi0)& Bso0 ); \ - A##se0 ^= De0; \ - Bsu0 = ROL32(A##se0, 1); \ - E##si0 = Bsi0 ^((~Bso0)& Bsu0 ); \ - E##so0 = Bso0 ^((~Bsu0)& Bsa0 ); \ - E##su0 = Bsu0 ^((~Bsa0)& Bse0 ); \ -\ - A##bi1 ^= Di1; \ - Bsa1 = ROL32(A##bi1, 31); \ - A##go0 ^= Do0; \ - Bse1 = ROL32(A##go0, 27); \ - A##ku0 ^= Du0; \ - Bsi1 = ROL32(A##ku0, 19); \ - E##sa1 = Bsa1 ^((~Bse1)& Bsi1 ); \ - A##ma0 ^= Da0; \ - Bso1 = ROL32(A##ma0, 20); \ - E##se1 = Bse1 ^((~Bsi1)& Bso1 ); \ - A##se1 ^= De1; \ - Bsu1 = ROL32(A##se1, 1); \ - E##si1 = Bsi1 ^((~Bso1)& Bsu1 ); \ - E##so1 = Bso1 ^((~Bsu1)& Bsa1 ); \ - E##su1 = Bsu1 ^((~Bsa1)& Bse1 ); \ -\ - -#endif // UseBebigokimisa - -const UINT32 KeccakF1600RoundConstants_int2_0[24] = { - 0x00000001UL, - 0x00000000UL, - 0x00000000UL, - 0x00000000UL, - 0x00000001UL, - 0x00000001UL, - 0x00000001UL, - 0x00000001UL, - 0x00000000UL, - 0x00000000UL, - 0x00000001UL, - 0x00000000UL, - 0x00000001UL, - 0x00000001UL, - 0x00000001UL, - 0x00000001UL, - 0x00000000UL, - 0x00000000UL, - 0x00000000UL, - 0x00000000UL, - 0x00000001UL, - 0x00000000UL, - 0x00000001UL, - 0x00000000UL }; - -const UINT32 KeccakF1600RoundConstants_int2_1[24] = { - 0x00000000UL, - 0x00000089UL, - 0x8000008bUL, - 0x80008080UL, - 0x0000008bUL, - 0x00008000UL, - 0x80008088UL, - 0x80000082UL, - 0x0000000bUL, - 0x0000000aUL, - 0x00008082UL, - 0x00008003UL, - 0x0000808bUL, - 0x8000000bUL, - 0x8000008aUL, - 0x80000081UL, - 0x80000081UL, - 0x80000008UL, - 0x00000083UL, - 0x80008003UL, - 0x80008088UL, - 0x80000088UL, - 0x00008000UL, - 0x80008082UL }; - -#define copyFromStateAndXor1024bits(X, state, input) \ - X##ba0 = state[ 0]^input[ 0]; \ - X##ba1 = state[ 1]^input[ 1]; \ - X##be0 = state[ 2]^input[ 2]; \ - X##be1 = state[ 3]^input[ 3]; \ - X##bi0 = state[ 4]^input[ 4]; \ - X##bi1 = state[ 5]^input[ 5]; \ - X##bo0 = state[ 6]^input[ 6]; \ - X##bo1 = state[ 7]^input[ 7]; \ - X##bu0 = state[ 8]^input[ 8]; \ - X##bu1 = state[ 9]^input[ 9]; \ - X##ga0 = state[10]^input[10]; \ - X##ga1 = state[11]^input[11]; \ - X##ge0 = state[12]^input[12]; \ - X##ge1 = state[13]^input[13]; \ - X##gi0 = state[14]^input[14]; \ - X##gi1 = state[15]^input[15]; \ - X##go0 = state[16]^input[16]; \ - X##go1 = state[17]^input[17]; \ - X##gu0 = state[18]^input[18]; \ - X##gu1 = state[19]^input[19]; \ - X##ka0 = state[20]^input[20]; \ - X##ka1 = state[21]^input[21]; \ - X##ke0 = state[22]^input[22]; \ - X##ke1 = state[23]^input[23]; \ - X##ki0 = state[24]^input[24]; \ - X##ki1 = state[25]^input[25]; \ - X##ko0 = state[26]^input[26]; \ - X##ko1 = state[27]^input[27]; \ - X##ku0 = state[28]^input[28]; \ - X##ku1 = state[29]^input[29]; \ - X##ma0 = state[30]^input[30]; \ - X##ma1 = state[31]^input[31]; \ - X##me0 = state[32]; \ - X##me1 = state[33]; \ - X##mi0 = state[34]; \ - X##mi1 = state[35]; \ - X##mo0 = state[36]; \ - X##mo1 = state[37]; \ - X##mu0 = state[38]; \ - X##mu1 = state[39]; \ - X##sa0 = state[40]; \ - X##sa1 = state[41]; \ - X##se0 = state[42]; \ - X##se1 = state[43]; \ - X##si0 = state[44]; \ - X##si1 = state[45]; \ - X##so0 = state[46]; \ - X##so1 = state[47]; \ - X##su0 = state[48]; \ - X##su1 = state[49]; \ - -#define copyFromStateAndXor1088bits(X, state, input) \ - X##ba0 = state[ 0]^input[ 0]; \ - X##ba1 = state[ 1]^input[ 1]; \ - X##be0 = state[ 2]^input[ 2]; \ - X##be1 = state[ 3]^input[ 3]; \ - X##bi0 = state[ 4]^input[ 4]; \ - X##bi1 = state[ 5]^input[ 5]; \ - X##bo0 = state[ 6]^input[ 6]; \ - X##bo1 = state[ 7]^input[ 7]; \ - X##bu0 = state[ 8]^input[ 8]; \ - X##bu1 = state[ 9]^input[ 9]; \ - X##ga0 = state[10]^input[10]; \ - X##ga1 = state[11]^input[11]; \ - X##ge0 = state[12]^input[12]; \ - X##ge1 = state[13]^input[13]; \ - X##gi0 = state[14]^input[14]; \ - X##gi1 = state[15]^input[15]; \ - X##go0 = state[16]^input[16]; \ - X##go1 = state[17]^input[17]; \ - X##gu0 = state[18]^input[18]; \ - X##gu1 = state[19]^input[19]; \ - X##ka0 = state[20]^input[20]; \ - X##ka1 = state[21]^input[21]; \ - X##ke0 = state[22]^input[22]; \ - X##ke1 = state[23]^input[23]; \ - X##ki0 = state[24]^input[24]; \ - X##ki1 = state[25]^input[25]; \ - X##ko0 = state[26]^input[26]; \ - X##ko1 = state[27]^input[27]; \ - X##ku0 = state[28]^input[28]; \ - X##ku1 = state[29]^input[29]; \ - X##ma0 = state[30]^input[30]; \ - X##ma1 = state[31]^input[31]; \ - X##me0 = state[32]^input[32]; \ - X##me1 = state[33]^input[33]; \ - X##mi0 = state[34]; \ - X##mi1 = state[35]; \ - X##mo0 = state[36]; \ - X##mo1 = state[37]; \ - X##mu0 = state[38]; \ - X##mu1 = state[39]; \ - X##sa0 = state[40]; \ - X##sa1 = state[41]; \ - X##se0 = state[42]; \ - X##se1 = state[43]; \ - X##si0 = state[44]; \ - X##si1 = state[45]; \ - X##so0 = state[46]; \ - X##so1 = state[47]; \ - X##su0 = state[48]; \ - X##su1 = state[49]; \ - -#define copyFromState(X, state) \ - X##ba0 = state[ 0]; \ - X##ba1 = state[ 1]; \ - X##be0 = state[ 2]; \ - X##be1 = state[ 3]; \ - X##bi0 = state[ 4]; \ - X##bi1 = state[ 5]; \ - X##bo0 = state[ 6]; \ - X##bo1 = state[ 7]; \ - X##bu0 = state[ 8]; \ - X##bu1 = state[ 9]; \ - X##ga0 = state[10]; \ - X##ga1 = state[11]; \ - X##ge0 = state[12]; \ - X##ge1 = state[13]; \ - X##gi0 = state[14]; \ - X##gi1 = state[15]; \ - X##go0 = state[16]; \ - X##go1 = state[17]; \ - X##gu0 = state[18]; \ - X##gu1 = state[19]; \ - X##ka0 = state[20]; \ - X##ka1 = state[21]; \ - X##ke0 = state[22]; \ - X##ke1 = state[23]; \ - X##ki0 = state[24]; \ - X##ki1 = state[25]; \ - X##ko0 = state[26]; \ - X##ko1 = state[27]; \ - X##ku0 = state[28]; \ - X##ku1 = state[29]; \ - X##ma0 = state[30]; \ - X##ma1 = state[31]; \ - X##me0 = state[32]; \ - X##me1 = state[33]; \ - X##mi0 = state[34]; \ - X##mi1 = state[35]; \ - X##mo0 = state[36]; \ - X##mo1 = state[37]; \ - X##mu0 = state[38]; \ - X##mu1 = state[39]; \ - X##sa0 = state[40]; \ - X##sa1 = state[41]; \ - X##se0 = state[42]; \ - X##se1 = state[43]; \ - X##si0 = state[44]; \ - X##si1 = state[45]; \ - X##so0 = state[46]; \ - X##so1 = state[47]; \ - X##su0 = state[48]; \ - X##su1 = state[49]; \ - -#define copyToState(state, X) \ - state[ 0] = X##ba0; \ - state[ 1] = X##ba1; \ - state[ 2] = X##be0; \ - state[ 3] = X##be1; \ - state[ 4] = X##bi0; \ - state[ 5] = X##bi1; \ - state[ 6] = X##bo0; \ - state[ 7] = X##bo1; \ - state[ 8] = X##bu0; \ - state[ 9] = X##bu1; \ - state[10] = X##ga0; \ - state[11] = X##ga1; \ - state[12] = X##ge0; \ - state[13] = X##ge1; \ - state[14] = X##gi0; \ - state[15] = X##gi1; \ - state[16] = X##go0; \ - state[17] = X##go1; \ - state[18] = X##gu0; \ - state[19] = X##gu1; \ - state[20] = X##ka0; \ - state[21] = X##ka1; \ - state[22] = X##ke0; \ - state[23] = X##ke1; \ - state[24] = X##ki0; \ - state[25] = X##ki1; \ - state[26] = X##ko0; \ - state[27] = X##ko1; \ - state[28] = X##ku0; \ - state[29] = X##ku1; \ - state[30] = X##ma0; \ - state[31] = X##ma1; \ - state[32] = X##me0; \ - state[33] = X##me1; \ - state[34] = X##mi0; \ - state[35] = X##mi1; \ - state[36] = X##mo0; \ - state[37] = X##mo1; \ - state[38] = X##mu0; \ - state[39] = X##mu1; \ - state[40] = X##sa0; \ - state[41] = X##sa1; \ - state[42] = X##se0; \ - state[43] = X##se1; \ - state[44] = X##si0; \ - state[45] = X##si1; \ - state[46] = X##so0; \ - state[47] = X##so1; \ - state[48] = X##su0; \ - state[49] = X##su1; \ - -#define copyStateVariables(X, Y) \ - X##ba0 = Y##ba0; \ - X##ba1 = Y##ba1; \ - X##be0 = Y##be0; \ - X##be1 = Y##be1; \ - X##bi0 = Y##bi0; \ - X##bi1 = Y##bi1; \ - X##bo0 = Y##bo0; \ - X##bo1 = Y##bo1; \ - X##bu0 = Y##bu0; \ - X##bu1 = Y##bu1; \ - X##ga0 = Y##ga0; \ - X##ga1 = Y##ga1; \ - X##ge0 = Y##ge0; \ - X##ge1 = Y##ge1; \ - X##gi0 = Y##gi0; \ - X##gi1 = Y##gi1; \ - X##go0 = Y##go0; \ - X##go1 = Y##go1; \ - X##gu0 = Y##gu0; \ - X##gu1 = Y##gu1; \ - X##ka0 = Y##ka0; \ - X##ka1 = Y##ka1; \ - X##ke0 = Y##ke0; \ - X##ke1 = Y##ke1; \ - X##ki0 = Y##ki0; \ - X##ki1 = Y##ki1; \ - X##ko0 = Y##ko0; \ - X##ko1 = Y##ko1; \ - X##ku0 = Y##ku0; \ - X##ku1 = Y##ku1; \ - X##ma0 = Y##ma0; \ - X##ma1 = Y##ma1; \ - X##me0 = Y##me0; \ - X##me1 = Y##me1; \ - X##mi0 = Y##mi0; \ - X##mi1 = Y##mi1; \ - X##mo0 = Y##mo0; \ - X##mo1 = Y##mo1; \ - X##mu0 = Y##mu0; \ - X##mu1 = Y##mu1; \ - X##sa0 = Y##sa0; \ - X##sa1 = Y##sa1; \ - X##se0 = Y##se0; \ - X##se1 = Y##se1; \ - X##si0 = Y##si0; \ - X##si1 = Y##si1; \ - X##so0 = Y##so0; \ - X##so1 = Y##so1; \ - X##su0 = Y##su0; \ - X##su1 = Y##su1; \ - diff --git a/c_src/KeccakF-1600-32.macros b/c_src/KeccakF-1600-32.macros deleted file mode 100755 index 9ade600..0000000 --- a/c_src/KeccakF-1600-32.macros +++ /dev/null @@ -1,26 +0,0 @@ -/* -The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -Michaël Peeters and Gilles Van Assche. For more information, feedback or -questions, please refer to our website: http://keccak.noekeon.org/ - -Implementation by the designers, -hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#ifdef UseSchedule - #if (UseSchedule == 1) - #include "KeccakF-1600-32-s1.macros" - #elif (UseSchedule == 2) - #include "KeccakF-1600-32-s2.macros" - #elif (UseSchedule == 3) - #include "KeccakF-1600-32-rvk.macros" - #else - #error "This schedule is not supported." - #endif -#else - #include "KeccakF-1600-32-s1.macros" -#endif diff --git a/c_src/KeccakF-1600-arm.c b/c_src/KeccakF-1600-arm.c deleted file mode 100755 index abd6dc9..0000000 --- a/c_src/KeccakF-1600-arm.c +++ /dev/null @@ -1,123 +0,0 @@ -/* -The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -Michaël Peeters and Gilles Van Assche. For more information, feedback or -questions, please refer to our website: http://keccak.noekeon.org/ - -Implementation by Ronny Van Keer, -hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#include "KeccakF-1600-interface.h" -#include "KeccakSponge.h" -#include - -typedef unsigned char UINT8; -typedef unsigned short UINT16; -typedef unsigned int UINT32; -typedef unsigned long long int UINT64; - -void KeccakPermutationOnWordsAfterXoring_ARM_asm(UINT32 *state, const UINT8 *input, int laneCount); - -void KeccakInitialize( void ) -{ -} - -void KeccakInitializeState(unsigned char *state) -{ - memset(state, 0, KeccakPermutationSizeInBytes); -} - -void KeccakPermutation(unsigned char *state) -{ - KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, 0, 0); -} - -#ifdef ProvideFast576 -void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, 9); -} -#endif - -#ifdef ProvideFast832 -void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, 13); -} -#endif - -#ifdef ProvideFast1024 -void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, 16); -} -#endif - -#ifdef ProvideFast1088 -void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, 17); -} -#endif - -#ifdef ProvideFast1152 -void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, 18); -} -#endif - -#ifdef ProvideFast1344 -void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, 21); -} -#endif - - -void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount) -{ - KeccakPermutationOnWordsAfterXoring_ARM_asm((UINT32*)state, data, laneCount); -} - -// Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -UINT64 fromInterleaving(UINT64 x) -{ - UINT64 t; - - t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16); - t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8); - t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4); - t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2); - t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1); - - return x; -} - -void setInterleavedWordsInto8bytes(UINT8* dest, UINT32* evenAndOdd) -{ - ((UINT64*)dest)[0] = fromInterleaving(*(UINT64*)evenAndOdd); -} - -#define extractLanes(laneCount, state, data) \ - { \ - int i; \ - for(i=0; i<(laneCount); i++) \ - setInterleavedWordsInto8bytes(data+i*8, (UINT32*)state+i*2); \ - } - -#ifdef ProvideFast1024 -void KeccakExtract1024bits(const unsigned char *state, unsigned char *data) -{ - extractLanes(16, state, data) -} -#endif - -void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount) -{ - extractLanes(laneCount, state, data) -} diff --git a/c_src/KeccakF-1600-armcc.s b/c_src/KeccakF-1600-armcc.s deleted file mode 100755 index b87d0ba..0000000 --- a/c_src/KeccakF-1600-armcc.s +++ /dev/null @@ -1,653 +0,0 @@ -;// The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -;// Michaël Peeters and Gilles Van Assche. For more information, feedback or -;// questions, please refer to our website: http://keccak.noekeon.org/ -;// -;// Implementation by Ronny Van Keer, -;// hereby denoted as "the implementer". -;// -;// To the extent possible under law, the implementer has waived all copyright -;// and related or neighboring rights to the source code in this file. -;// http://creativecommons.org/publicdomain/zero/1.0/ - - - PRESERVE8 - THUMB - AREA |.text|, CODE, READONLY - -;// --- defines - -_ba0 equ 0*4 -_ba1 equ 1*4 -_be0 equ 2*4 -_be1 equ 3*4 -_bi0 equ 4*4 -_bi1 equ 5*4 -_bo0 equ 6*4 -_bo1 equ 7*4 -_bu0 equ 8*4 -_bu1 equ 9*4 -_ga0 equ 10*4 -_ga1 equ 11*4 -_ge0 equ 12*4 -_ge1 equ 13*4 -_gi0 equ 14*4 -_gi1 equ 15*4 -_go0 equ 16*4 -_go1 equ 17*4 -_gu0 equ 18*4 -_gu1 equ 19*4 -_ka0 equ 20*4 -_ka1 equ 21*4 -_ke0 equ 22*4 -_ke1 equ 23*4 -_ki0 equ 24*4 -_ki1 equ 25*4 -_ko0 equ 26*4 -_ko1 equ 27*4 -_ku0 equ 28*4 -_ku1 equ 29*4 -_ma0 equ 30*4 -_ma1 equ 31*4 -_me0 equ 32*4 -_me1 equ 33*4 -_mi0 equ 34*4 -_mi1 equ 35*4 -_mo0 equ 36*4 -_mo1 equ 37*4 -_mu0 equ 38*4 -_mu1 equ 39*4 -_sa0 equ 40*4 -_sa1 equ 41*4 -_se0 equ 42*4 -_se1 equ 43*4 -_si0 equ 44*4 -_si1 equ 45*4 -_so0 equ 46*4 -_so1 equ 47*4 -_su0 equ 48*4 -_su1 equ 49*4 - -mDe1 equ 50*4 -mDi0 equ 51*4 -mDo0 equ 52*4 -mDo1 equ 53*4 - -;// --- macros - - MACRO - xor5 $result,$ptr,$b,$g,$k,$m,$s - - ldr $result, [$ptr, #$b] - ldr r1, [$ptr, #$g] - ldr r2, [$ptr, #$k] - eor $result, $result, r1 - ldr r1, [$ptr, #$m] - eor $result, $result, r2 - ldr r2, [$ptr, #$s] - eor $result, $result, r1 - eor $result, $result, r2 - MEND - - MACRO - xorrol $b, $yy, $rr - - eor $b, $b, $yy - ror $b, #32-$rr - MEND - - - MACRO - xandnot $resptr, $resofs, $aa, $bb, $cc - - bic r1, $cc, $bb - eor r1, r1, $aa - str r1, [$resptr, #$resofs] - MEND - - MACRO - xandnotRC $resptr, $resofs, $aa, $bb, $cc - - ldr r1, [r3], #4 - bic $cc, $cc, $bb - eor $cc, $cc, r1 - eor $cc, $cc, $aa - str $cc, [$resptr, #$resofs] - MEND - - - EXPORT KeccakPermutationOnWordsAfterXoring_ARM_asm -KeccakPermutationOnWordsAfterXoring_ARM_asm PROC - - push {r4-r12,lr} - sub sp,sp,#4*(50+4) - - movs r9, r2 - beq interleaveDone - mov r8,r0 -interleaveLoop - - ldr r4, [r1], #4 - ldr r5, [r1], #4 - ldrd r6, r7, [r8] - - ;// Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 - and r3,r4,#0x55555555 - orr r3,r3,r3, LSR #1 - and r3,r3,#0x33333333 - orr r3,r3,r3, LSR #2 - and r3,r3,#0x0F0F0F0F - orr r3,r3,r3, LSR #4 - and r3,r3,#0x00FF00FF - bfi r3,r3,#8, #8 - eor r6,r6,r3, LSR #8 - - and r3,r5,#0x55555555 - orr r3,r3,r3, LSR #1 - and r3,r3,#0x33333333 - orr r3,r3,r3, LSR #2 - and r3,r3,#0x0F0F0F0F - orr r3,r3,r3, LSR #4 - and r3,r3,#0x00FF00FF - orr r3,r3,r3, LSR #8 - eor r6,r6,r3, LSL #16 - - and r3,r4,#0xAAAAAAAA - orr r3,r3,r3, LSL #1 - and r3,r3,#0xCCCCCCCC - orr r3,r3,r3, LSL #2 - and r3,r3,#0xF0F0F0F0 - orr r3,r3,r3, LSL #4 - and r3,r3,#0xFF00FF00 - orr r3,r3,r3, LSL #8 - eor r7,r7,r3, LSR #16 - - and r3,r5,#0xAAAAAAAA - orr r3,r3,r3, LSL #1 - and r3,r3,#0xCCCCCCCC - orr r3,r3,r3, LSL #2 - and r3,r3,#0xF0F0F0F0 - orr r3,r3,r3, LSL #4 - and r3,r3,#0xFF00FF00 - orr r3,r3,r3, LSL #8 - bfc r3, #0, #16 - eor r7,r7,r3 - - strd r6,r7,[r8], #8 - - subs r9,r9,#1 - bne interleaveLoop - -interleaveDone - - ldr r3, =KeccakF1600RoundConstantsWithTerminator - b roundLoop ;//jump over the table - LTORG - - ALIGN - -KeccakF1600RoundConstantsWithTerminator - ;// 0 1 - dcd 0x00000001, 0x00000000 - dcd 0x00000000, 0x00000089 - dcd 0x00000000, 0x8000008b - dcd 0x00000000, 0x80008080 - dcd 0x00000001, 0x0000008b - dcd 0x00000001, 0x00008000 - dcd 0x00000001, 0x80008088 - dcd 0x00000001, 0x80000082 - dcd 0x00000000, 0x0000000b - dcd 0x00000000, 0x0000000a - dcd 0x00000001, 0x00008082 - dcd 0x00000000, 0x00008003 - dcd 0x00000001, 0x0000808b - dcd 0x00000001, 0x8000000b - dcd 0x00000001, 0x8000008a - dcd 0x00000001, 0x80000081 - dcd 0x00000000, 0x80000081 - dcd 0x00000000, 0x80000008 - dcd 0x00000000, 0x00000083 - dcd 0x00000000, 0x80008003 - dcd 0x00000001, 0x80008088 - dcd 0x00000000, 0x80000088 - dcd 0x00000001, 0x00008000 - dcd 0x00000000, 0x80008082 - dcd 0xFFFFFFFF ;//terminator - -roundLoop - - ;//prepTheta A - xor5 r10, r0,_bu0, _gu0, _ku0, _mu0, _su0 - xor5 r6, r0,_be1, _ge1, _ke1, _me1, _se1 - eor r5, r10, r6, ROR #31 - xor5 r11, r0,_bu1, _gu1, _ku1, _mu1, _su1 - xor5 r7, r0,_be0, _ge0, _ke0, _me0, _se0 - eor r4, r11, r7 - - xor5 r8, r0,_bi0, _gi0, _ki0, _mi0, _si0 - eor r1, r8, r11, ROR #31 - str r1, [sp, #mDo0] - xor5 r9, r0,_bi1, _gi1, _ki1, _mi1, _si1 - eor r1, r9, r10 - str r1, [sp, #mDo1] - - xor5 r10, r0,_ba0, _ga0, _ka0, _ma0, _sa0 - eor lr, r10, r9, ROR #31 - xor5 r11, r0,_ba1, _ga1, _ka1, _ma1, _sa1 - eor r1, r11, r8 - str r1, [sp, #mDe1] - - xor5 r9, r0,_bo1, _go1, _ko1, _mo1, _so1 - eor r1, r7, r9, ROR #31 - str r1, [sp, #mDi0] - xor5 r8, r0,_bo0, _go0, _ko0, _mo0, _so0 - eor r2, r6, r8 - - eor r7, r8, r11, ROR #31 - eor r6, r9, r10 - - ;//thetaRhoPiChiIota 0, in A, out E - ldr r8, [r0, #_ba0] - ldr r9, [r0, #_ge0] - ldr r10, [r0, #_ki1] - ldr r11, [r0, #_mo1] - ldr r12, [r0, #_su0] - ldr r1, [sp, #mDo1] - eor r8, r8, r5 - xorrol r9, lr, 22 - xorrol r10, r2, 22 - xorrol r11, r1, 11 - xorrol r12, r7, 7 - xandnot sp, _be0, r9, r10, r11 - xandnot sp, _bi0, r10, r11, r12 - xandnot sp, _bo0, r11, r12, r8 - xandnot sp, _bu0, r12, r8, r9 - xandnotRC sp, _ba0, r8, r9, r10 - - ldr r8, [r0, #_bo0] - ldr r1, [sp, #mDo0] - ldr r9, [r0, #_gu0] - xorrol r8, r1, 14 - ldr r1, [sp, #mDe1] - ldr r10, [r0, #_ka1] - ldr r11, [r0, #_me1] - ldr r12, [r0, #_si1] - xorrol r9, r7, 10 - xorrol r10, r4, 2 - xorrol r11, r1, 23 - xorrol r12, r2, 31 - xandnot sp, _ga0, r8, r9, r10 - xandnot sp, _ge0, r9, r10, r11 - xandnot sp, _gi0, r10, r11, r12 - xandnot sp, _go0, r11, r12, r8 - xandnot sp, _gu0, r12, r8, r9 - - ldr r8, [r0, #_be1] - ldr r1, [sp, #mDe1] - ldr r9, [r0, #_gi0] - xorrol r8, r1, 1 - ldr r1, [sp, #mDi0] - ldr r10, [r0, #_ko1] - xorrol r9, r1, 3 - ldr r1, [sp, #mDo1] - ldr r11, [r0, #_mu0] - ldr r12, [r0, #_sa0] - xorrol r10, r1, 13 - xorrol r11, r7, 4 - xorrol r12, r5, 9 - xandnot sp, _ka0, r8, r9, r10 - xandnot sp, _ke0, r9, r10, r11 - xandnot sp, _ki0, r10, r11, r12 - xandnot sp, _ko0, r11, r12, r8 - xandnot sp, _ku0, r12, r8, r9 - - ldr r8, [r0, #_bu1] - ldr r9, [r0, #_ga0] - ldr r10, [r0, #_ke0] - ldr r11, [r0, #_mi1] - ldr r12, [r0, #_so0] - ldr r1, [sp, #mDo0] - xorrol r8, r6, 14 - xorrol r9, r5, 18 - xorrol r10, lr, 5 - xorrol r11, r2, 8 - xorrol r12, r1, 28 - xandnot sp, _ma0, r8, r9, r10 - xandnot sp, _me0, r9, r10, r11 - xandnot sp, _mi0, r10, r11, r12 - xandnot sp, _mo0, r11, r12, r8 - xandnot sp, _mu0, r12, r8, r9 - - ldr r1, [sp, #mDi0] - ldr r8, [r0, #_bi0] - ldr r9, [r0, #_go1] - xorrol r8, r1, 31 - ldr r1, [sp, #mDo1] - ldr r10, [r0, #_ku1] - xorrol r9, r1, 28 - ldr r11, [r0, #_ma1] - ldr r12, [r0, #_se0] - xorrol r10, r6, 20 - xorrol r11, r4, 21 - xorrol r12, lr, 1 - xandnot sp, _sa0, r8, r9, r10 - xandnot sp, _se0, r9, r10, r11 - xandnot sp, _si0, r10, r11, r12 - xandnot sp, _so0, r11, r12, r8 - xandnot sp, _su0, r12, r8, r9 - - ;// thetaRhoPiChiIota 1, in A, out E - ldr r1, [sp, #mDe1] - ldr r9, [r0, #_ge1] - ldr r8, [r0, #_ba1] - xorrol r9, r1, 22 - ldr r1, [sp, #mDi0] - ldr r10, [r0, #_ki0] - eor r8, r8, r4 - xorrol r10, r1, 21 - ldr r1, [sp, #mDo0] - ldr r11, [r0, #_mo0] - ldr r12, [r0, #_su1] - xorrol r11, r1, 10 - xorrol r12, r6, 7 - xandnot sp, _be1, r9, r10, r11 - xandnot sp, _bi1, r10, r11, r12 - xandnot sp, _bo1, r11, r12, r8 - xandnot sp, _bu1, r12, r8, r9 - xandnotRC sp, _ba1, r8, r9, r10 - - ldr r1, [sp, #mDo1] - ldr r8, [r0, #_bo1] - ldr r12, [r0, #_si0] - xorrol r8, r1, 14 - ldr r1, [sp, #mDi0] - ldr r9, [r0, #_gu1] - xorrol r12, r1, 30 - ldr r10, [r0, #_ka0] - ldr r11, [r0, #_me0] - xorrol r9, r6, 10 - xorrol r10, r5, 1 - xorrol r11, lr, 22 - xandnot sp, _ga1, r8, r9, r10 - xandnot sp, _ge1, r9, r10, r11 - xandnot sp, _gi1, r10, r11, r12 - xandnot sp, _go1, r11, r12, r8 - xandnot sp, _gu1, r12, r8, r9 - - ldr r1, [sp, #mDo0] - ldr r10, [r0, #_ko0] - ldr r8, [r0, #_be0] - xorrol r10, r1, 12 - ldr r9, [r0, #_gi1] - ldr r11, [r0, #_mu1] - ldr r12, [r0, #_sa1] - eor r8, r8, lr - xorrol r9, r2, 3 - xorrol r11, r6, 4 - xorrol r12, r4, 9 - xandnot sp, _ka1, r8, r9, r10 - xandnot sp, _ke1, r9, r10, r11 - xandnot sp, _ki1, r10, r11, r12 - xandnot sp, _ko1, r11, r12, r8 - xandnot sp, _ku1, r12, r8, r9 - - ldr r1, [sp, #mDe1] - ldr r10, [r0, #_ke1] - ldr r11, [r0, #_mi0] - xorrol r10, r1, 5 - ldr r1, [sp, #mDi0] - ldr r12, [r0, #_so1] - xorrol r11, r1, 7 - ldr r1, [sp, #mDo1] - ldr r8, [r0, #_bu0] - ldr r9, [r0, #_ga1] - xorrol r8, r7, 13 - xorrol r9, r4, 18 - xorrol r12, r1, 28 - xandnot sp, _ma1, r8, r9, r10 - xandnot sp, _me1, r9, r10, r11 - xandnot sp, _mi1, r10, r11, r12 - xandnot sp, _mo1, r11, r12, r8 - xandnot sp, _mu1, r12, r8, r9 - - ldr r1, [sp, #mDo0] - ldr r9, [r0, #_go0] - ldr r8, [r0, #_bi1] - xorrol r9, r1, 27 - ldr r10, [r0, #_ku0] - ldr r11, [r0, #_ma0] - ldr r12, [r0, #_se1] - ldr r1, [sp, #mDe1] - xorrol r8, r2, 31 - xorrol r10, r7, 19 - xorrol r11, r5, 20 - xorrol r12, r1, 1 - xandnot sp, _sa1, r8, r9, r10 - xandnot sp, _se1, r9, r10, r11 - xandnot sp, _si1, r10, r11, r12 - xandnot sp, _so1, r11, r12, r8 - xandnot sp, _su1, r12, r8, r9 - - ;//prepTheta E - xor5 r10, sp,_bu0, _gu0, _ku0, _mu0, _su0 - xor5 r6, sp,_be1, _ge1, _ke1, _me1, _se1 - eor r5, r10, r6, ROR #31 - xor5 r11, sp,_bu1, _gu1, _ku1, _mu1, _su1 - xor5 r7, sp,_be0, _ge0, _ke0, _me0, _se0 - eor r4, r11, r7 - - xor5 r8, sp,_bi0, _gi0, _ki0, _mi0, _si0 - eor r1, r8, r11, ROR #31 - str r1, [sp, #mDo0] - xor5 r9, sp,_bi1, _gi1, _ki1, _mi1, _si1 - eor r1, r9, r10 - str r1, [sp, #mDo1] - - xor5 r10, sp,_ba0, _ga0, _ka0, _ma0, _sa0 - eor lr, r10, r9, ROR #31 - xor5 r11, sp,_ba1, _ga1, _ka1, _ma1, _sa1 - eor r1, r11, r8 - str r1, [sp, #mDe1] - - xor5 r9, sp,_bo1, _go1, _ko1, _mo1, _so1 - eor r1, r7, r9, ROR #31 - str r1, [sp, #mDi0] - xor5 r8, sp,_bo0, _go0, _ko0, _mo0, _so0 - eor r2, r6, r8 - - eor r7, r8, r11, ROR #31 - eor r6, r9, r10 - - ;//thetaRhoPiChiIota 0, in E, out A - ldr r8, [sp, #_ba0] - ldr r9, [sp, #_ge0] - ldr r10, [sp, #_ki1] - ldr r11, [sp, #_mo1] - ldr r12, [sp, #_su0] - ldr r1, [sp, #mDo1] - eor r8, r8, r5 - xorrol r9, lr, 22 - xorrol r10, r2, 22 - xorrol r11, r1, 11 - xorrol r12, r7, 7 - xandnot r0, _be0, r9, r10, r11 - xandnot r0, _bi0, r10, r11, r12 - xandnot r0, _bo0, r11, r12, r8 - xandnot r0, _bu0, r12, r8, r9 - xandnotRC r0, _ba0, r8, r9, r10 - - ldr r8, [sp, #_bo0] - ldr r1, [sp, #mDo0] - ldr r9, [sp, #_gu0] - xorrol r8, r1, 14 - ldr r1, [sp, #mDe1] - ldr r10, [sp, #_ka1] - ldr r11, [sp, #_me1] - ldr r12, [sp, #_si1] - xorrol r9, r7, 10 - xorrol r10, r4, 2 - xorrol r11, r1, 23 - xorrol r12, r2, 31 - xandnot r0, _ga0, r8, r9, r10 - xandnot r0, _ge0, r9, r10, r11 - xandnot r0, _gi0, r10, r11, r12 - xandnot r0, _go0, r11, r12, r8 - xandnot r0, _gu0, r12, r8, r9 - - ldr r8, [sp, #_be1] - ldr r1, [sp, #mDe1] - ldr r9, [sp, #_gi0] - xorrol r8, r1, 1 - ldr r1, [sp, #mDi0] - ldr r10, [sp, #_ko1] - xorrol r9, r1, 3 - ldr r1, [sp, #mDo1] - ldr r11, [sp, #_mu0] - ldr r12, [sp, #_sa0] - xorrol r10, r1, 13 - xorrol r11, r7, 4 - xorrol r12, r5, 9 - xandnot r0, _ka0, r8, r9, r10 - xandnot r0, _ke0, r9, r10, r11 - xandnot r0, _ki0, r10, r11, r12 - xandnot r0, _ko0, r11, r12, r8 - xandnot r0, _ku0, r12, r8, r9 - - ldr r8, [sp, #_bu1] - ldr r9, [sp, #_ga0] - ldr r10, [sp, #_ke0] - ldr r11, [sp, #_mi1] - ldr r12, [sp, #_so0] - ldr r1, [sp, #mDo0] - xorrol r8, r6, 14 - xorrol r9, r5, 18 - xorrol r10, lr, 5 - xorrol r11, r2, 8 - xorrol r12, r1, 28 - xandnot r0, _ma0, r8, r9, r10 - xandnot r0, _me0, r9, r10, r11 - xandnot r0, _mi0, r10, r11, r12 - xandnot r0, _mo0, r11, r12, r8 - xandnot r0, _mu0, r12, r8, r9 - - ldr r1, [sp, #mDi0] - ldr r8, [sp, #_bi0] - ldr r9, [sp, #_go1] - xorrol r8, r1, 31 - ldr r1, [sp, #mDo1] - ldr r10, [sp, #_ku1] - xorrol r9, r1, 28 - ldr r11, [sp, #_ma1] - ldr r12, [sp, #_se0] - xorrol r10, r6, 20 - xorrol r11, r4, 21 - xorrol r12, lr, 1 - xandnot r0, _sa0, r8, r9, r10 - xandnot r0, _se0, r9, r10, r11 - xandnot r0, _si0, r10, r11, r12 - xandnot r0, _so0, r11, r12, r8 - xandnot r0, _su0, r12, r8, r9 - - ;// thetaRhoPiChiIota 1, in A, out E - ldr r1, [sp, #mDe1] - ldr r9, [sp, #_ge1] - ldr r8, [sp, #_ba1] - xorrol r9, r1, 22 - ldr r1, [sp, #mDi0] - ldr r10, [sp, #_ki0] - eor r8, r8, r4 - xorrol r10, r1, 21 - ldr r1, [sp, #mDo0] - ldr r11, [sp, #_mo0] - ldr r12, [sp, #_su1] - xorrol r11, r1, 10 - xorrol r12, r6, 7 - xandnot r0, _be1, r9, r10, r11 - xandnot r0, _bi1, r10, r11, r12 - xandnot r0, _bo1, r11, r12, r8 - xandnot r0, _bu1, r12, r8, r9 - xandnotRC r0, _ba1, r8, r9, r10 - - ldr r1, [sp, #mDo1] - ldr r8, [sp, #_bo1] - ldr r12, [sp, #_si0] - xorrol r8, r1, 14 - ldr r1, [sp, #mDi0] - ldr r9, [sp, #_gu1] - xorrol r12, r1, 30 - ldr r10, [sp, #_ka0] - ldr r11, [sp, #_me0] - xorrol r9, r6, 10 - xorrol r10, r5, 1 - xorrol r11, lr, 22 - xandnot r0, _ga1, r8, r9, r10 - xandnot r0, _ge1, r9, r10, r11 - xandnot r0, _gi1, r10, r11, r12 - xandnot r0, _go1, r11, r12, r8 - xandnot r0, _gu1, r12, r8, r9 - - ldr r1, [sp, #mDo0] - ldr r10, [sp, #_ko0] - ldr r8, [sp, #_be0] - xorrol r10, r1, 12 - ldr r9, [sp, #_gi1] - ldr r11, [sp, #_mu1] - ldr r12, [sp, #_sa1] - eor r8, r8, lr - xorrol r9, r2, 3 - xorrol r11, r6, 4 - xorrol r12, r4, 9 - xandnot r0, _ka1, r8, r9, r10 - xandnot r0, _ke1, r9, r10, r11 - xandnot r0, _ki1, r10, r11, r12 - xandnot r0, _ko1, r11, r12, r8 - xandnot r0, _ku1, r12, r8, r9 - - ldr r1, [sp, #mDe1] - ldr r10, [sp, #_ke1] - ldr r11, [sp, #_mi0] - xorrol r10, r1, 5 - ldr r1, [sp, #mDi0] - ldr r12, [sp, #_so1] - xorrol r11, r1, 7 - ldr r1, [sp, #mDo1] - ldr r8, [sp, #_bu0] - ldr r9, [sp, #_ga1] - xorrol r8, r7, 13 - xorrol r9, r4, 18 - xorrol r12, r1, 28 - xandnot r0, _ma1, r8, r9, r10 - xandnot r0, _me1, r9, r10, r11 - xandnot r0, _mi1, r10, r11, r12 - xandnot r0, _mo1, r11, r12, r8 - xandnot r0, _mu1, r12, r8, r9 - - ldr r1, [sp, #mDo0] - ldr r9, [sp, #_go0] - ldr r8, [sp, #_bi1] - xorrol r9, r1, 27 - ldr r10, [sp, #_ku0] - ldr r11, [sp, #_ma0] - ldr r12, [sp, #_se1] - ldr r1, [sp, #mDe1] - xorrol r8, r2, 31 - xorrol r10, r7, 19 - xorrol r11, r5, 20 - xorrol r12, r1, 1 - xandnot r0, _sa1, r8, r9, r10 - xandnot r0, _se1, r9, r10, r11 - xandnot r0, _si1, r10, r11, r12 - xandnot r0, _so1, r11, r12, r8 - ldr r10, [r3] - xandnot r0, _su1, r12, r8, r9 - - cmp r10, #0xFFFFFFFF - bne roundLoop - - add sp,sp,#4*(50+4) - pop {r4-r12,pc} - - ENDP - - ALIGN - - END diff --git a/c_src/KeccakF-1600-armgcc.s b/c_src/KeccakF-1600-armgcc.s deleted file mode 100755 index d16594b..0000000 --- a/c_src/KeccakF-1600-armgcc.s +++ /dev/null @@ -1,686 +0,0 @@ -@ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -@ Michaël Peeters and Gilles Van Assche. For more information, feedback or -@ questions, please refer to our website: http://keccak.noekeon.org/ -@ -@ Implementation by Ronny Van Keer, -@ hereby denoted as "the implementer". -@ -@ To the extent possible under law, the implementer has waived all copyright -@ and related or neighboring rights to the source code in this file. -@ http://creativecommons.org/publicdomain/zero/1.0/ - -@ This file was created from a .asm file -@ using the ads2gas.pl script. - .equ DO1STROUNDING, 0 - - @ PRESERVE8 - @ THUMB - .syntax unified - .cpu cortex-m3 - .thumb - - -@// --- defines - -.equ _ba0 , 0*4 -.equ _ba1 , 1*4 -.equ _be0 , 2*4 -.equ _be1 , 3*4 -.equ _bi0 , 4*4 -.equ _bi1 , 5*4 -.equ _bo0 , 6*4 -.equ _bo1 , 7*4 -.equ _bu0 , 8*4 -.equ _bu1 , 9*4 -.equ _ga0 , 10*4 -.equ _ga1 , 11*4 -.equ _ge0 , 12*4 -.equ _ge1 , 13*4 -.equ _gi0 , 14*4 -.equ _gi1 , 15*4 -.equ _go0 , 16*4 -.equ _go1 , 17*4 -.equ _gu0 , 18*4 -.equ _gu1 , 19*4 -.equ _ka0 , 20*4 -.equ _ka1 , 21*4 -.equ _ke0 , 22*4 -.equ _ke1 , 23*4 -.equ _ki0 , 24*4 -.equ _ki1 , 25*4 -.equ _ko0 , 26*4 -.equ _ko1 , 27*4 -.equ _ku0 , 28*4 -.equ _ku1 , 29*4 -.equ _ma0 , 30*4 -.equ _ma1 , 31*4 -.equ _me0 , 32*4 -.equ _me1 , 33*4 -.equ _mi0 , 34*4 -.equ _mi1 , 35*4 -.equ _mo0 , 36*4 -.equ _mo1 , 37*4 -.equ _mu0 , 38*4 -.equ _mu1 , 39*4 -.equ _sa0 , 40*4 -.equ _sa1 , 41*4 -.equ _se0 , 42*4 -.equ _se1 , 43*4 -.equ _si0 , 44*4 -.equ _si1 , 45*4 -.equ _so0 , 46*4 -.equ _so1 , 47*4 -.equ _su0 , 48*4 -.equ _su1 , 49*4 - -.equ mDe1 , 50*4 -.equ mDi0 , 51*4 -.equ mDo0 , 52*4 -.equ mDo1 , 53*4 - -@// --- macros - -.macro xor5 result,ptr,b,g,k,m,s - - ldr \result, [\ptr, #\b] - ldr r1, [\ptr, #\g] - ldr r2, [\ptr, #\k] - eor \result, \result, r1 - ldr r1, [\ptr, #\m] - eor \result, \result, r2 - ldr r2, [\ptr, #\s] - eor \result, \result, r1 - eor \result, \result, r2 - .endm - -.macro xorrol b, yy, rr - - eor \b, \b, \yy - ror \b, #32-\rr - .endm - - -.macro xandnot resptr, resofs, aa, bb, cc - - bic r1, \cc, \bb - eor r1, r1, \aa - str r1, [\resptr, #\resofs] - .endm - -.macro xandnotRC resptr, resofs, aa, bb, cc - - ldr r1, [r3], #4 - bic \cc, \cc, \bb - eor \cc, \cc, r1 - eor \cc, \cc, \aa - str \cc, [\resptr, #\resofs] - .endm - - - .size KeccakPermutationOnWords, .-KeccakPermutationOnWords - .align 2 - .global KeccakPermutationOnWordsAfterXoring_ARM_asm - .thumb - .thumb_func - .type KeccakPermutationOnWordsAfterXoring_ARM_asm, %function -KeccakPermutationOnWordsAfterXoring_ARM_asm: - @ args = 0, pretend = 0, frame = 408 - @ frame_needed = 0, uses_anonymous_args = 0 - @ link register save eliminated. - - push {r4-r12,lr} - sub sp,sp,#4*(50+4) - - movs r9, r2 - beq interleaveDone - mov r8,r0 -interleaveLoop: - - ldr r4, [r1], #4 - ldr r5, [r1], #4 - ldrd r6, r7, [r8] - - @// Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 - and r3,r4,#0x55555555 - orr r3,r3,r3, LSR #1 - and r3,r3,#0x33333333 - orr r3,r3,r3, LSR #2 - and r3,r3,#0x0F0F0F0F - orr r3,r3,r3, LSR #4 - and r3,r3,#0x00FF00FF - bfi r3,r3,#8, #8 - eor r6,r6,r3, LSR #8 - - and r3,r5,#0x55555555 - orr r3,r3,r3, LSR #1 - and r3,r3,#0x33333333 - orr r3,r3,r3, LSR #2 - and r3,r3,#0x0F0F0F0F - orr r3,r3,r3, LSR #4 - and r3,r3,#0x00FF00FF - orr r3,r3,r3, LSR #8 - eor r6,r6,r3, LSL #16 - - and r3,r4,#0xAAAAAAAA - orr r3,r3,r3, LSL #1 - and r3,r3,#0xCCCCCCCC - orr r3,r3,r3, LSL #2 - and r3,r3,#0xF0F0F0F0 - orr r3,r3,r3, LSL #4 - and r3,r3,#0xFF00FF00 - orr r3,r3,r3, LSL #8 - eor r7,r7,r3, LSR #16 - - and r3,r5,#0xAAAAAAAA - orr r3,r3,r3, LSL #1 - and r3,r3,#0xCCCCCCCC - orr r3,r3,r3, LSL #2 - and r3,r3,#0xF0F0F0F0 - orr r3,r3,r3, LSL #4 - and r3,r3,#0xFF00FF00 - orr r3,r3,r3, LSL #8 - bfc r3, #0, #16 - eor r7,r7,r3 - - strd r6,r7,[r8], #8 - - subs r9,r9,#1 - bne interleaveLoop - -interleaveDone: - - ldr r3, =KeccakF1600RoundConstantsWithTerminator - b roundLoop @//jump over the table - .ltorg - - @ ALIGN - -KeccakF1600RoundConstantsWithTerminator: - @// 0 1 - .word 0x00000001 - .word 0x00000000 - .word 0x00000000 - .word 0x00000089 - .word 0x00000000 - .word 0x8000008b - .word 0x00000000 - .word 0x80008080 - .word 0x00000001 - .word 0x0000008b - .word 0x00000001 - .word 0x00008000 - .word 0x00000001 - .word 0x80008088 - .word 0x00000001 - .word 0x80000082 - .word 0x00000000 - .word 0x0000000b - .word 0x00000000 - .word 0x0000000a - .word 0x00000001 - .word 0x00008082 - .word 0x00000000 - .word 0x00008003 - .word 0x00000001 - .word 0x0000808b - .word 0x00000001 - .word 0x8000000b - .word 0x00000001 - .word 0x8000008a - .word 0x00000001 - .word 0x80000081 - .word 0x00000000 - .word 0x80000081 - .word 0x00000000 - .word 0x80000008 - .word 0x00000000 - .word 0x00000083 - .word 0x00000000 - .word 0x80008003 - .word 0x00000001 - .word 0x80008088 - .word 0x00000000 - .word 0x80000088 - .word 0x00000001 - .word 0x00008000 - .word 0x00000000 - .word 0x80008082 - .word 0xFFFFFFFF @//terminator - -roundLoop: - - @//prepTheta A - xor5 r10, r0,_bu0, _gu0, _ku0, _mu0, _su0 - xor5 r6, r0,_be1, _ge1, _ke1, _me1, _se1 - eor r5, r10, r6, ROR #31 - xor5 r11, r0,_bu1, _gu1, _ku1, _mu1, _su1 - xor5 r7, r0,_be0, _ge0, _ke0, _me0, _se0 - eor r4, r11, r7 - - xor5 r8, r0,_bi0, _gi0, _ki0, _mi0, _si0 - eor r1, r8, r11, ROR #31 - str r1, [sp, #mDo0] - xor5 r9, r0,_bi1, _gi1, _ki1, _mi1, _si1 - eor r1, r9, r10 - str r1, [sp, #mDo1] - - xor5 r10, r0,_ba0, _ga0, _ka0, _ma0, _sa0 - eor lr, r10, r9, ROR #31 - xor5 r11, r0,_ba1, _ga1, _ka1, _ma1, _sa1 - eor r1, r11, r8 - str r1, [sp, #mDe1] - - xor5 r9, r0,_bo1, _go1, _ko1, _mo1, _so1 - eor r1, r7, r9, ROR #31 - str r1, [sp, #mDi0] - xor5 r8, r0,_bo0, _go0, _ko0, _mo0, _so0 - eor r2, r6, r8 - - eor r7, r8, r11, ROR #31 - eor r6, r9, r10 - - @//thetaRhoPiChiIota 0, in A, out E - ldr r8, [r0, #_ba0] - ldr r9, [r0, #_ge0] - ldr r10, [r0, #_ki1] - ldr r11, [r0, #_mo1] - ldr r12, [r0, #_su0] - ldr r1, [sp, #mDo1] - eor r8, r8, r5 - xorrol r9, lr, 22 - xorrol r10, r2, 22 - xorrol r11, r1, 11 - xorrol r12, r7, 7 - xandnot sp, _be0, r9, r10, r11 - xandnot sp, _bi0, r10, r11, r12 - xandnot sp, _bo0, r11, r12, r8 - xandnot sp, _bu0, r12, r8, r9 - xandnotRC sp, _ba0, r8, r9, r10 - - ldr r8, [r0, #_bo0] - ldr r1, [sp, #mDo0] - ldr r9, [r0, #_gu0] - xorrol r8, r1, 14 - ldr r1, [sp, #mDe1] - ldr r10, [r0, #_ka1] - ldr r11, [r0, #_me1] - ldr r12, [r0, #_si1] - xorrol r9, r7, 10 - xorrol r10, r4, 2 - xorrol r11, r1, 23 - xorrol r12, r2, 31 - xandnot sp, _ga0, r8, r9, r10 - xandnot sp, _ge0, r9, r10, r11 - xandnot sp, _gi0, r10, r11, r12 - xandnot sp, _go0, r11, r12, r8 - xandnot sp, _gu0, r12, r8, r9 - - ldr r8, [r0, #_be1] - ldr r1, [sp, #mDe1] - ldr r9, [r0, #_gi0] - xorrol r8, r1, 1 - ldr r1, [sp, #mDi0] - ldr r10, [r0, #_ko1] - xorrol r9, r1, 3 - ldr r1, [sp, #mDo1] - ldr r11, [r0, #_mu0] - ldr r12, [r0, #_sa0] - xorrol r10, r1, 13 - xorrol r11, r7, 4 - xorrol r12, r5, 9 - xandnot sp, _ka0, r8, r9, r10 - xandnot sp, _ke0, r9, r10, r11 - xandnot sp, _ki0, r10, r11, r12 - xandnot sp, _ko0, r11, r12, r8 - xandnot sp, _ku0, r12, r8, r9 - - ldr r8, [r0, #_bu1] - ldr r9, [r0, #_ga0] - ldr r10, [r0, #_ke0] - ldr r11, [r0, #_mi1] - ldr r12, [r0, #_so0] - ldr r1, [sp, #mDo0] - xorrol r8, r6, 14 - xorrol r9, r5, 18 - xorrol r10, lr, 5 - xorrol r11, r2, 8 - xorrol r12, r1, 28 - xandnot sp, _ma0, r8, r9, r10 - xandnot sp, _me0, r9, r10, r11 - xandnot sp, _mi0, r10, r11, r12 - xandnot sp, _mo0, r11, r12, r8 - xandnot sp, _mu0, r12, r8, r9 - - ldr r1, [sp, #mDi0] - ldr r8, [r0, #_bi0] - ldr r9, [r0, #_go1] - xorrol r8, r1, 31 - ldr r1, [sp, #mDo1] - ldr r10, [r0, #_ku1] - xorrol r9, r1, 28 - ldr r11, [r0, #_ma1] - ldr r12, [r0, #_se0] - xorrol r10, r6, 20 - xorrol r11, r4, 21 - xorrol r12, lr, 1 - xandnot sp, _sa0, r8, r9, r10 - xandnot sp, _se0, r9, r10, r11 - xandnot sp, _si0, r10, r11, r12 - xandnot sp, _so0, r11, r12, r8 - xandnot sp, _su0, r12, r8, r9 - - @// thetaRhoPiChiIota 1, in A, out E - ldr r1, [sp, #mDe1] - ldr r9, [r0, #_ge1] - ldr r8, [r0, #_ba1] - xorrol r9, r1, 22 - ldr r1, [sp, #mDi0] - ldr r10, [r0, #_ki0] - eor r8, r8, r4 - xorrol r10, r1, 21 - ldr r1, [sp, #mDo0] - ldr r11, [r0, #_mo0] - ldr r12, [r0, #_su1] - xorrol r11, r1, 10 - xorrol r12, r6, 7 - xandnot sp, _be1, r9, r10, r11 - xandnot sp, _bi1, r10, r11, r12 - xandnot sp, _bo1, r11, r12, r8 - xandnot sp, _bu1, r12, r8, r9 - xandnotRC sp, _ba1, r8, r9, r10 - - ldr r1, [sp, #mDo1] - ldr r8, [r0, #_bo1] - ldr r12, [r0, #_si0] - xorrol r8, r1, 14 - ldr r1, [sp, #mDi0] - ldr r9, [r0, #_gu1] - xorrol r12, r1, 30 - ldr r10, [r0, #_ka0] - ldr r11, [r0, #_me0] - xorrol r9, r6, 10 - xorrol r10, r5, 1 - xorrol r11, lr, 22 - xandnot sp, _ga1, r8, r9, r10 - xandnot sp, _ge1, r9, r10, r11 - xandnot sp, _gi1, r10, r11, r12 - xandnot sp, _go1, r11, r12, r8 - xandnot sp, _gu1, r12, r8, r9 - - ldr r1, [sp, #mDo0] - ldr r10, [r0, #_ko0] - ldr r8, [r0, #_be0] - xorrol r10, r1, 12 - ldr r9, [r0, #_gi1] - ldr r11, [r0, #_mu1] - ldr r12, [r0, #_sa1] - eor r8, r8, lr - xorrol r9, r2, 3 - xorrol r11, r6, 4 - xorrol r12, r4, 9 - xandnot sp, _ka1, r8, r9, r10 - xandnot sp, _ke1, r9, r10, r11 - xandnot sp, _ki1, r10, r11, r12 - xandnot sp, _ko1, r11, r12, r8 - xandnot sp, _ku1, r12, r8, r9 - - ldr r1, [sp, #mDe1] - ldr r10, [r0, #_ke1] - ldr r11, [r0, #_mi0] - xorrol r10, r1, 5 - ldr r1, [sp, #mDi0] - ldr r12, [r0, #_so1] - xorrol r11, r1, 7 - ldr r1, [sp, #mDo1] - ldr r8, [r0, #_bu0] - ldr r9, [r0, #_ga1] - xorrol r8, r7, 13 - xorrol r9, r4, 18 - xorrol r12, r1, 28 - xandnot sp, _ma1, r8, r9, r10 - xandnot sp, _me1, r9, r10, r11 - xandnot sp, _mi1, r10, r11, r12 - xandnot sp, _mo1, r11, r12, r8 - xandnot sp, _mu1, r12, r8, r9 - - ldr r1, [sp, #mDo0] - ldr r9, [r0, #_go0] - ldr r8, [r0, #_bi1] - xorrol r9, r1, 27 - ldr r10, [r0, #_ku0] - ldr r11, [r0, #_ma0] - ldr r12, [r0, #_se1] - ldr r1, [sp, #mDe1] - xorrol r8, r2, 31 - xorrol r10, r7, 19 - xorrol r11, r5, 20 - xorrol r12, r1, 1 - xandnot sp, _sa1, r8, r9, r10 - xandnot sp, _se1, r9, r10, r11 - xandnot sp, _si1, r10, r11, r12 - xandnot sp, _so1, r11, r12, r8 - xandnot sp, _su1, r12, r8, r9 - - @//prepTheta E - xor5 r10, sp,_bu0, _gu0, _ku0, _mu0, _su0 - xor5 r6, sp,_be1, _ge1, _ke1, _me1, _se1 - eor r5, r10, r6, ROR #31 - xor5 r11, sp,_bu1, _gu1, _ku1, _mu1, _su1 - xor5 r7, sp,_be0, _ge0, _ke0, _me0, _se0 - eor r4, r11, r7 - - xor5 r8, sp,_bi0, _gi0, _ki0, _mi0, _si0 - eor r1, r8, r11, ROR #31 - str r1, [sp, #mDo0] - xor5 r9, sp,_bi1, _gi1, _ki1, _mi1, _si1 - eor r1, r9, r10 - str r1, [sp, #mDo1] - - xor5 r10, sp,_ba0, _ga0, _ka0, _ma0, _sa0 - eor lr, r10, r9, ROR #31 - xor5 r11, sp,_ba1, _ga1, _ka1, _ma1, _sa1 - eor r1, r11, r8 - str r1, [sp, #mDe1] - - xor5 r9, sp,_bo1, _go1, _ko1, _mo1, _so1 - eor r1, r7, r9, ROR #31 - str r1, [sp, #mDi0] - xor5 r8, sp,_bo0, _go0, _ko0, _mo0, _so0 - eor r2, r6, r8 - - eor r7, r8, r11, ROR #31 - eor r6, r9, r10 - - @//thetaRhoPiChiIota 0, in E, out A - ldr r8, [sp, #_ba0] - ldr r9, [sp, #_ge0] - ldr r10, [sp, #_ki1] - ldr r11, [sp, #_mo1] - ldr r12, [sp, #_su0] - ldr r1, [sp, #mDo1] - eor r8, r8, r5 - xorrol r9, lr, 22 - xorrol r10, r2, 22 - xorrol r11, r1, 11 - xorrol r12, r7, 7 - xandnot r0, _be0, r9, r10, r11 - xandnot r0, _bi0, r10, r11, r12 - xandnot r0, _bo0, r11, r12, r8 - xandnot r0, _bu0, r12, r8, r9 - xandnotRC r0, _ba0, r8, r9, r10 - - ldr r8, [sp, #_bo0] - ldr r1, [sp, #mDo0] - ldr r9, [sp, #_gu0] - xorrol r8, r1, 14 - ldr r1, [sp, #mDe1] - ldr r10, [sp, #_ka1] - ldr r11, [sp, #_me1] - ldr r12, [sp, #_si1] - xorrol r9, r7, 10 - xorrol r10, r4, 2 - xorrol r11, r1, 23 - xorrol r12, r2, 31 - xandnot r0, _ga0, r8, r9, r10 - xandnot r0, _ge0, r9, r10, r11 - xandnot r0, _gi0, r10, r11, r12 - xandnot r0, _go0, r11, r12, r8 - xandnot r0, _gu0, r12, r8, r9 - - ldr r8, [sp, #_be1] - ldr r1, [sp, #mDe1] - ldr r9, [sp, #_gi0] - xorrol r8, r1, 1 - ldr r1, [sp, #mDi0] - ldr r10, [sp, #_ko1] - xorrol r9, r1, 3 - ldr r1, [sp, #mDo1] - ldr r11, [sp, #_mu0] - ldr r12, [sp, #_sa0] - xorrol r10, r1, 13 - xorrol r11, r7, 4 - xorrol r12, r5, 9 - xandnot r0, _ka0, r8, r9, r10 - xandnot r0, _ke0, r9, r10, r11 - xandnot r0, _ki0, r10, r11, r12 - xandnot r0, _ko0, r11, r12, r8 - xandnot r0, _ku0, r12, r8, r9 - - ldr r8, [sp, #_bu1] - ldr r9, [sp, #_ga0] - ldr r10, [sp, #_ke0] - ldr r11, [sp, #_mi1] - ldr r12, [sp, #_so0] - ldr r1, [sp, #mDo0] - xorrol r8, r6, 14 - xorrol r9, r5, 18 - xorrol r10, lr, 5 - xorrol r11, r2, 8 - xorrol r12, r1, 28 - xandnot r0, _ma0, r8, r9, r10 - xandnot r0, _me0, r9, r10, r11 - xandnot r0, _mi0, r10, r11, r12 - xandnot r0, _mo0, r11, r12, r8 - xandnot r0, _mu0, r12, r8, r9 - - ldr r1, [sp, #mDi0] - ldr r8, [sp, #_bi0] - ldr r9, [sp, #_go1] - xorrol r8, r1, 31 - ldr r1, [sp, #mDo1] - ldr r10, [sp, #_ku1] - xorrol r9, r1, 28 - ldr r11, [sp, #_ma1] - ldr r12, [sp, #_se0] - xorrol r10, r6, 20 - xorrol r11, r4, 21 - xorrol r12, lr, 1 - xandnot r0, _sa0, r8, r9, r10 - xandnot r0, _se0, r9, r10, r11 - xandnot r0, _si0, r10, r11, r12 - xandnot r0, _so0, r11, r12, r8 - xandnot r0, _su0, r12, r8, r9 - - @// thetaRhoPiChiIota 1, in A, out E - ldr r1, [sp, #mDe1] - ldr r9, [sp, #_ge1] - ldr r8, [sp, #_ba1] - xorrol r9, r1, 22 - ldr r1, [sp, #mDi0] - ldr r10, [sp, #_ki0] - eor r8, r8, r4 - xorrol r10, r1, 21 - ldr r1, [sp, #mDo0] - ldr r11, [sp, #_mo0] - ldr r12, [sp, #_su1] - xorrol r11, r1, 10 - xorrol r12, r6, 7 - xandnot r0, _be1, r9, r10, r11 - xandnot r0, _bi1, r10, r11, r12 - xandnot r0, _bo1, r11, r12, r8 - xandnot r0, _bu1, r12, r8, r9 - xandnotRC r0, _ba1, r8, r9, r10 - - ldr r1, [sp, #mDo1] - ldr r8, [sp, #_bo1] - ldr r12, [sp, #_si0] - xorrol r8, r1, 14 - ldr r1, [sp, #mDi0] - ldr r9, [sp, #_gu1] - xorrol r12, r1, 30 - ldr r10, [sp, #_ka0] - ldr r11, [sp, #_me0] - xorrol r9, r6, 10 - xorrol r10, r5, 1 - xorrol r11, lr, 22 - xandnot r0, _ga1, r8, r9, r10 - xandnot r0, _ge1, r9, r10, r11 - xandnot r0, _gi1, r10, r11, r12 - xandnot r0, _go1, r11, r12, r8 - xandnot r0, _gu1, r12, r8, r9 - - ldr r1, [sp, #mDo0] - ldr r10, [sp, #_ko0] - ldr r8, [sp, #_be0] - xorrol r10, r1, 12 - ldr r9, [sp, #_gi1] - ldr r11, [sp, #_mu1] - ldr r12, [sp, #_sa1] - eor r8, r8, lr - xorrol r9, r2, 3 - xorrol r11, r6, 4 - xorrol r12, r4, 9 - xandnot r0, _ka1, r8, r9, r10 - xandnot r0, _ke1, r9, r10, r11 - xandnot r0, _ki1, r10, r11, r12 - xandnot r0, _ko1, r11, r12, r8 - xandnot r0, _ku1, r12, r8, r9 - - ldr r1, [sp, #mDe1] - ldr r10, [sp, #_ke1] - ldr r11, [sp, #_mi0] - xorrol r10, r1, 5 - ldr r1, [sp, #mDi0] - ldr r12, [sp, #_so1] - xorrol r11, r1, 7 - ldr r1, [sp, #mDo1] - ldr r8, [sp, #_bu0] - ldr r9, [sp, #_ga1] - xorrol r8, r7, 13 - xorrol r9, r4, 18 - xorrol r12, r1, 28 - xandnot r0, _ma1, r8, r9, r10 - xandnot r0, _me1, r9, r10, r11 - xandnot r0, _mi1, r10, r11, r12 - xandnot r0, _mo1, r11, r12, r8 - xandnot r0, _mu1, r12, r8, r9 - - ldr r1, [sp, #mDo0] - ldr r9, [sp, #_go0] - ldr r8, [sp, #_bi1] - xorrol r9, r1, 27 - ldr r10, [sp, #_ku0] - ldr r11, [sp, #_ma0] - ldr r12, [sp, #_se1] - ldr r1, [sp, #mDe1] - xorrol r8, r2, 31 - xorrol r10, r7, 19 - xorrol r11, r5, 20 - xorrol r12, r1, 1 - xandnot r0, _sa1, r8, r9, r10 - xandnot r0, _se1, r9, r10, r11 - xandnot r0, _si1, r10, r11, r12 - xandnot r0, _so1, r11, r12, r8 - ldr r10, [r3] - xandnot r0, _su1, r12, r8, r9 - - cmp r10, #0xFFFFFFFF - bne roundLoop - - add sp,sp,#4*(50+4) - pop {r4-r12,pc} - - @ - - @ ALIGN - diff --git a/c_src/KeccakF-1600-avr8.c b/c_src/KeccakF-1600-avr8.c deleted file mode 100755 index 7ea2679..0000000 --- a/c_src/KeccakF-1600-avr8.c +++ /dev/null @@ -1,163 +0,0 @@ -/* -The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -Michaël Peeters and Gilles Van Assche. For more information, feedback or -questions, please refer to our website: http://keccak.noekeon.org/ - -Implementation by Ronny Van Keer, -hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#include -#include -#include "AVR8-rotate64.h" - -typedef unsigned char UINT8; -typedef UINT8 tSmallUInt; -typedef unsigned long long UINT64; -typedef UINT64 tKeccakLane; - -#define cKeccakLaneSizeInBits (sizeof(tKeccakLane) * 8) - -#define cKeccakNumberOfRounds 24 - -static tKeccakLane KeccakF_RoundConstants[cKeccakNumberOfRounds] PROGMEM = -{ - (tKeccakLane)0x0000000000000001ULL, - (tKeccakLane)0x0000000000008082ULL, - (tKeccakLane)0x800000000000808aULL, - (tKeccakLane)0x8000000080008000ULL, - (tKeccakLane)0x000000000000808bULL, - (tKeccakLane)0x0000000080000001ULL, - (tKeccakLane)0x8000000080008081ULL, - (tKeccakLane)0x8000000000008009ULL, - (tKeccakLane)0x000000000000008aULL, - (tKeccakLane)0x0000000000000088ULL, - (tKeccakLane)0x0000000080008009ULL, - (tKeccakLane)0x000000008000000aULL, - (tKeccakLane)0x000000008000808bULL, - (tKeccakLane)0x800000000000008bULL, - (tKeccakLane)0x8000000000008089ULL, - (tKeccakLane)0x8000000000008003ULL, - (tKeccakLane)0x8000000000008002ULL, - (tKeccakLane)0x8000000000000080ULL, - (tKeccakLane)0x000000000000800aULL, - (tKeccakLane)0x800000008000000aULL, - (tKeccakLane)0x8000000080008081ULL, - (tKeccakLane)0x8000000000008080ULL, - (tKeccakLane)0x0000000080000001ULL, - (tKeccakLane)0x8000000080008008ULL -}; - -static tSmallUInt KeccakF_RotationConstants[24] PROGMEM = -{ - ROT_CODE( 1), ROT_CODE( 3), ROT_CODE( 6), ROT_CODE(10), ROT_CODE(15), - ROT_CODE(21), ROT_CODE(28), ROT_CODE(36), ROT_CODE(45), ROT_CODE(55), - ROT_CODE( 2), ROT_CODE(14), ROT_CODE(27), ROT_CODE(41), ROT_CODE(56), - ROT_CODE( 8), ROT_CODE(25), ROT_CODE(43), ROT_CODE(62), ROT_CODE(18), - ROT_CODE(39), ROT_CODE(61), ROT_CODE(20), ROT_CODE(44) -}; - -static tSmallUInt KeccakF_PiLane[24] PROGMEM = -{ - 10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4, 15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1 -}; - -static tSmallUInt KeccakF_Mod5[10] PROGMEM = -{ - 0, 1, 2, 3, 4, 0, 1, 2, 3, 4 -}; - - -void KeccakF( tKeccakLane * state ) -{ - tSmallUInt round; - tKeccakLane C[5]; - - // prepare Theta - { - tSmallUInt x; - tKeccakLane * pC; - for ( x = 0, pC = C; x < 5; ++x, ++pC ) - { - *pC = state[x] ^ state[5 + x] ^ state[10 + x] ^ state[15 + x] ^ state[20 + x]; - } - } - - for( round = 0; round < cKeccakNumberOfRounds; ++round ) - { - // Theta - { - tSmallUInt x; - for ( x = 0; x < 5; ++x ) - { - tKeccakLane temp; - tSmallUInt y; - temp = rotate64_1bit_left( C[pgm_read_byte((KeccakF_Mod5+1)+x)] ); - temp ^= C[pgm_read_byte((KeccakF_Mod5+4)+x)]; - for ( y = 0; y < 25; y += 5 ) - { - state[y + x] ^= temp; - } - } - } - - // Rho Pi - { - tKeccakLane temp; - tSmallUInt x; - - temp = state[1]; - for ( x = 0; x < 24; ++x ) - { - tSmallUInt t; - tKeccakLane T[1]; - t = pgm_read_byte(&KeccakF_PiLane[x]); - T[0] = state[t]; - state[t] = rotate64left_code( temp, pgm_read_byte(&KeccakF_RotationConstants[x]) ); - temp = T[0]; - } - } - - // Chi Iota Prepare Theta - { - tSmallUInt z; - UINT8 * p = (unsigned char *)state; - UINT8 * pC = (unsigned char *)C; - - for( z = 0; z < 8; ++z, ++p, ++pC ) - { - tSmallUInt y; - UINT8 c0, c1, c2, c3, c4, t; - - c0 = c1 = c2 = c3 = c4 = 0; - for( y = 5; y != 0; --y, p += 40 ) - { - UINT8 a0 = *p; - UINT8 a1 = *(p+8); - UINT8 a2 = *(p+16); - UINT8 a3 = *(p+24); - UINT8 a4 = *(p+32); - - *p = t = a0 ^ ((~a1) & a2); c0 ^= t; - *(p+8) = t = a1 ^ ((~a2) & a3); c1 ^= t; - *(p+16) = a2 ^= ((~a3) & a4); c2 ^= a2; - *(p+24) = a3 ^= ((~a4) & a0); c3 ^= a3; - *(p+32) = a4 ^= ((~a0) & a1); c4 ^= a4; - } - p -= 5 * 5 * 8; - y = pgm_read_byte( (UINT8 *)(KeccakF_RoundConstants+round) + z ); - *p ^= y; - *pC = c0 ^ y; - *(pC+ 8) = c1; - *(pC+16) = c2; - *(pC+24) = c3; - *(pC+32) = c4; - } - } - } - -} diff --git a/c_src/KeccakF-1600-avr8asm-compact.s b/c_src/KeccakF-1600-avr8asm-compact.s deleted file mode 100755 index c87920f..0000000 --- a/c_src/KeccakF-1600-avr8asm-compact.s +++ /dev/null @@ -1,647 +0,0 @@ -/* -The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -Michaël Peeters and Gilles Van Assche. For more information, feedback or -questions, please refer to our website: http://keccak.noekeon.org/ - -Implementation by Ronny Van Keer, hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#include "Keccak-avr8-settings.h" -#include "crypto_hash.h" - -#define cKeccakR_SizeInBytes (cKeccakR/8) - -#ifndef crypto_hash_BYTES - #ifdef cKeccakFixedOutputLengthInBytes - #define crypto_hash_BYTES cKeccakFixedOutputLengthInBytes - #else - #define crypto_hash_BYTES cKeccakR_SizeInBytes - #endif -#endif - -// Registers used in all routines -#define zero 1 -#define rpState 24 -#define rX 26 -#define rY 28 -#define rZ 30 - - -/* - * int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen ) - * - * argument out is passed in r24:r25 - * argument in is passed in r22:r23 - * argument inlen is passed in r14:r21, only lowest 16-bits (r14-r15) are used - */ -.global crypto_hash // populate.py, please update crypto_hash -crypto_hash: // populate.py, please update crypto_hash - - // crypto_hash only registers - #define rT1 16 - #define rT2 17 - #define rT3 18 - #define rInLen 22 //(2 regs) - #define sp 0x3D - - push r2 - push r3 - push r4 - push r5 - push r6 - push r7 - push r8 - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push r16 - push r17 - push r28 - push r29 - - // Allocate state (25*8) + C variables (5*8) - in rZ, sp - in rZ+1, sp+1 - subi rZ, 240 - sbci rZ+1, 0 - in r0, 0x3F - cli - out sp+1, rZ+1 - out sp, rZ - out 0x3F, r0 - adiw rZ, 41 // pointer to start of state, end of C, compensate post decrement - - push r24 // save out pointer - push r25 - - movw rpState, rZ - movw rY, r22 //y contains in pointer - movw rInLen, r14 - - ldi rT3, 5*5*8 //clear state -clearStateLoop: - st z+, zero - dec rT3 - brne clearStateLoop - - // Full blocks - cpi rInLen, cKeccakR_SizeInBytes - cpc rInLen+1, zero - brcs ch_lastblock - -ch_FullRateLoop: - ldi rT3, cKeccakR_SizeInBytes - movw rZ, rpState -ch_XorLanesLoop: - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - subi rT3, 1 - brne ch_XorLanesLoop - - push rY - push rY+1 - call KeccakF - pop rY+1 - pop rY - - subi rInLen, cKeccakR_SizeInBytes - sbci rInLen+1, 0 - cpi rInLen, cKeccakR_SizeInBytes - cpc rInLen+1, zero - brcc ch_FullRateLoop - -ch_lastblock: // XOR last uncomplete block into state - movw rZ, rpState - - subi rInLen, 0 - breq ch_Padding -ch_xorBytesLoop: - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - subi rInLen, 1 - brne ch_xorBytesLoop - -ch_Padding: - ldi rT1, 1 - ld rT2, Z - eor rT1, rT2 - st Z, rT1 - - ldi rZ, cKeccakR_SizeInBytes-1 - add rZ, rpState - mov rZ+1, rpState+1 - adc rZ+1, zero - ld rT1, Z - subi rT1, 0x80 - st Z, rT1 - - call KeccakF - - //output - ldi rT3, crypto_hash_BYTES - movw rY, rpState - pop rZ+1 ; restore out pointer - pop rZ -outputLoop: - ld rT1, Y+ - st Z+, rT1 - dec rT3 - brne outputLoop - - - // Free state and pop registers - ldi rZ, 199 - add rpState, rZ - adc rpState+1, zero - in r0, 0x3F - cli - out sp+1, rpState+1 - out sp, rpState - out 0x3F, r0 - - pop r29 - pop r28 - pop r17 - pop r16 - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop r8 - pop r7 - pop r6 - pop r5 - pop r4 - pop r3 - pop r2 - - // return 0 - mov r24, zero - mov r25, zero - - #undef rInLen - #undef rT1 - #undef rT2 - #undef rT3 - #undef sp - - ret - - -//#define ROT_BIT(a) (a <= 4) ? ((a == 0) ? 0x80 : (a & 7)) : (0x80 | (8-a)) - -#define ROT_BIT(a) ((a) & 7) -#define ROT_BYTE(a) (((a)/8 + !!(((a)%8) > 4)) & 7) - -KeccakF_RhoPiConstants: - .BYTE ROT_BIT( 1), ROT_BYTE( 3), 10 * 8 - .BYTE ROT_BIT( 3), ROT_BYTE( 6), 7 * 8 - .BYTE ROT_BIT( 6), ROT_BYTE(10), 11 * 8 - .BYTE ROT_BIT(10), ROT_BYTE(15), 17 * 8 - .BYTE ROT_BIT(15), ROT_BYTE(21), 18 * 8 - .BYTE ROT_BIT(21), ROT_BYTE(28), 3 * 8 - .BYTE ROT_BIT(28), ROT_BYTE(36), 5 * 8 - .BYTE ROT_BIT(36), ROT_BYTE(45), 16 * 8 - .BYTE ROT_BIT(45), ROT_BYTE(55), 8 * 8 - .BYTE ROT_BIT(55), ROT_BYTE( 2), 21 * 8 - .BYTE ROT_BIT( 2), ROT_BYTE(14), 24 * 8 - .BYTE ROT_BIT(14), ROT_BYTE(27), 4 * 8 - .BYTE ROT_BIT(27), ROT_BYTE(41), 15 * 8 - .BYTE ROT_BIT(41), ROT_BYTE(56), 23 * 8 - .BYTE ROT_BIT(56), ROT_BYTE( 8), 19 * 8 - .BYTE ROT_BIT( 8), ROT_BYTE(25), 13 * 8 - .BYTE ROT_BIT(25), ROT_BYTE(43), 12 * 8 - .BYTE ROT_BIT(43), ROT_BYTE(62), 2 * 8 - .BYTE ROT_BIT(62), ROT_BYTE(18), 20 * 8 - .BYTE ROT_BIT(18), ROT_BYTE(39), 14 * 8 - .BYTE ROT_BIT(39), ROT_BYTE(61), 22 * 8 - .BYTE ROT_BIT(61), ROT_BYTE(20), 9 * 8 - .BYTE ROT_BIT(20), ROT_BYTE(44), 6 * 8 - .BYTE ROT_BIT(44), ROT_BYTE( 1), 1 * 8 - - -KeccakF_RoundConstants: - .BYTE 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x82, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x8a, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x00, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x8b, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x01, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x81, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x09, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x09, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x0a, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x8b, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x8b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x89, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x02, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x0a, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x0a, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x81, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x80, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x01, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x08, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 - .BYTE 0xFF, 0 //terminator - - .text - - - -// KeccakF -// Not callable from C! -// -// argument rpState is passed in r24:r25 -// -KeccakF: - - // Variables used in multiple operations - #define rTemp 2 // 8 regs (2-9) - #define rTempBis 10 // 8 regs (10-17) - #define rTempTer 18 // 2 regs (18-19) - #define pRound 20 // 2 regs (20-21) - - // Initial Prepare Theta - #define TCIPx rTempTer - - movw rZ, rpState // Z points to 5 C lanes - sbiw rZ, 40 - movw rY, rpState - ldi TCIPx, 5*8 -KeccakInitialPrepTheta_Loop: - ld r0, Y - adiw rY, 40 - ld rTemp, Y - adiw rY, 40 - eor r0, rTemp - ld rTemp, Y - adiw rY, 40 - eor r0, rTemp - ld rTemp, Y - eor r0, rTemp - ldd rTemp, Y+40 - eor r0, rTemp - st Z+, r0 - subi rY, 119 - sbc rY+1, zero - dec TCIPx - brne KeccakInitialPrepTheta_Loop - #undef TCIPx - - ldi pRound, lo8(KeccakF_RoundConstants) - ldi pRound+1, hi8(KeccakF_RoundConstants) -Keccak_RoundLoop: - - // Theta - #define TCplus rX - #define TCminus rZ - #define TCcoordX rTempTer - #define TCcoordY rTempTer+1 - - movw TCminus, rpState - sbiw TCminus, 1*8 - movw TCplus, rpState - sbiw TCplus, 4*8 - movw rY, rpState - - ldi TCcoordX, 0x16 -KeccakTheta_Loop1: - ld rTemp+0, X+ - ld rTemp+1, X+ - ld rTemp+2, X+ - ld rTemp+3, X+ - ld rTemp+4, X+ - ld rTemp+5, X+ - ld rTemp+6, X+ - ld rTemp+7, X+ - - lsl rTemp+0 - rol rTemp+1 - rol rTemp+2 - rol rTemp+3 - rol rTemp+4 - rol rTemp+5 - rol rTemp+6 - rol rTemp+7 - adc rTemp+0, zero - - ld r0, Z+ - eor rTemp+0, r0 - ld r0, Z+ - eor rTemp+1, r0 - ld r0, Z+ - eor rTemp+2, r0 - ld r0, Z+ - eor rTemp+3, r0 - ld r0, Z+ - eor rTemp+4, r0 - ld r0, Z+ - eor rTemp+5, r0 - ld r0, Z+ - eor rTemp+6, r0 - ld r0, Z+ - eor rTemp+7, r0 - - ldi TCcoordY, 5 -KeccakTheta_Loop2: - ld r0, Y - eor r0, rTemp+0 - st Y+, r0 - ld r0, Y - eor r0, rTemp+1 - st Y+, r0 - ld r0, Y - eor r0, rTemp+2 - st Y+, r0 - ld r0, Y - eor r0, rTemp+3 - st Y+, r0 - ld r0, Y - eor r0, rTemp+4 - st Y+, r0 - ld r0, Y - eor r0, rTemp+5 - st Y+, r0 - ld r0, Y - eor r0, rTemp+6 - st Y+, r0 - ld r0, Y - eor r0, rTemp+7 - st Y+, r0 - adiw rY, 32 - - dec TCcoordY - brne KeccakTheta_Loop2 - - subi rY, 200-8 - sbc rY+1, zero - - lsr TCcoordX - brcc 1f - breq KeccakTheta_End - rjmp KeccakTheta_Loop1 -1: - cpi TCcoordX, 0x0B - brne 2f - sbiw TCminus, 40 - rjmp KeccakTheta_Loop1 -2: - sbiw TCplus, 40 - rjmp KeccakTheta_Loop1 - -KeccakTheta_End: - #undef TCplus - #undef TCminus - #undef TCcoordX - #undef TCcoordY - - - // Rho Pi - #define RPindex rTempTer+0 - #define RPTemp rTempTer+1 - - sbiw rY, 32 - - ld rTemp+0, Y+ - ld rTemp+1, Y+ - ld rTemp+2, Y+ - ld rTemp+3, Y+ - ld rTemp+4, Y+ - ld rTemp+5, Y+ - ld rTemp+6, Y+ - ld rTemp+7, Y+ - - ldi rZ, lo8(KeccakF_RhoPiConstants) - ldi rZ+1, hi8(KeccakF_RhoPiConstants) - -KeccakRhoPi_Loop: - ; do bit rotation - lpm RPTemp, Z+ ;get nuber of bits to rotate - cpi RPTemp, 5 - brcs rotate64_nbit_leftOrNot - neg RPTemp - andi RPTemp, 3 - -rotate64_nbit_right: - bst rTemp, 0 - ror rTemp+7 - ror rTemp+6 - ror rTemp+5 - ror rTemp+4 - ror rTemp+3 - ror rTemp+2 - ror rTemp+1 - ror rTemp - bld rTemp+7, 7 - dec RPTemp - brne rotate64_nbit_right - rjmp KeccakRhoPi_RhoBitRotateDone - -rotate64_nbit_leftOrNot: - tst RPTemp - breq KeccakRhoPi_RhoBitRotateDone -rotate64_nbit_left: - lsl rTemp - rol rTemp+1 - rol rTemp+2 - rol rTemp+3 - rol rTemp+4 - rol rTemp+5 - rol rTemp+6 - rol rTemp+7 - adc rTemp, r1 - dec RPTemp - brne rotate64_nbit_left - -KeccakRhoPi_RhoBitRotateDone: - lpm r0, Z+ ;get number of bytes to rotate - lpm RPindex, Z+ ;get index in state - movw rY, rpState - add rY, RPindex - adc rY+1, zero - - ldi rX, rTempBis - add rX, r0 - mov rX+1, zero - ldi RPTemp, 8 -KeccakRhoPi_PiByteRotLoop: - ld r0, Y+ - st X+, r0 - cpi rX, rTempBis+8 - brne KeccakRhoPi_PiByteRotFirst - ldi rX, rTempBis -KeccakRhoPi_PiByteRotFirst: - dec RPTemp - brne KeccakRhoPi_PiByteRotLoop - - sbiw rY, 8 - st Y+, rTemp+0 - st Y+, rTemp+1 - st Y+, rTemp+2 - st Y+, rTemp+3 - st Y+, rTemp+4 - st Y+, rTemp+5 - st Y+, rTemp+6 - st Y+, rTemp+7 - - movw rTemp+0, rTempBis+0 - movw rTemp+2, rTempBis+2 - movw rTemp+4, rTempBis+4 - movw rTemp+6, rTempBis+6 -KeccakRhoPi_RhoDone: - subi RPindex, 8 - brne KeccakRhoPi_Loop - - #undef RPindex - #undef RPTemp - - - // Chi Iota prepare Theta - #define CIPTa0 rTemp - #define CIPTa1 rTemp+1 - #define CIPTa2 rTemp+2 - #define CIPTa3 rTemp+3 - #define CIPTa4 rTemp+4 - #define CIPTc0 rTempBis - #define CIPTc1 rTempBis+1 - #define CIPTc2 rTempBis+2 - #define CIPTc3 rTempBis+3 - #define CIPTc4 rTempBis+4 - #define CIPTz rTempBis+6 - #define CIPTy rTempBis+7 - - movw rY, rpState - movw rX, rpState ; 5 * C - sbiw rX, 40 - movw rZ, pRound - - ldi CIPTz, 8 -KeccakChiIotaPrepareTheta_zLoop: - mov CIPTc0, zero - mov CIPTc1, zero - movw CIPTc2, CIPTc0 - mov CIPTc4, zero - - ldi CIPTy, 5 -KeccakChiIotaPrepareTheta_yLoop: - ld CIPTa0, Y - ldd CIPTa1, Y+8 - ldd CIPTa2, Y+16 - ldd CIPTa3, Y+24 - ldd CIPTa4, Y+32 - - ;*p = t = a0 ^ ((~a1) & a2); c0 ^= t; - mov r0, CIPTa1 - com r0 - and r0, CIPTa2 - eor r0, CIPTa0 - eor CIPTc0, r0 - st Y, r0 - - ;*(p+8) = t = a1 ^ ((~a2) & a3); c1 ^= t; - mov r0, CIPTa2 - com r0 - and r0, CIPTa3 - eor r0, CIPTa1 - eor CIPTc1, r0 - std Y+8, r0 - - ;*(p+16) = a2 ^= ((~a3) & a4); c2 ^= a2; - mov r0, CIPTa3 - com r0 - and r0, CIPTa4 - eor r0, CIPTa2 - eor CIPTc2, r0 - std Y+16, r0 - - ;*(p+24) = a3 ^= ((~a4) & a0); c3 ^= a3; - mov r0, CIPTa4 - com r0 - and r0, CIPTa0 - eor r0, CIPTa3 - eor CIPTc3, r0 - std Y+24, r0 - - ;*(p+32) = a4 ^= ((~a0) & a1); c4 ^= a4; - com CIPTa0 - and CIPTa0, CIPTa1 - eor CIPTa0, CIPTa4 - eor CIPTc4, CIPTa0 - std Y+32, CIPTa0 - - adiw rY, 40 - dec CIPTy - brne KeccakChiIotaPrepareTheta_yLoop - - subi rY, 200 - sbc rY+1, zero - - lpm r0, Z+ ;Round Constant - ld CIPTa0, Y - eor CIPTa0, r0 - st Y+, CIPTa0 - - movw pRound, rZ - movw rZ, rX - eor CIPTc0, r0 - st Z+, CIPTc0 - std Z+7, CIPTc1 - std Z+15, CIPTc2 - std Z+23, CIPTc3 - std Z+31, CIPTc4 - movw rX, rZ - movw rZ, pRound - - dec CIPTz - brne KeccakChiIotaPrepareTheta_zLoop - - #undef CIPTa0 - #undef CIPTa1 - #undef CIPTa2 - #undef CIPTa3 - #undef CIPTa4 - #undef CIPTc0 - #undef CIPTc1 - #undef CIPTc2 - #undef CIPTc3 - #undef CIPTc4 - #undef CIPTz - #undef CIPTy - - - ;Check for terminator - lpm r0, Z - inc r0 - breq Keccak_Done - rjmp Keccak_RoundLoop -Keccak_Done: - ret - - #undef rTemp - #undef rTempBis - #undef rTempTer - #undef pRound - - #undef rpState - #undef zero - #undef rX - #undef rY - #undef rZ diff --git a/c_src/KeccakF-1600-avr8asm-fast.s b/c_src/KeccakF-1600-avr8asm-fast.s deleted file mode 100755 index e27f174..0000000 --- a/c_src/KeccakF-1600-avr8asm-fast.s +++ /dev/null @@ -1,934 +0,0 @@ -/* -The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -Michaël Peeters and Gilles Van Assche. For more information, feedback or -questions, please refer to our website: http://keccak.noekeon.org/ - -Implementation by Ronny Van Keer, hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#include "Keccak-avr8-settings.h" -#include "crypto_hash.h" - -#define cKeccakR_SizeInBytes (cKeccakR/8) - -#ifndef crypto_hash_BYTES - #ifdef cKeccakFixedOutputLengthInBytes - #define crypto_hash_BYTES cKeccakFixedOutputLengthInBytes - #else - #define crypto_hash_BYTES cKeccakR_SizeInBytes - #endif -#endif - -// Registers used in all routines -#define zero 1 -#define rpState 24 -#define rX 26 -#define rY 28 -#define rZ 30 - - -/* - * int crypto_hash( unsigned char *out, const unsigned char *in, unsigned long long inlen ) - * - * argument out is passed in r24:r25 - * argument in is passed in r22:r23 - * argument inlen is passed in r14:r21, only lowest 16-bits (r14-r15) are used - */ -.global crypto_hash // populate.py, please update crypto_hash -crypto_hash: // populate.py, please update crypto_hash - - // crypto_hash only registers - #define rInLen 16 //(2 regs) - #define rT1 18 - #define rT2 19 - #define rT3 20 - #define sp 0x3D - - push r2 - push r3 - push r4 - push r5 - push r6 - push r7 - push r8 - push r9 - push r10 - push r11 - push r12 - push r13 - push r14 - push r15 - push r16 - push r17 - push r28 - push r29 - - // Allocate state (25*8) + C variables (5*8) - in rZ, sp - in rZ+1, sp+1 - subi rZ, 240 - sbci rZ+1, 0 - in r0, 0x3F - cli - out sp+1, rZ+1 - out sp, rZ - out 0x3F, r0 - adiw rZ, 41 // pointer to start of state, end of C, compensate post decrement - - push r24 // save out pointer - push r25 - - movw rpState, rZ - movw rY, r22 //y contains in pointer - movw rInLen, r14 - - ldi rT3, 5*5*2 //clear state (4 bytes each iteration) -clearStateLoop: - st z+, zero - st z+, zero - st z+, zero - st z+, zero - dec rT3 - brne clearStateLoop - - // Full blocks - cpi rInLen, cKeccakR_SizeInBytes - cpc rInLen+1, zero - brcs ch_lastblock - -ch_FullRateLoop: - ldi rT3, cKeccakR_SizeInBytes/8 - movw rZ, rpState -ch_XorLanesLoop: - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - - subi rT3, 1 - brne ch_XorLanesLoop - - push rY - push rY+1 - push rInLen - push rInLen+1 - call KeccakF - pop rInLen+1 - pop rInLen - pop rY+1 - pop rY - - subi rInLen, cKeccakR_SizeInBytes - sbci rInLen+1, 0 - cpi rInLen, cKeccakR_SizeInBytes - cpc rInLen+1, zero - brcc ch_FullRateLoop - -ch_lastblock: // XOR last uncomplete block into state - movw rZ, rpState - - lsr rInLen - brcc ch_xorBytes2 - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - subi rInLen, 0 -ch_xorBytes2: - breq ch_Padding -ch_xorBytes2Loop: - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - ld rT1, Y+ - ld rT2, Z - eor rT1, rT2 - st Z+, rT1 - subi rInLen, 1 - brne ch_xorBytes2Loop - -ch_Padding: - ldi rT1, 1 - ld rT2, Z - eor rT1, rT2 - st Z, rT1 - - ldi rZ, cKeccakR_SizeInBytes-1 - add rZ, rpState - mov rZ+1, rpState+1 - adc rZ+1, zero - ld rT1, Z - subi rT1, 0x80 - st Z, rT1 - - call KeccakF - - //output - ldi rT3, crypto_hash_BYTES/4 ; copy 4 bytes per iteration - movw rY, rpState - pop rZ+1 ; restore out pointer - pop rZ -outputLoop: - ld rT1, Y+ - st Z+, rT1 - ld rT1, Y+ - st Z+, rT1 - ld rT1, Y+ - st Z+, rT1 - ld rT1, Y+ - st Z+, rT1 - dec rT3 - brne outputLoop - - - // Free state and pop registers - ldi rZ, 199 - add rpState, rZ - adc rpState+1, zero - in r0, 0x3F - cli - out sp+1, rpState+1 - out sp, rpState - out 0x3F, r0 - - pop r29 - pop r28 - pop r17 - pop r16 - pop r15 - pop r14 - pop r13 - pop r12 - pop r11 - pop r10 - pop r9 - pop r8 - pop r7 - pop r6 - pop r5 - pop r4 - pop r3 - pop r2 - - // return 0 - mov r24, zero - mov r25, zero - - #undef rInLen - #undef rT1 - #undef rT2 - #undef rT3 - #undef sp - - ret - - -#define ROT_BIT(a) ((a) & 7) -#define ROT_BYTE(a) ((((a)/8 + !!(((a)%8) > 4)) & 7) * 9) - -KeccakF_RhoPiConstants: - .BYTE ROT_BIT( 1), ROT_BYTE( 3), 10 * 8 - .BYTE ROT_BIT( 3), ROT_BYTE( 6), 7 * 8 - .BYTE ROT_BIT( 6), ROT_BYTE(10), 11 * 8 - .BYTE ROT_BIT(10), ROT_BYTE(15), 17 * 8 - .BYTE ROT_BIT(15), ROT_BYTE(21), 18 * 8 - .BYTE ROT_BIT(21), ROT_BYTE(28), 3 * 8 - .BYTE ROT_BIT(28), ROT_BYTE(36), 5 * 8 - .BYTE ROT_BIT(36), ROT_BYTE(45), 16 * 8 - .BYTE ROT_BIT(45), ROT_BYTE(55), 8 * 8 - .BYTE ROT_BIT(55), ROT_BYTE( 2), 21 * 8 - .BYTE ROT_BIT( 2), ROT_BYTE(14), 24 * 8 - .BYTE ROT_BIT(14), ROT_BYTE(27), 4 * 8 - .BYTE ROT_BIT(27), ROT_BYTE(41), 15 * 8 - .BYTE ROT_BIT(41), ROT_BYTE(56), 23 * 8 - .BYTE ROT_BIT(56), ROT_BYTE( 8), 19 * 8 - .BYTE ROT_BIT( 8), ROT_BYTE(25), 13 * 8 - .BYTE ROT_BIT(25), ROT_BYTE(43), 12 * 8 - .BYTE ROT_BIT(43), ROT_BYTE(62), 2 * 8 - .BYTE ROT_BIT(62), ROT_BYTE(18), 20 * 8 - .BYTE ROT_BIT(18), ROT_BYTE(39), 14 * 8 - .BYTE ROT_BIT(39), ROT_BYTE(61), 22 * 8 - .BYTE ROT_BIT(61), ROT_BYTE(20), 9 * 8 - .BYTE ROT_BIT(20), ROT_BYTE(44), 6 * 8 - .BYTE ROT_BIT(44), ROT_BYTE( 1), 1 * 8 - - -KeccakF_RoundConstants: - .BYTE 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x82, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x8a, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x00, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x8b, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x01, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x81, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x09, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x8a, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x88, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x09, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x0a, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x8b, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x8b, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x89, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x03, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x02, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x0a, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x0a, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x81, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x80, 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 - .BYTE 0x01, 0x00, 0x00, 0x80, 0x00, 0x00, 0x00, 0x00 - .BYTE 0x08, 0x80, 0x00, 0x80, 0x00, 0x00, 0x00, 0x80 - .BYTE 0xFF, 0 //terminator - - .text - - - -// KeccakF -// Not callable from C! -// -// argument rpState is passed in r24:r25 -// -KeccakF: - - // Variables used in multiple operations - #define rTemp 2 // 8 regs (2-9) - #define rTempBis 10 // 8 regs (10-17) - #define rTempTer 18 // 4 regs (18-21) - #define pRound 22 // 2 regs (22-23) - - // Initial Prepare Theta - #define TCIPx rTempTer - - movw rZ, rpState // Z points to 8 C - sbiw rZ, 40 - ldi TCIPx, 5 - movw rY, rpState -KeccakInitialPrepTheta_Loop: - ld rTemp+0, Y+ ;state[x] - ld rTemp+1, Y+ - ld rTemp+2, Y+ - ld rTemp+3, Y+ - ld rTemp+4, Y+ - ld rTemp+5, Y+ - ld rTemp+6, Y+ - ld rTemp+7, Y+ - - adiw rY, 32 - ld r0, Y+ ;state[5+x] - eor rTemp+0, r0 - ld r0, Y+ - eor rTemp+1, r0 - ld r0, Y+ - eor rTemp+2, r0 - ld r0, Y+ - eor rTemp+3, r0 - ld r0, Y+ - eor rTemp+4, r0 - ld r0, Y+ - eor rTemp+5, r0 - ld r0, Y+ - eor rTemp+6, r0 - ld r0, Y+ - eor rTemp+7, r0 - - adiw rY, 32 - ld r0, Y+ ;state[10+x] - eor rTemp+0, r0 - ld r0, Y+ - eor rTemp+1, r0 - ld r0, Y+ - eor rTemp+2, r0 - ld r0, Y+ - eor rTemp+3, r0 - ld r0, Y+ - eor rTemp+4, r0 - ld r0, Y+ - eor rTemp+5, r0 - ld r0, Y+ - eor rTemp+6, r0 - ld r0, Y+ - eor rTemp+7, r0 - - adiw rY, 32 - ld r0, Y+ ;state[15+x] - eor rTemp+0, r0 - ld r0, Y+ - eor rTemp+1, r0 - ld r0, Y+ - eor rTemp+2, r0 - ld r0, Y+ - eor rTemp+3, r0 - ld r0, Y+ - eor rTemp+4, r0 - ld r0, Y+ - eor rTemp+5, r0 - ld r0, Y+ - eor rTemp+6, r0 - ld r0, Y+ - eor rTemp+7, r0 - - adiw rY, 32 - ld r0, Y+ ;state[20+x] - eor rTemp+0, r0 - ld r0, Y+ - eor rTemp+1, r0 - ld r0, Y+ - eor rTemp+2, r0 - ld r0, Y+ - eor rTemp+3, r0 - ld r0, Y+ - eor rTemp+4, r0 - ld r0, Y+ - eor rTemp+5, r0 - ld r0, Y+ - eor rTemp+6, r0 - ld r0, Y+ - eor rTemp+7, r0 - - st Z+, rTemp+0 - st Z+, rTemp+1 - st Z+, rTemp+2 - st Z+, rTemp+3 - st Z+, rTemp+4 - st Z+, rTemp+5 - st Z+, rTemp+6 - st Z+, rTemp+7 - - subi rY, 160 - sbc rY+1, zero - - subi TCIPx, 1 - breq KeccakInitialPrepTheta_Done - rjmp KeccakInitialPrepTheta_Loop -KeccakInitialPrepTheta_Done: - #undef TCIPx - - ldi pRound, lo8(KeccakF_RoundConstants) - ldi pRound+1, hi8(KeccakF_RoundConstants) -Keccak_RoundLoop: - - // Theta - #define TCplus rX - #define TCminus rZ - #define TCcoordX rTempTer - #define TCcoordY rTempTer+1 - - movw TCminus, rpState - sbiw TCminus, 1*8 - movw TCplus, rpState - sbiw TCplus, 4*8 - movw rY, rpState - - ldi TCcoordX, 0x16 -KeccakTheta_Loop1: - ld rTemp+0, X+ - ld rTemp+1, X+ - ld rTemp+2, X+ - ld rTemp+3, X+ - ld rTemp+4, X+ - ld rTemp+5, X+ - ld rTemp+6, X+ - ld rTemp+7, X+ - - lsl rTemp+0 - rol rTemp+1 - rol rTemp+2 - rol rTemp+3 - rol rTemp+4 - rol rTemp+5 - rol rTemp+6 - rol rTemp+7 - adc rTemp+0, zero - - ld r0, Z+ - eor rTemp+0, r0 - ld r0, Z+ - eor rTemp+1, r0 - ld r0, Z+ - eor rTemp+2, r0 - ld r0, Z+ - eor rTemp+3, r0 - ld r0, Z+ - eor rTemp+4, r0 - ld r0, Z+ - eor rTemp+5, r0 - ld r0, Z+ - eor rTemp+6, r0 - ld r0, Z+ - eor rTemp+7, r0 - - ldi TCcoordY, 5 -KeccakTheta_Loop2: - ld r0, Y - eor r0, rTemp+0 - st Y+, r0 - ld r0, Y - eor r0, rTemp+1 - st Y+, r0 - ld r0, Y - eor r0, rTemp+2 - st Y+, r0 - ld r0, Y - eor r0, rTemp+3 - st Y+, r0 - ld r0, Y - eor r0, rTemp+4 - st Y+, r0 - ld r0, Y - eor r0, rTemp+5 - st Y+, r0 - ld r0, Y - eor r0, rTemp+6 - st Y+, r0 - ld r0, Y - eor r0, rTemp+7 - st Y+, r0 - adiw rY, 32 - - dec TCcoordY - brne KeccakTheta_Loop2 - - subi rY, 200-8 - sbc rY+1, zero - - lsr TCcoordX - brcc 1f - breq KeccakTheta_End - rjmp KeccakTheta_Loop1 -1: - cpi TCcoordX, 0x0B - brne 2f - sbiw TCminus, 40 - rjmp KeccakTheta_Loop1 -2: - sbiw TCplus, 40 - rjmp KeccakTheta_Loop1 - -KeccakTheta_End: - #undef TCplus - #undef TCminus - #undef TCcoordX - #undef TCcoordY - - - // Rho Pi - #define RPpConst rTempTer // 2 regs - #define RPindex rTempTer+2 - #define RPpBitRot rX - #define RPpByteRot pRound - - sbiw rY, 32 - - ld rTemp+0, Y+ - ld rTemp+1, Y+ - ld rTemp+2, Y+ - ld rTemp+3, Y+ - ld rTemp+4, Y+ - ld rTemp+5, Y+ - ld rTemp+6, Y+ - ld rTemp+7, Y+ - - push pRound - push pRound+1 - ldi RPpConst, lo8(KeccakF_RhoPiConstants) - ldi RPpConst+1, hi8(KeccakF_RhoPiConstants) - ldi RPpBitRot, pm_lo8(bit_rot_jmp_table) - ldi RPpBitRot+1, pm_hi8(bit_rot_jmp_table) - ldi RPpByteRot, pm_lo8(rotate64_0byte_left) - ldi RPpByteRot+1, pm_hi8(rotate64_0byte_left) - -KeccakRhoPi_Loop: - ; get rotation codes and state index - movw rZ, RPpConst - lpm r0, Z+ ;bits - lpm rTempBis, Z+ ;bytes - lpm RPindex, Z+ - movw RPpConst, rZ - - ; do bit rotation - movw rZ, RPpBitRot - add rZ, r0 - adc rZ+1, zero - ijmp - -KeccakRhoPi_RhoBitRotateDone: - movw rY, rpState - add rY, RPindex - adc rY+1, zero - - movw rZ, RPpByteRot - add rZ, rTempBis - adc rZ+1, zero - ijmp - -KeccakRhoPi_PiStore: - sbiw rY, 8 - st Y+, rTemp+0 - st Y+, rTemp+1 - st Y+, rTemp+2 - st Y+, rTemp+3 - st Y+, rTemp+4 - st Y+, rTemp+5 - st Y+, rTemp+6 - st Y+, rTemp+7 - - movw rTemp+0, rTempBis+0 - movw rTemp+2, rTempBis+2 - movw rTemp+4, rTempBis+4 - movw rTemp+6, rTempBis+6 -KeccakRhoPi_RhoDone: - subi RPindex, 8 - brne KeccakRhoPi_Loop - pop pRound+1 - pop pRound - - #undef RPpConst - #undef RPindex - #undef RPpBitRot - #undef RPpByteRot - - - // Chi Iota prepare Theta - #define CIPTa0 rTemp - #define CIPTa1 rTemp+1 - #define CIPTa2 rTemp+2 - #define CIPTa3 rTemp+3 - #define CIPTa4 rTemp+4 - #define CIPTc0 rTempBis - #define CIPTc1 rTempBis+1 - #define CIPTc2 rTempBis+2 - #define CIPTc3 rTempBis+3 - #define CIPTc4 rTempBis+4 - #define CIPTz rTempBis+6 - #define CIPTy rTempBis+7 - - movw rY, rpState - movw rX, rpState ; 5 * C - sbiw rX, 40 - movw rZ, pRound - - ldi CIPTz, 8 -KeccakChiIotaPrepareTheta_zLoop: - mov CIPTc0, zero - mov CIPTc1, zero - movw CIPTc2, CIPTc0 - mov CIPTc4, zero - - ldi CIPTy, 5 -KeccakChiIotaPrepareTheta_yLoop: - ld CIPTa0, Y - ldd CIPTa1, Y+8 - ldd CIPTa2, Y+16 - ldd CIPTa3, Y+24 - ldd CIPTa4, Y+32 - - ;*p = t = a0 ^ ((~a1) & a2); c0 ^= t; - mov r0, CIPTa1 - com r0 - and r0, CIPTa2 - eor r0, CIPTa0 - eor CIPTc0, r0 - st Y, r0 - - ;*(p+8) = t = a1 ^ ((~a2) & a3); c1 ^= t; - mov r0, CIPTa2 - com r0 - and r0, CIPTa3 - eor r0, CIPTa1 - eor CIPTc1, r0 - std Y+8, r0 - - ;*(p+16) = a2 ^= ((~a3) & a4); c2 ^= a2; - mov r0, CIPTa3 - com r0 - and r0, CIPTa4 - eor r0, CIPTa2 - eor CIPTc2, r0 - std Y+16, r0 - - ;*(p+24) = a3 ^= ((~a4) & a0); c3 ^= a3; - mov r0, CIPTa4 - com r0 - and r0, CIPTa0 - eor r0, CIPTa3 - eor CIPTc3, r0 - std Y+24, r0 - - ;*(p+32) = a4 ^= ((~a0) & a1); c4 ^= a4; - com CIPTa0 - and CIPTa0, CIPTa1 - eor CIPTa0, CIPTa4 - eor CIPTc4, CIPTa0 - std Y+32, CIPTa0 - - adiw rY, 40 - dec CIPTy - brne KeccakChiIotaPrepareTheta_yLoop - - subi rY, 200 - sbc rY+1, zero - - lpm r0, Z+ ;Round Constant - ld CIPTa0, Y - eor CIPTa0, r0 - st Y+, CIPTa0 - - movw pRound, rZ - movw rZ, rX - eor CIPTc0, r0 - st Z+, CIPTc0 - std Z+7, CIPTc1 - std Z+15, CIPTc2 - std Z+23, CIPTc3 - std Z+31, CIPTc4 - movw rX, rZ - movw rZ, pRound - - dec CIPTz - brne KeccakChiIotaPrepareTheta_zLoop - - #undef CIPTa0 - #undef CIPTa1 - #undef CIPTa2 - #undef CIPTa3 - #undef CIPTa4 - #undef CIPTc0 - #undef CIPTc1 - #undef CIPTc2 - #undef CIPTc3 - #undef CIPTc4 - #undef CIPTz - #undef CIPTy - - - ;Check for terminator - lpm r0, Z - inc r0 - breq Keccak_Done - rjmp Keccak_RoundLoop -Keccak_Done: - ret - - -bit_rot_jmp_table: - rjmp KeccakRhoPi_RhoBitRotateDone - rjmp rotate64_1bit_left - rjmp rotate64_2bit_left - rjmp rotate64_3bit_left - rjmp rotate64_4bit_left - rjmp rotate64_3bit_right - rjmp rotate64_2bit_right - rjmp rotate64_1bit_right - -rotate64_4bit_left: - lsl rTemp - rol rTemp+1 - rol rTemp+2 - rol rTemp+3 - rol rTemp+4 - rol rTemp+5 - rol rTemp+6 - rol rTemp+7 - adc rTemp, r1 -rotate64_3bit_left: - lsl rTemp - rol rTemp+1 - rol rTemp+2 - rol rTemp+3 - rol rTemp+4 - rol rTemp+5 - rol rTemp+6 - rol rTemp+7 - adc rTemp, r1 -rotate64_2bit_left: - lsl rTemp - rol rTemp+1 - rol rTemp+2 - rol rTemp+3 - rol rTemp+4 - rol rTemp+5 - rol rTemp+6 - rol rTemp+7 - adc rTemp, r1 -rotate64_1bit_left: - lsl rTemp - rol rTemp+1 - rol rTemp+2 - rol rTemp+3 - rol rTemp+4 - rol rTemp+5 - rol rTemp+6 - rol rTemp+7 - adc rTemp, r1 - rjmp KeccakRhoPi_RhoBitRotateDone - -rotate64_3bit_right: - bst rTemp, 0 - ror rTemp+7 - ror rTemp+6 - ror rTemp+5 - ror rTemp+4 - ror rTemp+3 - ror rTemp+2 - ror rTemp+1 - ror rTemp - bld rTemp+7, 7 -rotate64_2bit_right: - bst rTemp, 0 - ror rTemp+7 - ror rTemp+6 - ror rTemp+5 - ror rTemp+4 - ror rTemp+3 - ror rTemp+2 - ror rTemp+1 - ror rTemp - bld rTemp+7, 7 -rotate64_1bit_right: - bst rTemp, 0 - ror rTemp+7 - ror rTemp+6 - ror rTemp+5 - ror rTemp+4 - ror rTemp+3 - ror rTemp+2 - ror rTemp+1 - ror rTemp - bld rTemp+7, 7 - rjmp KeccakRhoPi_RhoBitRotateDone - -/* -** Each byte rotate routine must be 9 instructions long. -*/ -rotate64_0byte_left: - ld rTempBis+0, Y+ - ld rTempBis+1, Y+ - ld rTempBis+2, Y+ - ld rTempBis+3, Y+ - ld rTempBis+4, Y+ - ld rTempBis+5, Y+ - ld rTempBis+6, Y+ - ld rTempBis+7, Y+ - rjmp KeccakRhoPi_PiStore - -rotate64_1byte_left: - ld rTempBis+1, Y+ - ld rTempBis+2, Y+ - ld rTempBis+3, Y+ - ld rTempBis+4, Y+ - ld rTempBis+5, Y+ - ld rTempBis+6, Y+ - ld rTempBis+7, Y+ - ld rTempBis+0, Y+ - rjmp KeccakRhoPi_PiStore - -rotate64_2byte_left: - ld rTempBis+2, Y+ - ld rTempBis+3, Y+ - ld rTempBis+4, Y+ - ld rTempBis+5, Y+ - ld rTempBis+6, Y+ - ld rTempBis+7, Y+ - ld rTempBis+0, Y+ - ld rTempBis+1, Y+ - rjmp KeccakRhoPi_PiStore - -rotate64_3byte_left: - ld rTempBis+3, Y+ - ld rTempBis+4, Y+ - ld rTempBis+5, Y+ - ld rTempBis+6, Y+ - ld rTempBis+7, Y+ - ld rTempBis+0, Y+ - ld rTempBis+1, Y+ - ld rTempBis+2, Y+ - rjmp KeccakRhoPi_PiStore - -rotate64_4byte_left: - ld rTempBis+4, Y+ - ld rTempBis+5, Y+ - ld rTempBis+6, Y+ - ld rTempBis+7, Y+ - ld rTempBis+0, Y+ - ld rTempBis+1, Y+ - ld rTempBis+2, Y+ - ld rTempBis+3, Y+ - rjmp KeccakRhoPi_PiStore - -rotate64_5byte_left: - ld rTempBis+5, Y+ - ld rTempBis+6, Y+ - ld rTempBis+7, Y+ - ld rTempBis+0, Y+ - ld rTempBis+1, Y+ - ld rTempBis+2, Y+ - ld rTempBis+3, Y+ - ld rTempBis+4, Y+ - rjmp KeccakRhoPi_PiStore - -rotate64_6byte_left: - ld rTempBis+6, Y+ - ld rTempBis+7, Y+ - ld rTempBis+0, Y+ - ld rTempBis+1, Y+ - ld rTempBis+2, Y+ - ld rTempBis+3, Y+ - ld rTempBis+4, Y+ - ld rTempBis+5, Y+ - rjmp KeccakRhoPi_PiStore - -rotate64_7byte_left: - ld rTempBis+7, Y+ - ld rTempBis+0, Y+ - ld rTempBis+1, Y+ - ld rTempBis+2, Y+ - ld rTempBis+3, Y+ - ld rTempBis+4, Y+ - ld rTempBis+5, Y+ - ld rTempBis+6, Y+ - rjmp KeccakRhoPi_PiStore - - #undef rTemp - #undef rTempBis - #undef rTempTer - #undef pRound - - #undef rpState - #undef zero - #undef rX - #undef rY - #undef rZ diff --git a/c_src/KeccakF-1600-inplace-armgcc-ARMv7A-NEON.s b/c_src/KeccakF-1600-inplace-armgcc-ARMv7A-NEON.s deleted file mode 100755 index 539e8ea..0000000 --- a/c_src/KeccakF-1600-inplace-armgcc-ARMv7A-NEON.s +++ /dev/null @@ -1,446 +0,0 @@ -@ The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -@ Michaël Peeters and Gilles Van Assche. For more information, feedback or -@ questions, please refer to our website: http://keccak.noekeon.org/ -@ -@ Implementation by Ronny Van Keer, hereby denoted as "the implementer". -@ -@ To the extent possible under law, the implementer has waived all copyright -@ and related or neighboring rights to the source code in this file. -@ http://creativecommons.org/publicdomain/zero/1.0/ - -@ This file was created from a .asm file -@ using the ads2gas.pl script. -.equ DO1STROUNDING, 0 - - @ PRESERVE8 -.text - -@// --- offsets in state -.equ Aba, 0*8 -.equ Aga, 1*8 -.equ Aka, 2*8 -.equ Ama, 3*8 -.equ Asa, 4*8 - -@// --- macros - -.macro KeccakThetaRhoPiChiIota argA1, argA2, argA3, argA4, argA5 - - @Prepare Theta - @Ca = Aba^Aga^Aka^Ama^Asa@ - @Ce = Abe^Age^Ake^Ame^Ase@ - @Ci = Abi^Agi^Aki^Ami^Asi@ - @Co = Abo^Ago^Ako^Amo^Aso@ - @Cu = Abu^Agu^Aku^Amu^Asu@ - @De = Ca^ROL64(Ci, 1)@ - @Di = Ce^ROL64(Co, 1)@ - @Do = Ci^ROL64(Cu, 1)@ - @Du = Co^ROL64(Ca, 1)@ - @Da = Cu^ROL64(Ce, 1)@ - - veor.64 q4, q6, q7 - veor.64 q5, q9, q10 - veor.64 d8, d8, d9 - veor.64 d10, d10, d11 - veor.64 d1, d8, d16 - veor.64 d2, d10, d17 - - veor.64 q4, q11, q12 - veor.64 q5, q14, q15 - veor.64 d8, d8, d9 - veor.64 d10, d10, d11 - veor.64 d3, d8, d26 - - vadd.u64 q4, q1, q1 - veor.64 d4, d10, d27 - vmov.64 d0, d5 - vsri.64 q4, q1, #63 - - vadd.u64 q5, q2, q2 - veor.64 q4, q4, q0 - vsri.64 q5, q2, #63 - vadd.u64 d7, d1, d1 - veor.64 \argA2, \argA2, d8 - veor.64 q5, q5, q1 - - vsri.64 d7, d1, #63 - vshl.u64 d1, \argA2, #44 - veor.64 \argA3, \argA3, d9 - veor.64 d7, d7, d4 - - @Ba = argA1^Da@ - @Be = ROL64((argA2^De), 44)@ - @Bi = ROL64((argA3^Di), 43)@ - @Bo = ROL64((argA4^Do), 21)@ - @Bu = ROL64((argA5^Du), 14)@ - @argA2 = Be ^((~Bi)& Bo )@ - @argA3 = Bi ^((~Bo)& Bu )@ - @argA4 = Bo ^((~Bu)& Ba )@ - @argA5 = Bu ^((~Ba)& Be )@ - @argA1 = Ba ^((~Be)& Bi )@ argA1 ^= KeccakF1600RoundConstants[i+round]@ - vsri.64 d1, \argA2, #64-44 - vshl.u64 d2, \argA3, #43 - vldr.64 d0, [sp, #\argA1] - veor.64 \argA4, \argA4, d10 - vsri.64 d2, \argA3, #64-43 - vshl.u64 d3, \argA4, #21 - veor.64 \argA5, \argA5, d11 - veor.64 d0, d0, d7 - vsri.64 d3, \argA4, #64-21 - vbic.64 d5, d2, d1 - vshl.u64 d4, \argA5, #14 - vbic.64 \argA2, d3, d2 - vld1.64 d6, [r3]! - veor.64 d5, d0 - vsri.64 d4, \argA5, #64-14 - veor.64 d5, d6 - vbic.64 \argA5, d1, d0 - vbic.64 \argA3, d4, d3 - vbic.64 \argA4, d0, d4 - veor.64 \argA2, d1 - vstr.64 d5, [sp, #\argA1] - veor.64 \argA3, d2 - veor.64 \argA4, d3 - veor.64 \argA5, d4 - - .endm - -.macro KeccakThetaRhoPiChi1 argA1, argA2, argA3, argA4, argA5 - - @d2 = ROL64((argA1^Da), 3)@ - @d3 = ROL64((argA2^De), 45)@ - @d4 = ROL64((argA3^Di), 61)@ - @d0 = ROL64((argA4^Do), 28)@ - @d1 = ROL64((argA5^Du), 20)@ - @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ - @argA2 = Be ^((~Bi)& Bo )@ - @argA3 = Bi ^((~Bo)& Bu )@ - @argA4 = Bo ^((~Bu)& Ba )@ - @argA5 = Bu ^((~Ba)& Be )@ - - veor.64 \argA2, \argA2, d8 - veor.64 \argA3, \argA3, d9 - vshl.u64 d3, \argA2, #45 - vldr.64 d6, [sp, #\argA1] - vshl.u64 d4, \argA3, #61 - veor.64 \argA4, \argA4, d10 - vsri.64 d3, \argA2, #64-45 - veor.64 \argA5, \argA5, d11 - vsri.64 d4, \argA3, #64-61 - vshl.u64 d0, \argA4, #28 - veor.64 d6, d6, d7 - vshl.u64 d1, \argA5, #20 - vbic.64 \argA3, d4, d3 - vsri.64 d0, \argA4, #64-28 - vbic.64 \argA4, d0, d4 - vshl.u64 d2, d6, #3 - vsri.64 d1, \argA5, #64-20 - veor.64 \argA4, d3 - vsri.64 d2, d6, #64-3 - vbic.64 \argA5, d1, d0 - vbic.64 d6, d2, d1 - vbic.64 \argA2, d3, d2 - veor.64 d6, d0 - veor.64 \argA2, d1 - vstr.64 d6, [sp, #\argA1] - veor.64 \argA3, d2 - veor.64 d5, d6 - veor.64 \argA5, d4 - - .endm - -.macro KeccakThetaRhoPiChi2 argA1, argA2, argA3, argA4, argA5 - - @d4 = ROL64((argA1^Da), 18)@ - @d0 = ROL64((argA2^De), 1)@ - @d1 = ROL64((argA3^Di), 6)@ - @d2 = ROL64((argA4^Do), 25)@ - @d3 = ROL64((argA5^Du), 8)@ - @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ - @argA2 = Be ^((~Bi)& Bo )@ - @argA3 = Bi ^((~Bo)& Bu )@ - @argA4 = Bo ^((~Bu)& Ba )@ - @argA5 = Bu ^((~Ba)& Be )@ - - veor.64 \argA3, \argA3, d9 - veor.64 \argA4, \argA4, d10 - vshl.u64 d1, \argA3, #6 - vldr.64 d6, [sp, #\argA1] - vshl.u64 d2, \argA4, #25 - veor.64 \argA5, \argA5, d11 - vsri.64 d1, \argA3, #64-6 - veor.64 \argA2, \argA2, d8 - vsri.64 d2, \argA4, #64-25 - vext.8 d3, \argA5, \argA5, #7 - veor.64 d6, d6, d7 - vbic.64 \argA3, d2, d1 - vadd.u64 d0, \argA2, \argA2 - vbic.64 \argA4, d3, d2 - vsri.64 d0, \argA2, #64-1 - vshl.u64 d4, d6, #18 - veor.64 \argA2, d1, \argA4 - veor.64 \argA3, d0 - vsri.64 d4, d6, #64-18 - vstr.64 \argA3, [sp, #\argA1] - veor.64 d5, \argA3 - vbic.64 \argA5, d1, d0 - vbic.64 \argA3, d4, d3 - vbic.64 \argA4, d0, d4 - veor.64 \argA3, d2 - veor.64 \argA4, d3 - veor.64 \argA5, d4 - - .endm - -.macro KeccakThetaRhoPiChi3 argA1, argA2, argA3, argA4, argA5 - - @d1 = ROL64((argA1^Da), 36)@ - @d2 = ROL64((argA2^De), 10)@ - @d3 = ROL64((argA3^Di), 15)@ - @d4 = ROL64((argA4^Do), 56)@ - @d0 = ROL64((argA5^Du), 27)@ - @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ - @argA2 = Be ^((~Bi)& Bo )@ - @argA3 = Bi ^((~Bo)& Bu )@ - @argA4 = Bo ^((~Bu)& Ba )@ - @argA5 = Bu ^((~Ba)& Be )@ - - veor.64 \argA2, \argA2, d8 - veor.64 \argA3, \argA3, d9 - vshl.u64 d2, \argA2, #10 - vldr.64 d6, [sp, #\argA1] - vshl.u64 d3, \argA3, #15 - veor.64 \argA4, \argA4, d10 - vsri.64 d2, \argA2, #64-10 - vsri.64 d3, \argA3, #64-15 - veor.64 \argA5, \argA5, d11 - vext.8 d4, \argA4, \argA4, #1 - vbic.64 \argA2, d3, d2 - vshl.u64 d0, \argA5, #27 - veor.64 d6, d6, d7 - vbic.64 \argA3, d4, d3 - vsri.64 d0, \argA5, #64-27 - vshl.u64 d1, d6, #36 - veor.64 \argA3, d2 - vbic.64 \argA4, d0, d4 - vsri.64 d1, d6, #64-36 - - veor.64 \argA4, d3 - vbic.64 d6, d2, d1 - vbic.64 \argA5, d1, d0 - veor.64 d6, d0 - veor.64 \argA2, d1 - vstr.64 d6, [sp, #\argA1] - veor.64 d5, d6 - veor.64 \argA5, d4 - - .endm - -.macro KeccakThetaRhoPiChi4 argA1, argA2, argA3, argA4, argA5 - - @d3 = ROL64((argA1^Da), 41)@ - @d4 = ROL64((argA2^De), 2)@ - @d0 = ROL64((argA3^Di), 62)@ - @d1 = ROL64((argA4^Do), 55)@ - @d2 = ROL64((argA5^Du), 39)@ - @argA1 = Ba ^((~Be)& Bi )@ Ca ^= argA1@ - @argA2 = Be ^((~Bi)& Bo )@ - @argA3 = Bi ^((~Bo)& Bu )@ - @argA4 = Bo ^((~Bu)& Ba )@ - @argA5 = Bu ^((~Ba)& Be )@ - - veor.64 \argA2, \argA2, d8 - veor.64 \argA3, \argA3, d9 - vshl.u64 d4, \argA2, #2 - veor.64 \argA5, \argA5, d11 - vshl.u64 d0, \argA3, #62 - vldr.64 d6, [sp, #\argA1] - vsri.64 d4, \argA2, #64-2 - veor.64 \argA4, \argA4, d10 - vsri.64 d0, \argA3, #64-62 - - vshl.u64 d1, \argA4, #55 - veor.64 d6, d6, d7 - vshl.u64 d2, \argA5, #39 - vsri.64 d1, \argA4, #64-55 - vbic.64 \argA4, d0, d4 - vsri.64 d2, \argA5, #64-39 - vbic.64 \argA2, d1, d0 - vshl.u64 d3, d6, #41 - veor.64 \argA5, d4, \argA2 - vbic.64 \argA2, d2, d1 - vsri.64 d3, d6, #64-41 - veor.64 d6, d0, \argA2 - - vbic.64 \argA2, d3, d2 - vbic.64 \argA3, d4, d3 - veor.64 \argA2, d1 - vstr.64 d6, [sp, #\argA1] - veor.64 d5, d6 - veor.64 \argA3, d2 - veor.64 \argA4, d3 - - .endm - -@// --- constants - - - .align 8 - .ltorg -KeccakF1600RoundConstantsWithTerminator: - .quad 0x0000000000000001 - .quad 0x0000000000008082 - .quad 0x800000000000808a - .quad 0x8000000080008000 - .quad 0x000000000000808b - .quad 0x0000000080000001 - .quad 0x8000000080008081 - .quad 0x8000000000008009 - .quad 0x000000000000008a - .quad 0x0000000000000088 - .quad 0x0000000080008009 - .quad 0x000000008000000a - .quad 0x000000008000808b - .quad 0x800000000000008b - .quad 0x8000000000008089 - .quad 0x8000000000008003 - .quad 0x8000000000008002 - .quad 0x8000000000000080 - .quad 0x000000000000800a - .quad 0x800000008000000a - .quad 0x8000000080008081 - .quad 0x8000000000008080 - .quad 0x0000000080000001 - .quad 0x8000000080008008 - .quad 0xFFFFFFFFFFFFFFFF @//terminator - - .align 8 - -@// --- code - -@not callable from C! -.global KeccakF_armv7a_neon_asm -KeccakF_armv7a_neon_asm: @ - - adr r3, KeccakF1600RoundConstantsWithTerminator -roundLoop: - - KeccakThetaRhoPiChiIota Aba, d13, d19, d25, d31 - KeccakThetaRhoPiChi1 Aka, d15, d21, d22, d28 - KeccakThetaRhoPiChi2 Asa, d12, d18, d24, d30 - KeccakThetaRhoPiChi3 Aga, d14, d20, d26, d27 - KeccakThetaRhoPiChi4 Ama, d16, d17, d23, d29 - - KeccakThetaRhoPiChiIota Aba, d15, d18, d26, d29 - KeccakThetaRhoPiChi1 Asa, d14, d17, d25, d28 - KeccakThetaRhoPiChi2 Ama, d13, d21, d24, d27 - KeccakThetaRhoPiChi3 Aka, d12, d20, d23, d31 - KeccakThetaRhoPiChi4 Aga, d16, d19, d22, d30 - - KeccakThetaRhoPiChiIota Aba, d14, d21, d23, d30 - KeccakThetaRhoPiChi1 Ama, d12, d19, d26, d28 - KeccakThetaRhoPiChi2 Aga, d15, d17, d24, d31 - KeccakThetaRhoPiChi3 Asa, d13, d20, d22, d29 - KeccakThetaRhoPiChi4 Aka, d16, d18, d25, d27 - - KeccakThetaRhoPiChiIota Aba, d12, d17, d22, d27 - KeccakThetaRhoPiChi1 Aga, d13, d18, d23, d28 - KeccakThetaRhoPiChi2 Aka, d14, d19, d24, d29 - ldr r0, [r3] - KeccakThetaRhoPiChi3 Ama, d15, d20, d25, d30 - cmp r0, #0xFFFFFFFF - KeccakThetaRhoPiChi4 Asa, d16, d21, d26, d31 - - bne roundLoop - bx lr - - @ - .align 8 - -@//void KeccakF_armv7a( tKeccakLane * state ) callable from C -.global KeccakF_armv7a_neon -KeccakF_armv7a_neon: @ - - vpush {q4-q7} - sub sp,sp, #5*8 - - vldr.64 d0, [r0, #0*8] - vldr.64 d12, [r0, #1*8] - vldr.64 d17, [r0, #2*8] - vldr.64 d22, [r0, #3*8] - vldr.64 d27, [r0, #4*8] - - vldr.64 d1, [r0, #5*8] - vldr.64 d13, [r0, #6*8] - vldr.64 d18, [r0, #7*8] - vldr.64 d23, [r0, #8*8] - vldr.64 d28, [r0, #9*8] - - vldr.64 d2, [r0, #10*8] - vldr.64 d14, [r0, #11*8] - vldr.64 d19, [r0, #12*8] - vldr.64 d24, [r0, #13*8] - vldr.64 d29, [r0, #14*8] - - vldr.64 d3, [r0, #15*8] - vldr.64 d15, [r0, #16*8] - vldr.64 d20, [r0, #17*8] - vldr.64 d25, [r0, #18*8] - vldr.64 d30, [r0, #19*8] - - vldr.64 d4, [r0, #20*8] - vldr.64 d16, [r0, #21*8] - vldr.64 d21, [r0, #22*8] - vldr.64 d26, [r0, #23*8] - vldr.64 d31, [r0, #24*8] - - vstr.64 d0, [sp, #Aba] - mov r2, lr - vstr.64 d1, [sp, #Aga] - veor.64 q0, q0, q1 - vstr.64 d2, [sp, #Aka] - veor.64 d5, d0, d1 - vstr.64 d3, [sp, #Ama] - mov r1, r0 - vstr.64 d4, [sp, #Asa] - veor.64 d5, d5, d4 - - bl KeccakF_armv7a_neon_asm - - vpop.64 { d0- d4 } - - vstr.64 d0, [r1, #0*8] - vstr.64 d12, [r1, #1*8] - vstr.64 d17, [r1, #2*8] - vstr.64 d22, [r1, #3*8] - vstr.64 d27, [r1, #4*8] - - vstr.64 d1, [r1, #5*8] - vstr.64 d13, [r1, #6*8] - vstr.64 d18, [r1, #7*8] - vstr.64 d23, [r1, #8*8] - vstr.64 d28, [r1, #9*8] - - vstr.64 d2, [r1, #10*8] - vstr.64 d14, [r1, #11*8] - vstr.64 d19, [r1, #12*8] - vstr.64 d24, [r1, #13*8] - vstr.64 d29, [r1, #14*8] - - vstr.64 d3, [r1, #15*8] - vstr.64 d15, [r1, #16*8] - vstr.64 d20, [r1, #17*8] - vstr.64 d25, [r1, #18*8] - vstr.64 d30, [r1, #19*8] - - vstr.64 d4, [r1, #20*8] - vstr.64 d16, [r1, #21*8] - vstr.64 d21, [r1, #22*8] - vstr.64 d26, [r1, #23*8] - vstr.64 d31, [r1, #24*8] - - vpop {q4-q7} - bx r2 - - @ - diff --git a/c_src/KeccakF-1600-opt32-settings.h b/c_src/KeccakF-1600-opt32-settings.h deleted file mode 100755 index b135918..0000000 --- a/c_src/KeccakF-1600-opt32-settings.h +++ /dev/null @@ -1,4 +0,0 @@ -#define Unrolling 2 -//#define UseBebigokimisa -//#define UseInterleaveTables -#define UseSchedule 3 diff --git a/c_src/KeccakF-1600-opt32.c b/c_src/KeccakF-1600-opt32.c deleted file mode 100755 index aded3a9..0000000 --- a/c_src/KeccakF-1600-opt32.c +++ /dev/null @@ -1,524 +0,0 @@ -/* -The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -Michaël Peeters and Gilles Van Assche. For more information, feedback or -questions, please refer to our website: http://keccak.noekeon.org/ - -Implementation by the designers, -hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#include -#include "brg_endian.h" -#include "KeccakF-1600-opt32-settings.h" -#include "KeccakF-1600-interface.h" - -typedef unsigned char UINT8; -typedef unsigned short UINT16; -typedef unsigned int UINT32; -typedef unsigned long long int UINT64; - -#ifdef UseInterleaveTables -int interleaveTablesBuilt = 0; -UINT16 interleaveTable[65536]; -UINT16 deinterleaveTable[65536]; - -void buildInterleaveTables() -{ - UINT32 i, j; - UINT16 x; - - if (!interleaveTablesBuilt) { - for(i=0; i<65536; i++) { - x = 0; - for(j=0; j<16; j++) { - if (i & (1 << j)) - x |= (1 << (j/2 + 8*(j%2))); - } - interleaveTable[i] = x; - deinterleaveTable[x] = (UINT16)i; - } - interleaveTablesBuilt = 1; - } -} - -#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - -#define xor2bytesIntoInterleavedWords(even, odd, source, j) \ - i##j = interleaveTable[((const UINT16*)source)[j]]; \ - ((UINT8*)even)[j] ^= i##j & 0xFF; \ - ((UINT8*)odd)[j] ^= i##j >> 8; - -#define setInterleavedWordsInto2bytes(dest, even, odd, j) \ - d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \ - ((UINT16*)dest)[j] = d##j; - -#else // (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) - -#define xor2bytesIntoInterleavedWords(even, odd, source, j) \ - i##j = interleaveTable[source[2*j] ^ ((UINT16)source[2*j+1] << 8)]; \ - *even ^= (i##j & 0xFF) << (j*8); \ - *odd ^= ((i##j >> 8) & 0xFF) << (j*8); - -#define setInterleavedWordsInto2bytes(dest, even, odd, j) \ - d##j = deinterleaveTable[((even >> (j*8)) & 0xFF) ^ (((odd >> (j*8)) & 0xFF) << 8)]; \ - dest[2*j] = d##j & 0xFF; \ - dest[2*j+1] = d##j >> 8; - -#endif // Endianness - -void xor8bytesIntoInterleavedWords(UINT32 *even, UINT32 *odd, const UINT8* source) -{ - UINT16 i0, i1, i2, i3; - - xor2bytesIntoInterleavedWords(even, odd, source, 0) - xor2bytesIntoInterleavedWords(even, odd, source, 1) - xor2bytesIntoInterleavedWords(even, odd, source, 2) - xor2bytesIntoInterleavedWords(even, odd, source, 3) -} - -#define xorLanesIntoState(laneCount, state, input) \ - { \ - int i; \ - for(i=0; i<(laneCount); i++) \ - xor8bytesIntoInterleavedWords(state+i*2, state+i*2+1, input+i*8); \ - } - -void setInterleavedWordsInto8bytes(UINT8* dest, UINT32 even, UINT32 odd) -{ - UINT16 d0, d1, d2, d3; - - setInterleavedWordsInto2bytes(dest, even, odd, 0) - setInterleavedWordsInto2bytes(dest, even, odd, 1) - setInterleavedWordsInto2bytes(dest, even, odd, 2) - setInterleavedWordsInto2bytes(dest, even, odd, 3) -} - -#define extractLanes(laneCount, state, data) \ - { \ - int i; \ - for(i=0; i<(laneCount); i++) \ - setInterleavedWordsInto8bytes(data+i*8, ((UINT32*)state)[i*2], ((UINT32*)state)[i*2+1]); \ - } - -#else // No interleaving tables - -#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - -// Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -#define xorInterleavedLE(rateInLanes, state, input) \ - { \ - const UINT32 * pI = (const UINT32 *)input; \ - UINT32 * pS = state; \ - UINT32 t, x0, x1; \ - int i; \ - for (i = (rateInLanes)-1; i >= 0; --i) \ - { \ - x0 = *(pI++); \ - t = (x0 ^ (x0 >> 1)) & 0x22222222UL; x0 = x0 ^ t ^ (t << 1); \ - t = (x0 ^ (x0 >> 2)) & 0x0C0C0C0CUL; x0 = x0 ^ t ^ (t << 2); \ - t = (x0 ^ (x0 >> 4)) & 0x00F000F0UL; x0 = x0 ^ t ^ (t << 4); \ - t = (x0 ^ (x0 >> 8)) & 0x0000FF00UL; x0 = x0 ^ t ^ (t << 8); \ - x1 = *(pI++); \ - t = (x1 ^ (x1 >> 1)) & 0x22222222UL; x1 = x1 ^ t ^ (t << 1); \ - t = (x1 ^ (x1 >> 2)) & 0x0C0C0C0CUL; x1 = x1 ^ t ^ (t << 2); \ - t = (x1 ^ (x1 >> 4)) & 0x00F000F0UL; x1 = x1 ^ t ^ (t << 4); \ - t = (x1 ^ (x1 >> 8)) & 0x0000FF00UL; x1 = x1 ^ t ^ (t << 8); \ - *(pS++) ^= (UINT16)x0 | (x1 << 16); \ - *(pS++) ^= (x0 >> 16) | (x1 & 0xFFFF0000); \ - } \ - } - -#define xorLanesIntoState(laneCount, state, input) \ - xorInterleavedLE(laneCount, state, input) - -#else // (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) - -// Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -UINT64 toInterleaving(UINT64 x) -{ - UINT64 t; - - t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1); - t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2); - t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4); - t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8); - t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16); - - return x; -} - -void xor8bytesIntoInterleavedWords(UINT32* evenAndOdd, const UINT8* source) -{ - // This can be optimized - UINT64 sourceWord = - (UINT64)source[0] - ^ (((UINT64)source[1]) << 8) - ^ (((UINT64)source[2]) << 16) - ^ (((UINT64)source[3]) << 24) - ^ (((UINT64)source[4]) << 32) - ^ (((UINT64)source[5]) << 40) - ^ (((UINT64)source[6]) << 48) - ^ (((UINT64)source[7]) << 56); - UINT64 evenAndOddWord = toInterleaving(sourceWord); - evenAndOdd[0] ^= (UINT32)evenAndOddWord; - evenAndOdd[1] ^= (UINT32)(evenAndOddWord >> 32); -} - -#define xorLanesIntoState(laneCount, state, input) \ - { \ - int i; \ - for(i=0; i<(laneCount); i++) \ - xor8bytesIntoInterleavedWords(state+i*2, input+i*8); \ - } - -#endif // Endianness - -// Credit: Henry S. Warren, Hacker's Delight, Addison-Wesley, 2002 -UINT64 fromInterleaving(UINT64 x) -{ - UINT64 t; - - t = (x ^ (x >> 16)) & 0x00000000FFFF0000ULL; x = x ^ t ^ (t << 16); - t = (x ^ (x >> 8)) & 0x0000FF000000FF00ULL; x = x ^ t ^ (t << 8); - t = (x ^ (x >> 4)) & 0x00F000F000F000F0ULL; x = x ^ t ^ (t << 4); - t = (x ^ (x >> 2)) & 0x0C0C0C0C0C0C0C0CULL; x = x ^ t ^ (t << 2); - t = (x ^ (x >> 1)) & 0x2222222222222222ULL; x = x ^ t ^ (t << 1); - - return x; -} - -void setInterleavedWordsInto8bytes(UINT8* dest, UINT32* evenAndOdd) -{ -#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - ((UINT64*)dest)[0] = fromInterleaving(*(UINT64*)evenAndOdd); -#else // (PLATFORM_BYTE_ORDER == IS_BIG_ENDIAN) - // This can be optimized - UINT64 evenAndOddWord = (UINT64)evenAndOdd[0] ^ ((UINT64)evenAndOdd[1] << 32); - UINT64 destWord = fromInterleaving(evenAndOddWord); - dest[0] = destWord & 0xFF; - dest[1] = (destWord >> 8) & 0xFF; - dest[2] = (destWord >> 16) & 0xFF; - dest[3] = (destWord >> 24) & 0xFF; - dest[4] = (destWord >> 32) & 0xFF; - dest[5] = (destWord >> 40) & 0xFF; - dest[6] = (destWord >> 48) & 0xFF; - dest[7] = (destWord >> 56) & 0xFF; -#endif // Endianness -} - -#define extractLanes(laneCount, state, data) \ - { \ - int i; \ - for(i=0; i<(laneCount); i++) \ - setInterleavedWordsInto8bytes(data+i*8, (UINT32*)state+i*2); \ - } - -#endif // With or without interleaving tables - -#if defined(_MSC_VER) -#define ROL32(a, offset) _rotl(a, offset) -#elif (defined (__arm__) && defined(__ARMCC_VERSION)) -#define ROL32(a, offset) __ror(a, 32-(offset)) -#else -#define ROL32(a, offset) ((((UINT32)a) << (offset)) ^ (((UINT32)a) >> (32-(offset)))) -#endif - -#include "KeccakF-1600-unrolling.macros" -#include "KeccakF-1600-32.macros" - -#if (UseSchedule == 3) - -#ifdef UseBebigokimisa -#error "No lane complementing with schedule 3." -#endif - -#if (Unrolling != 2) -#error "Only unrolling 2 is supported by schedule 3." -#endif - -void KeccakPermutationOnWords(UINT32 *state) -{ - rounds -} - -void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount) -{ - xorLanesIntoState(laneCount, state, input) - rounds -} - -#ifdef ProvideFast576 -void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input) -{ - xorLanesIntoState(9, state, input) - rounds -} -#endif - -#ifdef ProvideFast832 -void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input) -{ - xorLanesIntoState(13, state, input) - rounds -} -#endif - -#ifdef ProvideFast1024 -void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input) -{ - xorLanesIntoState(16, state, input) - rounds -} -#endif - -#ifdef ProvideFast1088 -void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input) -{ - xorLanesIntoState(17, state, input) - rounds -} -#endif - -#ifdef ProvideFast1152 -void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input) -{ - xorLanesIntoState(18, state, input) - rounds -} -#endif - -#ifdef ProvideFast1344 -void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input) -{ - xorLanesIntoState(21, state, input) - rounds -} -#endif - -#else // (Schedule != 3) - -void KeccakPermutationOnWords(UINT32 *state) -{ - declareABCDE -#if (Unrolling != 24) - unsigned int i; -#endif - - copyFromState(A, state) - rounds -} - -void KeccakPermutationOnWordsAfterXoring(UINT32 *state, const UINT8 *input, unsigned int laneCount) -{ - declareABCDE - unsigned int i; - - xorLanesIntoState(laneCount, state, input) - copyFromState(A, state) - rounds -} - -#ifdef ProvideFast576 -void KeccakPermutationOnWordsAfterXoring576bits(UINT32 *state, const UINT8 *input) -{ - declareABCDE - unsigned int i; - - xorLanesIntoState(9, state, input) - copyFromState(A, state) - rounds -} -#endif - -#ifdef ProvideFast832 -void KeccakPermutationOnWordsAfterXoring832bits(UINT32 *state, const UINT8 *input) -{ - declareABCDE - unsigned int i; - - xorLanesIntoState(13, state, input) - copyFromState(A, state) - rounds -} -#endif - -#ifdef ProvideFast1024 -void KeccakPermutationOnWordsAfterXoring1024bits(UINT32 *state, const UINT8 *input) -{ - declareABCDE - unsigned int i; - - xorLanesIntoState(16, state, input) - copyFromState(A, state) - rounds -} -#endif - -#ifdef ProvideFast1088 -void KeccakPermutationOnWordsAfterXoring1088bits(UINT32 *state, const UINT8 *input) -{ - declareABCDE - unsigned int i; - - xorLanesIntoState(17, state, input) - copyFromState(A, state) - rounds -} -#endif - -#ifdef ProvideFast1152 -void KeccakPermutationOnWordsAfterXoring1152bits(UINT32 *state, const UINT8 *input) -{ - declareABCDE - unsigned int i; - - xorLanesIntoState(18, state, input) - copyFromState(A, state) - rounds -} -#endif - -#ifdef ProvideFast1344 -void KeccakPermutationOnWordsAfterXoring1344bits(UINT32 *state, const UINT8 *input) -{ - declareABCDE - unsigned int i; - - xorLanesIntoState(21, state, input) - copyFromState(A, state) - rounds -} -#endif - -#endif - -void KeccakInitialize() -{ -#ifdef UseInterleaveTables - buildInterleaveTables(); -#endif -} - -void KeccakInitializeState(unsigned char *state) -{ - memset(state, 0, 200); -#ifdef UseBebigokimisa - ((UINT32*)state)[ 2] = ~(UINT32)0; - ((UINT32*)state)[ 3] = ~(UINT32)0; - ((UINT32*)state)[ 4] = ~(UINT32)0; - ((UINT32*)state)[ 5] = ~(UINT32)0; - ((UINT32*)state)[16] = ~(UINT32)0; - ((UINT32*)state)[17] = ~(UINT32)0; - ((UINT32*)state)[24] = ~(UINT32)0; - ((UINT32*)state)[25] = ~(UINT32)0; - ((UINT32*)state)[34] = ~(UINT32)0; - ((UINT32*)state)[35] = ~(UINT32)0; - ((UINT32*)state)[40] = ~(UINT32)0; - ((UINT32*)state)[41] = ~(UINT32)0; -#endif -} - -void KeccakPermutation(unsigned char *state) -{ - // We assume the state is always stored as interleaved 32-bit words - KeccakPermutationOnWords((UINT32*)state); -} - -#ifdef ProvideFast576 -void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationOnWordsAfterXoring576bits((UINT32*)state, data); -} -#endif - -#ifdef ProvideFast832 -void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationOnWordsAfterXoring832bits((UINT32*)state, data); -} -#endif - -#ifdef ProvideFast1024 -void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationOnWordsAfterXoring1024bits((UINT32*)state, data); -} -#endif - -#ifdef ProvideFast1088 -void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationOnWordsAfterXoring1088bits((UINT32*)state, data); -} -#endif - -#ifdef ProvideFast1152 -void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationOnWordsAfterXoring1152bits((UINT32*)state, data); -} -#endif - -#ifdef ProvideFast1344 -void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationOnWordsAfterXoring1344bits((UINT32*)state, data); -} -#endif - -void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount) -{ - KeccakPermutationOnWordsAfterXoring((UINT32*)state, data, laneCount); -} - -#ifdef ProvideFast1024 -void KeccakExtract1024bits(const unsigned char *state, unsigned char *data) -{ - extractLanes(16, state, data) -#ifdef UseBebigokimisa - ((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2]; - ((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3]; - ((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4]; - ((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5]; - ((UINT32*)data)[16] = ~((UINT32*)data)[16]; - ((UINT32*)data)[17] = ~((UINT32*)data)[17]; - ((UINT32*)data)[24] = ~((UINT32*)data)[24]; - ((UINT32*)data)[25] = ~((UINT32*)data)[25]; -#endif -} -#endif - -void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount) -{ - extractLanes(laneCount, state, data) -#ifdef UseBebigokimisa - if (laneCount > 1) { - ((UINT32*)data)[ 2] = ~((UINT32*)data)[ 2]; - ((UINT32*)data)[ 3] = ~((UINT32*)data)[ 3]; - if (laneCount > 2) { - ((UINT32*)data)[ 4] = ~((UINT32*)data)[ 4]; - ((UINT32*)data)[ 5] = ~((UINT32*)data)[ 5]; - if (laneCount > 8) { - ((UINT32*)data)[16] = ~((UINT32*)data)[16]; - ((UINT32*)data)[17] = ~((UINT32*)data)[17]; - if (laneCount > 12) { - ((UINT32*)data)[24] = ~((UINT32*)data)[24]; - ((UINT32*)data)[25] = ~((UINT32*)data)[25]; - if (laneCount > 17) { - ((UINT32*)data)[34] = ~((UINT32*)data)[34]; - ((UINT32*)data)[35] = ~((UINT32*)data)[35]; - if (laneCount > 20) { - ((UINT32*)data)[40] = ~((UINT32*)data)[40]; - ((UINT32*)data)[41] = ~((UINT32*)data)[41]; - } - } - } - } - } - } -#endif -} diff --git a/c_src/KeccakF-1600-reference.c b/c_src/KeccakF-1600-reference.c deleted file mode 100755 index 628f710..0000000 --- a/c_src/KeccakF-1600-reference.c +++ /dev/null @@ -1,300 +0,0 @@ -/* -The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -Michaël Peeters and Gilles Van Assche. For more information, feedback or -questions, please refer to our website: http://keccak.noekeon.org/ - -Implementation by the designers, -hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#include -#include -#include "brg_endian.h" -#include "displayIntermediateValues.h" -#include "KeccakNISTInterface.h" -#include "KeccakF-1600-interface.h" - -typedef unsigned char UINT8; -typedef unsigned long long int UINT64; - -#define nrRounds 24 -UINT64 KeccakRoundConstants[nrRounds]; -#define nrLanes 25 -unsigned int KeccakRhoOffsets[nrLanes]; - -void KeccakPermutationOnWords(UINT64 *state); -void theta(UINT64 *A); -void rho(UINT64 *A); -void pi(UINT64 *A); -void chi(UINT64 *A); -void iota(UINT64 *A, unsigned int indexRound); - -void fromBytesToWords(UINT64 *stateAsWords, const unsigned char *state) -{ - unsigned int i, j; - - for(i=0; i<(KeccakPermutationSize/64); i++) { - stateAsWords[i] = 0; - for(j=0; j<(64/8); j++) - stateAsWords[i] |= (UINT64)(state[i*(64/8)+j]) << (8*j); - } -} - -void fromWordsToBytes(unsigned char *state, const UINT64 *stateAsWords) -{ - unsigned int i, j; - - for(i=0; i<(KeccakPermutationSize/64); i++) - for(j=0; j<(64/8); j++) - state[i*(64/8)+j] = (stateAsWords[i] >> (8*j)) & 0xFF; -} - -void KeccakPermutation(unsigned char *state) -{ -#if (PLATFORM_BYTE_ORDER != IS_LITTLE_ENDIAN) - UINT64 stateAsWords[KeccakPermutationSize/64]; -#endif - - displayStateAsBytes(1, "Input of permutation", state); -#if (PLATFORM_BYTE_ORDER == IS_LITTLE_ENDIAN) - KeccakPermutationOnWords((UINT64*)state); -#else - fromBytesToWords(stateAsWords, state); - KeccakPermutationOnWords(stateAsWords); - fromWordsToBytes(state, stateAsWords); -#endif - displayStateAsBytes(1, "State after permutation", state); -} - -void KeccakPermutationAfterXor(unsigned char *state, const unsigned char *data, unsigned int dataLengthInBytes) -{ - unsigned int i; - - for(i=0; i> (64-offset))) : a) - -void theta(UINT64 *A) -{ - unsigned int x, y; - UINT64 C[5], D[5]; - - for(x=0; x<5; x++) { - C[x] = 0; - for(y=0; y<5; y++) - C[x] ^= A[index(x, y)]; - } - for(x=0; x<5; x++) - D[x] = ROL64(C[(x+1)%5], 1) ^ C[(x+4)%5]; - for(x=0; x<5; x++) - for(y=0; y<5; y++) - A[index(x, y)] ^= D[x]; -} - -void rho(UINT64 *A) -{ - unsigned int x, y; - - for(x=0; x<5; x++) for(y=0; y<5; y++) - A[index(x, y)] = ROL64(A[index(x, y)], KeccakRhoOffsets[index(x, y)]); -} - -void pi(UINT64 *A) -{ - unsigned int x, y; - UINT64 tempA[25]; - - for(x=0; x<5; x++) for(y=0; y<5; y++) - tempA[index(x, y)] = A[index(x, y)]; - for(x=0; x<5; x++) for(y=0; y<5; y++) - A[index(0*x+1*y, 2*x+3*y)] = tempA[index(x, y)]; -} - -void chi(UINT64 *A) -{ - unsigned int x, y; - UINT64 C[5]; - - for(y=0; y<5; y++) { - for(x=0; x<5; x++) - C[x] = A[index(x, y)] ^ ((~A[index(x+1, y)]) & A[index(x+2, y)]); - for(x=0; x<5; x++) - A[index(x, y)] = C[x]; - } -} - -void iota(UINT64 *A, unsigned int indexRound) -{ - A[index(0, 0)] ^= KeccakRoundConstants[indexRound]; -} - -int LFSR86540(UINT8 *LFSR) -{ - int result = ((*LFSR) & 0x01) != 0; - if (((*LFSR) & 0x80) != 0) - // Primitive polynomial over GF(2): x^8+x^6+x^5+x^4+1 - (*LFSR) = ((*LFSR) << 1) ^ 0x71; - else - (*LFSR) <<= 1; - return result; -} - -void KeccakInitializeRoundConstants() -{ - UINT8 LFSRstate = 0x01; - unsigned int i, j, bitPosition; - - for(i=0; i> 32)); - fprintf(f, "%08X", (unsigned int)(KeccakRoundConstants[i] & 0xFFFFFFFFULL)); - fprintf(f, "\n"); - } - fprintf(f, "\n"); -} - -void displayRhoOffsets(FILE *f) -{ - unsigned int x, y; - - for(y=0; y<5; y++) for(x=0; x<5; x++) { - fprintf(f, "RhoOffset[%i][%i] = ", x, y); - fprintf(f, "%2i", KeccakRhoOffsets[index(x, y)]); - fprintf(f, "\n"); - } - fprintf(f, "\n"); -} - -void KeccakInitializeState(unsigned char *state) -{ - memset(state, 0, KeccakPermutationSizeInBytes); -} - -#ifdef ProvideFast576 -void KeccakAbsorb576bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationAfterXor(state, data, 72); -} -#endif - -#ifdef ProvideFast832 -void KeccakAbsorb832bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationAfterXor(state, data, 104); -} -#endif - -#ifdef ProvideFast1024 -void KeccakAbsorb1024bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationAfterXor(state, data, 128); -} -#endif - -#ifdef ProvideFast1088 -void KeccakAbsorb1088bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationAfterXor(state, data, 136); -} -#endif - -#ifdef ProvideFast1152 -void KeccakAbsorb1152bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationAfterXor(state, data, 144); -} -#endif - -#ifdef ProvideFast1344 -void KeccakAbsorb1344bits(unsigned char *state, const unsigned char *data) -{ - KeccakPermutationAfterXor(state, data, 168); -} -#endif - -void KeccakAbsorb(unsigned char *state, const unsigned char *data, unsigned int laneCount) -{ - KeccakPermutationAfterXor(state, data, laneCount*8); -} - -#ifdef ProvideFast1024 -void KeccakExtract1024bits(const unsigned char *state, unsigned char *data) -{ - memcpy(data, state, 128); -} -#endif - -void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount) -{ - memcpy(data, state, laneCount*8); -} diff --git a/c_src/KeccakF-1600-reference.h b/c_src/KeccakF-1600-reference.h deleted file mode 100755 index 698bab8..0000000 --- a/c_src/KeccakF-1600-reference.h +++ /dev/null @@ -1,20 +0,0 @@ -/* -The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -Michaël Peeters and Gilles Van Assche. For more information, feedback or -questions, please refer to our website: http://keccak.noekeon.org/ - -Implementation by the designers, -hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#ifndef _KeccakPermutationReference_h_ -#define _KeccakPermutationReference_h_ - -void displayRoundConstants(FILE *f); -void displayRhoOffsets(FILE *f); - -#endif diff --git a/c_src/KeccakF-1600-reference32BI.c b/c_src/KeccakF-1600-reference32BI.c deleted file mode 100755 index 1ec4c23..0000000 --- a/c_src/KeccakF-1600-reference32BI.c +++ /dev/null @@ -1,371 +0,0 @@ -/* -The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -Michaël Peeters and Gilles Van Assche. For more information, feedback or -questions, please refer to our website: http://keccak.noekeon.org/ - -Implementation by the designers, -hereby denoted as "the implementer". - -To the extent possible under law, the implementer has waived all copyright -and related or neighboring rights to the source code in this file. -http://creativecommons.org/publicdomain/zero/1.0/ -*/ - -#include -#include -#include "brg_endian.h" -#include "displayIntermediateValues.h" -#include "KeccakNISTInterface.h" -#include "KeccakF-1600-interface.h" - -typedef unsigned char UINT8; -typedef unsigned int UINT32; - -#define nrRounds 24 -UINT32 KeccakRoundConstants[nrRounds][2]; -#define nrLanes 25 -unsigned int KeccakRhoOffsets[nrLanes]; - -void KeccakPermutationOnWords(UINT32 *state); -void theta(UINT32 *A); -void rho(UINT32 *A); -void pi(UINT32 *A); -void chi(UINT32 *A); -void iota(UINT32 *A, unsigned int indexRound); - -void toBitInterleaving(UINT32 low, UINT32 high, UINT32 *even, UINT32 *odd) -{ - unsigned int i; - - *even = 0; - *odd = 0; - for(i=0; i<64; i++) { - unsigned int inBit; - if (i < 32) - inBit = (low >> i) & 1; - else - inBit = (high >> (i-32)) & 1; - if ((i % 2) == 0) - *even |= inBit << (i/2); - else - *odd |= inBit << ((i-1)/2); - } -} - -void fromBitInterleaving(UINT32 even, UINT32 odd, UINT32 *low, UINT32 *high) -{ - unsigned int i; - - *low = 0; - *high = 0; - for(i=0; i<64; i++) { - unsigned int inBit; - if ((i % 2) == 0) - inBit = (even >> (i/2)) & 1; - else - inBit = (odd >> ((i-1)/2)) & 1; - if (i < 32) - *low |= inBit << i; - else - *high |= inBit << (i-32); - } -} - -void fromBytesToWords(UINT32 *stateAsWords, const unsigned char *state) -{ - unsigned int i, j; - UINT32 low, high; - UINT32 even, odd; - - for(i=0; i<(KeccakPermutationSize/64); i++) { - low = 0; - high = 0; - for(j=0; j<(32/8); j++) - low |= (UINT32)(state[i*(64/8)+j]) << (8*j); - for(j=(32/8); j<(64/8); j++) - high |= (UINT32)(state[i*(64/8)+j]) << (8*j-32); - toBitInterleaving(low, high, &even, &odd); - stateAsWords[2*i+0] = even; - stateAsWords[2*i+1] = odd; - } -} - -void fromWordsToBytes(unsigned char *state, const UINT32 *stateAsWords) -{ - unsigned int i, j; - UINT32 low, high; - - for(i=0; i<(KeccakPermutationSize/64); i++) { - fromBitInterleaving(stateAsWords[2*i+0], stateAsWords[2*i+1], &low, &high); - for(j=0; j<(32/8); j++) - state[i*(64/8)+j] = (low >> (8*j)) & 0xFF; - for(j=32/8; j<(64/8); j++) - state[i*(64/8)+j] = (high >> (8*j-32)) & 0xFF; - } -} - -void KeccakPermutation(unsigned char *state) -{ - UINT32 stateAsWords[KeccakPermutationSize/32]; - - displayStateAsBytes(1, "Input of permutation", state); - fromBytesToWords(stateAsWords, state); - KeccakPermutationOnWords(stateAsWords); - fromWordsToBytes(state, stateAsWords); - displayStateAsBytes(1, "State after permutation", state); -} - -void KeccakPermutationAfterXor(unsigned char *state, const unsigned char *data, unsigned int dataLengthInBytes) -{ - unsigned int i; - - for(i=0; i> (32-offset))) : a) - -void ROL64(UINT32 inEven, UINT32 inOdd, UINT32 *outEven, UINT32 *outOdd, unsigned int offset) -{ - if ((offset % 2) == 0) { - *outEven = ROL32(inEven, offset/2); - *outOdd = ROL32(inOdd, offset/2); - } - else { - *outEven = ROL32(inOdd, (offset+1)/2); - *outOdd = ROL32(inEven, (offset-1)/2); - } -} - -void theta(UINT32 *A) -{ - unsigned int x, y, z; - UINT32 C[5][2], D[5][2]; - - for(x=0; x<5; x++) { - for(z=0; z<2; z++) { - C[x][z] = 0; - for(y=0; y<5; y++) - C[x][z] ^= A[index(x, y, z)]; - } - } - for(x=0; x<5; x++) { - ROL64(C[(x+1)%5][0], C[(x+1)%5][1], &(D[x][0]), &(D[x][1]), 1); - for(z=0; z<2; z++) - D[x][z] ^= C[(x+4)%5][z]; - } - for(x=0; x<5; x++) - for(y=0; y<5; y++) - for(z=0; z<2; z++) - A[index(x, y, z)] ^= D[x][z]; -} - -void rho(UINT32 *A) -{ - unsigned int x, y; - - for(x=0; x<5; x++) for(y=0; y<5; y++) - ROL64(A[index(x, y, 0)], A[index(x, y, 1)], &(A[index(x, y, 0)]), &(A[index(x, y, 1)]), KeccakRhoOffsets[5*y+x]); -} - -void pi(UINT32 *A) -{ - unsigned int x, y, z; - UINT32 tempA[50]; - - for(x=0; x<5; x++) for(y=0; y<5; y++) for(z=0; z<2; z++) - tempA[index(x, y, z)] = A[index(x, y, z)]; - for(x=0; x<5; x++) for(y=0; y<5; y++) for(z=0; z<2; z++) - A[index(0*x+1*y, 2*x+3*y, z)] = tempA[index(x, y, z)]; -} - -void chi(UINT32 *A) -{ - unsigned int x, y, z; - UINT32 C[5][2]; - - for(y=0; y<5; y++) { - for(x=0; x<5; x++) - for(z=0; z<2; z++) - C[x][z] = A[index(x, y, z)] ^ ((~A[index(x+1, y, z)]) & A[index(x+2, y, z)]); - for(x=0; x<5; x++) - for(z=0; z<2; z++) - A[index(x, y, z)] = C[x][z]; - } -} - -void iota(UINT32 *A, unsigned int indexRound) -{ - A[index(0, 0, 0)] ^= KeccakRoundConstants[indexRound][0]; - A[index(0, 0, 1)] ^= KeccakRoundConstants[indexRound][1]; -} - -int LFSR86540(UINT8 *LFSR) -{ - int result = ((*LFSR) & 0x01) != 0; - if (((*LFSR) & 0x80) != 0) - // Primitive polynomial over GF(2): x^8+x^6+x^5+x^4+1 - (*LFSR) = ((*LFSR) << 1) ^ 0x71; - else - (*LFSR) <<= 1; - return result; -} - -void KeccakInitializeRoundConstants() -{ - UINT8 LFSRstate = 0x01; - unsigned int i, j, bitPosition; - UINT32 low, high; - - for(i=0; i -#include "KeccakF-1600-interface.h" - -#define UseBebigokimisa - -typedef unsigned char UINT8; -typedef unsigned long long int UINT64; - -void KeccakInitialize() -{ -} - -void KeccakExtract(const unsigned char *state, unsigned char *data, unsigned int laneCount) -{ - memcpy(data, state, laneCount*8); -#ifdef UseBebigokimisa - if (laneCount > 8) - { - ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; - ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; - ((UINT64*)data)[ 8] = ~((UINT64*)data)[ 8]; - - if (laneCount > 12) - { - ((UINT64*)data)[12] = ~((UINT64*)data)[12]; - if (laneCount > 17) - { - ((UINT64*)data)[17] = ~((UINT64*)data)[17]; - if (laneCount > 20) - { - ((UINT64*)data)[20] = ~((UINT64*)data)[20]; - } - } - } - } - else - { - if (laneCount > 1) - { - ((UINT64*)data)[ 1] = ~((UINT64*)data)[ 1]; - if (laneCount > 2) - { - ((UINT64*)data)[ 2] = ~((UINT64*)data)[ 2]; - } - } - } - -#endif -} diff --git a/c_src/KeccakF-1600-x86-64-gas.s b/c_src/KeccakF-1600-x86-64-gas.s deleted file mode 100755 index 289a84e..0000000 --- a/c_src/KeccakF-1600-x86-64-gas.s +++ /dev/null @@ -1,766 +0,0 @@ -# -# The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -# Michaël Peeters and Gilles Van Assche. For more information, feedback or -# questions, please refer to our website: http://keccak.noekeon.org/ -# -# Implementation by Ronny Van Keer, -# hereby denoted as "the implementer". -# -# To the extent possible under law, the implementer has waived all copyright -# and related or neighboring rights to the source code in this file. -# http://creativecommons.org/publicdomain/zero/1.0/ -# - - .text - - -#// --- defines - -.equ UseSIMD, 1 - - -.equ _ba, 0*8 -.equ _be, 1*8 -.equ _bi, 2*8 -.equ _bo, 3*8 -.equ _bu, 4*8 -.equ _ga, 5*8 -.equ _ge, 6*8 -.equ _gi, 7*8 -.equ _go, 8*8 -.equ _gu, 9*8 -.equ _ka, 10*8 -.equ _ke, 11*8 -.equ _ki, 12*8 -.equ _ko, 13*8 -.equ _ku, 14*8 -.equ _ma, 15*8 -.equ _me, 16*8 -.equ _mi, 17*8 -.equ _mo, 18*8 -.equ _mu, 19*8 -.equ _sa, 20*8 -.equ _se, 21*8 -.equ _si, 22*8 -.equ _so, 23*8 -.equ _su, 24*8 - - -# arguments -.equ apState, %rdi -.equ apInput, %rsi -.equ aNbrWords, %rdx - -# xor input into state section -.equ xpState, %r9 - -# round vars -.equ rT1, %rax -.equ rpState, %rdi -.equ rpStack, %rsp - -.equ rDa, %rbx -.equ rDe, %rcx -.equ rDi, %rdx -.equ rDo, %r8 -.equ rDu, %r9 - -.equ rBa, %r10 -.equ rBe, %r11 -.equ rBi, %r12 -.equ rBo, %r13 -.equ rBu, %r14 - -.equ rCa, %rsi -.equ rCe, %rbp -.equ rCi, rBi -.equ rCo, rBo -.equ rCu, %r15 - -.macro mKeccakRound iState, oState, rc, lastRound - - movq rCe, rDa - rolq rDa - - movq _bi(\iState), rCi - xorq _gi(\iState), rDi - xorq rCu, rDa - xorq _ki(\iState), rCi - xorq _mi(\iState), rDi - xorq rDi, rCi - - movq rCi, rDe - rolq rDe - - movq _bo(\iState), rCo - xorq _go(\iState), rDo - xorq rCa, rDe - xorq _ko(\iState), rCo - xorq _mo(\iState), rDo - xorq rDo, rCo - - movq rCo, rDi - rolq rDi - - movq rCu, rDo - xorq rCe, rDi - rolq rDo - - movq rCa, rDu - xorq rCi, rDo - rolq rDu - - movq _ba(\iState), rBa - movq _ge(\iState), rBe - xorq rCo, rDu - movq _ki(\iState), rBi - movq _mo(\iState), rBo - movq _su(\iState), rBu - xorq rDe, rBe - rolq $44, rBe - xorq rDi, rBi - xorq rDa, rBa - rolq $43, rBi - - movq rBe, rCa - movq $\rc, rT1 - orq rBi, rCa - xorq rBa, rT1 - xorq rT1, rCa - movq rCa, _ba(\oState) - - xorq rDu, rBu - rolq $14, rBu - movq rBa, rCu - andq rBe, rCu - xorq rBu, rCu - movq rCu, _bu(\oState) - - xorq rDo, rBo - rolq $21, rBo - movq rBo, rT1 - andq rBu, rT1 - xorq rBi, rT1 - movq rT1, _bi(\oState) - - notq rBi - orq rBa, rBu - orq rBo, rBi - xorq rBo, rBu - xorq rBe, rBi - movq rBu, _bo(\oState) - movq rBi, _be(\oState) - .if \lastRound == 0 - movq rBi, rCe - .endif - - - movq _gu(\iState), rBe - xorq rDu, rBe - movq _ka(\iState), rBi - rolq $20, rBe - xorq rDa, rBi - rolq $3, rBi - movq _bo(\iState), rBa - movq rBe, rT1 - orq rBi, rT1 - xorq rDo, rBa - movq _me(\iState), rBo - movq _si(\iState), rBu - rolq $28, rBa - xorq rBa, rT1 - movq rT1, _ga(\oState) - .if \lastRound == 0 - xor rT1, rCa - .endif - - xorq rDe, rBo - rolq $45, rBo - movq rBi, rT1 - andq rBo, rT1 - xorq rBe, rT1 - movq rT1, _ge(\oState) - .if \lastRound == 0 - xorq rT1, rCe - .endif - - xorq rDi, rBu - rolq $61, rBu - movq rBu, rT1 - orq rBa, rT1 - xorq rBo, rT1 - movq rT1, _go(\oState) - - andq rBe, rBa - xorq rBu, rBa - movq rBa, _gu(\oState) - notq rBu - .if \lastRound == 0 - xorq rBa, rCu - .endif - - orq rBu, rBo - xorq rBi, rBo - movq rBo, _gi(\oState) - - - movq _be(\iState), rBa - movq _gi(\iState), rBe - movq _ko(\iState), rBi - movq _mu(\iState), rBo - movq _sa(\iState), rBu - xorq rDi, rBe - rolq $6, rBe - xorq rDo, rBi - rolq $25, rBi - movq rBe, rT1 - orq rBi, rT1 - xorq rDe, rBa - rolq $1, rBa - xorq rBa, rT1 - movq rT1, _ka(\oState) - .if \lastRound == 0 - xor rT1, rCa - .endif - - xorq rDu, rBo - rolq $8, rBo - movq rBi, rT1 - andq rBo, rT1 - xorq rBe, rT1 - movq rT1, _ke(\oState) - .if \lastRound == 0 - xorq rT1, rCe - .endif - - xorq rDa, rBu - rolq $18, rBu - notq rBo - movq rBo, rT1 - andq rBu, rT1 - xorq rBi, rT1 - movq rT1, _ki(\oState) - - movq rBu, rT1 - orq rBa, rT1 - xorq rBo, rT1 - movq rT1, _ko(\oState) - - andq rBe, rBa - xorq rBu, rBa - movq rBa, _ku(\oState) - .if \lastRound == 0 - xorq rBa, rCu - .endif - - movq _ga(\iState), rBe - xorq rDa, rBe - movq _ke(\iState), rBi - rolq $36, rBe - xorq rDe, rBi - movq _bu(\iState), rBa - rolq $10, rBi - movq rBe, rT1 - movq _mi(\iState), rBo - andq rBi, rT1 - xorq rDu, rBa - movq _so(\iState), rBu - rolq $27, rBa - xorq rBa, rT1 - movq rT1, _ma(\oState) - .if \lastRound == 0 - xor rT1, rCa - .endif - - xorq rDi, rBo - rolq $15, rBo - movq rBi, rT1 - orq rBo, rT1 - xorq rBe, rT1 - movq rT1, _me(\oState) - .if \lastRound == 0 - xorq rT1, rCe - .endif - - xorq rDo, rBu - rolq $56, rBu - notq rBo - movq rBo, rT1 - orq rBu, rT1 - xorq rBi, rT1 - movq rT1, _mi(\oState) - - orq rBa, rBe - xorq rBu, rBe - movq rBe, _mu(\oState) - - andq rBa, rBu - xorq rBo, rBu - movq rBu, _mo(\oState) - .if \lastRound == 0 - xorq rBe, rCu - .endif - - - movq _bi(\iState), rBa - movq _go(\iState), rBe - movq _ku(\iState), rBi - xorq rDi, rBa - movq _ma(\iState), rBo - rolq $62, rBa - xorq rDo, rBe - movq _se(\iState), rBu - rolq $55, rBe - - xorq rDu, rBi - movq rBa, rDu - xorq rDe, rBu - rolq $2, rBu - andq rBe, rDu - xorq rBu, rDu - movq rDu, _su(\oState) - - rolq $39, rBi - .if \lastRound == 0 - xorq rDu, rCu - .endif - notq rBe - xorq rDa, rBo - movq rBe, rDa - andq rBi, rDa - xorq rBa, rDa - movq rDa, _sa(\oState) - .if \lastRound == 0 - xor rDa, rCa - .endif - - rolq $41, rBo - movq rBi, rDe - orq rBo, rDe - xorq rBe, rDe - movq rDe, _se(\oState) - .if \lastRound == 0 - xorq rDe, rCe - .endif - - movq rBo, rDi - movq rBu, rDo - andq rBu, rDi - orq rBa, rDo - xorq rBi, rDi - xorq rBo, rDo - movq rDi, _si(\oState) - movq rDo, _so(\oState) - - .endm - -.macro mKeccakPermutation - - subq $8*25, %rsp - - movq _ba(rpState), rCa - movq _be(rpState), rCe - movq _bu(rpState), rCu - - xorq _ga(rpState), rCa - xorq _ge(rpState), rCe - xorq _gu(rpState), rCu - - xorq _ka(rpState), rCa - xorq _ke(rpState), rCe - xorq _ku(rpState), rCu - - xorq _ma(rpState), rCa - xorq _me(rpState), rCe - xorq _mu(rpState), rCu - - xorq _sa(rpState), rCa - xorq _se(rpState), rCe - movq _si(rpState), rDi - movq _so(rpState), rDo - xorq _su(rpState), rCu - - - mKeccakRound rpState, rpStack, 0x0000000000000001, 0 - mKeccakRound rpStack, rpState, 0x0000000000008082, 0 - mKeccakRound rpState, rpStack, 0x800000000000808a, 0 - mKeccakRound rpStack, rpState, 0x8000000080008000, 0 - mKeccakRound rpState, rpStack, 0x000000000000808b, 0 - mKeccakRound rpStack, rpState, 0x0000000080000001, 0 - - mKeccakRound rpState, rpStack, 0x8000000080008081, 0 - mKeccakRound rpStack, rpState, 0x8000000000008009, 0 - mKeccakRound rpState, rpStack, 0x000000000000008a, 0 - mKeccakRound rpStack, rpState, 0x0000000000000088, 0 - mKeccakRound rpState, rpStack, 0x0000000080008009, 0 - mKeccakRound rpStack, rpState, 0x000000008000000a, 0 - - mKeccakRound rpState, rpStack, 0x000000008000808b, 0 - mKeccakRound rpStack, rpState, 0x800000000000008b, 0 - mKeccakRound rpState, rpStack, 0x8000000000008089, 0 - mKeccakRound rpStack, rpState, 0x8000000000008003, 0 - mKeccakRound rpState, rpStack, 0x8000000000008002, 0 - mKeccakRound rpStack, rpState, 0x8000000000000080, 0 - - mKeccakRound rpState, rpStack, 0x000000000000800a, 0 - mKeccakRound rpStack, rpState, 0x800000008000000a, 0 - mKeccakRound rpState, rpStack, 0x8000000080008081, 0 - mKeccakRound rpStack, rpState, 0x8000000000008080, 0 - mKeccakRound rpState, rpStack, 0x0000000080000001, 0 - mKeccakRound rpStack, rpState, 0x8000000080008008, 1 - - addq $8*25, %rsp - - .endm - -.macro mPushRegs - - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - - .endm - - -.macro mPopRegs - - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbp - popq %rbx - - .endm - - -.macro mXorState128 input, state, offset - .if UseSIMD == 0 - movq \offset(\input), %rax - movq \offset+8(\input), %rcx - xorq %rax, \offset(\state) - xorq %rcx, \offset+8(\state) - .else - movdqu \offset(\input), %xmm0 - pxor \offset(\state), %xmm0 - movdqu %xmm0, \offset(\state) - .endif - .endm - -.macro mXorState256 input, state, offset - .if UseSIMD == 0 - movq \offset(\input), %rax - movq \offset+8(\input), %r10 - movq \offset+16(\input), %rcx - movq \offset+24(\input), %r8 - xorq %rax, \offset(\state) - xorq %r10, \offset+8(\state) - xorq %rcx, \offset+16(\state) - xorq %r8, \offset+24(\state) - .else - movdqu \offset(\input), %xmm0 - pxor \offset(\state), %xmm0 - movdqu \offset+16(\input), %xmm1 - pxor \offset+16(\state), %xmm1 - movdqu %xmm0, \offset(\state) - movdqu %xmm1, \offset+16(\state) - .endif - .endm - -.macro mXorState512 input, state, offset - .if UseSIMD == 0 - mXorState256 \input, \state, \offset - mXorState256 \input, \state, \offset+32 - .else - movdqu \offset(\input), %xmm0 - movdqu \offset+16(\input), %xmm1 - pxor \offset(\state), %xmm0 - movdqu \offset+32(\input), %xmm2 - pxor \offset+16(\state), %xmm1 - movdqu %xmm0, \offset(\state) - movdqu \offset+48(\input), %xmm3 - pxor \offset+32(\state), %xmm2 - movdqu %xmm1, \offset+16(\state) - pxor \offset+48(\state), %xmm3 - movdqu %xmm2, \offset+32(\state) - movdqu %xmm3, \offset+48(\state) - .endif - .endm - -# ------------------------------------------------------------------------- - - .size KeccakPermutation, .-KeccakPermutation - .align 2 - .global KeccakPermutation - .type KeccakPermutation, %function -KeccakPermutation: - - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb576bits, .-KeccakAbsorb576bits - .align 2 - .global KeccakAbsorb576bits - .type KeccakAbsorb576bits, %function -KeccakAbsorb576bits: - - mXorState512 apInput, apState, 0 - movq 64(apInput), %rax - xorq %rax, 64(apState) - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb832bits, .-KeccakAbsorb832bits - .align 2 - .global KeccakAbsorb832bits - .type KeccakAbsorb832bits, %function -KeccakAbsorb832bits: - - mXorState512 apInput, apState, 0 - mXorState256 apInput, apState, 64 - movq 96(apInput), %rax - xorq %rax, 96(apState) - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb1024bits, .-KeccakAbsorb1024bits - .align 2 - .global KeccakAbsorb1024bits - .type KeccakAbsorb1024bits, %function -KeccakAbsorb1024bits: - - mXorState512 apInput, apState, 0 - mXorState512 apInput, apState, 64 - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb1088bits, .-KeccakAbsorb1088bits - .align 2 - .global KeccakAbsorb1088bits - .type KeccakAbsorb1088bits, %function -KeccakAbsorb1088bits: - - mXorState512 apInput, apState, 0 - mXorState512 apInput, apState, 64 - movq 128(apInput), %rax - xorq %rax, 128(apState) - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb1152bits, .-KeccakAbsorb1152bits - .align 2 - .global KeccakAbsorb1152bits - .type KeccakAbsorb1152bits, %function -KeccakAbsorb1152bits: - - mXorState512 apInput, apState, 0 - mXorState512 apInput, apState, 64 - mXorState128 apInput, apState, 128 - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb1344bits, .-KeccakAbsorb1344bits - .align 2 - .global KeccakAbsorb1344bits - .type KeccakAbsorb1344bits, %function -KeccakAbsorb1344bits: - - mXorState512 apInput, apState, 0 - mXorState512 apInput, apState, 64 - mXorState256 apInput, apState, 128 - movq 160(apInput), %rax - xorq %rax, 160(apState) - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb, .-KeccakAbsorb - .align 2 - .global KeccakAbsorb - .type KeccakAbsorb, %function -KeccakAbsorb: - - movq apState, xpState - - test $16, aNbrWords - jz xorInputToState8 - mXorState512 apInput, xpState, 0 - mXorState512 apInput, xpState, 64 - addq $128, apInput - addq $128, xpState - -xorInputToState8: - test $8, aNbrWords - jz xorInputToState4 - mXorState512 apInput, xpState, 0 - addq $64, apInput - addq $64, xpState - -xorInputToState4: - test $4, aNbrWords - jz xorInputToState2 - mXorState256 apInput, xpState, 0 - addq $32, apInput - addq $32, xpState - -xorInputToState2: - test $2, aNbrWords - jz xorInputToState1 - mXorState128 apInput, xpState, 0 - addq $16, apInput - addq $16, xpState - -xorInputToState1: - test $1, aNbrWords - jz xorInputToStateDone - movq (apInput), %rax - xorq %rax, (xpState) - -xorInputToStateDone: - - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakInitializeState, .-KeccakInitializeState - .align 2 - .global KeccakInitializeState - .type KeccakInitializeState, %function -KeccakInitializeState: - xorq %rax, %rax - xorq %rcx, %rcx - notq %rcx - - .if UseSIMD == 0 - movq %rax, 0*8(apState) - movq %rcx, 1*8(apState) - movq %rcx, 2*8(apState) - movq %rax, 3*8(apState) - movq %rax, 4*8(apState) - movq %rax, 5*8(apState) - movq %rax, 6*8(apState) - movq %rax, 7*8(apState) - movq %rcx, 8*8(apState) - movq %rax, 9*8(apState) - movq %rax, 10*8(apState) - movq %rax, 11*8(apState) - movq %rcx, 12*8(apState) - movq %rax, 13*8(apState) - movq %rax, 14*8(apState) - movq %rax, 15*8(apState) - movq %rax, 16*8(apState) - movq %rcx, 17*8(apState) - movq %rax, 18*8(apState) - movq %rax, 19*8(apState) - movq %rcx, 20*8(apState) - movq %rax, 21*8(apState) - movq %rax, 22*8(apState) - movq %rax, 23*8(apState) - movq %rax, 24*8(apState) - .else - pxor %xmm0, %xmm0 - - movq %rax, 0*8(apState) - movq %rcx, 1*8(apState) - movq %rcx, 2*8(apState) - movq %rax, 3*8(apState) - movdqu %xmm0, 4*8(apState) - movdqu %xmm0, 6*8(apState) - movq %rcx, 8*8(apState) - movq %rax, 9*8(apState) - movdqu %xmm0, 10*8(apState) - movq %rcx, 12*8(apState) - movq %rax, 13*8(apState) - movdqu %xmm0, 14*8(apState) - movq %rax, 16*8(apState) - movq %rcx, 17*8(apState) - movdqu %xmm0, 18*8(apState) - movq %rcx, 20*8(apState) - movq %rax, 21*8(apState) - movdqu %xmm0, 22*8(apState) - movq %rax, 24*8(apState) - .endif - ret - -# ------------------------------------------------------------------------- - - .size KeccakExtract1024bits, .-KeccakExtract1024bits - .align 2 - .global KeccakExtract1024bits - .type KeccakExtract1024bits, %function -KeccakExtract1024bits: - - movq 0*8(apState), %rax - movq 1*8(apState), %rcx - movq 2*8(apState), %rdx - movq 3*8(apState), %r8 - notq %rcx - notq %rdx - movq %rax, 0*8(%rsi) - movq %rcx, 1*8(%rsi) - movq %rdx, 2*8(%rsi) - movq %r8, 3*8(%rsi) - - movq 4*8(apState), %rax - movq 5*8(apState), %rcx - movq 6*8(apState), %rdx - movq 7*8(apState), %r8 - movq %rax, 4*8(%rsi) - movq %rcx, 5*8(%rsi) - movq %rdx, 6*8(%rsi) - movq %r8, 7*8(%rsi) - - movq 8*8(apState), %rax - movq 9*8(apState), %rcx - movq 10*8(apState), %rdx - movq 11*8(apState), %r8 - notq %rax - movq %rax, 8*8(%rsi) - movq %rcx, 9*8(%rsi) - movq %rdx, 10*8(%rsi) - movq %r8, 11*8(%rsi) - - movq 12*8(apState), %rax - movq 13*8(apState), %rcx - movq 14*8(apState), %rdx - movq 15*8(apState), %r8 - notq %rax - movq %rax, 12*8(%rsi) - movq %rcx, 13*8(%rsi) - movq %rdx, 14*8(%rsi) - movq %r8, 15*8(%rsi) - ret - diff --git a/c_src/KeccakF-1600-x86-64-shld-gas.s b/c_src/KeccakF-1600-x86-64-shld-gas.s deleted file mode 100755 index bc84762..0000000 --- a/c_src/KeccakF-1600-x86-64-shld-gas.s +++ /dev/null @@ -1,766 +0,0 @@ -# -# The Keccak sponge function, designed by Guido Bertoni, Joan Daemen, -# Michaël Peeters and Gilles Van Assche. For more information, feedback or -# questions, please refer to our website: http://keccak.noekeon.org/ -# -# Implementation by Ronny Van Keer, -# hereby denoted as "the implementer". -# -# To the extent possible under law, the implementer has waived all copyright -# and related or neighboring rights to the source code in this file. -# http://creativecommons.org/publicdomain/zero/1.0/ -# - - .text - - -#// --- defines - -.equ UseSIMD, 1 - - -.equ _ba, 0*8 -.equ _be, 1*8 -.equ _bi, 2*8 -.equ _bo, 3*8 -.equ _bu, 4*8 -.equ _ga, 5*8 -.equ _ge, 6*8 -.equ _gi, 7*8 -.equ _go, 8*8 -.equ _gu, 9*8 -.equ _ka, 10*8 -.equ _ke, 11*8 -.equ _ki, 12*8 -.equ _ko, 13*8 -.equ _ku, 14*8 -.equ _ma, 15*8 -.equ _me, 16*8 -.equ _mi, 17*8 -.equ _mo, 18*8 -.equ _mu, 19*8 -.equ _sa, 20*8 -.equ _se, 21*8 -.equ _si, 22*8 -.equ _so, 23*8 -.equ _su, 24*8 - - -# arguments -.equ apState, %rdi -.equ apInput, %rsi -.equ aNbrWords, %rdx - -# xor input into state section -.equ xpState, %r9 - -# round vars -.equ rT1, %rax -.equ rpState, %rdi -.equ rpStack, %rsp - -.equ rDa, %rbx -.equ rDe, %rcx -.equ rDi, %rdx -.equ rDo, %r8 -.equ rDu, %r9 - -.equ rBa, %r10 -.equ rBe, %r11 -.equ rBi, %r12 -.equ rBo, %r13 -.equ rBu, %r14 - -.equ rCa, %rsi -.equ rCe, %rbp -.equ rCi, rBi -.equ rCo, rBo -.equ rCu, %r15 - -.macro mKeccakRound iState, oState, rc, lastRound - - movq rCe, rDa - shld $1, rDa, rDa - - movq _bi(\iState), rCi - xorq _gi(\iState), rDi - xorq _ki(\iState), rCi - xorq rCu, rDa - xorq _mi(\iState), rDi - xorq rDi, rCi - - movq rCi, rDe - shld $1, rDe, rDe - - movq _bo(\iState), rCo - xorq _go(\iState), rDo - xorq _ko(\iState), rCo - xorq rCa, rDe - xorq _mo(\iState), rDo - xorq rDo, rCo - - movq rCo, rDi - shld $1, rDi, rDi - - movq rCu, rDo - xorq rCe, rDi - shld $1, rDo, rDo - - movq rCa, rDu - xorq rCi, rDo - shld $1, rDu, rDu - - movq _ba(\iState), rBa - movq _ge(\iState), rBe - xorq rCo, rDu - movq _ki(\iState), rBi - movq _mo(\iState), rBo - movq _su(\iState), rBu - xorq rDe, rBe - shld $44, rBe, rBe - xorq rDi, rBi - xorq rDa, rBa - shld $43, rBi, rBi - - movq rBe, rCa - movq $\rc, rT1 - orq rBi, rCa - xorq rBa, rT1 - xorq rT1, rCa - movq rCa, _ba(\oState) - - xorq rDu, rBu - shld $14, rBu, rBu - movq rBa, rCu - andq rBe, rCu - xorq rBu, rCu - movq rCu, _bu(\oState) - - xorq rDo, rBo - shld $21, rBo, rBo - movq rBo, rT1 - andq rBu, rT1 - xorq rBi, rT1 - movq rT1, _bi(\oState) - - notq rBi - orq rBa, rBu - orq rBo, rBi - xorq rBo, rBu - xorq rBe, rBi - movq rBu, _bo(\oState) - movq rBi, _be(\oState) - .if \lastRound == 0 - movq rBi, rCe - .endif - - - movq _gu(\iState), rBe - xorq rDu, rBe - movq _ka(\iState), rBi - shld $20, rBe, rBe - xorq rDa, rBi - shld $3, rBi, rBi - movq _bo(\iState), rBa - movq rBe, rT1 - orq rBi, rT1 - xorq rDo, rBa - movq _me(\iState), rBo - movq _si(\iState), rBu - shld $28, rBa, rBa - xorq rBa, rT1 - movq rT1, _ga(\oState) - .if \lastRound == 0 - xor rT1, rCa - .endif - - xorq rDe, rBo - shld $45, rBo, rBo - movq rBi, rT1 - andq rBo, rT1 - xorq rBe, rT1 - movq rT1, _ge(\oState) - .if \lastRound == 0 - xorq rT1, rCe - .endif - - xorq rDi, rBu - shld $61, rBu, rBu - movq rBu, rT1 - orq rBa, rT1 - xorq rBo, rT1 - movq rT1, _go(\oState) - - andq rBe, rBa - xorq rBu, rBa - movq rBa, _gu(\oState) - notq rBu - .if \lastRound == 0 - xorq rBa, rCu - .endif - - orq rBu, rBo - xorq rBi, rBo - movq rBo, _gi(\oState) - - - movq _be(\iState), rBa - movq _gi(\iState), rBe - movq _ko(\iState), rBi - movq _mu(\iState), rBo - movq _sa(\iState), rBu - xorq rDi, rBe - shld $6, rBe, rBe - xorq rDo, rBi - shld $25, rBi, rBi - movq rBe, rT1 - orq rBi, rT1 - xorq rDe, rBa - shld $1, rBa, rBa - xorq rBa, rT1 - movq rT1, _ka(\oState) - .if \lastRound == 0 - xor rT1, rCa - .endif - - xorq rDu, rBo - shld $8, rBo, rBo - movq rBi, rT1 - andq rBo, rT1 - xorq rBe, rT1 - movq rT1, _ke(\oState) - .if \lastRound == 0 - xorq rT1, rCe - .endif - - xorq rDa, rBu - shld $18, rBu, rBu - notq rBo - movq rBo, rT1 - andq rBu, rT1 - xorq rBi, rT1 - movq rT1, _ki(\oState) - - movq rBu, rT1 - orq rBa, rT1 - xorq rBo, rT1 - movq rT1, _ko(\oState) - - andq rBe, rBa - xorq rBu, rBa - movq rBa, _ku(\oState) - .if \lastRound == 0 - xorq rBa, rCu - .endif - - movq _ga(\iState), rBe - xorq rDa, rBe - movq _ke(\iState), rBi - shld $36, rBe, rBe - xorq rDe, rBi - movq _bu(\iState), rBa - shld $10, rBi, rBi - movq rBe, rT1 - movq _mi(\iState), rBo - andq rBi, rT1 - xorq rDu, rBa - movq _so(\iState), rBu - shld $27, rBa, rBa - xorq rBa, rT1 - movq rT1, _ma(\oState) - .if \lastRound == 0 - xor rT1, rCa - .endif - - xorq rDi, rBo - shld $15, rBo, rBo - movq rBi, rT1 - orq rBo, rT1 - xorq rBe, rT1 - movq rT1, _me(\oState) - .if \lastRound == 0 - xorq rT1, rCe - .endif - - xorq rDo, rBu - shld $56, rBu, rBu - notq rBo - movq rBo, rT1 - orq rBu, rT1 - xorq rBi, rT1 - movq rT1, _mi(\oState) - - orq rBa, rBe - xorq rBu, rBe - movq rBe, _mu(\oState) - - andq rBa, rBu - xorq rBo, rBu - movq rBu, _mo(\oState) - .if \lastRound == 0 - xorq rBe, rCu - .endif - - - movq _bi(\iState), rBa - movq _go(\iState), rBe - movq _ku(\iState), rBi - xorq rDi, rBa - movq _ma(\iState), rBo - shld $62, rBa, rBa - xorq rDo, rBe - movq _se(\iState), rBu - shld $55, rBe, rBe - - xorq rDu, rBi - movq rBa, rDu - xorq rDe, rBu - shld $2, rBu, rBu - andq rBe, rDu - xorq rBu, rDu - movq rDu, _su(\oState) - - shld $39, rBi, rBi - .if \lastRound == 0 - xorq rDu, rCu - .endif - notq rBe - xorq rDa, rBo - movq rBe, rDa - andq rBi, rDa - xorq rBa, rDa - movq rDa, _sa(\oState) - .if \lastRound == 0 - xor rDa, rCa - .endif - - shld $41, rBo, rBo - movq rBi, rDe - orq rBo, rDe - xorq rBe, rDe - movq rDe, _se(\oState) - .if \lastRound == 0 - xorq rDe, rCe - .endif - - movq rBo, rDi - movq rBu, rDo - andq rBu, rDi - orq rBa, rDo - xorq rBi, rDi - xorq rBo, rDo - movq rDi, _si(\oState) - movq rDo, _so(\oState) - - .endm - -.macro mKeccakPermutation - - subq $8*25, %rsp - - movq _ba(rpState), rCa - movq _be(rpState), rCe - movq _bu(rpState), rCu - - xorq _ga(rpState), rCa - xorq _ge(rpState), rCe - xorq _gu(rpState), rCu - - xorq _ka(rpState), rCa - xorq _ke(rpState), rCe - xorq _ku(rpState), rCu - - xorq _ma(rpState), rCa - xorq _me(rpState), rCe - xorq _mu(rpState), rCu - - xorq _sa(rpState), rCa - xorq _se(rpState), rCe - movq _si(rpState), rDi - movq _so(rpState), rDo - xorq _su(rpState), rCu - - - mKeccakRound rpState, rpStack, 0x0000000000000001, 0 - mKeccakRound rpStack, rpState, 0x0000000000008082, 0 - mKeccakRound rpState, rpStack, 0x800000000000808a, 0 - mKeccakRound rpStack, rpState, 0x8000000080008000, 0 - mKeccakRound rpState, rpStack, 0x000000000000808b, 0 - mKeccakRound rpStack, rpState, 0x0000000080000001, 0 - - mKeccakRound rpState, rpStack, 0x8000000080008081, 0 - mKeccakRound rpStack, rpState, 0x8000000000008009, 0 - mKeccakRound rpState, rpStack, 0x000000000000008a, 0 - mKeccakRound rpStack, rpState, 0x0000000000000088, 0 - mKeccakRound rpState, rpStack, 0x0000000080008009, 0 - mKeccakRound rpStack, rpState, 0x000000008000000a, 0 - - mKeccakRound rpState, rpStack, 0x000000008000808b, 0 - mKeccakRound rpStack, rpState, 0x800000000000008b, 0 - mKeccakRound rpState, rpStack, 0x8000000000008089, 0 - mKeccakRound rpStack, rpState, 0x8000000000008003, 0 - mKeccakRound rpState, rpStack, 0x8000000000008002, 0 - mKeccakRound rpStack, rpState, 0x8000000000000080, 0 - - mKeccakRound rpState, rpStack, 0x000000000000800a, 0 - mKeccakRound rpStack, rpState, 0x800000008000000a, 0 - mKeccakRound rpState, rpStack, 0x8000000080008081, 0 - mKeccakRound rpStack, rpState, 0x8000000000008080, 0 - mKeccakRound rpState, rpStack, 0x0000000080000001, 0 - mKeccakRound rpStack, rpState, 0x8000000080008008, 1 - - addq $8*25, %rsp - - .endm - -.macro mPushRegs - - pushq %rbx - pushq %rbp - pushq %r12 - pushq %r13 - pushq %r14 - pushq %r15 - - .endm - - -.macro mPopRegs - - popq %r15 - popq %r14 - popq %r13 - popq %r12 - popq %rbp - popq %rbx - - .endm - - -.macro mXorState128 input, state, offset - .if UseSIMD == 0 - movq \offset(\input), %rax - movq \offset+8(\input), %rcx - xorq %rax, \offset(\state) - xorq %rcx, \offset+8(\state) - .else - movdqu \offset(\input), %xmm0 - pxor \offset(\state), %xmm0 - movdqu %xmm0, \offset(\state) - .endif - .endm - -.macro mXorState256 input, state, offset - .if UseSIMD == 0 - movq \offset(\input), %rax - movq \offset+8(\input), %r10 - movq \offset+16(\input), %rcx - movq \offset+24(\input), %r8 - xorq %rax, \offset(\state) - xorq %r10, \offset+8(\state) - xorq %rcx, \offset+16(\state) - xorq %r8, \offset+24(\state) - .else - movdqu \offset(\input), %xmm0 - pxor \offset(\state), %xmm0 - movdqu \offset+16(\input), %xmm1 - pxor \offset+16(\state), %xmm1 - movdqu %xmm0, \offset(\state) - movdqu %xmm1, \offset+16(\state) - .endif - .endm - -.macro mXorState512 input, state, offset - .if UseSIMD == 0 - mXorState256 \input, \state, \offset - mXorState256 \input, \state, \offset+32 - .else - movdqu \offset(\input), %xmm0 - movdqu \offset+16(\input), %xmm1 - pxor \offset(\state), %xmm0 - movdqu \offset+32(\input), %xmm2 - pxor \offset+16(\state), %xmm1 - movdqu %xmm0, \offset(\state) - movdqu \offset+48(\input), %xmm3 - pxor \offset+32(\state), %xmm2 - movdqu %xmm1, \offset+16(\state) - pxor \offset+48(\state), %xmm3 - movdqu %xmm2, \offset+32(\state) - movdqu %xmm3, \offset+48(\state) - .endif - .endm - -# ------------------------------------------------------------------------- - - .size KeccakPermutation, .-KeccakPermutation - .align 2 - .global KeccakPermutation - .type KeccakPermutation, %function -KeccakPermutation: - - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb576bits, .-KeccakAbsorb576bits - .align 2 - .global KeccakAbsorb576bits - .type KeccakAbsorb576bits, %function -KeccakAbsorb576bits: - - mXorState512 apInput, apState, 0 - movq 64(apInput), %rax - xorq %rax, 64(apState) - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb832bits, .-KeccakAbsorb832bits - .align 2 - .global KeccakAbsorb832bits - .type KeccakAbsorb832bits, %function -KeccakAbsorb832bits: - - mXorState512 apInput, apState, 0 - mXorState256 apInput, apState, 64 - movq 96(apInput), %rax - xorq %rax, 96(apState) - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb1024bits, .-KeccakAbsorb1024bits - .align 2 - .global KeccakAbsorb1024bits - .type KeccakAbsorb1024bits, %function -KeccakAbsorb1024bits: - - mXorState512 apInput, apState, 0 - mXorState512 apInput, apState, 64 - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb1088bits, .-KeccakAbsorb1088bits - .align 2 - .global KeccakAbsorb1088bits - .type KeccakAbsorb1088bits, %function -KeccakAbsorb1088bits: - - mXorState512 apInput, apState, 0 - mXorState512 apInput, apState, 64 - movq 128(apInput), %rax - xorq %rax, 128(apState) - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb1152bits, .-KeccakAbsorb1152bits - .align 2 - .global KeccakAbsorb1152bits - .type KeccakAbsorb1152bits, %function -KeccakAbsorb1152bits: - - mXorState512 apInput, apState, 0 - mXorState512 apInput, apState, 64 - mXorState128 apInput, apState, 128 - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb1344bits, .-KeccakAbsorb1344bits - .align 2 - .global KeccakAbsorb1344bits - .type KeccakAbsorb1344bits, %function -KeccakAbsorb1344bits: - - mXorState512 apInput, apState, 0 - mXorState512 apInput, apState, 64 - mXorState256 apInput, apState, 128 - movq 160(apInput), %rax - xorq %rax, 160(apState) - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakAbsorb, .-KeccakAbsorb - .align 2 - .global KeccakAbsorb - .type KeccakAbsorb, %function -KeccakAbsorb: - - movq apState, xpState - - test $16, aNbrWords - jz xorInputToState8 - mXorState512 apInput, xpState, 0 - mXorState512 apInput, xpState, 64 - addq $128, apInput - addq $128, xpState - -xorInputToState8: - test $8, aNbrWords - jz xorInputToState4 - mXorState512 apInput, xpState, 0 - addq $64, apInput - addq $64, xpState - -xorInputToState4: - test $4, aNbrWords - jz xorInputToState2 - mXorState256 apInput, xpState, 0 - addq $32, apInput - addq $32, xpState - -xorInputToState2: - test $2, aNbrWords - jz xorInputToState1 - mXorState128 apInput, xpState, 0 - addq $16, apInput - addq $16, xpState - -xorInputToState1: - test $1, aNbrWords - jz xorInputToStateDone - movq (apInput), %rax - xorq %rax, (xpState) - -xorInputToStateDone: - - mPushRegs - mKeccakPermutation - mPopRegs - ret - -# ------------------------------------------------------------------------- - - .size KeccakInitializeState, .-KeccakInitializeState - .align 2 - .global KeccakInitializeState - .type KeccakInitializeState, %function -KeccakInitializeState: - xorq %rax, %rax - xorq %rcx, %rcx - notq %rcx - - .if UseSIMD == 0 - movq %rax, 0*8(apState) - movq %rcx, 1*8(apState) - movq %rcx, 2*8(apState) - movq %rax, 3*8(apState) - movq %rax, 4*8(apState) - movq %rax, 5*8(apState) - movq %rax, 6*8(apState) - movq %rax, 7*8(apState) - movq %rcx, 8*8(apState) - movq %rax, 9*8(apState) - movq %rax, 10*8(apState) - movq %rax, 11*8(apState) - movq %rcx, 12*8(apState) - movq %rax, 13*8(apState) - movq %rax, 14*8(apState) - movq %rax, 15*8(apState) - movq %rax, 16*8(apState) - movq %rcx, 17*8(apState) - movq %rax, 18*8(apState) - movq %rax, 19*8(apState) - movq %rcx, 20*8(apState) - movq %rax, 21*8(apState) - movq %rax, 22*8(apState) - movq %rax, 23*8(apState) - movq %rax, 24*8(apState) - .else - pxor %xmm0, %xmm0 - - movq %rax, 0*8(apState) - movq %rcx, 1*8(apState) - movq %rcx, 2*8(apState) - movq %rax, 3*8(apState) - movdqu %xmm0, 4*8(apState) - movdqu %xmm0, 6*8(apState) - movq %rcx, 8*8(apState) - movq %rax, 9*8(apState) - movdqu %xmm0, 10*8(apState) - movq %rcx, 12*8(apState) - movq %rax, 13*8(apState) - movdqu %xmm0, 14*8(apState) - movq %rax, 16*8(apState) - movq %rcx, 17*8(apState) - movdqu %xmm0, 18*8(apState) - movq %rcx, 20*8(apState) - movq %rax, 21*8(apState) - movdqu %xmm0, 22*8(apState) - movq %rax, 24*8(apState) - .endif - ret - -# ------------------------------------------------------------------------- - - .size KeccakExtract1024bits, .-KeccakExtract1024bits - .align 2 - .global KeccakExtract1024bits - .type KeccakExtract1024bits, %function -KeccakExtract1024bits: - - movq 0*8(apState), %rax - movq 1*8(apState), %rcx - movq 2*8(apState), %rdx - movq 3*8(apState), %r8 - notq %rcx - notq %rdx - movq %rax, 0*8(%rsi) - movq %rcx, 1*8(%rsi) - movq %rdx, 2*8(%rsi) - movq %r8, 3*8(%rsi) - - movq 4*8(apState), %rax - movq 5*8(apState), %rcx - movq 6*8(apState), %rdx - movq 7*8(apState), %r8 - movq %rax, 4*8(%rsi) - movq %rcx, 5*8(%rsi) - movq %rdx, 6*8(%rsi) - movq %r8, 7*8(%rsi) - - movq 8*8(apState), %rax - movq 9*8(apState), %rcx - movq 10*8(apState), %rdx - movq 11*8(apState), %r8 - notq %rax - movq %rax, 8*8(%rsi) - movq %rcx, 9*8(%rsi) - movq %rdx, 10*8(%rsi) - movq %r8, 11*8(%rsi) - - movq 12*8(apState), %rax - movq 13*8(apState), %rcx - movq 14*8(apState), %rdx - movq 15*8(apState), %r8 - notq %rax - movq %rax, 12*8(%rsi) - movq %rcx, 13*8(%rsi) - movq %rdx, 14*8(%rsi) - movq %r8, 15*8(%rsi) - ret - diff --git a/rebar.config b/rebar.config index 16377e2..ec59721 100644 --- a/rebar.config +++ b/rebar.config @@ -9,21 +9,11 @@ {port_env, [{"CFLAGS", "$CFLAGS -O2 -finline-functions -fomit-frame-pointer -fno-strict-aliasing -Wmissing-prototypes -Wall -std=c99"}]}. {port_specs, [ - % TODO: support optimization - % {"i386", "priv/sha3_nif.so", ["c_src/sha3_nif.c", - % "c_src/KeccakNISTInterface.c", - % "c_src/KeccakSponge.c", - % "c_src/KeccakF-1600-opt32.c", - % "c_src/displayIntermediateValues.c"]}, - % {"x86_64", "priv/sha3_nif.so", ["c_src/sha3_nif.c", - % "c_src/KeccakNISTInterface.c", - % "c_src/KeccakSponge.c", - % "c_src/KeccakF-1600-opt64.c", - % "c_src/displayIntermediateValues.c"]}, + % 64 bit only {"priv/sha3_nif.so", ["c_src/sha3_nif.c", "c_src/KeccakNISTInterface.c", "c_src/KeccakSponge.c", - "c_src/KeccakF-1600-reference.c", + "c_src/KeccakF-1600-opt64.c", "c_src/displayIntermediateValues.c"]} ]}.