blob: 50640656c10dfe615eb292c3682db28e9ef49153 [file] [log] [blame]
#include <linux/bitops.h>
#include "threefish_api.h"
void threefish_encrypt_256(struct threefish_key *key_ctx, u64 *input,
u64 *output)
{
u64 b0 = input[0], b1 = input[1],
b2 = input[2], b3 = input[3];
u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
k2 = key_ctx->key[2], k3 = key_ctx->key[3],
k4 = key_ctx->key[4];
u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
t2 = key_ctx->tweak[2];
b1 += k1 + t0;
b0 += b1 + k0;
b1 = rol64(b1, 14) ^ b0;
b3 += k3;
b2 += b3 + k2 + t1;
b3 = rol64(b3, 16) ^ b2;
b0 += b3;
b3 = rol64(b3, 52) ^ b0;
b2 += b1;
b1 = rol64(b1, 57) ^ b2;
b0 += b1;
b1 = rol64(b1, 23) ^ b0;
b2 += b3;
b3 = rol64(b3, 40) ^ b2;
b0 += b3;
b3 = rol64(b3, 5) ^ b0;
b2 += b1;
b1 = rol64(b1, 37) ^ b2;
b1 += k2 + t1;
b0 += b1 + k1;
b1 = rol64(b1, 25) ^ b0;
b3 += k4 + 1;
b2 += b3 + k3 + t2;
b3 = rol64(b3, 33) ^ b2;
b0 += b3;
b3 = rol64(b3, 46) ^ b0;
b2 += b1;
b1 = rol64(b1, 12) ^ b2;
b0 += b1;
b1 = rol64(b1, 58) ^ b0;
b2 += b3;
b3 = rol64(b3, 22) ^ b2;
b0 += b3;
b3 = rol64(b3, 32) ^ b0;
b2 += b1;
b1 = rol64(b1, 32) ^ b2;
b1 += k3 + t2;
b0 += b1 + k2;
b1 = rol64(b1, 14) ^ b0;
b3 += k0 + 2;
b2 += b3 + k4 + t0;
b3 = rol64(b3, 16) ^ b2;
b0 += b3;
b3 = rol64(b3, 52) ^ b0;
b2 += b1;
b1 = rol64(b1, 57) ^ b2;
b0 += b1;
b1 = rol64(b1, 23) ^ b0;
b2 += b3;
b3 = rol64(b3, 40) ^ b2;
b0 += b3;
b3 = rol64(b3, 5) ^ b0;
b2 += b1;
b1 = rol64(b1, 37) ^ b2;
b1 += k4 + t0;
b0 += b1 + k3;
b1 = rol64(b1, 25) ^ b0;
b3 += k1 + 3;
b2 += b3 + k0 + t1;
b3 = rol64(b3, 33) ^ b2;
b0 += b3;
b3 = rol64(b3, 46) ^ b0;
b2 += b1;
b1 = rol64(b1, 12) ^ b2;
b0 += b1;
b1 = rol64(b1, 58) ^ b0;
b2 += b3;
b3 = rol64(b3, 22) ^ b2;
b0 += b3;
b3 = rol64(b3, 32) ^ b0;
b2 += b1;
b1 = rol64(b1, 32) ^ b2;
b1 += k0 + t1;
b0 += b1 + k4;
b1 = rol64(b1, 14) ^ b0;
b3 += k2 + 4;
b2 += b3 + k1 + t2;
b3 = rol64(b3, 16) ^ b2;
b0 += b3;
b3 = rol64(b3, 52) ^ b0;
b2 += b1;
b1 = rol64(b1, 57) ^ b2;
b0 += b1;
b1 = rol64(b1, 23) ^ b0;
b2 += b3;
b3 = rol64(b3, 40) ^ b2;
b0 += b3;
b3 = rol64(b3, 5) ^ b0;
b2 += b1;
b1 = rol64(b1, 37) ^ b2;
b1 += k1 + t2;
b0 += b1 + k0;
b1 = rol64(b1, 25) ^ b0;
b3 += k3 + 5;
b2 += b3 + k2 + t0;
b3 = rol64(b3, 33) ^ b2;
b0 += b3;
b3 = rol64(b3, 46) ^ b0;
b2 += b1;
b1 = rol64(b1, 12) ^ b2;
b0 += b1;
b1 = rol64(b1, 58) ^ b0;
b2 += b3;
b3 = rol64(b3, 22) ^ b2;
b0 += b3;
b3 = rol64(b3, 32) ^ b0;
b2 += b1;
b1 = rol64(b1, 32) ^ b2;
b1 += k2 + t0;
b0 += b1 + k1;
b1 = rol64(b1, 14) ^ b0;
b3 += k4 + 6;
b2 += b3 + k3 + t1;
b3 = rol64(b3, 16) ^ b2;
b0 += b3;
b3 = rol64(b3, 52) ^ b0;
b2 += b1;
b1 = rol64(b1, 57) ^ b2;
b0 += b1;
b1 = rol64(b1, 23) ^ b0;
b2 += b3;
b3 = rol64(b3, 40) ^ b2;
b0 += b3;
b3 = rol64(b3, 5) ^ b0;
b2 += b1;
b1 = rol64(b1, 37) ^ b2;
b1 += k3 + t1;
b0 += b1 + k2;
b1 = rol64(b1, 25) ^ b0;
b3 += k0 + 7;
b2 += b3 + k4 + t2;
b3 = rol64(b3, 33) ^ b2;
b0 += b3;
b3 = rol64(b3, 46) ^ b0;
b2 += b1;
b1 = rol64(b1, 12) ^ b2;
b0 += b1;
b1 = rol64(b1, 58) ^ b0;
b2 += b3;
b3 = rol64(b3, 22) ^ b2;
b0 += b3;
b3 = rol64(b3, 32) ^ b0;
b2 += b1;
b1 = rol64(b1, 32) ^ b2;
b1 += k4 + t2;
b0 += b1 + k3;
b1 = rol64(b1, 14) ^ b0;
b3 += k1 + 8;
b2 += b3 + k0 + t0;
b3 = rol64(b3, 16) ^ b2;
b0 += b3;
b3 = rol64(b3, 52) ^ b0;
b2 += b1;
b1 = rol64(b1, 57) ^ b2;
b0 += b1;
b1 = rol64(b1, 23) ^ b0;
b2 += b3;
b3 = rol64(b3, 40) ^ b2;
b0 += b3;
b3 = rol64(b3, 5) ^ b0;
b2 += b1;
b1 = rol64(b1, 37) ^ b2;
b1 += k0 + t0;
b0 += b1 + k4;
b1 = rol64(b1, 25) ^ b0;
b3 += k2 + 9;
b2 += b3 + k1 + t1;
b3 = rol64(b3, 33) ^ b2;
b0 += b3;
b3 = rol64(b3, 46) ^ b0;
b2 += b1;
b1 = rol64(b1, 12) ^ b2;
b0 += b1;
b1 = rol64(b1, 58) ^ b0;
b2 += b3;
b3 = rol64(b3, 22) ^ b2;
b0 += b3;
b3 = rol64(b3, 32) ^ b0;
b2 += b1;
b1 = rol64(b1, 32) ^ b2;
b1 += k1 + t1;
b0 += b1 + k0;
b1 = rol64(b1, 14) ^ b0;
b3 += k3 + 10;
b2 += b3 + k2 + t2;
b3 = rol64(b3, 16) ^ b2;
b0 += b3;
b3 = rol64(b3, 52) ^ b0;
b2 += b1;
b1 = rol64(b1, 57) ^ b2;
b0 += b1;
b1 = rol64(b1, 23) ^ b0;
b2 += b3;
b3 = rol64(b3, 40) ^ b2;
b0 += b3;
b3 = rol64(b3, 5) ^ b0;
b2 += b1;
b1 = rol64(b1, 37) ^ b2;
b1 += k2 + t2;
b0 += b1 + k1;
b1 = rol64(b1, 25) ^ b0;
b3 += k4 + 11;
b2 += b3 + k3 + t0;
b3 = rol64(b3, 33) ^ b2;
b0 += b3;
b3 = rol64(b3, 46) ^ b0;
b2 += b1;
b1 = rol64(b1, 12) ^ b2;
b0 += b1;
b1 = rol64(b1, 58) ^ b0;
b2 += b3;
b3 = rol64(b3, 22) ^ b2;
b0 += b3;
b3 = rol64(b3, 32) ^ b0;
b2 += b1;
b1 = rol64(b1, 32) ^ b2;
b1 += k3 + t0;
b0 += b1 + k2;
b1 = rol64(b1, 14) ^ b0;
b3 += k0 + 12;
b2 += b3 + k4 + t1;
b3 = rol64(b3, 16) ^ b2;
b0 += b3;
b3 = rol64(b3, 52) ^ b0;
b2 += b1;
b1 = rol64(b1, 57) ^ b2;
b0 += b1;
b1 = rol64(b1, 23) ^ b0;
b2 += b3;
b3 = rol64(b3, 40) ^ b2;
b0 += b3;
b3 = rol64(b3, 5) ^ b0;
b2 += b1;
b1 = rol64(b1, 37) ^ b2;
b1 += k4 + t1;
b0 += b1 + k3;
b1 = rol64(b1, 25) ^ b0;
b3 += k1 + 13;
b2 += b3 + k0 + t2;
b3 = rol64(b3, 33) ^ b2;
b0 += b3;
b3 = rol64(b3, 46) ^ b0;
b2 += b1;
b1 = rol64(b1, 12) ^ b2;
b0 += b1;
b1 = rol64(b1, 58) ^ b0;
b2 += b3;
b3 = rol64(b3, 22) ^ b2;
b0 += b3;
b3 = rol64(b3, 32) ^ b0;
b2 += b1;
b1 = rol64(b1, 32) ^ b2;
b1 += k0 + t2;
b0 += b1 + k4;
b1 = rol64(b1, 14) ^ b0;
b3 += k2 + 14;
b2 += b3 + k1 + t0;
b3 = rol64(b3, 16) ^ b2;
b0 += b3;
b3 = rol64(b3, 52) ^ b0;
b2 += b1;
b1 = rol64(b1, 57) ^ b2;
b0 += b1;
b1 = rol64(b1, 23) ^ b0;
b2 += b3;
b3 = rol64(b3, 40) ^ b2;
b0 += b3;
b3 = rol64(b3, 5) ^ b0;
b2 += b1;
b1 = rol64(b1, 37) ^ b2;
b1 += k1 + t0;
b0 += b1 + k0;
b1 = rol64(b1, 25) ^ b0;
b3 += k3 + 15;
b2 += b3 + k2 + t1;
b3 = rol64(b3, 33) ^ b2;
b0 += b3;
b3 = rol64(b3, 46) ^ b0;
b2 += b1;
b1 = rol64(b1, 12) ^ b2;
b0 += b1;
b1 = rol64(b1, 58) ^ b0;
b2 += b3;
b3 = rol64(b3, 22) ^ b2;
b0 += b3;
b3 = rol64(b3, 32) ^ b0;
b2 += b1;
b1 = rol64(b1, 32) ^ b2;
b1 += k2 + t1;
b0 += b1 + k1;
b1 = rol64(b1, 14) ^ b0;
b3 += k4 + 16;
b2 += b3 + k3 + t2;
b3 = rol64(b3, 16) ^ b2;
b0 += b3;
b3 = rol64(b3, 52) ^ b0;
b2 += b1;
b1 = rol64(b1, 57) ^ b2;
b0 += b1;
b1 = rol64(b1, 23) ^ b0;
b2 += b3;
b3 = rol64(b3, 40) ^ b2;
b0 += b3;
b3 = rol64(b3, 5) ^ b0;
b2 += b1;
b1 = rol64(b1, 37) ^ b2;
b1 += k3 + t2;
b0 += b1 + k2;
b1 = rol64(b1, 25) ^ b0;
b3 += k0 + 17;
b2 += b3 + k4 + t0;
b3 = rol64(b3, 33) ^ b2;
b0 += b3;
b3 = rol64(b3, 46) ^ b0;
b2 += b1;
b1 = rol64(b1, 12) ^ b2;
b0 += b1;
b1 = rol64(b1, 58) ^ b0;
b2 += b3;
b3 = rol64(b3, 22) ^ b2;
b0 += b3;
b3 = rol64(b3, 32) ^ b0;
b2 += b1;
b1 = rol64(b1, 32) ^ b2;
output[0] = b0 + k3;
output[1] = b1 + k4 + t0;
output[2] = b2 + k0 + t1;
output[3] = b3 + k1 + 18;
}
void threefish_decrypt_256(struct threefish_key *key_ctx, u64 *input,
u64 *output)
{
u64 b0 = input[0], b1 = input[1],
b2 = input[2], b3 = input[3];
u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
k2 = key_ctx->key[2], k3 = key_ctx->key[3],
k4 = key_ctx->key[4];
u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
t2 = key_ctx->tweak[2];
u64 tmp;
b0 -= k3;
b1 -= k4 + t0;
b2 -= k0 + t1;
b3 -= k1 + 18;
tmp = b3 ^ b0;
b3 = ror64(tmp, 32);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 32);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 58);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 22);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 46);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 12);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 25);
b0 -= b1 + k2;
b1 -= k3 + t2;
tmp = b3 ^ b2;
b3 = ror64(tmp, 33);
b2 -= b3 + k4 + t0;
b3 -= k0 + 17;
tmp = b3 ^ b0;
b3 = ror64(tmp, 5);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 37);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 23);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 40);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 52);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 57);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 14);
b0 -= b1 + k1;
b1 -= k2 + t1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 16);
b2 -= b3 + k3 + t2;
b3 -= k4 + 16;
tmp = b3 ^ b0;
b3 = ror64(tmp, 32);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 32);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 58);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 22);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 46);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 12);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 25);
b0 -= b1 + k0;
b1 -= k1 + t0;
tmp = b3 ^ b2;
b3 = ror64(tmp, 33);
b2 -= b3 + k2 + t1;
b3 -= k3 + 15;
tmp = b3 ^ b0;
b3 = ror64(tmp, 5);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 37);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 23);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 40);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 52);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 57);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 14);
b0 -= b1 + k4;
b1 -= k0 + t2;
tmp = b3 ^ b2;
b3 = ror64(tmp, 16);
b2 -= b3 + k1 + t0;
b3 -= k2 + 14;
tmp = b3 ^ b0;
b3 = ror64(tmp, 32);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 32);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 58);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 22);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 46);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 12);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 25);
b0 -= b1 + k3;
b1 -= k4 + t1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 33);
b2 -= b3 + k0 + t2;
b3 -= k1 + 13;
tmp = b3 ^ b0;
b3 = ror64(tmp, 5);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 37);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 23);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 40);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 52);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 57);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 14);
b0 -= b1 + k2;
b1 -= k3 + t0;
tmp = b3 ^ b2;
b3 = ror64(tmp, 16);
b2 -= b3 + k4 + t1;
b3 -= k0 + 12;
tmp = b3 ^ b0;
b3 = ror64(tmp, 32);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 32);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 58);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 22);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 46);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 12);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 25);
b0 -= b1 + k1;
b1 -= k2 + t2;
tmp = b3 ^ b2;
b3 = ror64(tmp, 33);
b2 -= b3 + k3 + t0;
b3 -= k4 + 11;
tmp = b3 ^ b0;
b3 = ror64(tmp, 5);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 37);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 23);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 40);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 52);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 57);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 14);
b0 -= b1 + k0;
b1 -= k1 + t1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 16);
b2 -= b3 + k2 + t2;
b3 -= k3 + 10;
tmp = b3 ^ b0;
b3 = ror64(tmp, 32);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 32);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 58);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 22);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 46);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 12);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 25);
b0 -= b1 + k4;
b1 -= k0 + t0;
tmp = b3 ^ b2;
b3 = ror64(tmp, 33);
b2 -= b3 + k1 + t1;
b3 -= k2 + 9;
tmp = b3 ^ b0;
b3 = ror64(tmp, 5);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 37);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 23);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 40);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 52);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 57);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 14);
b0 -= b1 + k3;
b1 -= k4 + t2;
tmp = b3 ^ b2;
b3 = ror64(tmp, 16);
b2 -= b3 + k0 + t0;
b3 -= k1 + 8;
tmp = b3 ^ b0;
b3 = ror64(tmp, 32);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 32);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 58);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 22);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 46);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 12);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 25);
b0 -= b1 + k2;
b1 -= k3 + t1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 33);
b2 -= b3 + k4 + t2;
b3 -= k0 + 7;
tmp = b3 ^ b0;
b3 = ror64(tmp, 5);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 37);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 23);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 40);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 52);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 57);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 14);
b0 -= b1 + k1;
b1 -= k2 + t0;
tmp = b3 ^ b2;
b3 = ror64(tmp, 16);
b2 -= b3 + k3 + t1;
b3 -= k4 + 6;
tmp = b3 ^ b0;
b3 = ror64(tmp, 32);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 32);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 58);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 22);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 46);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 12);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 25);
b0 -= b1 + k0;
b1 -= k1 + t2;
tmp = b3 ^ b2;
b3 = ror64(tmp, 33);
b2 -= b3 + k2 + t0;
b3 -= k3 + 5;
tmp = b3 ^ b0;
b3 = ror64(tmp, 5);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 37);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 23);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 40);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 52);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 57);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 14);
b0 -= b1 + k4;
b1 -= k0 + t1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 16);
b2 -= b3 + k1 + t2;
b3 -= k2 + 4;
tmp = b3 ^ b0;
b3 = ror64(tmp, 32);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 32);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 58);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 22);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 46);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 12);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 25);
b0 -= b1 + k3;
b1 -= k4 + t0;
tmp = b3 ^ b2;
b3 = ror64(tmp, 33);
b2 -= b3 + k0 + t1;
b3 -= k1 + 3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 5);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 37);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 23);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 40);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 52);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 57);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 14);
b0 -= b1 + k2;
b1 -= k3 + t2;
tmp = b3 ^ b2;
b3 = ror64(tmp, 16);
b2 -= b3 + k4 + t0;
b3 -= k0 + 2;
tmp = b3 ^ b0;
b3 = ror64(tmp, 32);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 32);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 58);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 22);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 46);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 12);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 25);
b0 -= b1 + k1;
b1 -= k2 + t1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 33);
b2 -= b3 + k3 + t2;
b3 -= k4 + 1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 5);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 37);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 23);
b0 -= b1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 40);
b2 -= b3;
tmp = b3 ^ b0;
b3 = ror64(tmp, 52);
b0 -= b3;
tmp = b1 ^ b2;
b1 = ror64(tmp, 57);
b2 -= b1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 14);
b0 -= b1 + k0;
b1 -= k1 + t0;
tmp = b3 ^ b2;
b3 = ror64(tmp, 16);
b2 -= b3 + k2 + t1;
b3 -= k3;
output[0] = b0;
output[1] = b1;
output[2] = b2;
output[3] = b3;
}
void threefish_encrypt_512(struct threefish_key *key_ctx, u64 *input,
u64 *output)
{
u64 b0 = input[0], b1 = input[1],
b2 = input[2], b3 = input[3],
b4 = input[4], b5 = input[5],
b6 = input[6], b7 = input[7];
u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
k2 = key_ctx->key[2], k3 = key_ctx->key[3],
k4 = key_ctx->key[4], k5 = key_ctx->key[5],
k6 = key_ctx->key[6], k7 = key_ctx->key[7],
k8 = key_ctx->key[8];
u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
t2 = key_ctx->tweak[2];
b1 += k1;
b0 += b1 + k0;
b1 = rol64(b1, 46) ^ b0;
b3 += k3;
b2 += b3 + k2;
b3 = rol64(b3, 36) ^ b2;
b5 += k5 + t0;
b4 += b5 + k4;
b5 = rol64(b5, 19) ^ b4;
b7 += k7;
b6 += b7 + k6 + t1;
b7 = rol64(b7, 37) ^ b6;
b2 += b1;
b1 = rol64(b1, 33) ^ b2;
b4 += b7;
b7 = rol64(b7, 27) ^ b4;
b6 += b5;
b5 = rol64(b5, 14) ^ b6;
b0 += b3;
b3 = rol64(b3, 42) ^ b0;
b4 += b1;
b1 = rol64(b1, 17) ^ b4;
b6 += b3;
b3 = rol64(b3, 49) ^ b6;
b0 += b5;
b5 = rol64(b5, 36) ^ b0;
b2 += b7;
b7 = rol64(b7, 39) ^ b2;
b6 += b1;
b1 = rol64(b1, 44) ^ b6;
b0 += b7;
b7 = rol64(b7, 9) ^ b0;
b2 += b5;
b5 = rol64(b5, 54) ^ b2;
b4 += b3;
b3 = rol64(b3, 56) ^ b4;
b1 += k2;
b0 += b1 + k1;
b1 = rol64(b1, 39) ^ b0;
b3 += k4;
b2 += b3 + k3;
b3 = rol64(b3, 30) ^ b2;
b5 += k6 + t1;
b4 += b5 + k5;
b5 = rol64(b5, 34) ^ b4;
b7 += k8 + 1;
b6 += b7 + k7 + t2;
b7 = rol64(b7, 24) ^ b6;
b2 += b1;
b1 = rol64(b1, 13) ^ b2;
b4 += b7;
b7 = rol64(b7, 50) ^ b4;
b6 += b5;
b5 = rol64(b5, 10) ^ b6;
b0 += b3;
b3 = rol64(b3, 17) ^ b0;
b4 += b1;
b1 = rol64(b1, 25) ^ b4;
b6 += b3;
b3 = rol64(b3, 29) ^ b6;
b0 += b5;
b5 = rol64(b5, 39) ^ b0;
b2 += b7;
b7 = rol64(b7, 43) ^ b2;
b6 += b1;
b1 = rol64(b1, 8) ^ b6;
b0 += b7;
b7 = rol64(b7, 35) ^ b0;
b2 += b5;
b5 = rol64(b5, 56) ^ b2;
b4 += b3;
b3 = rol64(b3, 22) ^ b4;
b1 += k3;
b0 += b1 + k2;
b1 = rol64(b1, 46) ^ b0;
b3 += k5;
b2 += b3 + k4;
b3 = rol64(b3, 36) ^ b2;
b5 += k7 + t2;
b4 += b5 + k6;
b5 = rol64(b5, 19) ^ b4;
b7 += k0 + 2;
b6 += b7 + k8 + t0;
b7 = rol64(b7, 37) ^ b6;
b2 += b1;
b1 = rol64(b1, 33) ^ b2;
b4 += b7;
b7 = rol64(b7, 27) ^ b4;
b6 += b5;
b5 = rol64(b5, 14) ^ b6;
b0 += b3;
b3 = rol64(b3, 42) ^ b0;
b4 += b1;
b1 = rol64(b1, 17) ^ b4;
b6 += b3;
b3 = rol64(b3, 49) ^ b6;
b0 += b5;
b5 = rol64(b5, 36) ^ b0;
b2 += b7;
b7 = rol64(b7, 39) ^ b2;
b6 += b1;
b1 = rol64(b1, 44) ^ b6;
b0 += b7;
b7 = rol64(b7, 9) ^ b0;
b2 += b5;
b5 = rol64(b5, 54) ^ b2;
b4 += b3;
b3 = rol64(b3, 56) ^ b4;
b1 += k4;
b0 += b1 + k3;
b1 = rol64(b1, 39) ^ b0;
b3 += k6;
b2 += b3 + k5;
b3 = rol64(b3, 30) ^ b2;
b5 += k8 + t0;
b4 += b5 + k7;
b5 = rol64(b5, 34) ^ b4;
b7 += k1 + 3;
b6 += b7 + k0 + t1;
b7 = rol64(b7, 24) ^ b6;
b2 += b1;
b1 = rol64(b1, 13) ^ b2;
b4 += b7;
b7 = rol64(b7, 50) ^ b4;
b6 += b5;
b5 = rol64(b5, 10) ^ b6;
b0 += b3;
b3 = rol64(b3, 17) ^ b0;
b4 += b1;
b1 = rol64(b1, 25) ^ b4;
b6 += b3;
b3 = rol64(b3, 29) ^ b6;
b0 += b5;
b5 = rol64(b5, 39) ^ b0;
b2 += b7;
b7 = rol64(b7, 43) ^ b2;
b6 += b1;
b1 = rol64(b1, 8) ^ b6;
b0 += b7;
b7 = rol64(b7, 35) ^ b0;
b2 += b5;
b5 = rol64(b5, 56) ^ b2;
b4 += b3;
b3 = rol64(b3, 22) ^ b4;
b1 += k5;
b0 += b1 + k4;
b1 = rol64(b1, 46) ^ b0;
b3 += k7;
b2 += b3 + k6;
b3 = rol64(b3, 36) ^ b2;
b5 += k0 + t1;
b4 += b5 + k8;
b5 = rol64(b5, 19) ^ b4;
b7 += k2 + 4;
b6 += b7 + k1 + t2;
b7 = rol64(b7, 37) ^ b6;
b2 += b1;
b1 = rol64(b1, 33) ^ b2;
b4 += b7;
b7 = rol64(b7, 27) ^ b4;
b6 += b5;
b5 = rol64(b5, 14) ^ b6;
b0 += b3;
b3 = rol64(b3, 42) ^ b0;
b4 += b1;
b1 = rol64(b1, 17) ^ b4;
b6 += b3;
b3 = rol64(b3, 49) ^ b6;
b0 += b5;
b5 = rol64(b5, 36) ^ b0;
b2 += b7;
b7 = rol64(b7, 39) ^ b2;
b6 += b1;
b1 = rol64(b1, 44) ^ b6;
b0 += b7;
b7 = rol64(b7, 9) ^ b0;
b2 += b5;
b5 = rol64(b5, 54) ^ b2;
b4 += b3;
b3 = rol64(b3, 56) ^ b4;
b1 += k6;
b0 += b1 + k5;
b1 = rol64(b1, 39) ^ b0;
b3 += k8;
b2 += b3 + k7;
b3 = rol64(b3, 30) ^ b2;
b5 += k1 + t2;
b4 += b5 + k0;
b5 = rol64(b5, 34) ^ b4;
b7 += k3 + 5;
b6 += b7 + k2 + t0;
b7 = rol64(b7, 24) ^ b6;
b2 += b1;
b1 = rol64(b1, 13) ^ b2;
b4 += b7;
b7 = rol64(b7, 50) ^ b4;
b6 += b5;
b5 = rol64(b5, 10) ^ b6;
b0 += b3;
b3 = rol64(b3, 17) ^ b0;
b4 += b1;
b1 = rol64(b1, 25) ^ b4;
b6 += b3;
b3 = rol64(b3, 29) ^ b6;
b0 += b5;
b5 = rol64(b5, 39) ^ b0;
b2 += b7;
b7 = rol64(b7, 43) ^ b2;
b6 += b1;
b1 = rol64(b1, 8) ^ b6;
b0 += b7;
b7 = rol64(b7, 35) ^ b0;
b2 += b5;
b5 = rol64(b5, 56) ^ b2;
b4 += b3;
b3 = rol64(b3, 22) ^ b4;
b1 += k7;
b0 += b1 + k6;
b1 = rol64(b1, 46) ^ b0;
b3 += k0;
b2 += b3 + k8;
b3 = rol64(b3, 36) ^ b2;
b5 += k2 + t0;
b4 += b5 + k1;
b5 = rol64(b5, 19) ^ b4;
b7 += k4 + 6;
b6 += b7 + k3 + t1;
b7 = rol64(b7, 37) ^ b6;
b2 += b1;
b1 = rol64(b1, 33) ^ b2;
b4 += b7;
b7 = rol64(b7, 27) ^ b4;
b6 += b5;
b5 = rol64(b5, 14) ^ b6;
b0 += b3;
b3 = rol64(b3, 42) ^ b0;
b4 += b1;
b1 = rol64(b1, 17) ^ b4;
b6 += b3;
b3 = rol64(b3, 49) ^ b6;
b0 += b5;
b5 = rol64(b5, 36) ^ b0;
b2 += b7;
b7 = rol64(b7, 39) ^ b2;
b6 += b1;
b1 = rol64(b1, 44) ^ b6;
b0 += b7;
b7 = rol64(b7, 9) ^ b0;
b2 += b5;
b5 = rol64(b5, 54) ^ b2;
b4 += b3;
b3 = rol64(b3, 56) ^ b4;
b1 += k8;
b0 += b1 + k7;
b1 = rol64(b1, 39) ^ b0;
b3 += k1;
b2 += b3 + k0;
b3 = rol64(b3, 30) ^ b2;
b5 += k3 + t1;
b4 += b5 + k2;
b5 = rol64(b5, 34) ^ b4;
b7 += k5 + 7;
b6 += b7 + k4 + t2;
b7 = rol64(b7, 24) ^ b6;
b2 += b1;
b1 = rol64(b1, 13) ^ b2;
b4 += b7;
b7 = rol64(b7, 50) ^ b4;
b6 += b5;
b5 = rol64(b5, 10) ^ b6;
b0 += b3;
b3 = rol64(b3, 17) ^ b0;
b4 += b1;
b1 = rol64(b1, 25) ^ b4;
b6 += b3;
b3 = rol64(b3, 29) ^ b6;
b0 += b5;
b5 = rol64(b5, 39) ^ b0;
b2 += b7;
b7 = rol64(b7, 43) ^ b2;
b6 += b1;
b1 = rol64(b1, 8) ^ b6;
b0 += b7;
b7 = rol64(b7, 35) ^ b0;
b2 += b5;
b5 = rol64(b5, 56) ^ b2;
b4 += b3;
b3 = rol64(b3, 22) ^ b4;
b1 += k0;
b0 += b1 + k8;
b1 = rol64(b1, 46) ^ b0;
b3 += k2;
b2 += b3 + k1;
b3 = rol64(b3, 36) ^ b2;
b5 += k4 + t2;
b4 += b5 + k3;
b5 = rol64(b5, 19) ^ b4;
b7 += k6 + 8;
b6 += b7 + k5 + t0;
b7 = rol64(b7, 37) ^ b6;
b2 += b1;
b1 = rol64(b1, 33) ^ b2;
b4 += b7;
b7 = rol64(b7, 27) ^ b4;
b6 += b5;
b5 = rol64(b5, 14) ^ b6;
b0 += b3;
b3 = rol64(b3, 42) ^ b0;
b4 += b1;
b1 = rol64(b1, 17) ^ b4;
b6 += b3;
b3 = rol64(b3, 49) ^ b6;
b0 += b5;
b5 = rol64(b5, 36) ^ b0;
b2 += b7;
b7 = rol64(b7, 39) ^ b2;
b6 += b1;
b1 = rol64(b1, 44) ^ b6;
b0 += b7;
b7 = rol64(b7, 9) ^ b0;
b2 += b5;
b5 = rol64(b5, 54) ^ b2;
b4 += b3;
b3 = rol64(b3, 56) ^ b4;
b1 += k1;
b0 += b1 + k0;
b1 = rol64(b1, 39) ^ b0;
b3 += k3;
b2 += b3 + k2;
b3 = rol64(b3, 30) ^ b2;
b5 += k5 + t0;
b4 += b5 + k4;
b5 = rol64(b5, 34) ^ b4;
b7 += k7 + 9;
b6 += b7 + k6 + t1;
b7 = rol64(b7, 24) ^ b6;
b2 += b1;
b1 = rol64(b1, 13) ^ b2;
b4 += b7;
b7 = rol64(b7, 50) ^ b4;
b6 += b5;
b5 = rol64(b5, 10) ^ b6;
b0 += b3;
b3 = rol64(b3, 17) ^ b0;
b4 += b1;
b1 = rol64(b1, 25) ^ b4;
b6 += b3;
b3 = rol64(b3, 29) ^ b6;
b0 += b5;
b5 = rol64(b5, 39) ^ b0;
b2 += b7;
b7 = rol64(b7, 43) ^ b2;
b6 += b1;
b1 = rol64(b1, 8) ^ b6;
b0 += b7;
b7 = rol64(b7, 35) ^ b0;
b2 += b5;
b5 = rol64(b5, 56) ^ b2;
b4 += b3;
b3 = rol64(b3, 22) ^ b4;
b1 += k2;
b0 += b1 + k1;
b1 = rol64(b1, 46) ^ b0;
b3 += k4;
b2 += b3 + k3;
b3 = rol64(b3, 36) ^ b2;
b5 += k6 + t1;
b4 += b5 + k5;
b5 = rol64(b5, 19) ^ b4;
b7 += k8 + 10;
b6 += b7 + k7 + t2;
b7 = rol64(b7, 37) ^ b6;
b2 += b1;
b1 = rol64(b1, 33) ^ b2;
b4 += b7;
b7 = rol64(b7, 27) ^ b4;
b6 += b5;
b5 = rol64(b5, 14) ^ b6;
b0 += b3;
b3 = rol64(b3, 42) ^ b0;
b4 += b1;
b1 = rol64(b1, 17) ^ b4;
b6 += b3;
b3 = rol64(b3, 49) ^ b6;
b0 += b5;
b5 = rol64(b5, 36) ^ b0;
b2 += b7;
b7 = rol64(b7, 39) ^ b2;
b6 += b1;
b1 = rol64(b1, 44) ^ b6;
b0 += b7;
b7 = rol64(b7, 9) ^ b0;
b2 += b5;
b5 = rol64(b5, 54) ^ b2;
b4 += b3;
b3 = rol64(b3, 56) ^ b4;
b1 += k3;
b0 += b1 + k2;
b1 = rol64(b1, 39) ^ b0;
b3 += k5;
b2 += b3 + k4;
b3 = rol64(b3, 30) ^ b2;
b5 += k7 + t2;
b4 += b5 + k6;
b5 = rol64(b5, 34) ^ b4;
b7 += k0 + 11;
b6 += b7 + k8 + t0;
b7 = rol64(b7, 24) ^ b6;
b2 += b1;
b1 = rol64(b1, 13) ^ b2;
b4 += b7;
b7 = rol64(b7, 50) ^ b4;
b6 += b5;
b5 = rol64(b5, 10) ^ b6;
b0 += b3;
b3 = rol64(b3, 17) ^ b0;
b4 += b1;
b1 = rol64(b1, 25) ^ b4;
b6 += b3;
b3 = rol64(b3, 29) ^ b6;
b0 += b5;
b5 = rol64(b5, 39) ^ b0;
b2 += b7;
b7 = rol64(b7, 43) ^ b2;
b6 += b1;
b1 = rol64(b1, 8) ^ b6;
b0 += b7;
b7 = rol64(b7, 35) ^ b0;
b2 += b5;
b5 = rol64(b5, 56) ^ b2;
b4 += b3;
b3 = rol64(b3, 22) ^ b4;
b1 += k4;
b0 += b1 + k3;
b1 = rol64(b1, 46) ^ b0;
b3 += k6;
b2 += b3 + k5;
b3 = rol64(b3, 36) ^ b2;
b5 += k8 + t0;
b4 += b5 + k7;
b5 = rol64(b5, 19) ^ b4;
b7 += k1 + 12;
b6 += b7 + k0 + t1;
b7 = rol64(b7, 37) ^ b6;
b2 += b1;
b1 = rol64(b1, 33) ^ b2;
b4 += b7;
b7 = rol64(b7, 27) ^ b4;
b6 += b5;
b5 = rol64(b5, 14) ^ b6;
b0 += b3;
b3 = rol64(b3, 42) ^ b0;
b4 += b1;
b1 = rol64(b1, 17) ^ b4;
b6 += b3;
b3 = rol64(b3, 49) ^ b6;
b0 += b5;
b5 = rol64(b5, 36) ^ b0;
b2 += b7;
b7 = rol64(b7, 39) ^ b2;
b6 += b1;
b1 = rol64(b1, 44) ^ b6;
b0 += b7;
b7 = rol64(b7, 9) ^ b0;
b2 += b5;
b5 = rol64(b5, 54) ^ b2;
b4 += b3;
b3 = rol64(b3, 56) ^ b4;
b1 += k5;
b0 += b1 + k4;
b1 = rol64(b1, 39) ^ b0;
b3 += k7;
b2 += b3 + k6;
b3 = rol64(b3, 30) ^ b2;
b5 += k0 + t1;
b4 += b5 + k8;
b5 = rol64(b5, 34) ^ b4;
b7 += k2 + 13;
b6 += b7 + k1 + t2;
b7 = rol64(b7, 24) ^ b6;
b2 += b1;
b1 = rol64(b1, 13) ^ b2;
b4 += b7;
b7 = rol64(b7, 50) ^ b4;
b6 += b5;
b5 = rol64(b5, 10) ^ b6;
b0 += b3;
b3 = rol64(b3, 17) ^ b0;
b4 += b1;
b1 = rol64(b1, 25) ^ b4;
b6 += b3;
b3 = rol64(b3, 29) ^ b6;
b0 += b5;
b5 = rol64(b5, 39) ^ b0;
b2 += b7;
b7 = rol64(b7, 43) ^ b2;
b6 += b1;
b1 = rol64(b1, 8) ^ b6;
b0 += b7;
b7 = rol64(b7, 35) ^ b0;
b2 += b5;
b5 = rol64(b5, 56) ^ b2;
b4 += b3;
b3 = rol64(b3, 22) ^ b4;
b1 += k6;
b0 += b1 + k5;
b1 = rol64(b1, 46) ^ b0;
b3 += k8;
b2 += b3 + k7;
b3 = rol64(b3, 36) ^ b2;
b5 += k1 + t2;
b4 += b5 + k0;
b5 = rol64(b5, 19) ^ b4;
b7 += k3 + 14;
b6 += b7 + k2 + t0;
b7 = rol64(b7, 37) ^ b6;
b2 += b1;
b1 = rol64(b1, 33) ^ b2;
b4 += b7;
b7 = rol64(b7, 27) ^ b4;
b6 += b5;
b5 = rol64(b5, 14) ^ b6;
b0 += b3;
b3 = rol64(b3, 42) ^ b0;
b4 += b1;
b1 = rol64(b1, 17) ^ b4;
b6 += b3;
b3 = rol64(b3, 49) ^ b6;
b0 += b5;
b5 = rol64(b5, 36) ^ b0;
b2 += b7;
b7 = rol64(b7, 39) ^ b2;
b6 += b1;
b1 = rol64(b1, 44) ^ b6;
b0 += b7;
b7 = rol64(b7, 9) ^ b0;
b2 += b5;
b5 = rol64(b5, 54) ^ b2;
b4 += b3;
b3 = rol64(b3, 56) ^ b4;
b1 += k7;
b0 += b1 + k6;
b1 = rol64(b1, 39) ^ b0;
b3 += k0;
b2 += b3 + k8;
b3 = rol64(b3, 30) ^ b2;
b5 += k2 + t0;
b4 += b5 + k1;
b5 = rol64(b5, 34) ^ b4;
b7 += k4 + 15;
b6 += b7 + k3 + t1;
b7 = rol64(b7, 24) ^ b6;
b2 += b1;
b1 = rol64(b1, 13) ^ b2;
b4 += b7;
b7 = rol64(b7, 50) ^ b4;
b6 += b5;
b5 = rol64(b5, 10) ^ b6;
b0 += b3;
b3 = rol64(b3, 17) ^ b0;
b4 += b1;
b1 = rol64(b1, 25) ^ b4;
b6 += b3;
b3 = rol64(b3, 29) ^ b6;
b0 += b5;
b5 = rol64(b5, 39) ^ b0;
b2 += b7;
b7 = rol64(b7, 43) ^ b2;
b6 += b1;
b1 = rol64(b1, 8) ^ b6;
b0 += b7;
b7 = rol64(b7, 35) ^ b0;
b2 += b5;
b5 = rol64(b5, 56) ^ b2;
b4 += b3;
b3 = rol64(b3, 22) ^ b4;
b1 += k8;
b0 += b1 + k7;
b1 = rol64(b1, 46) ^ b0;
b3 += k1;
b2 += b3 + k0;
b3 = rol64(b3, 36) ^ b2;
b5 += k3 + t1;
b4 += b5 + k2;
b5 = rol64(b5, 19) ^ b4;
b7 += k5 + 16;
b6 += b7 + k4 + t2;
b7 = rol64(b7, 37) ^ b6;
b2 += b1;
b1 = rol64(b1, 33) ^ b2;
b4 += b7;
b7 = rol64(b7, 27) ^ b4;
b6 += b5;
b5 = rol64(b5, 14) ^ b6;
b0 += b3;
b3 = rol64(b3, 42) ^ b0;
b4 += b1;
b1 = rol64(b1, 17) ^ b4;
b6 += b3;
b3 = rol64(b3, 49) ^ b6;
b0 += b5;
b5 = rol64(b5, 36) ^ b0;
b2 += b7;
b7 = rol64(b7, 39) ^ b2;
b6 += b1;
b1 = rol64(b1, 44) ^ b6;
b0 += b7;
b7 = rol64(b7, 9) ^ b0;
b2 += b5;
b5 = rol64(b5, 54) ^ b2;
b4 += b3;
b3 = rol64(b3, 56) ^ b4;
b1 += k0;
b0 += b1 + k8;
b1 = rol64(b1, 39) ^ b0;
b3 += k2;
b2 += b3 + k1;
b3 = rol64(b3, 30) ^ b2;
b5 += k4 + t2;
b4 += b5 + k3;
b5 = rol64(b5, 34) ^ b4;
b7 += k6 + 17;
b6 += b7 + k5 + t0;
b7 = rol64(b7, 24) ^ b6;
b2 += b1;
b1 = rol64(b1, 13) ^ b2;
b4 += b7;
b7 = rol64(b7, 50) ^ b4;
b6 += b5;
b5 = rol64(b5, 10) ^ b6;
b0 += b3;
b3 = rol64(b3, 17) ^ b0;
b4 += b1;
b1 = rol64(b1, 25) ^ b4;
b6 += b3;
b3 = rol64(b3, 29) ^ b6;
b0 += b5;
b5 = rol64(b5, 39) ^ b0;
b2 += b7;
b7 = rol64(b7, 43) ^ b2;
b6 += b1;
b1 = rol64(b1, 8) ^ b6;
b0 += b7;
b7 = rol64(b7, 35) ^ b0;
b2 += b5;
b5 = rol64(b5, 56) ^ b2;
b4 += b3;
b3 = rol64(b3, 22) ^ b4;
output[0] = b0 + k0;
output[1] = b1 + k1;
output[2] = b2 + k2;
output[3] = b3 + k3;
output[4] = b4 + k4;
output[5] = b5 + k5 + t0;
output[6] = b6 + k6 + t1;
output[7] = b7 + k7 + 18;
}
void threefish_decrypt_512(struct threefish_key *key_ctx, u64 *input,
u64 *output)
{
u64 b0 = input[0], b1 = input[1],
b2 = input[2], b3 = input[3],
b4 = input[4], b5 = input[5],
b6 = input[6], b7 = input[7];
u64 k0 = key_ctx->key[0], k1 = key_ctx->key[1],
k2 = key_ctx->key[2], k3 = key_ctx->key[3],
k4 = key_ctx->key[4], k5 = key_ctx->key[5],
k6 = key_ctx->key[6], k7 = key_ctx->key[7],
k8 = key_ctx->key[8];
u64 t0 = key_ctx->tweak[0], t1 = key_ctx->tweak[1],
t2 = key_ctx->tweak[2];
u64 tmp;
b0 -= k0;
b1 -= k1;
b2 -= k2;
b3 -= k3;
b4 -= k4;
b5 -= k5 + t0;
b6 -= k6 + t1;
b7 -= k7 + 18;
tmp = b3 ^ b4;
b3 = ror64(tmp, 22);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 56);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 35);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 8);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 43);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 39);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 29);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 25);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 17);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 10);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 50);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 13);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 24);
b6 -= b7 + k5 + t0;
b7 -= k6 + 17;
tmp = b5 ^ b4;
b5 = ror64(tmp, 34);
b4 -= b5 + k3;
b5 -= k4 + t2;
tmp = b3 ^ b2;
b3 = ror64(tmp, 30);
b2 -= b3 + k1;
b3 -= k2;
tmp = b1 ^ b0;
b1 = ror64(tmp, 39);
b0 -= b1 + k8;
b1 -= k0;
tmp = b3 ^ b4;
b3 = ror64(tmp, 56);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 54);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 9);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 44);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 39);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 36);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 49);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 17);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 42);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 14);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 27);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 33);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 37);
b6 -= b7 + k4 + t2;
b7 -= k5 + 16;
tmp = b5 ^ b4;
b5 = ror64(tmp, 19);
b4 -= b5 + k2;
b5 -= k3 + t1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 36);
b2 -= b3 + k0;
b3 -= k1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 46);
b0 -= b1 + k7;
b1 -= k8;
tmp = b3 ^ b4;
b3 = ror64(tmp, 22);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 56);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 35);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 8);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 43);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 39);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 29);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 25);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 17);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 10);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 50);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 13);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 24);
b6 -= b7 + k3 + t1;
b7 -= k4 + 15;
tmp = b5 ^ b4;
b5 = ror64(tmp, 34);
b4 -= b5 + k1;
b5 -= k2 + t0;
tmp = b3 ^ b2;
b3 = ror64(tmp, 30);
b2 -= b3 + k8;
b3 -= k0;
tmp = b1 ^ b0;
b1 = ror64(tmp, 39);
b0 -= b1 + k6;
b1 -= k7;
tmp = b3 ^ b4;
b3 = ror64(tmp, 56);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 54);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 9);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 44);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 39);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 36);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 49);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 17);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 42);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 14);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 27);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 33);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 37);
b6 -= b7 + k2 + t0;
b7 -= k3 + 14;
tmp = b5 ^ b4;
b5 = ror64(tmp, 19);
b4 -= b5 + k0;
b5 -= k1 + t2;
tmp = b3 ^ b2;
b3 = ror64(tmp, 36);
b2 -= b3 + k7;
b3 -= k8;
tmp = b1 ^ b0;
b1 = ror64(tmp, 46);
b0 -= b1 + k5;
b1 -= k6;
tmp = b3 ^ b4;
b3 = ror64(tmp, 22);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 56);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 35);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 8);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 43);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 39);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 29);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 25);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 17);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 10);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 50);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 13);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 24);
b6 -= b7 + k1 + t2;
b7 -= k2 + 13;
tmp = b5 ^ b4;
b5 = ror64(tmp, 34);
b4 -= b5 + k8;
b5 -= k0 + t1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 30);
b2 -= b3 + k6;
b3 -= k7;
tmp = b1 ^ b0;
b1 = ror64(tmp, 39);
b0 -= b1 + k4;
b1 -= k5;
tmp = b3 ^ b4;
b3 = ror64(tmp, 56);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 54);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 9);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 44);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 39);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 36);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 49);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 17);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 42);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 14);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 27);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 33);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 37);
b6 -= b7 + k0 + t1;
b7 -= k1 + 12;
tmp = b5 ^ b4;
b5 = ror64(tmp, 19);
b4 -= b5 + k7;
b5 -= k8 + t0;
tmp = b3 ^ b2;
b3 = ror64(tmp, 36);
b2 -= b3 + k5;
b3 -= k6;
tmp = b1 ^ b0;
b1 = ror64(tmp, 46);
b0 -= b1 + k3;
b1 -= k4;
tmp = b3 ^ b4;
b3 = ror64(tmp, 22);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 56);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 35);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 8);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 43);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 39);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 29);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 25);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 17);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 10);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 50);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 13);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 24);
b6 -= b7 + k8 + t0;
b7 -= k0 + 11;
tmp = b5 ^ b4;
b5 = ror64(tmp, 34);
b4 -= b5 + k6;
b5 -= k7 + t2;
tmp = b3 ^ b2;
b3 = ror64(tmp, 30);
b2 -= b3 + k4;
b3 -= k5;
tmp = b1 ^ b0;
b1 = ror64(tmp, 39);
b0 -= b1 + k2;
b1 -= k3;
tmp = b3 ^ b4;
b3 = ror64(tmp, 56);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 54);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 9);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 44);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 39);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 36);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 49);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 17);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 42);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 14);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 27);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 33);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 37);
b6 -= b7 + k7 + t2;
b7 -= k8 + 10;
tmp = b5 ^ b4;
b5 = ror64(tmp, 19);
b4 -= b5 + k5;
b5 -= k6 + t1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 36);
b2 -= b3 + k3;
b3 -= k4;
tmp = b1 ^ b0;
b1 = ror64(tmp, 46);
b0 -= b1 + k1;
b1 -= k2;
tmp = b3 ^ b4;
b3 = ror64(tmp, 22);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 56);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 35);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 8);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 43);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 39);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 29);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 25);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 17);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 10);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 50);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 13);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 24);
b6 -= b7 + k6 + t1;
b7 -= k7 + 9;
tmp = b5 ^ b4;
b5 = ror64(tmp, 34);
b4 -= b5 + k4;
b5 -= k5 + t0;
tmp = b3 ^ b2;
b3 = ror64(tmp, 30);
b2 -= b3 + k2;
b3 -= k3;
tmp = b1 ^ b0;
b1 = ror64(tmp, 39);
b0 -= b1 + k0;
b1 -= k1;
tmp = b3 ^ b4;
b3 = ror64(tmp, 56);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 54);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 9);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 44);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 39);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 36);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 49);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 17);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 42);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 14);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 27);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 33);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 37);
b6 -= b7 + k5 + t0;
b7 -= k6 + 8;
tmp = b5 ^ b4;
b5 = ror64(tmp, 19);
b4 -= b5 + k3;
b5 -= k4 + t2;
tmp = b3 ^ b2;
b3 = ror64(tmp, 36);
b2 -= b3 + k1;
b3 -= k2;
tmp = b1 ^ b0;
b1 = ror64(tmp, 46);
b0 -= b1 + k8;
b1 -= k0;
tmp = b3 ^ b4;
b3 = ror64(tmp, 22);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 56);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 35);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 8);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 43);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 39);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 29);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 25);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 17);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 10);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 50);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 13);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 24);
b6 -= b7 + k4 + t2;
b7 -= k5 + 7;
tmp = b5 ^ b4;
b5 = ror64(tmp, 34);
b4 -= b5 + k2;
b5 -= k3 + t1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 30);
b2 -= b3 + k0;
b3 -= k1;
tmp = b1 ^ b0;
b1 = ror64(tmp, 39);
b0 -= b1 + k7;
b1 -= k8;
tmp = b3 ^ b4;
b3 = ror64(tmp, 56);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 54);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 9);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 44);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 39);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 36);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 49);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 17);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 42);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 14);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 27);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 33);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 37);
b6 -= b7 + k3 + t1;
b7 -= k4 + 6;
tmp = b5 ^ b4;
b5 = ror64(tmp, 19);
b4 -= b5 + k1;
b5 -= k2 + t0;
tmp = b3 ^ b2;
b3 = ror64(tmp, 36);
b2 -= b3 + k8;
b3 -= k0;
tmp = b1 ^ b0;
b1 = ror64(tmp, 46);
b0 -= b1 + k6;
b1 -= k7;
tmp = b3 ^ b4;
b3 = ror64(tmp, 22);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 56);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 35);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 8);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 43);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 39);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 29);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 25);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 17);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 10);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 50);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 13);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 24);
b6 -= b7 + k2 + t0;
b7 -= k3 + 5;
tmp = b5 ^ b4;
b5 = ror64(tmp, 34);
b4 -= b5 + k0;
b5 -= k1 + t2;
tmp = b3 ^ b2;
b3 = ror64(tmp, 30);
b2 -= b3 + k7;
b3 -= k8;
tmp = b1 ^ b0;
b1 = ror64(tmp, 39);
b0 -= b1 + k5;
b1 -= k6;
tmp = b3 ^ b4;
b3 = ror64(tmp, 56);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 54);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 9);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 44);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 39);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 36);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 49);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 17);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 42);
b0 -= b3;
tmp = b5 ^ b6;
b5 = ror64(tmp, 14);
b6 -= b5;
tmp = b7 ^ b4;
b7 = ror64(tmp, 27);
b4 -= b7;
tmp = b1 ^ b2;
b1 = ror64(tmp, 33);
b2 -= b1;
tmp = b7 ^ b6;
b7 = ror64(tmp, 37);
b6 -= b7 + k1 + t2;
b7 -= k2 + 4;
tmp = b5 ^ b4;
b5 = ror64(tmp, 19);
b4 -= b5 + k8;
b5 -= k0 + t1;
tmp = b3 ^ b2;
b3 = ror64(tmp, 36);
b2 -= b3 + k6;
b3 -= k7;
tmp = b1 ^ b0;
b1 = ror64(tmp, 46);
b0 -= b1 + k4;
b1 -= k5;
tmp = b3 ^ b4;
b3 = ror64(tmp, 22);
b4 -= b3;
tmp = b5 ^ b2;
b5 = ror64(tmp, 56);
b2 -= b5;
tmp = b7 ^ b0;
b7 = ror64(tmp, 35);
b0 -= b7;
tmp = b1 ^ b6;
b1 = ror64(tmp, 8);
b6 -= b1;
tmp = b7 ^ b2;
b7 = ror64(tmp, 43);
b2 -= b7;
tmp = b5 ^ b0;
b5 = ror64(tmp, 39);
b0 -= b5;
tmp = b3 ^ b6;
b3 = ror64(tmp, 29);
b6 -= b3;
tmp = b1 ^ b4;
b1 = ror64(tmp, 25);
b4 -= b1;
tmp = b3 ^ b0;
b3 = ror64(tmp, 17);
b0 -= b3;
tmp