Program Listing for File keccak.cuh#
↰ Return to documentation for file (src/keccak.cuh
)
// cuEVM: CUDA Ethereum Virtual Machine implementation
// Copyright 2023 Stefan-Dan Ciocirlan (SBIP - Singapore Blockchain Innovation Programme)
// Author: Stefan-Dan Ciocirlan
// Data: 2023-11-30
// SPDX-License-Identifier: MIT
#ifndef _KECCAK_H_
#define _KECCAK_H_
#include "utils.h"
#ifndef ROTL64
#define ROTL64(x, y) (((x) << (y)) | ((x) >> (64 - (y))))
#endif
namespace keccak
{
const uint64_t keccakf_rndc[24] = {
0x0000000000000001, 0x0000000000008082, 0x800000000000808a,
0x8000000080008000, 0x000000000000808b, 0x0000000080000001,
0x8000000080008081, 0x8000000000008009, 0x000000000000008a,
0x0000000000000088, 0x0000000080008009, 0x000000008000000a,
0x000000008000808b, 0x800000000000008b, 0x8000000000008089,
0x8000000000008003, 0x8000000000008002, 0x8000000000000080,
0x000000000000800a, 0x800000008000000a, 0x8000000080008081,
0x8000000000008080, 0x0000000080000001, 0x8000000080008008};
const int keccakf_rotc[24] = {
1, 3, 6, 10, 15, 21, 28, 36, 45, 55, 2, 14,
27, 41, 56, 8, 25, 43, 62, 18, 39, 61, 20, 44};
const int keccakf_piln[24] = {
10, 7, 11, 17, 18, 3, 5, 16, 8, 21, 24, 4,
15, 23, 19, 13, 12, 2, 20, 14, 22, 9, 6, 1};
class keccak_t
{
public:
static const uint32_t KECCAK_ROUNDS = 24;
typedef struct alignas(32)
{
union
{ // state:
uint8_t b[200]; // 8-bit bytes
uint64_t q[25]; // 64-bit words
} st;
int pt, rsiz, mdlen; // these don't overflow
} sha3_ctx_t;
typedef struct
{
uint64_t *rndc;
int *rotc;
int *piln;
} sha3_parameters_t;
sha3_parameters_t *_parameters;
sha3_ctx_t *_content;
__host__ __device__ __forceinline__ keccak_t(
sha3_parameters_t *parameters) : _parameters(parameters)
{
SHARED_MEMORY sha3_ctx_t *content;
ONE_THREAD_PER_INSTANCE(
content = new sha3_ctx_t;)
_content = content;
}
__host__ keccak_t()
{
#ifndef ONLY_CPU
CUDA_CHECK(cudaMallocManaged(
(void **)&(_parameters),
sizeof(sha3_parameters_t)));
CUDA_CHECK(cudaMallocManaged(
(void **)&(_parameters->rndc),
sizeof(uint64_t) * 24));
CUDA_CHECK(cudaMallocManaged(
(void **)&(_parameters->rotc),
sizeof(int) * 24));
CUDA_CHECK(cudaMallocManaged(
(void **)&(_parameters->piln),
sizeof(int) * 24));
#else
_parameters = new sha3_parameters_t;
_parameters->rndc = new uint64_t[24];
_parameters->rotc = new int[24];
_parameters->piln = new int[24];
#endif
memcpy(_parameters->rndc, keccakf_rndc, sizeof(uint64_t) * 24);
memcpy(_parameters->rotc, keccakf_rotc, sizeof(int) * 24);
memcpy(_parameters->piln, keccakf_piln, sizeof(int) * 24);
_content = new sha3_ctx_t;
}
__host__ __device__ __forceinline__ ~keccak_t()
{
ONE_THREAD_PER_INSTANCE(
delete _content;)
_content = NULL;
_parameters = NULL;
}
__host__ void free_parameters()
{
#ifndef ONLY_CPU
CUDA_CHECK(cudaFree(_parameters->rndc));
CUDA_CHECK(cudaFree(_parameters->rotc));
CUDA_CHECK(cudaFree(_parameters->piln));
CUDA_CHECK(cudaFree(_parameters));
#else
delete[] _parameters->rndc;
delete[] _parameters->rotc;
delete[] _parameters->piln;
delete _parameters;
#endif
}
__host__ __device__ __forceinline__ void sha3_keccakf(uint64_t st[25])
{
// variables
int i, j, r;
uint64_t t, bc[5];
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
uint8_t *v;
// endianess conversion. this is redundant on little-endian targets
for (i = 0; i < 25; i++)
{
v = (uint8_t *)&st[i];
st[i] = ((uint64_t)v[0]) | (((uint64_t)v[1]) << 8) |
(((uint64_t)v[2]) << 16) | (((uint64_t)v[3]) << 24) |
(((uint64_t)v[4]) << 32) | (((uint64_t)v[5]) << 40) |
(((uint64_t)v[6]) << 48) | (((uint64_t)v[7]) << 56);
}
#endif
// actual iteration
for (r = 0; r < KECCAK_ROUNDS; r++)
{
// Theta
for (i = 0; i < 5; i++)
bc[i] = st[i] ^ st[i + 5] ^ st[i + 10] ^ st[i + 15] ^ st[i + 20];
for (i = 0; i < 5; i++)
{
t = bc[(i + 4) % 5] ^ ROTL64(bc[(i + 1) % 5], 1);
for (j = 0; j < 25; j += 5)
st[j + i] ^= t;
}
// Rho Pi
t = st[1];
for (i = 0; i < 24; i++)
{
j = _parameters->piln[i];
bc[0] = st[j];
st[j] = ROTL64(t, _parameters->rotc[i]);
t = bc[0];
}
// Chi
for (j = 0; j < 25; j += 5)
{
for (i = 0; i < 5; i++)
bc[i] = st[j + i];
for (i = 0; i < 5; i++)
st[j + i] ^= (~bc[(i + 1) % 5]) & bc[(i + 2) % 5];
}
// Iota
st[0] ^= _parameters->rndc[r];
}
#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__
// endianess conversion. this is redundant on little-endian targets
for (i = 0; i < 25; i++)
{
v = (uint8_t *)&st[i];
t = st[i];
v[0] = t & 0xFF;
v[1] = (t >> 8) & 0xFF;
v[2] = (t >> 16) & 0xFF;
v[3] = (t >> 24) & 0xFF;
v[4] = (t >> 32) & 0xFF;
v[5] = (t >> 40) & 0xFF;
v[6] = (t >> 48) & 0xFF;
v[7] = (t >> 56) & 0xFF;
}
#endif
}
__host__ __device__ void sha3_init(int mdlen)
{
uint32_t idx;
for (idx = 0; idx < 25; idx++)
_content->st.q[idx] = 0;
_content->mdlen = mdlen;
_content->rsiz = 200 - 2 * mdlen;
_content->pt = 0;
}
__host__ __device__ void sha3_update(const uint8_t *data, size_t len)
{
size_t idx;
int j;
j = _content->pt;
for (idx = 0; idx < len; idx++)
{
_content->st.b[j++] ^= data[idx];
if (j >= _content->rsiz)
{
sha3_keccakf(_content->st.q);
j = 0;
}
}
_content->pt = j;
}
__host__ __device__ void sha3_final(uint8_t *md)
{
int idx;
// why not _parameters->rndc[0]? 0x06 for sha3, 0x1F for shake, 0x01 for keccak
_content->st.b[_content->pt] ^= 0x01;
_content->st.b[_content->rsiz - 1] ^= 0x80;
sha3_keccakf(_content->st.q);
for (idx = 0; idx < _content->mdlen; idx++)
md[idx] = _content->st.b[idx];
}
__host__ __device__ void sha3(const uint8_t *in, size_t inlen, uint8_t *md, int mdlen)
{
sha3_init(mdlen);
sha3_update(in, inlen);
sha3_final(md);
}
// SHAKE128 and SHAKE256 extensible-output functions
__host__ __device__ void shake_xof(uint8_t *md, int len)
{
_content->st.b[_content->pt] ^= 0x1F;
_content->st.b[_content->rsiz - 1] ^= 0x80;
sha3_keccakf(_content->st.q);
_content->pt = 0;
}
__host__ __device__ void shake_out(uint8_t *out, size_t len)
{
size_t idx;
int j;
j = _content->pt;
for (idx = 0; idx < len; idx++)
{
if (j >= _content->rsiz)
{
sha3_keccakf(_content->st.q);
j = 0;
}
((uint8_t *)out)[idx] = _content->st.b[j++];
}
_content->pt = j;
}
__host__ __device__ void shae128_init()
{
sha3_init(16);
}
__host__ __device__ void shake256_init()
{
sha3_init(32);
}
__host__ __device__ void shake_update(const uint8_t *in, size_t inlen)
{
sha3_update(in, inlen);
}
__host__ __device__ void sha3_256(const uint8_t *in, size_t inlen, uint8_t *md)
{
sha3(in, inlen, md, 32);
}
__host__ __device__ void sha3_512(const uint8_t *in, size_t inlen, uint8_t *md)
{
sha3(in, inlen, md, 64);
}
};
}
#endif