Skip to content

Commit 660197d

Browse files
committed
implemented rv64. did not use intrinsics due to [this issue](rust-lang/stdarch#1453 (comment))
1 parent 95a946e commit 660197d

File tree

3 files changed

+303
-9
lines changed

3 files changed

+303
-9
lines changed

README.md

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,31 @@
11
This is a pure-Rust platform-agnostic [AES](https://nvlpubs.nist.gov/nistpubs/FIPS/NIST.FIPS.197-upd1.pdf) library, that
22
is focused on reusability and optimal performance.
33

4-
This library guarantees the best performance on the `target_cpu` (if correctly specified). This currently has 5
5-
implementations, among which it automatically decides the best (most performant) using Cargo's `target_feature` flags.
4+
This library guarantees the best performance on the `target-cpu` (if correctly specified). This currently has 6
5+
implementations, among which it automatically decides the best (most performant) using Cargo's `target-feature` flags.
66

77
# The implementations and their requirements are:
88

99
- AES-NI (with Vector AES for 2- and 4- blocks) => requires a Nightly Compiler, the `nightly` feature to be enabled, and
10-
compiling for x86(64) with the `avx512f` and `vaes` target_feature flags set.
10+
compiling for x86(64) with the `avx512f` and `vaes` target-feature flags set.
1111
- AES-NI (with Vector AES for 2-blocks) => requires a Nightly Compiler, the `nightly` feature to be enabled, and
12-
compiling for x86(64) with the `vaes` target_feature flag set. (although `vaes` is a AVX-512 feature, some AlderLake
12+
compiling for x86(64) with the `vaes` target-feature flag set. (although `vaes` is a AVX-512 feature, some AlderLake
1313
CPUs have `vaes` without AVX-512 support)
14-
- AES-NI => requires compiling for x86(64) with the `sse4.1` and `aes` target_feature flags set.
15-
- AES-Neon => requires compiling for AArch64 or ARM64EC or ARM-v8 with the `aes` target_feature flag set (ARM-v8
14+
- AES-NI => requires compiling for x86(64) with the `sse4.1` and `aes` target-feature flags set.
15+
- AES-Neon => requires compiling for AArch64 or ARM64EC or ARM-v8 with the `aes` target-feature flag set (ARM-v8
1616
requires a Nightly compiler and the `nightly` feature to be enabled) .
17+
- AES-RV64 => requires a Nightly compiler, the `nightly` feature to be enabled, and compiling for RISC-V RV64 with
18+
the `zkne` and `zknd` target-feature flags set.
1719
- Software AES => fallback implementation based on Rijmen and Daemen's `optimized` implementation (available
1820
on [their website](https://web.archive.org/web/20050828204927/http://www.iaik.tu-graz.ac.at/research/krypto/AES/old/%7Erijmen/rijndael/))
1921

20-
If you are unsure about the target_feature flags to set, use `target_cpu=native` (if not cross-compiling) in
22+
If you are unsure about the target-feature flags to set, use `target-cpu=native` (if not cross-compiling) in
2123
the `RUSTFLAGS` environment variable, and use the `nightly` feature only if you are using a nightly compiler.
2224

2325
# Warning
2426

25-
Using the wrong `target_feature` flags may lead to the binary crashing due to an "Unknown Instruction" error. This
26-
library uses these flags to use the CPU intrinsics to maximize performance. If you are unsure what `target_feature`s are
27+
Using the wrong `target-feature` flags may lead to the binary crashing due to an "Unknown Instruction" error. This
28+
library uses these flags to use the CPU intrinsics to maximize performance. If you are unsure what `target-feature`s are
2729
supported on your CPU, use the command
2830

2931
````bash

src/aes_riscv64.rs

Lines changed: 283 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,283 @@
1+
use core::arch::asm;
2+
use core::ops::{BitAnd, BitOr, BitXor, Not};
3+
use core::{mem, slice};
4+
5+
macro_rules! _asm {
6+
(asm: $assembly:expr, $rs1:expr $(, $rs2:expr)?) => {{
7+
let value: u64;
8+
unsafe {
9+
asm!(
10+
$assembly,
11+
rd = lateout(reg) value,
12+
rs1 = in(reg) $rs1,
13+
$(rs2 = in(reg) $rs2, )?
14+
options(pure, nomem, nostack)
15+
)
16+
}
17+
value
18+
}};
19+
($instruction:literal, $rs1:expr) => {
20+
_asm!(asm: concat!($instruction, " {rd},{rs1}"), $rs1)
21+
};
22+
($instruction:literal, $rs1:expr, $rs2:expr) => {
23+
_asm!(asm: concat!($instruction, " {rd},{rs1},{rs2}"), $rs1, $rs2)
24+
};
25+
}
26+
27+
#[inline(always)]
28+
fn aes64esm(rs1: u64, rs2: u64) -> u64 {
29+
_asm!("aes64esm", rs1, rs2)
30+
}
31+
32+
#[inline(always)]
33+
fn aes64es(rs1: u64, rs2: u64) -> u64 {
34+
_asm!("aes64es", rs1, rs2)
35+
}
36+
37+
#[inline(always)]
38+
fn aes64dsm(rs1: u64, rs2: u64) -> u64 {
39+
_asm!("aes64dsm", rs1, rs2)
40+
}
41+
42+
#[inline(always)]
43+
fn aes64ds(rs1: u64, rs2: u64) -> u64 {
44+
_asm!("aes64ds", rs1, rs2)
45+
}
46+
47+
#[inline(always)]
48+
fn aes64im(rs1: u64) -> u64 {
49+
_asm!("aes64im", rs1)
50+
}
51+
52+
#[inline(always)]
53+
fn aes64ks1i(rs1: u64, rnum: u8) -> u64 {
54+
macro_rules! case {
55+
($imm_0_until_10:expr) => {
56+
_asm!(asm: concat!("aes64ks1i {rd},{rs1},", $imm_0_until_10), rs1)
57+
}
58+
}
59+
match rnum {
60+
0 => case!(0),
61+
1 => case!(1),
62+
2 => case!(2),
63+
3 => case!(3),
64+
4 => case!(4),
65+
5 => case!(5),
66+
6 => case!(6),
67+
7 => case!(7),
68+
8 => case!(8),
69+
9 => case!(9),
70+
10 => case!(10),
71+
_ => unreachable!(),
72+
}
73+
}
74+
75+
#[inline(always)]
76+
fn aes64ks2(rs1: u64, rs2: u64) -> u64 {
77+
_asm!("aes64ks2", rs1, rs2)
78+
}
79+
80+
#[derive(Copy, Clone, Eq, PartialEq)]
81+
#[repr(C, align(16))]
82+
pub struct AesBlock(u64, u64);
83+
84+
impl From<[u8; 16]> for AesBlock {
85+
#[inline]
86+
fn from(value: [u8; 16]) -> Self {
87+
Self::new(value)
88+
}
89+
}
90+
91+
impl BitAnd for AesBlock {
92+
type Output = Self;
93+
94+
#[inline]
95+
fn bitand(self, rhs: Self) -> Self::Output {
96+
Self(self.0 & rhs.0, self.1 & rhs.1)
97+
}
98+
}
99+
100+
impl BitOr for AesBlock {
101+
type Output = Self;
102+
103+
#[inline]
104+
fn bitor(self, rhs: Self) -> Self::Output {
105+
Self(self.0 | rhs.0, self.1 | rhs.1)
106+
}
107+
}
108+
109+
impl BitXor for AesBlock {
110+
type Output = Self;
111+
112+
#[inline]
113+
fn bitxor(self, rhs: Self) -> Self::Output {
114+
Self(self.0 ^ rhs.0, self.1 ^ rhs.1)
115+
}
116+
}
117+
118+
impl Not for AesBlock {
119+
type Output = Self;
120+
121+
#[inline]
122+
fn not(self) -> Self::Output {
123+
Self(!self.0, !self.1)
124+
}
125+
}
126+
127+
impl AesBlock {
128+
#[inline]
129+
pub const fn new(value: [u8; 16]) -> Self {
130+
unsafe { mem::transmute(value) }
131+
}
132+
133+
#[inline]
134+
pub fn store_to(self, dst: &mut [u8]) {
135+
assert!(dst.len() >= 16);
136+
unsafe {
137+
*dst.as_mut_ptr().cast::<[u8; 16]>() = mem::transmute(self);
138+
}
139+
}
140+
141+
#[inline]
142+
pub fn zero() -> Self {
143+
Self(0, 0)
144+
}
145+
146+
#[inline]
147+
pub fn is_zero(self) -> bool {
148+
(self.0 | self.1) == 0
149+
}
150+
151+
/// Performs one round of AES encryption function (ShiftRows->SubBytes->MixColumns->AddRoundKey)
152+
#[inline]
153+
pub fn enc(self, round_key: Self) -> Self {
154+
Self(
155+
aes64esm(self.0, self.1) ^ round_key.0,
156+
aes64esm(self.1, self.0) ^ round_key.1,
157+
)
158+
}
159+
160+
/// Performs one round of AES decryption function (InvShiftRows->InvSubBytes->InvMixColumns->AddRoundKey)
161+
#[inline]
162+
pub fn dec(self, round_key: Self) -> Self {
163+
Self(
164+
aes64dsm(self.0, self.1) ^ round_key.0,
165+
aes64dsm(self.1, self.0) ^ round_key.1,
166+
)
167+
}
168+
169+
/// Performs one round of AES encryption function without MixColumns (ShiftRows->SubBytes->AddRoundKey)
170+
#[inline]
171+
pub fn enc_last(self, round_key: Self) -> Self {
172+
Self(
173+
aes64es(self.0, self.1) ^ round_key.0,
174+
aes64es(self.1, self.0) ^ round_key.1,
175+
)
176+
}
177+
178+
/// Performs one round of AES decryption function without InvMixColumns (InvShiftRows->InvSubBytes->AddRoundKey)
179+
#[inline]
180+
pub fn dec_last(self, round_key: Self) -> Self {
181+
Self(
182+
aes64ds(self.0, self.1) ^ round_key.0,
183+
aes64ds(self.1, self.0) ^ round_key.1,
184+
)
185+
}
186+
187+
/// Performs the MixColumns operation
188+
#[inline]
189+
pub fn mc(self) -> Self {
190+
let (tmp0, tmp1) = (aes64ds(self.0, self.1), aes64ds(self.1, self.0));
191+
Self(aes64esm(tmp0, tmp1), aes64esm(tmp1, tmp0))
192+
}
193+
194+
/// Performs the InvMixColumns operation
195+
#[inline]
196+
pub fn imc(self) -> Self {
197+
Self(aes64im(self.0), aes64im(self.1))
198+
}
199+
}
200+
201+
#[inline(always)]
202+
fn keyexp_128(prev: AesBlock, rnum: u8) -> AesBlock {
203+
let tmp = aes64ks2(aes64ks1i(prev.1, rnum), prev.0);
204+
AesBlock(tmp, aes64ks2(tmp, prev.1))
205+
}
206+
207+
#[inline(always)]
208+
fn keyexp_256_1(prev0: AesBlock, prev1: AesBlock, rnum: u8) -> AesBlock {
209+
let tmp = aes64ks2(aes64ks1i(prev1.1, rnum), prev0.0);
210+
AesBlock(tmp, aes64ks2(tmp, prev0.1))
211+
}
212+
213+
#[inline(always)]
214+
fn keyexp_256_2(prev0: AesBlock, prev1: AesBlock) -> AesBlock {
215+
let tmp = aes64ks2(aes64ks1i(prev1.1, 10), prev0.0);
216+
AesBlock(tmp, aes64ks2(tmp, prev0.1))
217+
}
218+
219+
pub(super) fn keygen_128(key: [u8; 16]) -> [AesBlock; 11] {
220+
let key0 = AesBlock::from(key);
221+
let key1 = keyexp_128(key0, 0);
222+
let key2 = keyexp_128(key1, 1);
223+
let key3 = keyexp_128(key2, 2);
224+
let key4 = keyexp_128(key3, 3);
225+
let key5 = keyexp_128(key4, 4);
226+
let key6 = keyexp_128(key5, 5);
227+
let key7 = keyexp_128(key6, 6);
228+
let key8 = keyexp_128(key7, 7);
229+
let key9 = keyexp_128(key8, 8);
230+
let key10 = keyexp_128(key9, 9);
231+
232+
[
233+
key0, key1, key2, key3, key4, key5, key6, key7, key8, key9, key10,
234+
]
235+
}
236+
237+
pub(super) fn keygen_192(key: [u8; 24]) -> [AesBlock; 13] {
238+
unsafe {
239+
let mut expanded_keys: [AesBlock; 13] = mem::zeroed();
240+
241+
let keys_ptr: *mut u64 = expanded_keys.as_mut_ptr().cast();
242+
let columns = slice::from_raw_parts_mut(keys_ptr, 26);
243+
244+
for (i, chunk) in key.chunks_exact(8).enumerate() {
245+
columns[i] = u64::from_ne_bytes(chunk.try_into().unwrap());
246+
}
247+
248+
for i in (0..21).step_by(3) {
249+
columns[i + 3] = aes64ks2(aes64ks1i(columns[i + 2], (i / 3) as u8), columns[i + 0]);
250+
columns[i + 4] = aes64ks2(columns[i + 3], columns[i + 1]);
251+
columns[i + 5] = aes64ks2(columns[i + 4], columns[i + 2]);
252+
}
253+
254+
columns[24] = aes64ks2(aes64ks1i(columns[23], 7), columns[21]);
255+
columns[25] = aes64ks2(columns[24], columns[22]);
256+
257+
expanded_keys
258+
}
259+
}
260+
261+
pub(super) fn keygen_256(key: [u8; 32]) -> [AesBlock; 15] {
262+
let key0 = AesBlock::try_from(&key[..16]).unwrap();
263+
let key1 = AesBlock::try_from(&key[16..]).unwrap();
264+
265+
let key2 = keyexp_256_1(key0, key1, 0);
266+
let key3 = keyexp_256_2(key1, key2);
267+
let key4 = keyexp_256_1(key2, key3, 1);
268+
let key5 = keyexp_256_2(key3, key4);
269+
let key6 = keyexp_256_1(key4, key5, 2);
270+
let key7 = keyexp_256_2(key5, key6);
271+
let key8 = keyexp_256_1(key6, key7, 3);
272+
let key9 = keyexp_256_2(key7, key8);
273+
let key10 = keyexp_256_1(key8, key9, 4);
274+
let key11 = keyexp_256_2(key9, key10);
275+
let key12 = keyexp_256_1(key10, key11, 5);
276+
let key13 = keyexp_256_2(key11, key12);
277+
let key14 = keyexp_256_1(key12, key13, 6);
278+
279+
[
280+
key0, key1, key2, key3, key4, key5, key6, key7, key8, key9, key10, key11, key12, key13,
281+
key14,
282+
]
283+
}

src/lib.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,15 @@ cfg_if! {
4343
mod aes_arm;
4444
pub use aes_arm::AesBlock;
4545
use aes_arm::*;
46+
} else if #[cfg(all(
47+
feature = "nightly",
48+
target_arch = "riscv64",
49+
target_feature = "zkne",
50+
target_feature = "zknd"
51+
))] {
52+
mod aes_riscv64;
53+
pub use aes_riscv64::AesBlock;
54+
use aes_riscv64::*;
4655
} else {
4756
mod aes_default;
4857
pub use aes_default::AesBlock;

0 commit comments

Comments
 (0)