quantica/ml_kem/masked.rs
1//! First-order arithmetic masking for ML-KEM polynomials
2//! (**countermeasure: DPA / DEMA / CPA on the K-PKE secret ``s``**).
3//!
4//! ## Principle
5//!
6//! Every secret polynomial is kept as two additive shares modulo `q`:
7//! `s = s_0 + s_1 (mod q)`. Shares are drawn uniformly at random and
8//! each operation consuming the secret (NTT, pointwise multiplication
9//! with a public polynomial, additions / subtractions) is rewritten
10//! to operate on the shares without ever materialising the unmasked
11//! `s`. Fresh-randomness refreshes (`MaskedPoly::refresh`) break
12//! cross-operation correlations that a high-order DPA could otherwise
13//! exploit.
14//!
15//! Because the unmasked `s` never exists as a single value in memory,
16//! the Hamming-weight / Hamming-distance hypothesis at the core of
17//! CPA becomes non-identifiable: correlating the power trace with a
18//! guess of any byte of `s` produces no peak since each share alone
19//! is uniform.
20//!
21//! ## Available operations
22//!
23//! | Function | Description |
24//! |----------|-------------|
25//! | `MaskedPoly::mask` | Split a plaintext polynomial into two shares |
26//! | `MaskedPoly::unmask` | Reconstruct the polynomial from shares |
27//! | `MaskedPoly::refresh` | Re-randomize shares (prevents correlation buildup) |
28//! | `masked_ntt` / `masked_ntt_inv` | NTT on each share independently |
29//! | `masked_multiply_public` | Multiply masked poly by a public poly |
30//! | `masked_add` / `masked_sub` | Add/subtract two masked polys |
31//! | `masked_multiply_accumulate` | Fused multiply-add with a public poly |
32//!
33//! ## References
34//!
35//! * *Side-channel analysis of the ML-KEM pointwise multiplication*
36//! (IACR ePrint 2025, `doc/papers/eprint2025_sca_mlkem_pointwise.pdf`)
37//! — identifies the pointwise multiplication as the key DPA
38//! target; masking the shares defeats the published attack.
39//! * *ML-KEM and ML-DSA on OpenTitan: side-channel evaluation*
40//! (cryptojedi, 2024) — independent evaluation of the trace-count
41//! blow-up induced by masking.
42//!
43//! ## Where to look next
44//!
45//! * Countermeasure description and threat analysis:
46//! `doc/sca/countermeasures/ml_kem.rst`, section
47//! *DPA / DEMA / CPA — first-order masking of the KPKE secret*.
48//! * Call sites: `keygen_internal_sca`,
49//! `decaps_internal_sca`,
50//! [`crate::ml_kem::kpke::decrypt_sca`].
51//!
52//! ## Scope and residual risk
53//!
54//! Masking is **first-order**. A higher-order DPA that combines two
55//! independent time samples can still recover the secret with
56//! substantially more traces. Tier-4 item `K-SCA3` (not yet
57//! scheduled) would extend this to a 3-share scheme (CC EAL4+-grade).
58
59use super::MlKemError;
60use super::ntt::{self, Q};
61use super::params::N;
62use super::rng::CryptoRng;
63
64/// A polynomial split into two additive shares modulo q.
65///
66/// Maintains the invariant `real_value[i] = (share0[i] + share1[i]) mod q`
67/// for all `i` in `0..256`. Neither share alone reveals any information
68/// about the underlying polynomial.
69///
70/// Both shares have coefficients in `[0, q-1]`.
71pub struct MaskedPoly {
72 /// First additive share of the polynomial.
73 pub share0: [i16; N],
74 /// Second additive share of the polynomial.
75 pub share1: [i16; N],
76}
77
78impl MaskedPoly {
79 /// Split a plaintext polynomial into two random additive shares.
80 ///
81 /// Generates a uniformly random `share1` from the RNG, then computes
82 /// `share0 = poly - share1 mod q`. The random bytes are zeroized after use.
83 ///
84 /// # Arguments
85 ///
86 /// * `poly` - The secret polynomial to mask (coefficients in `[0, q-1]`).
87 /// * `rng` - A cryptographic RNG for generating the random share.
88 ///
89 /// # Errors
90 ///
91 /// Returns [`MlKemError::RngFailure`] if the RNG fails.
92 pub fn mask(poly: &[i16; N], rng: &mut impl CryptoRng) -> Result<Self, MlKemError> {
93 let mut share1 = [0i16; N];
94 let mut rand_bytes = [0u8; N * 2];
95 rng.fill_bytes(&mut rand_bytes)?;
96
97 for i in 0..N {
98 // Random value mod q from 2 random bytes
99 let r = u16::from_le_bytes([rand_bytes[2 * i], rand_bytes[2 * i + 1]]);
100 share1[i] = (r % (Q as u16)) as i16;
101 }
102 ntt::zeroize_bytes(&mut rand_bytes);
103
104 let mut share0 = [0i16; N];
105 for i in 0..N {
106 let mut r = (poly[i] as i32 - share1[i] as i32) % (Q as i32);
107 r += (Q as i32) & (r >> 31);
108 share0[i] = r as i16;
109 }
110
111 Ok(MaskedPoly { share0, share1 })
112 }
113
114 /// Reconstruct the plaintext polynomial from the two shares.
115 ///
116 /// Computes `(share0[i] + share1[i]) mod q` for each coefficient.
117 /// The resulting polynomial has coefficients in `[0, q-1]`.
118 ///
119 /// # Security note
120 ///
121 /// The returned polynomial is unmasked and should be handled as secret
122 /// data (zeroized after use).
123 pub fn unmask(&self) -> [i16; N] {
124 let mut result = [0i16; N];
125 for i in 0..N {
126 let mut r = (self.share0[i] as i32 + self.share1[i] as i32) % (Q as i32);
127 r += (Q as i32) & (r >> 31);
128 result[i] = r as i16;
129 }
130 result
131 }
132
133 /// Securely erase both shares via volatile writes.
134 ///
135 /// Delegates to [`super::ntt::zeroize_poly`] for each share to ensure
136 /// the optimizer cannot elide the writes.
137 pub fn zeroize(&mut self) {
138 ntt::zeroize_poly(&mut self.share0);
139 ntt::zeroize_poly(&mut self.share1);
140 }
141
142 /// Re-randomize the shares without changing the unmasked value.
143 ///
144 /// Draws a fresh random polynomial `r` and updates the shares:
145 /// `share0' = share0 - r mod q`, `share1' = share1 + r mod q`.
146 /// The sum is preserved: `share0' + share1' = share0 + share1`.
147 ///
148 /// Refreshing prevents higher-order correlation buildup when the same
149 /// masked polynomial is used in multiple operations.
150 ///
151 /// # Errors
152 ///
153 /// Returns [`MlKemError::RngFailure`] if the RNG fails.
154 pub fn refresh(&mut self, rng: &mut impl CryptoRng) -> Result<(), MlKemError> {
155 let mut rand_bytes = [0u8; N * 2];
156 rng.fill_bytes(&mut rand_bytes)?;
157
158 for i in 0..N {
159 let r = (u16::from_le_bytes([rand_bytes[2 * i], rand_bytes[2 * i + 1]]) % (Q as u16)) as i16;
160 let mut s0 = (self.share0[i] as i32 - r as i32) % (Q as i32);
161 s0 += (Q as i32) & (s0 >> 31);
162 self.share0[i] = s0 as i16;
163
164 let mut s1 = (self.share1[i] as i32 + r as i32) % (Q as i32);
165 s1 += (Q as i32) & (s1 >> 31);
166 self.share1[i] = s1 as i16;
167 }
168 ntt::zeroize_bytes(&mut rand_bytes);
169 Ok(())
170 }
171}
172
173// ---- Masked NTT operations ----
174// NTT is linear: NTT(s₀ + s₁) = NTT(s₀) + NTT(s₁)
175// So we NTT each share independently.
176
177/// Apply the forward NTT to each share of a [`MaskedPoly`] independently.
178///
179/// Exploits the linearity of the NTT: `NTT(s0 + s1) = NTT(s0) + NTT(s1)`.
180/// Each share is transformed independently so the masking invariant is preserved.
181pub fn masked_ntt(m: &mut MaskedPoly) {
182 ntt::ntt(&mut m.share0);
183 ntt::ntt(&mut m.share1);
184}
185
186/// Apply the inverse NTT to each share of a [`MaskedPoly`] independently.
187///
188/// The inverse of `masked_ntt`. Transforms both shares from the NTT domain
189/// back to the standard domain while preserving the masking invariant.
190pub fn masked_ntt_inv(m: &mut MaskedPoly) {
191 ntt::ntt_inv(&mut m.share0);
192 ntt::ntt_inv(&mut m.share1);
193}
194
195/// Multiply a masked polynomial by a **public** polynomial in NTT domain.
196///
197/// Computes `(s0 + s1) * p = s0*p + s1*p` by multiplying each share
198/// independently. This is secure because `p` is public data -- there is
199/// no secret-times-secret interaction that would require second-order masking.
200///
201/// # Arguments
202///
203/// * `masked` - The masked secret polynomial (NTT domain).
204/// * `public` - A public polynomial (NTT domain).
205/// * `out` - Output masked polynomial for the product.
206pub fn masked_multiply_public(masked: &MaskedPoly, public: &[i16; N], out: &mut MaskedPoly) {
207 ntt::multiply_ntts(&masked.share0, public, &mut out.share0);
208 ntt::multiply_ntts(&masked.share1, public, &mut out.share1);
209}
210
211/// Add a **public** polynomial to a masked polynomial.
212///
213/// Adds the public value only to `share0`. Since the public polynomial
214/// is not secret, it does not need to be split across shares.
215///
216/// # Arguments
217///
218/// * `m` - The masked polynomial to modify in place.
219/// * `public` - A public polynomial to add.
220pub fn masked_add_public(m: &mut MaskedPoly, public: &[i16; N]) {
221 ntt::poly_add(&m.share0, public, &mut m.share0.clone());
222 let old = m.share0;
223 ntt::poly_add(&old, public, &mut m.share0);
224}
225
226/// Add two masked polynomials share-wise.
227///
228/// Computes `c.share0 = a.share0 + b.share0` and
229/// `c.share1 = a.share1 + b.share1`, so
230/// `unmask(c) = unmask(a) + unmask(b) mod q`.
231pub fn masked_add(a: &MaskedPoly, b: &MaskedPoly, c: &mut MaskedPoly) {
232 ntt::poly_add(&a.share0, &b.share0, &mut c.share0);
233 ntt::poly_add(&a.share1, &b.share1, &mut c.share1);
234}
235
236/// Subtract two masked polynomials share-wise.
237///
238/// Computes `c.share0 = a.share0 - b.share0` and
239/// `c.share1 = a.share1 - b.share1`, so
240/// `unmask(c) = unmask(a) - unmask(b) mod q`.
241pub fn masked_sub(a: &MaskedPoly, b: &MaskedPoly, c: &mut MaskedPoly) {
242 ntt::poly_sub(&a.share0, &b.share0, &mut c.share0);
243 ntt::poly_sub(&a.share1, &b.share1, &mut c.share1);
244}
245
246/// Fused multiply-accumulate: `out += masked * public` (NTT domain).
247///
248/// Multiplies a masked polynomial by a public polynomial and adds the
249/// result to the existing shares in `out`. Useful for computing matrix-vector
250/// products share-wise without allocating temporaries for each column.
251///
252/// # Arguments
253///
254/// * `out` - Accumulator masked polynomial (modified in place).
255/// * `masked` - The masked secret polynomial (NTT domain).
256/// * `public` - A public polynomial (NTT domain).
257pub fn masked_multiply_accumulate(out: &mut MaskedPoly, masked: &MaskedPoly, public: &[i16; N]) {
258 let mut tmp0 = [0i16; N];
259 let mut tmp1 = [0i16; N];
260 ntt::multiply_ntts(&masked.share0, public, &mut tmp0);
261 ntt::multiply_ntts(&masked.share1, public, &mut tmp1);
262 for i in 0..N {
263 out.share0[i] = ntt::barrett_reduce((out.share0[i] as i32 + tmp0[i] as i32) as i16);
264 out.share1[i] = ntt::barrett_reduce((out.share1[i] as i32 + tmp1[i] as i32) as i16);
265 }
266}