quantica/ml_dsa/masked.rs
1//! First-order arithmetic masking for ML-DSA polynomials.
2//!
3//! Same idea as the ML-KEM `masked` module but adapted to the
4//! ML-DSA arithmetic (q = 8 380 417, polynomial coefficients held as
5//! `i32`, NTT goes all the way down to length-1 components).
6//!
7//! Each secret polynomial is represented as two additive shares modulo
8//! `q`: `s = (s₀ + s₁) mod q`. All operations on secret data manipulate
9//! the shares independently, so a first-order side-channel attacker
10//! observing one share at a time learns nothing about the unmasked
11//! value.
12//!
13//! ## Sensitive operations protected
14//!
15//! In `dsa::sign_internal`, the secret polynomials `s1`, `s2`, `t0`
16//! are NTT-transformed once before the rejection-sampling loop, then
17//! multiplied by the per-iteration challenge polynomial `c` (which is
18//! public — verifier recomputes it):
19//!
20//! ```text
21//! ŝ1, ŝ2, t̂0 ← NTT(s1), NTT(s2), NTT(t0)
22//! for each rejection iteration:
23//! ĉ ← NTT(c)
24//! cs1[i] ← ĉ · ŝ1[i] // secret × public
25//! cs2[i] ← ĉ · ŝ2[i] // secret × public
26//! ct0[i] ← ĉ · t̂0[i] // secret × public
27//! ```
28//!
29//! The masked variants in this module replace `ŝ1`, `ŝ2`, `t̂0` with
30//! `MaskedPoly` containers and provide a `pointwise_mul_public`
31//! that multiplies each share independently. Because `ĉ` is public,
32//! no secret×secret multiplication occurs and first-order masking is
33//! sufficient.
34//!
35//! ## Available operations
36//!
37//! | Function | Description |
38//! |--------------------------------------|------------------------------------------------------------|
39//! | `MaskedPoly::mask` | Split a plaintext polynomial into two shares |
40//! | `MaskedPoly::unmask` | Reconstruct the polynomial from shares |
41//! | `MaskedPoly::refresh` | Re-randomize shares (prevents correlation buildup) |
42//! | `MaskedPoly::zeroize` | DSE-resistant wipe of both shares |
43//! | `masked_ntt` | Forward NTT applied to each share |
44//! | `masked_ntt_inv` | Inverse NTT applied to each share |
45//! | `masked_pointwise_mul_public` | Masked × public pointwise mul (returns a `MaskedPoly`) |
46//!
47//! ## Masked `y` pipeline (`sca-masked-y`)
48//!
49//! `MaskedPoly::expand_mask` samples `y` directly as two shares
50//! drawn from SHAKE256. The shares propagate through
51//! `masked_ntt` and `masked_mat_vec_mul` / `masked_mat_vec_mul_lazy`
52//! so that the intermediate `w = A·y` stays in masked form until the
53//! rejection loop commits to emitting it. This closes the DPA
54//! recovery of `s1` from `z = y + c·s1` that exists on any unmasked
55//! implementation. See *Side-channel analysis of masked y-sampling
56//! in ML-DSA* (IACR ePrint 2025/276) and the countermeasure chapter
57//! at `doc/sca/countermeasures/ml_dsa.rst`, section *DPA on `y` —
58//! the `sca-masked-y` pipeline*.
59//!
60//! ## References
61//!
62//! * *Hardware masking of ML-DSA* (IACR ePrint 2024,
63//! `doc/papers/eprint2024_mldsa_hw_masking.pdf`) — reference
64//! construction, we follow the same share topology.
65//! * *Side-channel analysis of masked y-sampling in ML-DSA*
66//! (IACR ePrint 2025/276) — basis for
67//! `MaskedPoly::expand_mask` + propagation through the linear
68//! stage.
69//! * *Physical security considerations for ML-DSA* (NIST, 2025) —
70//! masking recommendation for high-assurance profiles.
71//!
72//! ## Where to look next
73//!
74//! * Countermeasure description and threat analysis:
75//! `doc/sca/countermeasures/ml_dsa.rst`, sections *DPA — first-
76//! order masking of secret polynomials* and *DPA on `y` — the
77//! `sca-masked-y` pipeline*.
78//! * Call sites: [`crate::ml_dsa::dsa::sign_internal`] (look for
79//! `#[cfg(feature = "sca-protected")]` and
80//! `#[cfg(feature = "sca-masked-y")]` blocks).
81//!
82//! ## Scope and residual risk
83//!
84//! Masking here is **first-order**. The shipped Tier-1 item
85//! `T1-A` (A3, refresh shares at the start of every rejection
86//! iteration, head-of-loop refresh block in `dsa.rs`) raises the
87//! effort required by a higher-order DPA that combines leakage
88//! across iterations. Going beyond first-order (full higher-order
89//! masking) is tracked as Tier-4 `T4-C`.
90
91use super::MlDsaError;
92use super::ntt::{self, mod_q};
93use super::params::{N, Q};
94use super::rng::CryptoRng;
95
96/// A polynomial split into two additive shares modulo `q`.
97///
98/// Maintains the invariant `unmask()[i] = (share0[i] + share1[i]) mod q`
99/// for all `i in 0..N`. Both shares are stored with coefficients in
100/// `[0, q-1]`. Neither share alone reveals any information about the
101/// underlying polynomial.
102pub struct MaskedPoly {
103 /// First additive share.
104 pub share0: [i32; N],
105 /// Second additive share.
106 pub share1: [i32; N],
107}
108
109impl MaskedPoly {
110 /// Build an all-zero `MaskedPoly`. Both shares are zero, so
111 /// `unmask()` returns the zero polynomial. Useful as a stack
112 /// initializer for fixed-size arrays of masked polynomials.
113 pub const fn zero() -> Self {
114 Self {
115 share0: [0i32; N],
116 share1: [0i32; N],
117 }
118 }
119
120 /// Masked sampling of a masking vector polynomial from a SHAKE256
121 /// stream — the DPA-safe replacement for `sample::expand_mask`.
122 ///
123 /// Implements ExpandMask (FIPS 204 Algorithm 34) but produces a
124 /// two-share arithmetic representation `(share0, share1)` directly,
125 /// without ever materializing the unmasked y coefficient in a stack
126 /// or heap slot.
127 ///
128 /// ## Threat model
129 ///
130 /// Boolean-masked y is attackable with ~300 traces per the
131 /// Hermelink-Ning-Petri result (ePrint 2025/276). Arithmetic
132 /// masking is more robust but still requires careful
133 /// implementation: the key invariant is that the unmasked
134 /// coefficient value must only exist transiently in a CPU
135 /// register, never be written to RAM.
136 ///
137 /// ## Implementation
138 ///
139 /// For each coefficient:
140 /// 1. Decode the unmasked `y_i` from SHAKE256 output bytes
141 /// into a stack-local `let y_i: i32 = ...` (register-scoped).
142 /// 2. Draw a fresh random mask `r_i` from the same SHAKE256
143 /// stream (a separate squeeze block).
144 /// 3. Compute `share1_i = r_i mod q`, `share0_i = (y_i - r_i) mod q`.
145 /// 4. Write both shares to the output `MaskedPoly`.
146 /// 5. `y_i` and `r_i` go out of scope immediately.
147 ///
148 /// The two SHAKE256 streams (y bits and mask bits) are drawn from
149 /// the same state: we first squeeze the packed-y bytes, then
150 /// squeeze additional bytes for the mask. This keeps the function
151 /// deterministic for a given `rho'' || nonce`, so the signature
152 /// remains reproducible (ACVP-compatible).
153 ///
154 /// # Arguments
155 ///
156 /// * `rho_double_prime` — 64-byte seed (FIPS 204).
157 /// * `nonce` — the per-polynomial nonce (`kappa + r`).
158 /// * `gamma1` — the Γ₁ parameter for the current ML-DSA level.
159 /// * `bitlen_gamma1_minus1` — bit length used by ExpandMask (17 or 19).
160 pub fn sample_expand_mask(
161 rho_double_prime: &[u8; 64],
162 nonce: u16,
163 gamma1: i32,
164 bitlen_gamma1_minus1: usize,
165 ) -> Self {
166 use super::sha3;
167 let c = bitlen_gamma1_minus1 + 1; // bits per coefficient
168 let poly_bytes = 32 * c; // packed y bytes for 256 coeffs
169 let mask_bytes = N * 4; // 1024 bytes of mask randomness
170
171 let mut state = sha3::shake256();
172 state.absorb(rho_double_prime);
173 state.absorb(&nonce.to_le_bytes());
174
175 // Two-phase squeeze: y packed bytes, then mask bytes.
176 let mut y_buf = [0u8; 640]; // 32 * 20 = max poly_bytes
177 state.squeeze(&mut y_buf[..poly_bytes]);
178 let mut mask_buf = [0u8; N * 4];
179 state.squeeze(&mut mask_buf);
180
181 let mut mp = Self::zero();
182 // Tight per-coefficient loop — the unmasked y_i only exists
183 // in registers between the two writes to mp.share0 and
184 // mp.share1 (the compiler optimizer is free to keep it in
185 // registers; no intermediate array is used).
186 let mut bit_pos = 0usize;
187 let b = gamma1 as u32;
188 let range = (gamma1 as u32 - 1) + gamma1 as u32; // a + b (bit_unpack semantics)
189 let bits = 32 - range.leading_zeros() as usize;
190 debug_assert_eq!(bits, c);
191
192 for i in 0..N {
193 // 1. Decode y_i (register-scoped, never on the stack as a poly).
194 let mut val = 0u32;
195 for bit in 0..bits {
196 if (y_buf[bit_pos / 8] >> (bit_pos % 8)) & 1 == 1 {
197 val |= 1 << bit;
198 }
199 bit_pos += 1;
200 }
201 let y_i = b as i32 - val as i32;
202
203 // 2. Fresh mask r_i from the 4-byte mask chunk.
204 let r = u32::from_le_bytes([
205 mask_buf[4 * i],
206 mask_buf[4 * i + 1],
207 mask_buf[4 * i + 2],
208 mask_buf[4 * i + 3],
209 ]);
210 let r_i = (r % (Q as u32)) as i32;
211
212 // 3. share1 = r, share0 = y - r (mod q).
213 mp.share1[i] = r_i;
214 mp.share0[i] = mod_q(y_i - r_i);
215 // y_i and r_i fall out of scope here.
216 }
217 zeroize_bytes(&mut y_buf);
218 zeroize_bytes(&mut mask_buf);
219 mp
220 }
221
222 /// Split a plaintext polynomial into two random additive shares.
223 ///
224 /// Generates a uniformly distributed `share1 ∈ [0, q-1]^N` from the
225 /// RNG, then sets `share0 = poly - share1 mod q`. The intermediate
226 /// random bytes are zeroized after use.
227 ///
228 /// # Errors
229 ///
230 /// Returns [`MlDsaError::RngFailure`] if the RNG fails.
231 pub fn mask(poly: &[i32; N], rng: &mut dyn CryptoRng) -> Result<Self, MlDsaError> {
232 let mut share0 = [0i32; N];
233 let mut share1 = [0i32; N];
234 // q < 2^23 so 4 bytes is enough; we keep rejection-bias-low by
235 // taking each 32-bit word and reducing mod q.
236 let mut rand_bytes = [0u8; N * 4];
237 rng.fill_bytes(&mut rand_bytes)?;
238
239 for i in 0..N {
240 let r = u32::from_le_bytes([
241 rand_bytes[4 * i],
242 rand_bytes[4 * i + 1],
243 rand_bytes[4 * i + 2],
244 rand_bytes[4 * i + 3],
245 ]);
246 // Reduce a 32-bit word mod q. Bias is 2^32 mod q ≈ 2^9 over
247 // 2^32, ~10^-7 — negligible for masking purposes (security
248 // properties of masking only require the share to be
249 // statistically close to uniform).
250 share1[i] = (r % (Q as u32)) as i32;
251 }
252 zeroize_bytes(&mut rand_bytes);
253
254 for i in 0..N {
255 share0[i] = mod_q(poly[i] - share1[i]);
256 }
257
258 Ok(MaskedPoly { share0, share1 })
259 }
260
261 /// Reconstruct the plaintext polynomial from the two shares.
262 ///
263 /// Result coefficients are in `[0, q-1]`. The returned polynomial
264 /// is unmasked secret data and should be zeroized after use.
265 pub fn unmask(&self) -> [i32; N] {
266 let mut out = [0i32; N];
267 for i in 0..N {
268 out[i] = mod_q(self.share0[i] + self.share1[i]);
269 }
270 out
271 }
272
273 /// Securely erase both shares via volatile writes.
274 pub fn zeroize(&mut self) {
275 zeroize_poly(&mut self.share0);
276 zeroize_poly(&mut self.share1);
277 }
278
279 /// Re-randomize the shares without changing the unmasked value.
280 ///
281 /// Draws a fresh random polynomial `r` and updates the shares as
282 /// `share0' = share0 - r mod q`, `share1' = share1 + r mod q`.
283 /// The sum is preserved: `share0' + share1' ≡ share0 + share1`.
284 /// Refreshing prevents higher-order correlation buildup when the
285 /// same masked polynomial is reused across multiple operations.
286 pub fn refresh(&mut self, rng: &mut dyn CryptoRng) -> Result<(), MlDsaError> {
287 let mut rand_bytes = [0u8; N * 4];
288 rng.fill_bytes(&mut rand_bytes)?;
289 for i in 0..N {
290 let r = (u32::from_le_bytes([
291 rand_bytes[4 * i],
292 rand_bytes[4 * i + 1],
293 rand_bytes[4 * i + 2],
294 rand_bytes[4 * i + 3],
295 ]) % (Q as u32)) as i32;
296 self.share0[i] = mod_q(self.share0[i] - r);
297 self.share1[i] = mod_q(self.share1[i] + r);
298 }
299 zeroize_bytes(&mut rand_bytes);
300 Ok(())
301 }
302}
303
304// =====================================================================
305// Linear masked operations
306// =====================================================================
307//
308// The NTT and pointwise-multiplication-by-public-data are linear, so
309// we can apply them to each share independently and the masking
310// invariant is preserved:
311//
312// NTT(s₀ + s₁) = NTT(s₀) + NTT(s₁)
313// p · (s₀ + s₁) = p·s₀ + p·s₁
314
315/// Apply the forward NTT to each share independently.
316pub fn masked_ntt(m: &mut MaskedPoly) {
317 ntt::ntt(&mut m.share0);
318 ntt::ntt(&mut m.share1);
319}
320
321/// Apply the inverse NTT to each share independently.
322pub fn masked_ntt_inv(m: &mut MaskedPoly) {
323 ntt::ntt_inv(&mut m.share0);
324 ntt::ntt_inv(&mut m.share1);
325}
326
327/// Pointwise multiply a masked polynomial by a **public** polynomial
328/// in NTT domain. Returns a fresh `MaskedPoly` holding the product.
329///
330/// `c_hat` (the per-iteration challenge polynomial in NTT form) is
331/// public — the verifier recomputes it, and any side-channel
332/// observation of it does not help an attacker recover the secret
333/// shares.
334pub fn masked_pointwise_mul_public(masked: &MaskedPoly, c_hat: &[i32; N]) -> MaskedPoly {
335 MaskedPoly {
336 share0: ntt::pointwise_mul(&masked.share0, c_hat),
337 share1: ntt::pointwise_mul(&masked.share1, c_hat),
338 }
339}
340
341/// Masked matrix-vector multiplication in the NTT domain: for each
342/// output row `i`, compute `sum_j (A_hat[i][j] · y_hat_m[j])` as a
343/// masked accumulator.
344///
345/// Since `A_hat` is public (derived from the public seed `rho`) and
346/// the NTT is linear, each share is multiplied independently by the
347/// same public matrix and accumulated into the corresponding output
348/// share — no secret×secret operation occurs, so first-order shares
349/// remain sufficient.
350///
351/// `y_hat_m` must have length ≥ `l` (the first `l` masked polynomials
352/// are consumed). `out` must have length ≥ `k`.
353pub fn masked_mat_vec_mul(
354 a_hat: &[[[i32; N]; super::params::MAX_L]; super::params::MAX_K],
355 y_hat_m: &[MaskedPoly],
356 k: usize,
357 l: usize,
358 out: &mut [MaskedPoly],
359) {
360 for i in 0..k {
361 out[i].share0 = [0i32; N];
362 out[i].share1 = [0i32; N];
363 for j in 0..l {
364 let prod0 = ntt::pointwise_mul(&a_hat[i][j], &y_hat_m[j].share0);
365 let prod1 = ntt::pointwise_mul(&a_hat[i][j], &y_hat_m[j].share1);
366 for n in 0..N {
367 out[i].share0[n] = mod_q(out[i].share0[n] + prod0[n]);
368 out[i].share1[n] = mod_q(out[i].share1[n] + prod1[n]);
369 }
370 }
371 }
372}
373
374/// Low-memory variant of `masked_mat_vec_mul`: recomputes each
375/// `a_hat[i][j]` polynomial on-the-fly from the public seed `rho`
376/// via SHAKE128 instead of holding the full k×l matrix in memory.
377///
378/// Trade-off identical to `mat_vec_mul_lazy` (dsa.rs): saves up to
379/// 57 KB of stack for the `a_hat` matrix at the cost of repeated
380/// SHAKE128 invocations. Called from `sign_internal` when both
381/// `sca-masked-y` and `low-mem` are enabled.
382pub fn masked_mat_vec_mul_lazy(rho: &[u8; 32], y_hat_m: &[MaskedPoly], k: usize, l: usize, out: &mut [MaskedPoly]) {
383 use super::sample;
384 for i in 0..k {
385 out[i].share0 = [0i32; N];
386 out[i].share1 = [0i32; N];
387 for j in 0..l {
388 let a_ij = sample::rej_ntt_poly(rho, j as u8, i as u8);
389 let prod0 = ntt::pointwise_mul(&a_ij, &y_hat_m[j].share0);
390 let prod1 = ntt::pointwise_mul(&a_ij, &y_hat_m[j].share1);
391 for n in 0..N {
392 out[i].share0[n] = mod_q(out[i].share0[n] + prod0[n]);
393 out[i].share1[n] = mod_q(out[i].share1[n] + prod1[n]);
394 }
395 }
396 }
397}
398
399// =====================================================================
400// Local DSE-resistant zeroization helpers
401// =====================================================================
402
403/// Securely erase a polynomial buffer (write_volatile + compiler fence).
404fn zeroize_poly(p: &mut [i32; N]) {
405 for c in p.iter_mut() {
406 unsafe { core::ptr::write_volatile(c, 0) };
407 }
408 core::sync::atomic::compiler_fence(core::sync::atomic::Ordering::SeqCst);
409}
410
411/// Securely erase a byte buffer (write_volatile + compiler fence).
412fn zeroize_bytes(b: &mut [u8]) {
413 for byte in b.iter_mut() {
414 unsafe { core::ptr::write_volatile(byte, 0) };
415 }
416 core::sync::atomic::compiler_fence(core::sync::atomic::Ordering::SeqCst);
417}
418
419#[cfg(test)]
420mod tests {
421 use super::super::ntt;
422 use super::*;
423
424 /// Same deterministic test PRNG as in `shuffle.rs::tests`.
425 struct TestRng(u64);
426 impl CryptoRng for TestRng {
427 fn fill_bytes(&mut self, dest: &mut [u8]) -> Result<(), MlDsaError> {
428 for chunk in dest.chunks_mut(8) {
429 let mut x = self.0;
430 x ^= x << 13;
431 x ^= x >> 7;
432 x ^= x << 17;
433 self.0 = x;
434 let bytes = x.to_le_bytes();
435 for (i, b) in chunk.iter_mut().enumerate() {
436 *b = bytes[i];
437 }
438 }
439 Ok(())
440 }
441 }
442
443 fn fixture_poly() -> [i32; N] {
444 let mut p = [0i32; N];
445 for i in 0..N {
446 p[i] = ((i as i32 * 12345 + 7).rem_euclid(Q)) as i32;
447 }
448 p
449 }
450
451 #[test]
452 fn masked_mat_vec_mul_matches_unmasked() {
453 use super::super::ntt as dsa_ntt;
454 use super::super::params::{MAX_K, MAX_L, MlDsa65, Params as ParamsT};
455 use super::super::sample;
456
457 let k = MlDsa65::K;
458 let l = MlDsa65::L;
459 let rho = [0x17u8; 32];
460
461 // Reference: unmasked A·y
462 let a_hat = sample::expand_a::<MlDsa65>(&rho);
463 let mut y = [[0i32; N]; MAX_L];
464 for j in 0..l {
465 for n in 0..N {
466 y[j][n] = ((j as i32 * 100 + n as i32) % Q).abs();
467 }
468 dsa_ntt::ntt(&mut y[j]);
469 }
470 let mut w_ref = [[0i32; N]; MAX_K];
471 for i in 0..k {
472 for j in 0..l {
473 let prod = dsa_ntt::pointwise_mul(&a_hat[i][j], &y[j]);
474 for n in 0..N {
475 w_ref[i][n] = mod_q(w_ref[i][n] + prod[n]);
476 }
477 }
478 }
479
480 // Masked path: mask y, call masked_mat_vec_mul, unmask result
481 let mut rng = TestRng(0xFEEDC0DEBADCAFEu64);
482 let mut y_m: [MaskedPoly; MAX_L] = core::array::from_fn(|_| MaskedPoly::zero());
483 for j in 0..l {
484 y_m[j] = MaskedPoly::mask(&y[j], &mut rng).unwrap();
485 }
486 let mut w_m: [MaskedPoly; MAX_K] = core::array::from_fn(|_| MaskedPoly::zero());
487 masked_mat_vec_mul(&a_hat, &y_m, k, l, &mut w_m);
488
489 for i in 0..k {
490 let w_got = w_m[i].unmask();
491 assert_eq!(w_got, w_ref[i], "mismatch at row {}", i);
492 }
493 }
494
495 #[test]
496 fn masked_expand_mask_matches_unmasked_expand_mask() {
497 use super::super::params::MlDsa65;
498 use super::super::params::Params as _;
499 use super::super::sample;
500
501 let rho = [0x42u8; 64];
502 let kappa = 0u16;
503 let gamma1 = MlDsa65::GAMMA1;
504 let bitlen = MlDsa65::BITLEN_GAMMA1_MINUS1;
505
506 // Reference: unmasked ExpandMask for row 0
507 let y_ref = sample::expand_mask::<MlDsa65>(&rho, kappa);
508
509 // Masked path: two shares, unmask to compare
510 let mp = MaskedPoly::sample_expand_mask(&rho, kappa, gamma1, bitlen);
511 let unmasked = mp.unmask();
512
513 // y_ref[0] coefficients are in (-gamma1, gamma1] (centered);
514 // unmasked() returns in [0, q-1]. Normalize both to the centered
515 // range for comparison.
516 for i in 0..N {
517 let r = y_ref[0][i];
518 let u = {
519 let mut v = unmasked[i];
520 if v > Q / 2 {
521 v -= Q;
522 }
523 v
524 };
525 assert_eq!(r, u, "mismatch at i={}: ref={}, masked={}", i, r, u);
526 }
527 }
528
529 #[test]
530 fn mask_unmask_roundtrip() {
531 let p = fixture_poly();
532 let mut rng = TestRng(0xCAFEF00DDEADBEEF);
533 let mp = MaskedPoly::mask(&p, &mut rng).unwrap();
534 let recovered = mp.unmask();
535 assert_eq!(recovered, p);
536 }
537
538 #[test]
539 fn refresh_preserves_unmasked_value() {
540 let p = fixture_poly();
541 let mut rng = TestRng(0x1234567890ABCDEF);
542 let mut mp = MaskedPoly::mask(&p, &mut rng).unwrap();
543 mp.refresh(&mut rng).unwrap();
544 mp.refresh(&mut rng).unwrap();
545 assert_eq!(mp.unmask(), p);
546 }
547
548 #[test]
549 fn masked_ntt_matches_regular_ntt() {
550 let p = fixture_poly();
551 let mut expected = p;
552 ntt::ntt(&mut expected);
553
554 let mut rng = TestRng(0x0123456789ABCDEF);
555 let mut mp = MaskedPoly::mask(&p, &mut rng).unwrap();
556 masked_ntt(&mut mp);
557 // share0 + share1 mod q must equal NTT(p) coefficient-wise.
558 for i in 0..N {
559 let got = mod_q(mp.share0[i] + mp.share1[i]);
560 assert_eq!(got, expected[i], "mismatch at i={}", i);
561 }
562 }
563
564 #[test]
565 fn masked_pointwise_mul_public_matches_unmasked() {
566 let secret = fixture_poly();
567 // Build a "public" polynomial — anything will do.
568 let mut public = [0i32; N];
569 for i in 0..N {
570 public[i] = ((i as i32 * 991 + 13).rem_euclid(Q)) as i32;
571 }
572 // Bring both into NTT domain.
573 let mut secret_ntt = secret;
574 ntt::ntt(&mut secret_ntt);
575 let mut public_ntt = public;
576 ntt::ntt(&mut public_ntt);
577
578 // Reference: regular pointwise_mul. Output is in /R Montgomery
579 // domain and may be in [-q, q]; normalize for comparison.
580 let mut expected = ntt::pointwise_mul(&secret_ntt, &public_ntt);
581 for c in expected.iter_mut() {
582 *c = mod_q(*c);
583 }
584
585 // Masked path: mask the (NTT-domain) secret, multiply, unmask.
586 // `unmask()` already normalizes to [0, q-1].
587 let mut rng = TestRng(0xFEEDFACE12345678);
588 let mp_secret = MaskedPoly::mask(&secret_ntt, &mut rng).unwrap();
589 let mp_product = masked_pointwise_mul_public(&mp_secret, &public_ntt);
590 let got = mp_product.unmask();
591
592 assert_eq!(got, expected);
593 }
594}