Skip to content
This repository has been archived by the owner on Jan 26, 2022. It is now read-only.

Commit

Permalink
GLV impl for all SW curves except MNT (#26)
Browse files Browse the repository at this point in the history
* add glv for all SW curves (except MNT)

* fix unoptimal lattice basis generation

* Rigorous bounds -> justify impl, update bw6 params to use general script

* fix formatting issues

* improve comments, glv checks, swap voracious for core::unstable_sort..

* add glv mul test

* remove unused comment

Co-authored-by: Kobi Gurkan <[email protected]>
  • Loading branch information
jon-chuang and kobigurk authored Nov 6, 2020
1 parent c075ce0 commit c894564
Show file tree
Hide file tree
Showing 23 changed files with 596 additions and 304 deletions.
3 changes: 1 addition & 2 deletions algebra-core/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,6 @@ rand = { version = "0.7", default-features = false }
rayon = { version = "1", optional = true }
unroll = { version = "=0.1.4" }
itertools = { version = "0.9.0", default-features = false }
voracious_radix_sort = { version = "0.1.0", optional = true }
either = { version = "1.6.0", default-features = false }
thread-id = { version = "3.3.0", optional = true }
backtrace = { version = "0.3", optional = true }
Expand All @@ -46,7 +45,7 @@ rand_xorshift = "0.2"

[features]
default = [ "std", "rand/default" ]
std = [ "voracious_radix_sort" ]
std = []
parallel = [ "std", "rayon", "rand/default" ]
derive = [ "algebra-core-derive" ]
prefetch = [ "std" ]
Expand Down
20 changes: 12 additions & 8 deletions algebra-core/src/curves/batch_arith.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ use num_traits::Zero;
/// inversion close to zero while not straining the CPU cache by generating and
/// fetching from large w-NAF tables and slices [G]
pub const BATCH_SIZE: usize = 4096;

/// We code this in the second operand for the `batch_add_in_place_read_only`
/// method utilised in the `batch_scalar_mul_in_place` method.
/// 0 == Identity; 1 == Neg; 2 == GLV; 3 == GLV + Neg
pub const ENDO_CODING_BITS: usize = 2;

Expand All @@ -29,8 +32,9 @@ where
// Refer to e.g. Improved Techniques for Fast Exponentiation, Section 4
// Bodo M¨oller 2002. https://www.bmoeller.de/pdf/fastexp-icisc2002.pdf

/// Computes [[p, 3 * p, ..., (2^w - 1) * p], ..., [q, 3* q, ..., ]]
/// We need to manipulate the offsets when using the table
/// Computes [[p_1, 3 * p_1, ..., (2^w - 1) * p_1], ..., [p_n, 3*p_n, ...,
/// (2^w - 1) p_n]] We need to manipulate the offsets when using the
/// table
fn batch_wnaf_tables(bases: &[Self], w: usize) -> Vec<Self> {
let half_size = 1 << (w - 1);
let batch_size = bases.len();
Expand Down Expand Up @@ -156,9 +160,9 @@ where
/// or simply writes them to new_elems, using scratch space to store
/// intermediate values. Scratch space is always cleared after use.

/// No-ops, or copies of the elem in the slice `lookup` in the position of the index
/// of the first operand to the new_elems vector, are encoded as !0u32 in the index
/// for the second operand
/// No-ops, or copies of the elem in the slice `lookup` in the position of
/// the index of the first operand to the new_elems vector, are encoded
/// as !0u32 in the index for the second operand
fn batch_add_write(
lookup: &[Self],
index: &[(u32, u32)],
Expand All @@ -169,9 +173,9 @@ where
/// Similar to batch_add_write, only that the lookup for the first operand
/// is performed in new_elems rather than lookup

/// No-ops, or copies of the elem in the slice `lookup` in the position of the index
/// of the first operand to the new_elems vector, are encoded as !0u32 in the index
/// for the second operand
/// No-ops, or copies of the elem in the slice `lookup` in the position of
/// the index of the first operand to the new_elems vector, are encoded
/// as !0u32 in the index for the second operand
fn batch_add_write_read_self(
lookup: &[Self],
index: &[(u32, u32)],
Expand Down
157 changes: 50 additions & 107 deletions algebra-core/src/curves/bucketed_add.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,42 +3,32 @@ use crate::{
AffineCurve, Vec,
};

#[cfg(feature = "std")]
use {core::cmp::Ordering, voracious_radix_sort::*};

#[cfg(not(feature = "std"))]
use crate::log2;

#[derive(Copy, Clone, Debug)]
pub struct BucketPosition {
pub bucket: u32,
pub position: u32,
}

#[cfg(feature = "std")]
impl PartialOrd for BucketPosition {
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
self.bucket.partial_cmp(&other.bucket)
}
}

#[cfg(feature = "std")]
impl Radixable<u32> for BucketPosition {
type Key = u32;
#[inline]
fn key(&self) -> Self::Key {
self.bucket
}
}

impl PartialEq for BucketPosition {
fn eq(&self, other: &Self) -> bool {
self.bucket == other.bucket
}
}

/// The objective of this function is to identify an addition tree of
/// independent elliptic curve group additions for each bucket, and to batch the
/// independent additions using the batch affine inversion method.

/// The strategy taken is to sort a list of bucket assignments of all the
/// elements (which we can for most intents and purposes, think of as being
/// uniformly random) by bucket, so that indices corresponding to elements that
/// must be added together are physically collocated in memory. Then, in the
/// first round, we proceed to perform independent additions producing
/// intermediate results at the greatest depth for each addition tree (each
/// corresponding to a bucket), and write the result to a new vector. We do so
/// to improve cache locality for future rounds, and take advantage of the
/// CPU-intensive nature of elliptic curve operations along with prfetching to
/// hide the latency of reading from essentially random locations in memory.

/// Subsequently, we perform the additions in place, and the second operands
/// become junk data. Finally, when we only have the buckets left (no more
/// additions left to perform), we copy the result into a destination `res`
/// slice.
#[inline]
#[cfg(feature = "std")]
pub fn batch_bucketed_add<C: AffineCurve>(
buckets: usize,
elems: &[C],
Expand All @@ -48,8 +38,11 @@ pub fn batch_bucketed_add<C: AffineCurve>(
assert!(elems.len() > 0);

let _now = timer!();
dlsd_radixsort(bucket_positions, 8);
timer_println!(_now, "radixsort");
// We sort the bucket positions so that indices of elements assigned
// to the same bucket are continguous. This way, we can easily identify
// how to construct the addition tree for that bucket.
bucket_positions.sort_unstable_by_key(|x| x.bucket);
timer_println!(_now, "sort");

let mut len = bucket_positions.len();
let mut all_ones = true;
Expand All @@ -68,29 +61,45 @@ pub fn batch_bucketed_add<C: AffineCurve>(
// Subsequently, we perform all the operations in place
while glob < len {
let current_bucket = bucket_positions[glob].bucket;
// We are iterating over elements using a global `glob` counter, and counting
// how many in a row are being assigned to the same bucket, using the `loc`
// counter.
while glob + 1 < len && bucket_positions[glob + 1].bucket == current_bucket {
glob += 1;
loc += 1;
}
// If the current bucket exceeds buckets, it encodes a noop
if current_bucket >= buckets as u32 {
loc = 1;
} else if loc > 1 {
// all ones is false if next len is not 1

// in other words, we have not reached the terminating
// condition that after the current round of addition
// there is only one element left in each addition tree

// This would be the case, if each addition tree had at
// most 2 elements in the current round.
if loc > 2 {
all_ones = false;
}
let is_odd = loc % 2 == 1;
let half = loc / 2;
// We encode instructions to add adjacent elements
for i in 0..half {
instr.push((
bucket_positions[glob - (loc - 1) + 2 * i].position,
bucket_positions[glob - (loc - 1) + 2 * i + 1].position,
));
// Compactification of buckets
bucket_positions[new_len + i] = BucketPosition {
bucket: current_bucket,
position: (new_len + i) as u32,
};
}
// If there are an odd number of elements, the lone element
// without a partner will be copied over to the `new_elems`
// vector, a noop which is encoded as !0u32
if is_odd {
instr.push((bucket_positions[glob].position, !0u32));
bucket_positions[new_len + half] = BucketPosition {
Expand All @@ -99,6 +108,13 @@ pub fn batch_bucketed_add<C: AffineCurve>(
};
}
// Reset the local_counter and update state

// We compactify the `bucket_positions` data by shifing left
// `new_len` is the len of the current compactified vector.

// We also update the `batch` counter to decide when it is
// optimal to invoke the batch inversion, i.e. when we have
// accumulated enough independent additions.
new_len += half + (loc % 2);
batch += half;
loc = 1;
Expand Down Expand Up @@ -131,6 +147,9 @@ pub fn batch_bucketed_add<C: AffineCurve>(
len = new_len;
new_len = 0;

// We repeat the above procedure, except, since we are performing the addition
// trees in place, we do not need to encode noops to force a copy to a new
// vector.
while !all_ones {
all_ones = true;
while glob < len {
Expand Down Expand Up @@ -197,79 +216,3 @@ pub fn batch_bucketed_add<C: AffineCurve>(
timer_println!(_now, "reassign");
res
}

#[cfg(not(feature = "std"))]
pub fn batch_bucketed_add<C: AffineCurve>(
buckets: usize,
elems: &[C],
bucket_assign: &[BucketPosition],
) -> Vec<C> {
let mut elems = elems.to_vec();
let num_split = 2i32.pow(log2(buckets) / 2 + 2) as usize;
let split_size = (buckets - 1) / num_split + 1;
let ratio = elems.len() / buckets * 2;
// Get the inverted index for the positions assigning to each bucket
let mut bucket_split = vec![vec![]; num_split];
let mut index = vec![Vec::with_capacity(ratio); buckets];

for bucket_pos in bucket_assign.iter() {
let (bucket, position) = (bucket_pos.bucket as usize, bucket_pos.position as usize);
// Check the bucket assignment is valid
if bucket < buckets {
// index[bucket].push(position);
bucket_split[bucket / split_size].push((bucket, position));
}
}

for split in bucket_split {
for (bucket, position) in split {
index[bucket].push(position as u32);
}
}

// Instructions for indexes for the in place addition tree
let mut instr: Vec<Vec<(u32, u32)>> = vec![];
// Find the maximum depth of the addition tree
let max_depth = index.iter()
// log_2
.map(|x| log2(x.len()))
.max().unwrap();

// Generate in-place addition instructions that implement the addition tree
// for each bucket from the leaves to the root
for i in 0..max_depth {
let mut instr_row = Vec::<(u32, u32)>::with_capacity(buckets);
for to_add in index.iter_mut() {
if to_add.len() > 1 << (max_depth - i - 1) {
let mut new_to_add = vec![];
for j in 0..(to_add.len() / 2) {
new_to_add.push(to_add[2 * j]);
instr_row.push((to_add[2 * j], to_add[2 * j + 1]));
}
if to_add.len() % 2 == 1 {
new_to_add.push(*to_add.last().unwrap());
}
*to_add = new_to_add;
}
}
instr.push(instr_row);
}

for instr_row in instr.iter() {
for instr in C::get_chunked_instr::<(u32, u32)>(&instr_row[..], BATCH_SIZE).iter() {
elems[..].batch_add_in_place_same_slice(&instr[..]);
}
}

let zero = C::zero();
let mut res = vec![zero; buckets];

for (i, to_add) in index.iter().enumerate() {
if to_add.len() == 1 {
res[i] = elems[to_add[0] as usize];
} else if to_add.len() > 1 {
debug_assert!(false, "Did not successfully reduce to_add");
}
}
res
}
39 changes: 34 additions & 5 deletions algebra-core/src/curves/glv.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use core::ops::Neg;

/// The GLV parameters here require the following conditions to be satisfied:
/// 1. MODULUS_BITS < NUM_LIMBS * 64 - 1. So 2 * n < 1 << (64 * NUM_LIMBS)
/// We also assume that |b1| * |b2| < 2 * n
/// We also assume that (|b1| + 2) * (|b2| + 2) < 2 * n
/// We also know that either B1 is neg or B2 is.
pub trait GLVParameters: Send + Sync + 'static + ModelParameters {
type WideBigInt: BigInteger;
Expand Down Expand Up @@ -53,9 +53,12 @@ pub trait GLVParameters: Send + Sync + 'static + ModelParameters {
let c2 = &c2_wide.as_ref()[..limbs];

// We first assume that the final 2 bits of the representation for the modulus
// is not set, so that 2 * n < R = 1 << (64 * NUM_LIMBS). Then, since we
// know that |b_i| < \sqrt{2n}, wlog k|b1|/n * |b2| < 2 * k < 2 * n <
// R.
// is not set, so that 2 * n < R = 1 << (64 * NUM_LIMBS).

// wlog c1 = round(k * round(|b_1|R / n) / R) < ceil(k * ceil(|b_1|* R / n) / R)
// < k * (b_1 * R / n + 1) / R + 1 < b_1 * k / n + 2 < b_1 + 2, so a
// bound like (|b1| + 2) * (|b2| + 2) < 2 * n is good enough for wlog d1
// < 2 * n
let mut d1 =
<Self::ScalarField as PrimeField>::BigInt::mul_no_reduce_lo(&c1, Self::B1.as_ref());
if d1 > modulus {
Expand All @@ -66,7 +69,8 @@ pub trait GLVParameters: Send + Sync + 'static + ModelParameters {
if d2 > modulus {
d2.sub_noborrow(&modulus);
}
// We compute k_2 = -(c1.b1 + c1.b1) = sign(b1)*(c2|b2| - c1|b1|) = sign(b1)(d2 - d1)
// We compute k_2 = -(c1.b1 + c1.b1) = sign(b1)*(c2|b2| - c1|b1|) = sign(b1)(d2
// - d1)
let k2_field = if !Self::B1_IS_NEG {
Self::ScalarField::from(d2) - &Self::ScalarField::from(d1)
} else {
Expand All @@ -91,3 +95,28 @@ pub trait GLVParameters: Send + Sync + 'static + ModelParameters {
((neg1, k1), (neg2, k2))
}
}

#[macro_export]
macro_rules! impl_glv_for_sw {
() => {
#[inline(always)]
fn has_glv() -> bool {
true
}

#[inline(always)]
fn glv_endomorphism_in_place(elem: &mut Self::BaseField) {
*elem *= &<Self as GLVParameters>::OMEGA;
}

#[inline]
fn glv_scalar_decomposition(
k: <Self::ScalarField as PrimeField>::BigInt,
) -> (
(bool, <Self::ScalarField as PrimeField>::BigInt),
(bool, <Self::ScalarField as PrimeField>::BigInt),
) {
<Self as GLVParameters>::glv_scalar_decomposition_inner(k)
}
};
}
3 changes: 3 additions & 0 deletions algebra-core/src/curves/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pub use self::batch_arith::*;
pub mod bucketed_add;
pub use self::bucketed_add::*;

#[macro_use]
pub mod glv;
pub use self::glv::*;

Expand Down Expand Up @@ -202,6 +203,8 @@ pub trait ProjectiveCurve:
self = res;
self
}

fn get_x(&mut self) -> &mut Self::BaseField;
}

/// Affine representation of an elliptic curve point guaranteed to be
Expand Down
4 changes: 4 additions & 0 deletions algebra-core/src/curves/models/short_weierstrass_jacobian.rs
Original file line number Diff line number Diff line change
Expand Up @@ -367,6 +367,10 @@ impl<P: Parameters> ProjectiveCurve for GroupProjective<P> {
self
}
}

fn get_x(&mut self) -> &mut Self::BaseField {
&mut self.x
}
}

impl<P: Parameters> Neg for GroupProjective<P> {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,10 @@ impl<P: Parameters> ProjectiveCurve for GroupProjective<P> {
type ScalarField = P::ScalarField;
type Affine = GroupAffine<P>;

fn get_x(&mut self) -> &mut Self::BaseField {
&mut self.x
}

#[inline]
fn prime_subgroup_generator() -> Self {
GroupAffine::prime_subgroup_generator().into()
Expand Down
Loading

0 comments on commit c894564

Please sign in to comment.