From fb17c36a42c2fe7b958d9f18f4aea24b72586457 Mon Sep 17 00:00:00 2001 From: Dave Collins Date: Sun, 14 Jul 2024 14:50:47 -0500 Subject: [PATCH] blake256: Add _asm note about AVX2 attempts. --- .../blake256/internal/_asm/gen_compress_asm_amd64.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/crypto/blake256/internal/_asm/gen_compress_asm_amd64.go b/crypto/blake256/internal/_asm/gen_compress_asm_amd64.go index 7915d18ee..16003fc85 100644 --- a/crypto/blake256/internal/_asm/gen_compress_asm_amd64.go +++ b/crypto/blake256/internal/_asm/gen_compress_asm_amd64.go @@ -1102,6 +1102,18 @@ func blocksAVX() { } func main() { + // ------------------------------------------------------------------------- + // NOTE: Various attempts to optimize using the larger 256-bit registers + // provided by AVX2 were made, but since only 4 columns can be computed in + // parallel, it turns out that the extra overhead of shuffling data around + // offsets any gains made by the few places that the larger registers are + // able to speed up. That includes things such as converting the message to + // big endian using 2x256-bit registers and freeing up registers by packing + // more data into the larger registers and then making use of the extra + // freed up registers to cache the results of xoring the message and + // constants to reuse in final rounds where they are the same. + // ------------------------------------------------------------------------- + // Ideally this would just reference the compress package with the struct // definition, but avo doesn't seem to have a way to specify a build tag // for this statement and the compress package is unable to build before the