vendor: Update vendor dir from a80c0fda

2024-12-23 03:18:59 +00:00 · 2019-01-20 08:50:40 +01:00 · 2019-01-20 08:50:40 +01:00 · 29e4b417f2
commit 29e4b417f2
parent 00fa77dd47
8 changed files with 68 additions and 47 deletions
--- a/vendor/github.com/minio/sha256-simd/.travis.yml
+++ b/vendor/github.com/minio/sha256-simd/.travis.yml
@ -21,3 +21,4 @@ matrix:
 script:
 - diff -au <(gofmt -d .) <(printf "")
 - go test -race -v ./...
+- go tool vet -asmdecl .
--- a/vendor/github.com/minio/sha256-simd/README.md
+++ b/vendor/github.com/minio/sha256-simd/README.md
@ -1,6 +1,6 @@
 # sha256-simd

-Accelerate SHA256 computations in pure Go using AVX512 and AVX2 for Intel and ARM64 for ARM. On AVX512 it provides an up to 8x improvement (over 3 GB/s per core) in comparison to AVX2.
+Accelerate SHA256 computations in pure Go using AVX512, SHA Extensions and AVX2 for Intel and ARM64 for ARM. On AVX512 it provides an up to 8x improvement (over 3 GB/s per core) in comparison to AVX2. SHA Extensions give a performance boost of close to 4x over AVX2.

 ## Introduction

@ -8,7 +8,19 @@ This package is designed as a replacement for `crypto/sha256`. For Intel CPUs it

 This package uses Golang assembly. The AVX512 version is based on the Intel's "multi-buffer crypto library for IPSec" whereas the other Intel implementations are described in "Fast SHA-256 Implementations on Intel Architecture Processors" by J. Guilford et al.

-## New: Support for AVX512
+## New: Support for Intel SHA Extensions
+
+Support for the Intel SHA Extensions has been added by Kristofer Peterson (@svenski123), originally developed for spacemeshos [here](https://github.com/spacemeshos/POET/issues/23). On CPUs that support it (known thus far Intel Celeron J3455 and AMD Ryzen) it gives a significant boost in performance (with thanks to @AudriusButkevicius for reporting the results; full results [here](https://github.com/minio/sha256-simd/pull/37#issuecomment-451607827)).
+
+```
+$ benchcmp avx2.txt sha-ext.txt
+benchmark           AVX2 MB/s    SHA Ext MB/s  speedup
+BenchmarkHash5M     514.40       1975.17       3.84x
+```
+
+Thanks to Kristofer Peterson, we also added additional performance changes such as optimized padding, endian conversions which sped up all implementations i.e. Intel SHA alone while doubled performance for small sizes, the other changes increased everything roughly 50%.
+
+## Support for AVX512

 We have added support for AVX512 which results in an up to 8x performance improvement over AVX2 (3.0 GHz Xeon Platinum 8124M CPU):

@ -66,6 +78,7 @@ Below is the speed in MB/s for a single core (ranked fast to slow) for blocks la
 | Processor                         | SIMD    | Speed (MB/s) |
 | --------------------------------- | ------- | ------------:|
 | 3.0 GHz Intel Xeon Platinum 8124M | AVX512  |         3498 |
+| 3.7 GHz AMD Ryzen 7 2700X         | SHA Ext |         1979 |
 | 1.2 GHz ARM Cortex-A53            | ARM64   |          638 |
 | 3.0 GHz Intel Xeon Platinum 8124M | AVX2    |          449 |
 | 3.1 GHz Intel Core i7             | AVX     |          362 |
@ -84,7 +97,7 @@ Other applications that can benefit from enhanced SHA256 performance are dedupli
 ## ARM SHA Extensions

 The 64-bit ARMv8 core has introduced new instructions for SHA1 and SHA2 acceleration as part of the [Cryptography Extensions](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0501f/CHDFJBCJ.html). Below you can see a small excerpt highlighting one of the rounds as is done for the SHA256 calculation process (for full code see [sha256block_arm64.s](https://github.com/minio/sha256-simd/blob/master/sha256block_arm64.s)).
- 
+
 ```
 sha256h    q2, q3, v9.4s
 sha256h2   q3, q4, v9.4s
@ -100,7 +113,7 @@ The 64-bit ARMv8 core has introduced new instructions for SHA1 and SHA2 accelera

 ### Detailed benchmarks

-Benchmarks generated on a 1.2 Ghz Quad-Core ARM Cortex A53 equipped [Pine64](https://www.pine64.com/). 
+Benchmarks generated on a 1.2 Ghz Quad-Core ARM Cortex A53 equipped [Pine64](https://www.pine64.com/).

 ```
 minio@minio-arm:$ benchcmp golang.txt arm64.txt
--- a/vendor/github.com/minio/sha256-simd/sha256blockAvx2_amd64.s
+++ b/vendor/github.com/minio/sha256-simd/sha256blockAvx2_amd64.s
@ -32,8 +32,6 @@
 // equivalents
 //

-#include "textflag.h"
-
 DATA K256<>+0x000(SB)/8, $0x71374491428a2f98
 DATA K256<>+0x008(SB)/8, $0xe9b5dba5b5c0fbcf
 DATA K256<>+0x010(SB)/8, $0x71374491428a2f98
@ -114,16 +112,25 @@ DATA K256<>+0x258(SB)/8, $0x0b0a090803020100

 GLOBL K256<>(SB), 8, $608

-// func blockAvx2(h []uint32, message []uint8)
-TEXT ·blockAvx2(SB), 7, $0
+// We need 0x220 stack space aligned on a 512 boundary, so for the
+// worstcase-aligned SP we need twice this amount, being 1088 (=0x440)
+//
+// SP        aligned   end-aligned  stacksize
+// 100013d0  10001400  10001620     592
+// 100013d8  10001400  10001620     584
+// 100013e0  10001600  10001820     1088
+// 100013e8  10001600  10001820     1080

-	MOVQ ctx+0(FP), DI           // DI: &h
-	MOVQ inp+24(FP), SI          // SI: &message
-	MOVQ inplength+32(FP), DX    // len(message)
+// func blockAvx2(h []uint32, message []uint8)
+TEXT ·blockAvx2(SB),$1088-48
+
+	MOVQ h+0(FP), DI             // DI: &h
+	MOVQ message_base+24(FP), SI // SI: &message
+	MOVQ message_len+32(FP), DX  // len(message)
 	ADDQ SI, DX                  // end pointer of input
 	MOVQ SP, R11                 // copy stack pointer
-	SUBQ $0x220, SP              // sp -= 0x220
-	ANDQ $0xfffffffffffffc00, SP // align stack frame
+	ADDQ $0x220, SP              // sp += 0x220
+	ANDQ $0xfffffffffffffe00, SP // align stack frame
 	ADDQ $0x1c0, SP
 	MOVQ DI, 0x40(SP)            // save ctx
 	MOVQ SI, 0x48(SP)            // save input
@ -1435,7 +1442,7 @@ loop2:

 done:
 	MOVQ BP, SP
-	MOVQ 0x58(SP), SP
+	MOVQ 0x58(SP), SP        // restore saved stack pointer
 	WORD $0xf8c5; BYTE $0x77 // vzeroupper

 	RET
--- a/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s
+++ b/vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s
@ -2,7 +2,7 @@ TEXT ·sha256X16Avx512(SB), 7, $0
 	MOVQ  digests+0(FP), DI
 	MOVQ  scratch+8(FP), R12
 	MOVQ  mask_len+32(FP), SI
-	MOVQ  r14+24(FP), R13
+	MOVQ  mask_base+24(FP), R13
 	MOVQ  (R13), R14
 	LONG  $0x92fbc1c4; BYTE $0xce
 	LEAQ  inputs+48(FP), AX
--- a/vendor/github.com/minio/sha256-simd/sha256blockAvx_amd64.s
+++ b/vendor/github.com/minio/sha256-simd/sha256blockAvx_amd64.s
@ -232,15 +232,15 @@
 	ADDL R13, h                // h = h + S1 + CH + k + w + S0 + MAJ

 // func blockAvx(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64)
-TEXT ·blockAvx(SB), 7, $0
+TEXT ·blockAvx(SB), 7, $0-80

-	MOVQ h+0(FP), SI           // SI: &h
-	MOVQ message+24(FP), R8    // &message
-	MOVQ lenmessage+32(FP), R9 // length of message
+	MOVQ h+0(FP), SI             // SI: &h
+	MOVQ message_base+24(FP), R8 // &message
+	MOVQ message_len+32(FP), R9  // length of message
 	CMPQ R9, $0
 	JEQ  done_hash
 	ADDQ R8, R9
-	MOVQ R9, _inp_end+64(FP)   // store end of message
+	MOVQ R9, reserved2+64(FP)    // store end of message

 	// Register definition
 	//  a -->  eax
@ -269,7 +269,7 @@ TEXT ·blockAvx(SB), 7, $0
 	MOVOU shuf00BA<>(SB), X10  // shuffle xBxA -> 00BA
 	MOVOU shufDC00<>(SB), X12  // shuffle xDxC -> DC00

-	MOVQ message+24(FP), SI // SI: &message
+	MOVQ message_base+24(FP), SI // SI: &message

 loop0:
 	LEAQ constants<>(SB), BP
@ -284,25 +284,25 @@ loop0:
 	MOVOU 3*16(SI), X7
 	LONG  $0x0041c2c4; BYTE $0xfd // VPSHUFB XMM7, XMM7, XMM13

-	MOVQ SI, _inp+72(FP)
+	MOVQ SI, reserved3+72(FP)
 	MOVD $0x3, DI

 	// schedule 48 input dwords, by doing 3 rounds of 16 each
 loop1:
 	LONG  $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP]   /* Add 1st constant to first part of message */
-	MOVOU X9, _xfer+48(FP)
+	MOVOU X9, reserved0+48(FP)
 	FOUR_ROUNDS_AND_SCHED(AX, BX,  CX,  R8, DX, R9, R10, R11)

 	LONG  $0x4dfe59c5; BYTE $0x10 // VPADDD XMM9, XMM4, 16[RBP]   /* Add 2nd constant to message */
-	MOVOU X9, _xfer+48(FP)
+	MOVOU X9, reserved0+48(FP)
 	FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX,  CX,  R8)

 	LONG  $0x4dfe59c5; BYTE $0x20 // VPADDD XMM9, XMM4, 32[RBP]   /* Add 3rd constant to message */
-	MOVOU X9, _xfer+48(FP)
+	MOVOU X9, reserved0+48(FP)
 	FOUR_ROUNDS_AND_SCHED(AX, BX,  CX,  R8, DX, R9, R10, R11)

 	LONG  $0x4dfe59c5; BYTE $0x30 // VPADDD XMM9, XMM4, 48[RBP]   /* Add 4th constant to message */
-	MOVOU X9, _xfer+48(FP)
+	MOVOU X9, reserved0+48(FP)
 	ADDQ  $64, BP
 	FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX,  CX,  R8)

@ -313,14 +313,14 @@ loop1:

 loop2:
 	LONG  $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP]   /* Add 1st constant to first part of message */
-	MOVOU X9, _xfer+48(FP)
+	MOVOU X9, reserved0+48(FP)
 	DO_ROUND( AX,  BX,  CX,  R8,  DX,  R9, R10, R11, 48)
 	DO_ROUND(R11,  AX,  BX,  CX,  R8,  DX,  R9, R10, 52)
 	DO_ROUND(R10, R11,  AX,  BX,  CX,  R8,  DX,  R9, 56)
 	DO_ROUND( R9, R10, R11,  AX,  BX,  CX,  R8,  DX, 60)

 	LONG  $0x4dfe51c5; BYTE $0x10 // VPADDD XMM9, XMM5, 16[RBP]   /* Add 2nd constant to message */
-	MOVOU X9, _xfer+48(FP)
+	MOVOU X9, reserved0+48(FP)
 	ADDQ  $32, BP
 	DO_ROUND( DX,  R9, R10, R11,  AX,  BX,  CX,  R8, 48)
 	DO_ROUND( R8,  DX,  R9, R10, R11,  AX,  BX,  CX, 52)
@ -351,9 +351,9 @@ loop2:
 	ADDL (7*4)(SI), R11 // H7 = h + H7
 	MOVL R11, (7*4)(SI)

-	MOVQ _inp+72(FP), SI
+	MOVQ reserved3+72(FP), SI
 	ADDQ $64, SI
-	CMPQ _inp_end+64(FP), SI
+	CMPQ reserved2+64(FP), SI
 	JNE  loop0

 done_hash:
--- a/vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s
+++ b/vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s
@ -2,7 +2,7 @@

 // SHA intrinsic version of SHA256

-// Minio Cloud Storage, (C) 2018 Minio, Inc.
+// Kristofer Peterson, (C) 2018.
 //
 // Licensed under the Apache License, Version 2.0 (the "License");
 // you may not use this file except in compliance with the License.
--- a/vendor/github.com/minio/sha256-simd/sha256blockSsse_amd64.s
+++ b/vendor/github.com/minio/sha256-simd/sha256blockSsse_amd64.s
@ -244,15 +244,15 @@
 	ADDL R13, h                // h = h + S1 + CH + k + w + S0 + MAJ

 // func blockSsse(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64)
-TEXT ·blockSsse(SB), 7, $0
+TEXT ·blockSsse(SB), 7, $0-80

-	MOVQ h+0(FP), SI           // SI: &h
-	MOVQ message+24(FP), R8    // &message
-	MOVQ lenmessage+32(FP), R9 // length of message
+	MOVQ h+0(FP), SI             // SI: &h
+	MOVQ message_base+24(FP), R8 // &message
+	MOVQ message_len+32(FP), R9  // length of message
 	CMPQ R9, $0
 	JEQ  done_hash
 	ADDQ R8, R9
-	MOVQ R9, _inp_end+64(FP)   // store end of message
+	MOVQ R9, reserved2+64(FP)    // store end of message

 	// Register definition
 	//  a -->  eax
@ -281,7 +281,7 @@ TEXT ·blockSsse(SB), 7, $0
 	MOVOU shuf00BA<>(SB), X10  // shuffle xBxA -> 00BA
 	MOVOU shufDC00<>(SB), X12  // shuffle xDxC -> DC00

-	MOVQ message+24(FP), SI // SI: &message
+	MOVQ message_base+24(FP), SI // SI: &message

 loop0:
 	LEAQ constants<>(SB), BP
@ -296,7 +296,7 @@ loop0:
 	MOVOU 3*16(SI), X7
 	LONG  $0x380f4166; WORD $0xfd00 // PSHUFB XMM7, XMM13

-	MOVQ SI, _inp+72(FP)
+	MOVQ SI, reserved3+72(FP)
 	MOVD $0x3, DI

 	// Align
@ -306,22 +306,22 @@ loop0:
 loop1:
 	MOVOU X4, X9
 	LONG  $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP]   /* Add 1st constant to first part of message */
-	MOVOU X9, _xfer+48(FP)
+	MOVOU X9, reserved0+48(FP)
 	FOUR_ROUNDS_AND_SCHED(AX, BX,  CX,  R8, DX, R9, R10, R11)

 	MOVOU X4, X9
 	LONG  $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP]   /* Add 2nd constant to message */
-	MOVOU X9, _xfer+48(FP)
+	MOVOU X9, reserved0+48(FP)
 	FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX,  CX,  R8)

 	MOVOU X4, X9
 	LONG  $0xfe0f4466; WORD $0x204d // PADDD XMM9, 32[RBP]   /* Add 3rd constant to message */
-	MOVOU X9, _xfer+48(FP)
+	MOVOU X9, reserved0+48(FP)
 	FOUR_ROUNDS_AND_SCHED(AX, BX,  CX,  R8, DX, R9, R10, R11)

 	MOVOU X4, X9
 	LONG  $0xfe0f4466; WORD $0x304d // PADDD XMM9, 48[RBP]   /* Add 4th constant to message */
-	MOVOU X9, _xfer+48(FP)
+	MOVOU X9, reserved0+48(FP)
 	ADDQ  $64, BP
 	FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX,  CX,  R8)

@ -333,7 +333,7 @@ loop1:
 loop2:
 	MOVOU X4, X9
 	LONG  $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP]   /* Add 1st constant to first part of message */
-	MOVOU X9, _xfer+48(FP)
+	MOVOU X9, reserved0+48(FP)
 	DO_ROUND( AX,  BX,  CX,  R8,  DX,  R9, R10, R11, 48)
 	DO_ROUND(R11,  AX,  BX,  CX,  R8,  DX,  R9, R10, 52)
 	DO_ROUND(R10, R11,  AX,  BX,  CX,  R8,  DX,  R9, 56)
@ -341,7 +341,7 @@ loop2:

 	MOVOU X5, X9
 	LONG  $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP]   /* Add 2nd constant to message */
-	MOVOU X9, _xfer+48(FP)
+	MOVOU X9, reserved0+48(FP)
 	ADDQ  $32, BP
 	DO_ROUND( DX,  R9, R10, R11,  AX,  BX,  CX,  R8, 48)
 	DO_ROUND( R8,  DX,  R9, R10, R11,  AX,  BX,  CX, 52)
@ -372,9 +372,9 @@ loop2:
 	ADDL (7*4)(SI), R11 // H7 = h + H7
 	MOVL R11, (7*4)(SI)

-	MOVQ _inp+72(FP), SI
+	MOVQ reserved3+72(FP), SI
 	ADDQ $64, SI
-	CMPQ _inp_end+64(FP), SI
+	CMPQ reserved2+64(FP), SI
 	JNE  loop0

 done_hash:
--- a/vendor/modules.txt
+++ b/vendor/modules.txt
@ -95,7 +95,7 @@ github.com/lib/pq
 github.com/lib/pq/oid
 # github.com/matttproud/golang_protobuf_extensions v1.0.1
 github.com/matttproud/golang_protobuf_extensions/pbutil
-# github.com/minio/sha256-simd v0.0.0-20190104231041-e529fa194128
+# github.com/minio/sha256-simd v0.0.0-20190117184323-cc1980cb0338
 github.com/minio/sha256-simd
 # github.com/oschwald/geoip2-golang v1.1.0
 github.com/oschwald/geoip2-golang