mirror of
https://github.com/octoleo/syncthing.git
synced 2024-11-09 14:50:56 +00:00
vendor: Update vendor dir from a80c0fda
This commit is contained in:
parent
00fa77dd47
commit
29e4b417f2
1
vendor/github.com/minio/sha256-simd/.travis.yml
generated
vendored
1
vendor/github.com/minio/sha256-simd/.travis.yml
generated
vendored
@ -21,3 +21,4 @@ matrix:
|
||||
script:
|
||||
- diff -au <(gofmt -d .) <(printf "")
|
||||
- go test -race -v ./...
|
||||
- go tool vet -asmdecl .
|
||||
|
21
vendor/github.com/minio/sha256-simd/README.md
generated
vendored
21
vendor/github.com/minio/sha256-simd/README.md
generated
vendored
@ -1,6 +1,6 @@
|
||||
# sha256-simd
|
||||
|
||||
Accelerate SHA256 computations in pure Go using AVX512 and AVX2 for Intel and ARM64 for ARM. On AVX512 it provides an up to 8x improvement (over 3 GB/s per core) in comparison to AVX2.
|
||||
Accelerate SHA256 computations in pure Go using AVX512, SHA Extensions and AVX2 for Intel and ARM64 for ARM. On AVX512 it provides an up to 8x improvement (over 3 GB/s per core) in comparison to AVX2. SHA Extensions give a performance boost of close to 4x over AVX2.
|
||||
|
||||
## Introduction
|
||||
|
||||
@ -8,7 +8,19 @@ This package is designed as a replacement for `crypto/sha256`. For Intel CPUs it
|
||||
|
||||
This package uses Golang assembly. The AVX512 version is based on the Intel's "multi-buffer crypto library for IPSec" whereas the other Intel implementations are described in "Fast SHA-256 Implementations on Intel Architecture Processors" by J. Guilford et al.
|
||||
|
||||
## New: Support for AVX512
|
||||
## New: Support for Intel SHA Extensions
|
||||
|
||||
Support for the Intel SHA Extensions has been added by Kristofer Peterson (@svenski123), originally developed for spacemeshos [here](https://github.com/spacemeshos/POET/issues/23). On CPUs that support it (known thus far Intel Celeron J3455 and AMD Ryzen) it gives a significant boost in performance (with thanks to @AudriusButkevicius for reporting the results; full results [here](https://github.com/minio/sha256-simd/pull/37#issuecomment-451607827)).
|
||||
|
||||
```
|
||||
$ benchcmp avx2.txt sha-ext.txt
|
||||
benchmark AVX2 MB/s SHA Ext MB/s speedup
|
||||
BenchmarkHash5M 514.40 1975.17 3.84x
|
||||
```
|
||||
|
||||
Thanks to Kristofer Peterson, we also added additional performance changes such as optimized padding, endian conversions which sped up all implementations i.e. Intel SHA alone while doubled performance for small sizes, the other changes increased everything roughly 50%.
|
||||
|
||||
## Support for AVX512
|
||||
|
||||
We have added support for AVX512 which results in an up to 8x performance improvement over AVX2 (3.0 GHz Xeon Platinum 8124M CPU):
|
||||
|
||||
@ -66,6 +78,7 @@ Below is the speed in MB/s for a single core (ranked fast to slow) for blocks la
|
||||
| Processor | SIMD | Speed (MB/s) |
|
||||
| --------------------------------- | ------- | ------------:|
|
||||
| 3.0 GHz Intel Xeon Platinum 8124M | AVX512 | 3498 |
|
||||
| 3.7 GHz AMD Ryzen 7 2700X | SHA Ext | 1979 |
|
||||
| 1.2 GHz ARM Cortex-A53 | ARM64 | 638 |
|
||||
| 3.0 GHz Intel Xeon Platinum 8124M | AVX2 | 449 |
|
||||
| 3.1 GHz Intel Core i7 | AVX | 362 |
|
||||
@ -84,7 +97,7 @@ Other applications that can benefit from enhanced SHA256 performance are dedupli
|
||||
## ARM SHA Extensions
|
||||
|
||||
The 64-bit ARMv8 core has introduced new instructions for SHA1 and SHA2 acceleration as part of the [Cryptography Extensions](http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.ddi0501f/CHDFJBCJ.html). Below you can see a small excerpt highlighting one of the rounds as is done for the SHA256 calculation process (for full code see [sha256block_arm64.s](https://github.com/minio/sha256-simd/blob/master/sha256block_arm64.s)).
|
||||
|
||||
|
||||
```
|
||||
sha256h q2, q3, v9.4s
|
||||
sha256h2 q3, q4, v9.4s
|
||||
@ -100,7 +113,7 @@ The 64-bit ARMv8 core has introduced new instructions for SHA1 and SHA2 accelera
|
||||
|
||||
### Detailed benchmarks
|
||||
|
||||
Benchmarks generated on a 1.2 Ghz Quad-Core ARM Cortex A53 equipped [Pine64](https://www.pine64.com/).
|
||||
Benchmarks generated on a 1.2 Ghz Quad-Core ARM Cortex A53 equipped [Pine64](https://www.pine64.com/).
|
||||
|
||||
```
|
||||
minio@minio-arm:$ benchcmp golang.txt arm64.txt
|
||||
|
27
vendor/github.com/minio/sha256-simd/sha256blockAvx2_amd64.s
generated
vendored
27
vendor/github.com/minio/sha256-simd/sha256blockAvx2_amd64.s
generated
vendored
@ -32,8 +32,6 @@
|
||||
// equivalents
|
||||
//
|
||||
|
||||
#include "textflag.h"
|
||||
|
||||
DATA K256<>+0x000(SB)/8, $0x71374491428a2f98
|
||||
DATA K256<>+0x008(SB)/8, $0xe9b5dba5b5c0fbcf
|
||||
DATA K256<>+0x010(SB)/8, $0x71374491428a2f98
|
||||
@ -114,16 +112,25 @@ DATA K256<>+0x258(SB)/8, $0x0b0a090803020100
|
||||
|
||||
GLOBL K256<>(SB), 8, $608
|
||||
|
||||
// func blockAvx2(h []uint32, message []uint8)
|
||||
TEXT ·blockAvx2(SB), 7, $0
|
||||
// We need 0x220 stack space aligned on a 512 boundary, so for the
|
||||
// worstcase-aligned SP we need twice this amount, being 1088 (=0x440)
|
||||
//
|
||||
// SP aligned end-aligned stacksize
|
||||
// 100013d0 10001400 10001620 592
|
||||
// 100013d8 10001400 10001620 584
|
||||
// 100013e0 10001600 10001820 1088
|
||||
// 100013e8 10001600 10001820 1080
|
||||
|
||||
MOVQ ctx+0(FP), DI // DI: &h
|
||||
MOVQ inp+24(FP), SI // SI: &message
|
||||
MOVQ inplength+32(FP), DX // len(message)
|
||||
// func blockAvx2(h []uint32, message []uint8)
|
||||
TEXT ·blockAvx2(SB),$1088-48
|
||||
|
||||
MOVQ h+0(FP), DI // DI: &h
|
||||
MOVQ message_base+24(FP), SI // SI: &message
|
||||
MOVQ message_len+32(FP), DX // len(message)
|
||||
ADDQ SI, DX // end pointer of input
|
||||
MOVQ SP, R11 // copy stack pointer
|
||||
SUBQ $0x220, SP // sp -= 0x220
|
||||
ANDQ $0xfffffffffffffc00, SP // align stack frame
|
||||
ADDQ $0x220, SP // sp += 0x220
|
||||
ANDQ $0xfffffffffffffe00, SP // align stack frame
|
||||
ADDQ $0x1c0, SP
|
||||
MOVQ DI, 0x40(SP) // save ctx
|
||||
MOVQ SI, 0x48(SP) // save input
|
||||
@ -1435,7 +1442,7 @@ loop2:
|
||||
|
||||
done:
|
||||
MOVQ BP, SP
|
||||
MOVQ 0x58(SP), SP
|
||||
MOVQ 0x58(SP), SP // restore saved stack pointer
|
||||
WORD $0xf8c5; BYTE $0x77 // vzeroupper
|
||||
|
||||
RET
|
||||
|
2
vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s
generated
vendored
2
vendor/github.com/minio/sha256-simd/sha256blockAvx512_amd64.s
generated
vendored
@ -2,7 +2,7 @@ TEXT ·sha256X16Avx512(SB), 7, $0
|
||||
MOVQ digests+0(FP), DI
|
||||
MOVQ scratch+8(FP), R12
|
||||
MOVQ mask_len+32(FP), SI
|
||||
MOVQ r14+24(FP), R13
|
||||
MOVQ mask_base+24(FP), R13
|
||||
MOVQ (R13), R14
|
||||
LONG $0x92fbc1c4; BYTE $0xce
|
||||
LEAQ inputs+48(FP), AX
|
||||
|
30
vendor/github.com/minio/sha256-simd/sha256blockAvx_amd64.s
generated
vendored
30
vendor/github.com/minio/sha256-simd/sha256blockAvx_amd64.s
generated
vendored
@ -232,15 +232,15 @@
|
||||
ADDL R13, h // h = h + S1 + CH + k + w + S0 + MAJ
|
||||
|
||||
// func blockAvx(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64)
|
||||
TEXT ·blockAvx(SB), 7, $0
|
||||
TEXT ·blockAvx(SB), 7, $0-80
|
||||
|
||||
MOVQ h+0(FP), SI // SI: &h
|
||||
MOVQ message+24(FP), R8 // &message
|
||||
MOVQ lenmessage+32(FP), R9 // length of message
|
||||
MOVQ h+0(FP), SI // SI: &h
|
||||
MOVQ message_base+24(FP), R8 // &message
|
||||
MOVQ message_len+32(FP), R9 // length of message
|
||||
CMPQ R9, $0
|
||||
JEQ done_hash
|
||||
ADDQ R8, R9
|
||||
MOVQ R9, _inp_end+64(FP) // store end of message
|
||||
MOVQ R9, reserved2+64(FP) // store end of message
|
||||
|
||||
// Register definition
|
||||
// a --> eax
|
||||
@ -269,7 +269,7 @@ TEXT ·blockAvx(SB), 7, $0
|
||||
MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA
|
||||
MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00
|
||||
|
||||
MOVQ message+24(FP), SI // SI: &message
|
||||
MOVQ message_base+24(FP), SI // SI: &message
|
||||
|
||||
loop0:
|
||||
LEAQ constants<>(SB), BP
|
||||
@ -284,25 +284,25 @@ loop0:
|
||||
MOVOU 3*16(SI), X7
|
||||
LONG $0x0041c2c4; BYTE $0xfd // VPSHUFB XMM7, XMM7, XMM13
|
||||
|
||||
MOVQ SI, _inp+72(FP)
|
||||
MOVQ SI, reserved3+72(FP)
|
||||
MOVD $0x3, DI
|
||||
|
||||
// schedule 48 input dwords, by doing 3 rounds of 16 each
|
||||
loop1:
|
||||
LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */
|
||||
MOVOU X9, _xfer+48(FP)
|
||||
MOVOU X9, reserved0+48(FP)
|
||||
FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11)
|
||||
|
||||
LONG $0x4dfe59c5; BYTE $0x10 // VPADDD XMM9, XMM4, 16[RBP] /* Add 2nd constant to message */
|
||||
MOVOU X9, _xfer+48(FP)
|
||||
MOVOU X9, reserved0+48(FP)
|
||||
FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8)
|
||||
|
||||
LONG $0x4dfe59c5; BYTE $0x20 // VPADDD XMM9, XMM4, 32[RBP] /* Add 3rd constant to message */
|
||||
MOVOU X9, _xfer+48(FP)
|
||||
MOVOU X9, reserved0+48(FP)
|
||||
FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11)
|
||||
|
||||
LONG $0x4dfe59c5; BYTE $0x30 // VPADDD XMM9, XMM4, 48[RBP] /* Add 4th constant to message */
|
||||
MOVOU X9, _xfer+48(FP)
|
||||
MOVOU X9, reserved0+48(FP)
|
||||
ADDQ $64, BP
|
||||
FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8)
|
||||
|
||||
@ -313,14 +313,14 @@ loop1:
|
||||
|
||||
loop2:
|
||||
LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */
|
||||
MOVOU X9, _xfer+48(FP)
|
||||
MOVOU X9, reserved0+48(FP)
|
||||
DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48)
|
||||
DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52)
|
||||
DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56)
|
||||
DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60)
|
||||
|
||||
LONG $0x4dfe51c5; BYTE $0x10 // VPADDD XMM9, XMM5, 16[RBP] /* Add 2nd constant to message */
|
||||
MOVOU X9, _xfer+48(FP)
|
||||
MOVOU X9, reserved0+48(FP)
|
||||
ADDQ $32, BP
|
||||
DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48)
|
||||
DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52)
|
||||
@ -351,9 +351,9 @@ loop2:
|
||||
ADDL (7*4)(SI), R11 // H7 = h + H7
|
||||
MOVL R11, (7*4)(SI)
|
||||
|
||||
MOVQ _inp+72(FP), SI
|
||||
MOVQ reserved3+72(FP), SI
|
||||
ADDQ $64, SI
|
||||
CMPQ _inp_end+64(FP), SI
|
||||
CMPQ reserved2+64(FP), SI
|
||||
JNE loop0
|
||||
|
||||
done_hash:
|
||||
|
2
vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s
generated
vendored
2
vendor/github.com/minio/sha256-simd/sha256blockSha_amd64.s
generated
vendored
@ -2,7 +2,7 @@
|
||||
|
||||
// SHA intrinsic version of SHA256
|
||||
|
||||
// Minio Cloud Storage, (C) 2018 Minio, Inc.
|
||||
// Kristofer Peterson, (C) 2018.
|
||||
//
|
||||
// Licensed under the Apache License, Version 2.0 (the "License");
|
||||
// you may not use this file except in compliance with the License.
|
||||
|
30
vendor/github.com/minio/sha256-simd/sha256blockSsse_amd64.s
generated
vendored
30
vendor/github.com/minio/sha256-simd/sha256blockSsse_amd64.s
generated
vendored
@ -244,15 +244,15 @@
|
||||
ADDL R13, h // h = h + S1 + CH + k + w + S0 + MAJ
|
||||
|
||||
// func blockSsse(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64)
|
||||
TEXT ·blockSsse(SB), 7, $0
|
||||
TEXT ·blockSsse(SB), 7, $0-80
|
||||
|
||||
MOVQ h+0(FP), SI // SI: &h
|
||||
MOVQ message+24(FP), R8 // &message
|
||||
MOVQ lenmessage+32(FP), R9 // length of message
|
||||
MOVQ h+0(FP), SI // SI: &h
|
||||
MOVQ message_base+24(FP), R8 // &message
|
||||
MOVQ message_len+32(FP), R9 // length of message
|
||||
CMPQ R9, $0
|
||||
JEQ done_hash
|
||||
ADDQ R8, R9
|
||||
MOVQ R9, _inp_end+64(FP) // store end of message
|
||||
MOVQ R9, reserved2+64(FP) // store end of message
|
||||
|
||||
// Register definition
|
||||
// a --> eax
|
||||
@ -281,7 +281,7 @@ TEXT ·blockSsse(SB), 7, $0
|
||||
MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA
|
||||
MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00
|
||||
|
||||
MOVQ message+24(FP), SI // SI: &message
|
||||
MOVQ message_base+24(FP), SI // SI: &message
|
||||
|
||||
loop0:
|
||||
LEAQ constants<>(SB), BP
|
||||
@ -296,7 +296,7 @@ loop0:
|
||||
MOVOU 3*16(SI), X7
|
||||
LONG $0x380f4166; WORD $0xfd00 // PSHUFB XMM7, XMM13
|
||||
|
||||
MOVQ SI, _inp+72(FP)
|
||||
MOVQ SI, reserved3+72(FP)
|
||||
MOVD $0x3, DI
|
||||
|
||||
// Align
|
||||
@ -306,22 +306,22 @@ loop0:
|
||||
loop1:
|
||||
MOVOU X4, X9
|
||||
LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */
|
||||
MOVOU X9, _xfer+48(FP)
|
||||
MOVOU X9, reserved0+48(FP)
|
||||
FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11)
|
||||
|
||||
MOVOU X4, X9
|
||||
LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */
|
||||
MOVOU X9, _xfer+48(FP)
|
||||
MOVOU X9, reserved0+48(FP)
|
||||
FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8)
|
||||
|
||||
MOVOU X4, X9
|
||||
LONG $0xfe0f4466; WORD $0x204d // PADDD XMM9, 32[RBP] /* Add 3rd constant to message */
|
||||
MOVOU X9, _xfer+48(FP)
|
||||
MOVOU X9, reserved0+48(FP)
|
||||
FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11)
|
||||
|
||||
MOVOU X4, X9
|
||||
LONG $0xfe0f4466; WORD $0x304d // PADDD XMM9, 48[RBP] /* Add 4th constant to message */
|
||||
MOVOU X9, _xfer+48(FP)
|
||||
MOVOU X9, reserved0+48(FP)
|
||||
ADDQ $64, BP
|
||||
FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8)
|
||||
|
||||
@ -333,7 +333,7 @@ loop1:
|
||||
loop2:
|
||||
MOVOU X4, X9
|
||||
LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */
|
||||
MOVOU X9, _xfer+48(FP)
|
||||
MOVOU X9, reserved0+48(FP)
|
||||
DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48)
|
||||
DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52)
|
||||
DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56)
|
||||
@ -341,7 +341,7 @@ loop2:
|
||||
|
||||
MOVOU X5, X9
|
||||
LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */
|
||||
MOVOU X9, _xfer+48(FP)
|
||||
MOVOU X9, reserved0+48(FP)
|
||||
ADDQ $32, BP
|
||||
DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48)
|
||||
DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52)
|
||||
@ -372,9 +372,9 @@ loop2:
|
||||
ADDL (7*4)(SI), R11 // H7 = h + H7
|
||||
MOVL R11, (7*4)(SI)
|
||||
|
||||
MOVQ _inp+72(FP), SI
|
||||
MOVQ reserved3+72(FP), SI
|
||||
ADDQ $64, SI
|
||||
CMPQ _inp_end+64(FP), SI
|
||||
CMPQ reserved2+64(FP), SI
|
||||
JNE loop0
|
||||
|
||||
done_hash:
|
||||
|
2
vendor/modules.txt
vendored
2
vendor/modules.txt
vendored
@ -95,7 +95,7 @@ github.com/lib/pq
|
||||
github.com/lib/pq/oid
|
||||
# github.com/matttproud/golang_protobuf_extensions v1.0.1
|
||||
github.com/matttproud/golang_protobuf_extensions/pbutil
|
||||
# github.com/minio/sha256-simd v0.0.0-20190104231041-e529fa194128
|
||||
# github.com/minio/sha256-simd v0.0.0-20190117184323-cc1980cb0338
|
||||
github.com/minio/sha256-simd
|
||||
# github.com/oschwald/geoip2-golang v1.1.0
|
||||
github.com/oschwald/geoip2-golang
|
||||
|
Loading…
Reference in New Issue
Block a user