From 5e99d38412ccd655f64f8f9ba75af2e0c47ad3b7 Mon Sep 17 00:00:00 2001 From: Jakob Borg Date: Fri, 9 Sep 2016 09:57:51 +0000 Subject: [PATCH] all: Use github.com/minio/sha256-simd GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/3581 --- cmd/syncthing/usage_report.go | 3 +- lib/protocol/bep_extensions.go | 3 +- lib/protocol/deviceid.go | 3 +- lib/scanner/blocks.go | 3 +- lib/signature/signature.go | 3 +- vendor/github.com/minio/sha256-simd/LICENSE | 202 +++ vendor/github.com/minio/sha256-simd/cpuid.go | 55 + .../github.com/minio/sha256-simd/cpuid_386.go | 24 + .../github.com/minio/sha256-simd/cpuid_386.s | 33 + .../minio/sha256-simd/cpuid_amd64.go | 24 + .../minio/sha256-simd/cpuid_amd64.s | 34 + .../github.com/minio/sha256-simd/cpuid_arm.go | 33 + .../minio/sha256-simd/cpuid_arm64.go | 33 + .../minio/sha256-simd/cpuid_ppc64.go | 32 + .../minio/sha256-simd/cpuid_ppc64le.go | 32 + vendor/github.com/minio/sha256-simd/sha256.go | 176 ++ .../sha256-simd/sha256blockAvx2_amd64.go | 22 + .../minio/sha256-simd/sha256blockAvx2_amd64.s | 1442 +++++++++++++++++ .../minio/sha256-simd/sha256blockAvx_amd64.go | 22 + .../minio/sha256-simd/sha256blockAvx_amd64.s | 409 +++++ .../sha256-simd/sha256blockSsse_amd64.go | 22 + .../minio/sha256-simd/sha256blockSsse_amd64.s | 430 +++++ .../minio/sha256-simd/sha256block_386.go | 24 + .../minio/sha256-simd/sha256block_amd64.go | 48 + .../minio/sha256-simd/sha256block_arm.go | 24 + .../minio/sha256-simd/sha256block_arm64.go | 36 + .../minio/sha256-simd/sha256block_arm64.s | 193 +++ .../minio/sha256-simd/sha256block_noasm.go | 136 ++ .../minio/sha256-simd/sha256block_ppc64.go | 22 + .../minio/sha256-simd/sha256block_ppc64le.go | 22 + vendor/manifest | 22 +- 31 files changed, 3547 insertions(+), 20 deletions(-) create mode 100644 vendor/github.com/minio/sha256-simd/LICENSE create mode 100644 vendor/github.com/minio/sha256-simd/cpuid.go create mode 100644 vendor/github.com/minio/sha256-simd/cpuid_386.go create mode 100644 vendor/github.com/minio/sha256-simd/cpuid_386.s create mode 100644 vendor/github.com/minio/sha256-simd/cpuid_amd64.go create mode 100644 vendor/github.com/minio/sha256-simd/cpuid_amd64.s create mode 100644 vendor/github.com/minio/sha256-simd/cpuid_arm.go create mode 100644 vendor/github.com/minio/sha256-simd/cpuid_arm64.go create mode 100644 vendor/github.com/minio/sha256-simd/cpuid_ppc64.go create mode 100644 vendor/github.com/minio/sha256-simd/cpuid_ppc64le.go create mode 100644 vendor/github.com/minio/sha256-simd/sha256.go create mode 100644 vendor/github.com/minio/sha256-simd/sha256blockAvx2_amd64.go create mode 100644 vendor/github.com/minio/sha256-simd/sha256blockAvx2_amd64.s create mode 100644 vendor/github.com/minio/sha256-simd/sha256blockAvx_amd64.go create mode 100644 vendor/github.com/minio/sha256-simd/sha256blockAvx_amd64.s create mode 100644 vendor/github.com/minio/sha256-simd/sha256blockSsse_amd64.go create mode 100644 vendor/github.com/minio/sha256-simd/sha256blockSsse_amd64.s create mode 100644 vendor/github.com/minio/sha256-simd/sha256block_386.go create mode 100644 vendor/github.com/minio/sha256-simd/sha256block_amd64.go create mode 100644 vendor/github.com/minio/sha256-simd/sha256block_arm.go create mode 100644 vendor/github.com/minio/sha256-simd/sha256block_arm64.go create mode 100644 vendor/github.com/minio/sha256-simd/sha256block_arm64.s create mode 100644 vendor/github.com/minio/sha256-simd/sha256block_noasm.go create mode 100644 vendor/github.com/minio/sha256-simd/sha256block_ppc64.go create mode 100644 vendor/github.com/minio/sha256-simd/sha256block_ppc64le.go diff --git a/cmd/syncthing/usage_report.go b/cmd/syncthing/usage_report.go index fc64ddbda..26a2b6842 100644 --- a/cmd/syncthing/usage_report.go +++ b/cmd/syncthing/usage_report.go @@ -9,7 +9,6 @@ package main import ( "bytes" "crypto/rand" - "crypto/sha256" "crypto/tls" "encoding/json" "fmt" @@ -19,6 +18,8 @@ import ( "strings" "time" + "github.com/minio/sha256-simd" + "github.com/syncthing/syncthing/lib/config" "github.com/syncthing/syncthing/lib/dialer" "github.com/syncthing/syncthing/lib/model" diff --git a/lib/protocol/bep_extensions.go b/lib/protocol/bep_extensions.go index 3d65d6899..61f791b02 100644 --- a/lib/protocol/bep_extensions.go +++ b/lib/protocol/bep_extensions.go @@ -7,12 +7,13 @@ package protocol import ( "bytes" - "crypto/sha256" "encoding/binary" "errors" "fmt" "time" + "github.com/minio/sha256-simd" + "github.com/syncthing/syncthing/lib/rand" ) diff --git a/lib/protocol/deviceid.go b/lib/protocol/deviceid.go index 065bb30ae..961689b3b 100644 --- a/lib/protocol/deviceid.go +++ b/lib/protocol/deviceid.go @@ -4,7 +4,6 @@ package protocol import ( "bytes" - "crypto/sha256" "encoding/base32" "encoding/binary" "errors" @@ -12,6 +11,8 @@ import ( "regexp" "strings" + "github.com/minio/sha256-simd" + "github.com/calmh/luhn" ) diff --git a/lib/scanner/blocks.go b/lib/scanner/blocks.go index fc847f143..aac00900a 100644 --- a/lib/scanner/blocks.go +++ b/lib/scanner/blocks.go @@ -8,10 +8,11 @@ package scanner import ( "bytes" - "crypto/sha256" "fmt" "io" + "github.com/minio/sha256-simd" + "github.com/syncthing/syncthing/lib/protocol" ) diff --git a/lib/signature/signature.go b/lib/signature/signature.go index 5e33f5e01..a0afb641a 100644 --- a/lib/signature/signature.go +++ b/lib/signature/signature.go @@ -12,7 +12,6 @@ import ( "crypto/ecdsa" "crypto/elliptic" "crypto/rand" - "crypto/sha256" "crypto/x509" "encoding/asn1" "encoding/pem" @@ -20,6 +19,8 @@ import ( "fmt" "io" "math/big" + + "github.com/minio/sha256-simd" ) // GenerateKeys returns a new key pair, with the private and public key diff --git a/vendor/github.com/minio/sha256-simd/LICENSE b/vendor/github.com/minio/sha256-simd/LICENSE new file mode 100644 index 000000000..d64569567 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/LICENSE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/vendor/github.com/minio/sha256-simd/cpuid.go b/vendor/github.com/minio/sha256-simd/cpuid.go new file mode 100644 index 000000000..531bbcc9f --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/cpuid.go @@ -0,0 +1,55 @@ +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package sha256 + +// True when SIMD instructions are available. +var avx2 = haveAVX2() +var avx = haveAVX() +var ssse3 = haveSSSE3() +var armSha = haveArmSha() + +// haveAVX returns true when there is AVX support +func haveAVX() bool { + _, _, c, _ := cpuid(1) + + // Check XGETBV, OXSAVE and AVX bits + if c&(1<<26) != 0 && c&(1<<27) != 0 && c&(1<<28) != 0 { + // Check for OS support + eax, _ := xgetbv(0) + return (eax & 0x6) == 0x6 + } + return false +} + +// haveAVX2 returns true when there is AVX2 support +func haveAVX2() bool { + mfi, _, _, _ := cpuid(0) + + // Check AVX2, AVX2 requires OS support, but BMI1/2 don't. + if mfi >= 7 && haveAVX() { + _, ebx, _, _ := cpuidex(7, 0) + return (ebx & 0x00000020) != 0 + } + return false +} + +// haveSSSE3 returns true when there is SSSE3 support +func haveSSSE3() bool { + + _, _, c, _ := cpuid(1) + + return (c & 0x00000200) != 0 +} diff --git a/vendor/github.com/minio/sha256-simd/cpuid_386.go b/vendor/github.com/minio/sha256-simd/cpuid_386.go new file mode 100644 index 000000000..c9890be47 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/cpuid_386.go @@ -0,0 +1,24 @@ +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package sha256 + +func cpuid(op uint32) (eax, ebx, ecx, edx uint32) +func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +func xgetbv(index uint32) (eax, edx uint32) + +func haveArmSha() bool { + return false +} diff --git a/vendor/github.com/minio/sha256-simd/cpuid_386.s b/vendor/github.com/minio/sha256-simd/cpuid_386.s new file mode 100644 index 000000000..fa38814ec --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/cpuid_386.s @@ -0,0 +1,33 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// +build 386,!gccgo + +// func cpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·cpuid(SB), 7, $0 + XORL CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+4(FP) + MOVL BX, ebx+8(FP) + MOVL CX, ecx+12(FP) + MOVL DX, edx+16(FP) + RET + +// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·cpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func xgetbv(index uint32) (eax, edx uint32) +TEXT ·xgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+4(FP) + MOVL DX, edx+8(FP) + RET diff --git a/vendor/github.com/minio/sha256-simd/cpuid_amd64.go b/vendor/github.com/minio/sha256-simd/cpuid_amd64.go new file mode 100644 index 000000000..c9890be47 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/cpuid_amd64.go @@ -0,0 +1,24 @@ +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package sha256 + +func cpuid(op uint32) (eax, ebx, ecx, edx uint32) +func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +func xgetbv(index uint32) (eax, edx uint32) + +func haveArmSha() bool { + return false +} diff --git a/vendor/github.com/minio/sha256-simd/cpuid_amd64.s b/vendor/github.com/minio/sha256-simd/cpuid_amd64.s new file mode 100644 index 000000000..fb45a6560 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/cpuid_amd64.s @@ -0,0 +1,34 @@ +// Copyright (c) 2015 Klaus Post, released under MIT License. See LICENSE file. + +// +build amd64,!gccgo + +// func cpuid(op uint32) (eax, ebx, ecx, edx uint32) +TEXT ·cpuid(SB), 7, $0 + XORQ CX, CX + MOVL op+0(FP), AX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + + +// func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) +TEXT ·cpuidex(SB), 7, $0 + MOVL op+0(FP), AX + MOVL op2+4(FP), CX + CPUID + MOVL AX, eax+8(FP) + MOVL BX, ebx+12(FP) + MOVL CX, ecx+16(FP) + MOVL DX, edx+20(FP) + RET + +// func xgetbv(index uint32) (eax, edx uint32) +TEXT ·xgetbv(SB), 7, $0 + MOVL index+0(FP), CX + BYTE $0x0f; BYTE $0x01; BYTE $0xd0 // XGETBV + MOVL AX, eax+8(FP) + MOVL DX, edx+12(FP) + RET diff --git a/vendor/github.com/minio/sha256-simd/cpuid_arm.go b/vendor/github.com/minio/sha256-simd/cpuid_arm.go new file mode 100644 index 000000000..28637d391 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/cpuid_arm.go @@ -0,0 +1,33 @@ +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package sha256 + +func cpuid(op uint32) (eax, ebx, ecx, edx uint32) { + return 0, 0, 0, 0 +} + +func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) { + return 0, 0, 0, 0 +} + +func xgetbv(index uint32) (eax, edx uint32) { + return 0, 0 +} + +func haveArmSha() bool { + // TODO: Implement feature detection for ARM + return true +} diff --git a/vendor/github.com/minio/sha256-simd/cpuid_arm64.go b/vendor/github.com/minio/sha256-simd/cpuid_arm64.go new file mode 100644 index 000000000..28637d391 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/cpuid_arm64.go @@ -0,0 +1,33 @@ +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package sha256 + +func cpuid(op uint32) (eax, ebx, ecx, edx uint32) { + return 0, 0, 0, 0 +} + +func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) { + return 0, 0, 0, 0 +} + +func xgetbv(index uint32) (eax, edx uint32) { + return 0, 0 +} + +func haveArmSha() bool { + // TODO: Implement feature detection for ARM + return true +} diff --git a/vendor/github.com/minio/sha256-simd/cpuid_ppc64.go b/vendor/github.com/minio/sha256-simd/cpuid_ppc64.go new file mode 100644 index 000000000..351dff4b6 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/cpuid_ppc64.go @@ -0,0 +1,32 @@ +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package sha256 + +func cpuid(op uint32) (eax, ebx, ecx, edx uint32) { + return 0, 0, 0, 0 +} + +func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) { + return 0, 0, 0, 0 +} + +func xgetbv(index uint32) (eax, edx uint32) { + return 0, 0 +} + +func haveArmSha() bool { + return false +} diff --git a/vendor/github.com/minio/sha256-simd/cpuid_ppc64le.go b/vendor/github.com/minio/sha256-simd/cpuid_ppc64le.go new file mode 100644 index 000000000..351dff4b6 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/cpuid_ppc64le.go @@ -0,0 +1,32 @@ +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +package sha256 + +func cpuid(op uint32) (eax, ebx, ecx, edx uint32) { + return 0, 0, 0, 0 +} + +func cpuidex(op, op2 uint32) (eax, ebx, ecx, edx uint32) { + return 0, 0, 0, 0 +} + +func xgetbv(index uint32) (eax, edx uint32) { + return 0, 0 +} + +func haveArmSha() bool { + return false +} diff --git a/vendor/github.com/minio/sha256-simd/sha256.go b/vendor/github.com/minio/sha256-simd/sha256.go new file mode 100644 index 000000000..a753e2886 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256.go @@ -0,0 +1,176 @@ +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sha256 + +import ( + "crypto/sha256" + "hash" + "runtime" +) + +// Size - The size of a SHA256 checksum in bytes. +const Size = 32 + +// BlockSize - The blocksize of SHA256 in bytes. +const BlockSize = 64 + +const ( + chunk = 64 + init0 = 0x6A09E667 + init1 = 0xBB67AE85 + init2 = 0x3C6EF372 + init3 = 0xA54FF53A + init4 = 0x510E527F + init5 = 0x9B05688C + init6 = 0x1F83D9AB + init7 = 0x5BE0CD19 +) + +// digest represents the partial evaluation of a checksum. +type digest struct { + h [8]uint32 + x [chunk]byte + nx int + len uint64 +} + +// Reset digest back to default +func (d *digest) Reset() { + d.h[0] = init0 + d.h[1] = init1 + d.h[2] = init2 + d.h[3] = init3 + d.h[4] = init4 + d.h[5] = init5 + d.h[6] = init6 + d.h[7] = init7 + d.nx = 0 + d.len = 0 +} + +func block(dig *digest, p []byte) { + is386bit := runtime.GOARCH == "386" + isARM := runtime.GOARCH == "arm" + if is386bit || isARM { + blockGeneric(dig, p) + } + switch !is386bit && !isARM { + case avx2: + blockAvx2Go(dig, p) + case avx: + blockAvxGo(dig, p) + case ssse3: + blockSsseGo(dig, p) + case armSha: + blockArmGo(dig, p) + default: + blockGeneric(dig, p) + } +} + +// New returns a new hash.Hash computing the SHA256 checksum. +func New() hash.Hash { + if avx2 || avx || ssse3 || armSha { + d := new(digest) + d.Reset() + return d + } + // default back to the standard golang implementation + return sha256.New() +} + +// Sum256 - single caller sha256 helper +func Sum256(data []byte) [Size]byte { + var d digest + d.Reset() + d.Write(data) + return d.checkSum() +} + +// Return size of checksum +func (d *digest) Size() int { return Size } + +// Return blocksize of checksum +func (d *digest) BlockSize() int { return BlockSize } + +// Write to digest +func (d *digest) Write(p []byte) (nn int, err error) { + nn = len(p) + d.len += uint64(nn) + if d.nx > 0 { + n := copy(d.x[d.nx:], p) + d.nx += n + if d.nx == chunk { + block(d, d.x[:]) + d.nx = 0 + } + p = p[n:] + } + if len(p) >= chunk { + n := len(p) &^ (chunk - 1) + block(d, p[:n]) + p = p[n:] + } + if len(p) > 0 { + d.nx = copy(d.x[:], p) + } + return +} + +// Return sha256 sum in bytes +func (d *digest) Sum(in []byte) []byte { + // Make a copy of d0 so that caller can keep writing and summing. + d0 := *d + hash := d0.checkSum() + return append(in, hash[:]...) +} + +// Intermediate checksum function +func (d *digest) checkSum() [Size]byte { + len := d.len + // Padding. Add a 1 bit and 0 bits until 56 bytes mod 64. + var tmp [64]byte + tmp[0] = 0x80 + if len%64 < 56 { + d.Write(tmp[0 : 56-len%64]) + } else { + d.Write(tmp[0 : 64+56-len%64]) + } + + // Length in bits. + len <<= 3 + for i := uint(0); i < 8; i++ { + tmp[i] = byte(len >> (56 - 8*i)) + } + d.Write(tmp[0:8]) + + if d.nx != 0 { + panic("d.nx != 0") + } + + h := d.h[:] + + var digest [Size]byte + for i, s := range h { + digest[i*4] = byte(s >> 24) + digest[i*4+1] = byte(s >> 16) + digest[i*4+2] = byte(s >> 8) + digest[i*4+3] = byte(s) + } + + return digest +} diff --git a/vendor/github.com/minio/sha256-simd/sha256blockAvx2_amd64.go b/vendor/github.com/minio/sha256-simd/sha256blockAvx2_amd64.go new file mode 100644 index 000000000..43ee7a948 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256blockAvx2_amd64.go @@ -0,0 +1,22 @@ +//+build !noasm + +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sha256 + +//go:noescape +func blockAvx2(h []uint32, message []uint8) diff --git a/vendor/github.com/minio/sha256-simd/sha256blockAvx2_amd64.s b/vendor/github.com/minio/sha256-simd/sha256blockAvx2_amd64.s new file mode 100644 index 000000000..ff5d2a65d --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256blockAvx2_amd64.s @@ -0,0 +1,1442 @@ +//+build !noasm !appengine + +// SHA256 implementation for AVX2 + +// +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// +// This code is based on an Intel White-Paper: +// "Fast SHA-256 Implementations on Intel Architecture Processors" +// +// together with the reference implementation from the following authors: +// James Guilford +// Kirk Yap +// Tim Chen +// +// For Golang it has been converted to Plan 9 assembly with the help of +// github.com/minio/asm2plan9s to assemble Intel instructions to their Plan9 +// equivalents +// + +#include "textflag.h" + +DATA K256<>+0x000(SB)/8, $0x71374491428a2f98 +DATA K256<>+0x008(SB)/8, $0xe9b5dba5b5c0fbcf +DATA K256<>+0x010(SB)/8, $0x71374491428a2f98 +DATA K256<>+0x018(SB)/8, $0xe9b5dba5b5c0fbcf +DATA K256<>+0x020(SB)/8, $0x59f111f13956c25b +DATA K256<>+0x028(SB)/8, $0xab1c5ed5923f82a4 +DATA K256<>+0x030(SB)/8, $0x59f111f13956c25b +DATA K256<>+0x038(SB)/8, $0xab1c5ed5923f82a4 +DATA K256<>+0x040(SB)/8, $0x12835b01d807aa98 +DATA K256<>+0x048(SB)/8, $0x550c7dc3243185be +DATA K256<>+0x050(SB)/8, $0x12835b01d807aa98 +DATA K256<>+0x058(SB)/8, $0x550c7dc3243185be +DATA K256<>+0x060(SB)/8, $0x80deb1fe72be5d74 +DATA K256<>+0x068(SB)/8, $0xc19bf1749bdc06a7 +DATA K256<>+0x070(SB)/8, $0x80deb1fe72be5d74 +DATA K256<>+0x078(SB)/8, $0xc19bf1749bdc06a7 +DATA K256<>+0x080(SB)/8, $0xefbe4786e49b69c1 +DATA K256<>+0x088(SB)/8, $0x240ca1cc0fc19dc6 +DATA K256<>+0x090(SB)/8, $0xefbe4786e49b69c1 +DATA K256<>+0x098(SB)/8, $0x240ca1cc0fc19dc6 +DATA K256<>+0x0a0(SB)/8, $0x4a7484aa2de92c6f +DATA K256<>+0x0a8(SB)/8, $0x76f988da5cb0a9dc +DATA K256<>+0x0b0(SB)/8, $0x4a7484aa2de92c6f +DATA K256<>+0x0b8(SB)/8, $0x76f988da5cb0a9dc +DATA K256<>+0x0c0(SB)/8, $0xa831c66d983e5152 +DATA K256<>+0x0c8(SB)/8, $0xbf597fc7b00327c8 +DATA K256<>+0x0d0(SB)/8, $0xa831c66d983e5152 +DATA K256<>+0x0d8(SB)/8, $0xbf597fc7b00327c8 +DATA K256<>+0x0e0(SB)/8, $0xd5a79147c6e00bf3 +DATA K256<>+0x0e8(SB)/8, $0x1429296706ca6351 +DATA K256<>+0x0f0(SB)/8, $0xd5a79147c6e00bf3 +DATA K256<>+0x0f8(SB)/8, $0x1429296706ca6351 +DATA K256<>+0x100(SB)/8, $0x2e1b213827b70a85 +DATA K256<>+0x108(SB)/8, $0x53380d134d2c6dfc +DATA K256<>+0x110(SB)/8, $0x2e1b213827b70a85 +DATA K256<>+0x118(SB)/8, $0x53380d134d2c6dfc +DATA K256<>+0x120(SB)/8, $0x766a0abb650a7354 +DATA K256<>+0x128(SB)/8, $0x92722c8581c2c92e +DATA K256<>+0x130(SB)/8, $0x766a0abb650a7354 +DATA K256<>+0x138(SB)/8, $0x92722c8581c2c92e +DATA K256<>+0x140(SB)/8, $0xa81a664ba2bfe8a1 +DATA K256<>+0x148(SB)/8, $0xc76c51a3c24b8b70 +DATA K256<>+0x150(SB)/8, $0xa81a664ba2bfe8a1 +DATA K256<>+0x158(SB)/8, $0xc76c51a3c24b8b70 +DATA K256<>+0x160(SB)/8, $0xd6990624d192e819 +DATA K256<>+0x168(SB)/8, $0x106aa070f40e3585 +DATA K256<>+0x170(SB)/8, $0xd6990624d192e819 +DATA K256<>+0x178(SB)/8, $0x106aa070f40e3585 +DATA K256<>+0x180(SB)/8, $0x1e376c0819a4c116 +DATA K256<>+0x188(SB)/8, $0x34b0bcb52748774c +DATA K256<>+0x190(SB)/8, $0x1e376c0819a4c116 +DATA K256<>+0x198(SB)/8, $0x34b0bcb52748774c +DATA K256<>+0x1a0(SB)/8, $0x4ed8aa4a391c0cb3 +DATA K256<>+0x1a8(SB)/8, $0x682e6ff35b9cca4f +DATA K256<>+0x1b0(SB)/8, $0x4ed8aa4a391c0cb3 +DATA K256<>+0x1b8(SB)/8, $0x682e6ff35b9cca4f +DATA K256<>+0x1c0(SB)/8, $0x78a5636f748f82ee +DATA K256<>+0x1c8(SB)/8, $0x8cc7020884c87814 +DATA K256<>+0x1d0(SB)/8, $0x78a5636f748f82ee +DATA K256<>+0x1d8(SB)/8, $0x8cc7020884c87814 +DATA K256<>+0x1e0(SB)/8, $0xa4506ceb90befffa +DATA K256<>+0x1e8(SB)/8, $0xc67178f2bef9a3f7 +DATA K256<>+0x1f0(SB)/8, $0xa4506ceb90befffa +DATA K256<>+0x1f8(SB)/8, $0xc67178f2bef9a3f7 + +DATA K256<>+0x200(SB)/8, $0x0405060700010203 +DATA K256<>+0x208(SB)/8, $0x0c0d0e0f08090a0b +DATA K256<>+0x210(SB)/8, $0x0405060700010203 +DATA K256<>+0x218(SB)/8, $0x0c0d0e0f08090a0b +DATA K256<>+0x220(SB)/8, $0x0b0a090803020100 +DATA K256<>+0x228(SB)/8, $0xffffffffffffffff +DATA K256<>+0x230(SB)/8, $0x0b0a090803020100 +DATA K256<>+0x238(SB)/8, $0xffffffffffffffff +DATA K256<>+0x240(SB)/8, $0xffffffffffffffff +DATA K256<>+0x248(SB)/8, $0x0b0a090803020100 +DATA K256<>+0x250(SB)/8, $0xffffffffffffffff +DATA K256<>+0x258(SB)/8, $0x0b0a090803020100 + +GLOBL K256<>(SB), 8, $608 + +// func blockAvx2(h []uint32, message []uint8) +TEXT ·blockAvx2(SB), 7, $0 + + MOVQ ctx+0(FP), DI // DI: &h + MOVQ inp+24(FP), SI // SI: &message + MOVQ inplength+32(FP), DX // len(message) + ADDQ SI, DX // end pointer of input + MOVQ SP, R11 // copy stack pointer + SUBQ $0x220, SP // sp -= 0x220 + ANDQ $0xfffffffffffffc00, SP // align stack frame + ADDQ $0x1c0, SP + MOVQ DI, 0x40(SP) // save ctx + MOVQ SI, 0x48(SP) // save input + MOVQ DX, 0x50(SP) // save end pointer + MOVQ R11, 0x58(SP) // save copy of stack pointer + + WORD $0xf8c5; BYTE $0x77 // vzeroupper + ADDQ $0x40, SI // input++ + MOVL (DI), AX + MOVQ SI, R12 // borrow $T1 + MOVL 4(DI), BX + CMPQ SI, DX // $_end + MOVL 8(DI), CX + LONG $0xe4440f4c // cmove r12,rsp /* next block or random data */ + MOVL 12(DI), DX + MOVL 16(DI), R8 + MOVL 20(DI), R9 + MOVL 24(DI), R10 + MOVL 28(DI), R11 + + LEAQ K256<>(SB), BP + LONG $0x856f7dc5; LONG $0x00000220 // VMOVDQA YMM8, 0x220[rbp] /* vmovdqa ymm8,YMMWORD PTR [rip+0x220] */ + LONG $0x8d6f7dc5; LONG $0x00000240 // VMOVDQA YMM9, 0x240[rbp] /* vmovdqa ymm9,YMMWORD PTR [rip+0x240] */ + LONG $0x956f7dc5; LONG $0x00000200 // VMOVDQA YMM10, 0x200[rbp] /* vmovdqa ymm7,YMMWORD PTR [rip+0x200] */ + +loop0: + LONG $0x6f7dc1c4; BYTE $0xfa // VMOVDQA YMM7, YMM10 + + // Load first 16 dwords from two blocks + MOVOU -64(SI), X0 // vmovdqu xmm0,XMMWORD PTR [rsi-0x40] + MOVOU -48(SI), X1 // vmovdqu xmm1,XMMWORD PTR [rsi-0x30] + MOVOU -32(SI), X2 // vmovdqu xmm2,XMMWORD PTR [rsi-0x20] + MOVOU -16(SI), X3 // vmovdqu xmm3,XMMWORD PTR [rsi-0x10] + + // Byte swap data and transpose data into high/low + LONG $0x387dc3c4; WORD $0x2404; BYTE $0x01 // vinserti128 ymm0,ymm0,[r12],0x1 + LONG $0x3875c3c4; LONG $0x0110244c // vinserti128 ymm1,ymm1,0x10[r12],0x1 + LONG $0x007de2c4; BYTE $0xc7 // vpshufb ymm0,ymm0,ymm7 + LONG $0x386dc3c4; LONG $0x01202454 // vinserti128 ymm2,ymm2,0x20[r12],0x1 + LONG $0x0075e2c4; BYTE $0xcf // vpshufb ymm1,ymm1,ymm7 + LONG $0x3865c3c4; LONG $0x0130245c // vinserti128 ymm3,ymm3,0x30[r12],0x1 + + LEAQ K256<>(SB), BP + LONG $0x006de2c4; BYTE $0xd7 // vpshufb ymm2,ymm2,ymm7 + LONG $0x65fefdc5; BYTE $0x00 // vpaddd ymm4,ymm0,[rbp] + LONG $0x0065e2c4; BYTE $0xdf // vpshufb ymm3,ymm3,ymm7 + LONG $0x6dfef5c5; BYTE $0x20 // vpaddd ymm5,ymm1,0x20[rbp] + LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,0x40[rbp] + LONG $0x7dfee5c5; BYTE $0x60 // vpaddd ymm7,ymm3,0x60[rbp] + + LONG $0x247ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm4 + XORQ R14, R14 + LONG $0x6c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm5 + + ADDQ $-0x40, SP + MOVQ BX, DI + LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 + XORQ CX, DI // magic + LONG $0x7c7ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm7 + MOVQ R9, R12 + ADDQ $0x80,BP + +loop1: + // Schedule 48 input dwords, by doing 3 rounds of 12 each + // Note: SIMD instructions are interleaved with the SHA calculations + ADDQ $-0x40, SP + LONG $0x0f75e3c4; WORD $0x04e0 // vpalignr ymm4,ymm1,ymm0,0x4 + + // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80) + LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80] + WORD $0x2145; BYTE $0xc4 // and r12d,r8d + LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 + LONG $0x0f65e3c4; WORD $0x04fa // vpalignr ymm7,ymm3,ymm2,0x4 + LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb + LONG $0x30048d42 // lea eax,[rax+r14*1] + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 + LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 + LONG $0xc7fefdc5 // vpaddd ymm0,ymm0,ymm7 + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xc7 // mov r15d,eax + LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 + LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 + LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] + WORD $0x3141; BYTE $0xdf // xor r15d,ebx + LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe + LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd + LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 + LONG $0x1a148d42 // lea edx,[rdx+r11*1] + LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xdf31 // xor edi,ebx + LONG $0xfb70fdc5; BYTE $0xfa // vpshufd ymm7,ymm3,0xfa + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] + WORD $0x8945; BYTE $0xc4 // mov r12d,r8d + LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb + + // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84) + LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84] + WORD $0x2141; BYTE $0xd4 // and r12d,edx + LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb + LONG $0x331c8d47 // lea r11d,[r11+r14*1] + LONG $0x22148d47 // lea r10d,[r10+r12*1] + LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb + LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 + LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 + LONG $0x22148d47 // lea r10d,[r10+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xdf // mov edi,r11d + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 + LONG $0x2a148d47 // lea r10d,[r10+r13*1] + WORD $0xc731 // xor edi,eax + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd + LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 + LONG $0x110c8d42 // lea ecx,[rcx+r10*1] + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xc7 // xor r15d,eax + LONG $0xc4fefdc5 // vpaddd ymm0,ymm0,ymm4 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3a148d47 // lea r10d,[r10+r15*1] + WORD $0x8941; BYTE $0xd4 // mov r12d,edx + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88) + LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88] + WORD $0x2141; BYTE $0xcc // and r12d,ecx + LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb + LONG $0x32148d47 // lea r10d,[r10+r14*1] + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 + LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xd7 // mov r15d,r10d + LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6 + LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 + LONG $0x290c8d47 // lea r9d,[r9+r13*1] + WORD $0x3145; BYTE $0xdf // xor r15d,r11d + LONG $0xf870fdc5; BYTE $0x50 // vpshufd ymm7,ymm0,0x50 + LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd + LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 + LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xdf // xor edi,r11d + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d45 // lea r9d,[r9+rdi*1] + WORD $0x8941; BYTE $0xcc // mov r12d,ecx + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c) + LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c] + WORD $0x2141; BYTE $0xdc // and r12d,ebx + LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb + LONG $0x310c8d47 // lea r9d,[r9+r14*1] + LONG $0x20048d47 // lea r8d,[r8+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 + LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 + LONG $0x20048d47 // lea r8d,[r8+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xcf // mov edi,r9d + LONG $0xc6fefdc5 // vpaddd ymm0,ymm0,ymm6 + LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 + LONG $0x28048d47 // lea r8d,[r8+r13*1] + WORD $0x3144; BYTE $0xd7 // xor edi,r10d + LONG $0x75fefdc5; BYTE $0x00 // vpaddd ymm6,ymm0,[rbp+0x0] + LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd + LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 + LONG $0x00048d42 // lea eax,[rax+r8*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xd7 // xor r15d,r10d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d47 // lea r8d,[r8+r15*1] + WORD $0x8941; BYTE $0xdc // mov r12d,ebx + + LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 + LONG $0x0f6de3c4; WORD $0x04e1 // vpalignr ymm4,ymm2,ymm1,0x4 + + // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0) + LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0] + WORD $0x2141; BYTE $0xc4 // and r12d,eax + LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 + LONG $0x0f7de3c4; WORD $0x04fb // vpalignr ymm7,ymm0,ymm3,0x4 + LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb + LONG $0x30048d47 // lea r8d,[r8+r14*1] + LONG $0x22148d42 // lea edx,[rdx+r12*1] + LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 + LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 + LONG $0xcffef5c5 // vpaddd ymm1,ymm1,ymm7 + LONG $0x22148d42 // lea edx,[rdx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xc7 // mov r15d,r8d + LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 + LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 + LONG $0x2a148d42 // lea edx,[rdx+r13*1] + WORD $0x3145; BYTE $0xcf // xor r15d,r9d + LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe + LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd + LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 + LONG $0x131c8d45 // lea r11d,[r11+rdx*1] + LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xcf // xor edi,r9d + LONG $0xf870fdc5; BYTE $0xfa // vpshufd ymm7,ymm0,0xfa + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] + WORD $0x8941; BYTE $0xc4 // mov r12d,eax + LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb + + // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4) + LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4] + WORD $0x2145; BYTE $0xdc // and r12d,r11d + LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb + LONG $0x32148d42 // lea edx,[rdx+r14*1] + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb + LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 + LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xd789 // mov edi,edx + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 + LONG $0x290c8d42 // lea ecx,[rcx+r13*1] + WORD $0x3144; BYTE $0xc7 // xor edi,r8d + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd + LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 + LONG $0x0a148d45 // lea r10d,[r10+rcx*1] + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xc7 // xor r15d,r8d + LONG $0xccfef5c5 // vpaddd ymm1,ymm1,ymm4 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d42 // lea ecx,[rcx+r15*1] + WORD $0x8945; BYTE $0xdc // mov r12d,r11d + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8) + LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8] + WORD $0x2145; BYTE $0xd4 // and r12d,r10d + LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb + LONG $0x310c8d42 // lea ecx,[rcx+r14*1] + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 + LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xcf // mov r15d,ecx + LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6 + LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 + LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] + WORD $0x3141; BYTE $0xd7 // xor r15d,edx + LONG $0xf970fdc5; BYTE $0x50 // vpshufd ymm7,ymm1,0x50 + LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd + LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 + LONG $0x190c8d45 // lea r9d,[r9+rbx*1] + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xd731 // xor edi,edx + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] + WORD $0x8945; BYTE $0xd4 // mov r12d,r10d + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac) + LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac] + WORD $0x2145; BYTE $0xcc // and r12d,r9d + LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb + LONG $0x331c8d42 // lea ebx,[rbx+r14*1] + LONG $0x20048d42 // lea eax,[rax+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 + LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 + LONG $0x20048d42 // lea eax,[rax+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xdf89 // mov edi,ebx + LONG $0xcefef5c5 // vpaddd ymm1,ymm1,ymm6 + LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 + LONG $0x28048d42 // lea eax,[rax+r13*1] + WORD $0xcf31 // xor edi,ecx + LONG $0x75fef5c5; BYTE $0x20 // vpaddd ymm6,ymm1,[rbp+0x20] + LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd + LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 + LONG $0x00048d45 // lea r8d,[r8+rax*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xcf // xor r15d,ecx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d42 // lea eax,[rax+r15*1] + WORD $0x8945; BYTE $0xcc // mov r12d,r9d + + LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6 + + LONG $0x24648d48; BYTE $0xc0 // lea rsp,[rsp-0x40] + LONG $0x0f65e3c4; WORD $0x04e2 // vpalignr ymm4,ymm3,ymm2,0x4 + + // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x80) + LONG $0x249c0344; LONG $0x00000080 // add r11d,[rsp+0x80] + WORD $0x2145; BYTE $0xc4 // and r12d,r8d + LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 + LONG $0x0f75e3c4; WORD $0x04f8 // vpalignr ymm7,ymm1,ymm0,0x4 + LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb + LONG $0x30048d42 // lea eax,[rax+r14*1] + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 + LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 + LONG $0xd7feedc5 // vpaddd ymm2,ymm2,ymm7 + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xc7 // mov r15d,eax + LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 + LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 + LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] + WORD $0x3141; BYTE $0xdf // xor r15d,ebx + LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe + LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd + LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 + LONG $0x1a148d42 // lea edx,[rdx+r11*1] + LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xdf31 // xor edi,ebx + LONG $0xf970fdc5; BYTE $0xfa // vpshufd ymm7,ymm1,0xfa + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] + WORD $0x8945; BYTE $0xc4 // mov r12d,r8d + LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb + + // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x84) + LONG $0x24940344; LONG $0x00000084 // add r10d,[rsp+0x84] + WORD $0x2141; BYTE $0xd4 // and r12d,edx + LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb + LONG $0x331c8d47 // lea r11d,[r11+r14*1] + LONG $0x22148d47 // lea r10d,[r10+r12*1] + LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb + LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 + LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 + LONG $0x22148d47 // lea r10d,[r10+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xdf // mov edi,r11d + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 + LONG $0x2a148d47 // lea r10d,[r10+r13*1] + WORD $0xc731 // xor edi,eax + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd + LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 + LONG $0x110c8d42 // lea ecx,[rcx+r10*1] + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xc7 // xor r15d,eax + LONG $0xd4feedc5 // vpaddd ymm2,ymm2,ymm4 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3a148d47 // lea r10d,[r10+r15*1] + WORD $0x8941; BYTE $0xd4 // mov r12d,edx + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x88) + LONG $0x248c0344; LONG $0x00000088 // add r9d,[rsp+0x88] + WORD $0x2141; BYTE $0xcc // and r12d,ecx + LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb + LONG $0x32148d47 // lea r10d,[r10+r14*1] + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 + LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xd7 // mov r15d,r10d + LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6 + LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 + LONG $0x290c8d47 // lea r9d,[r9+r13*1] + WORD $0x3145; BYTE $0xdf // xor r15d,r11d + LONG $0xfa70fdc5; BYTE $0x50 // vpshufd ymm7,ymm2,0x50 + LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd + LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 + LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xdf // xor edi,r11d + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d45 // lea r9d,[r9+rdi*1] + WORD $0x8941; BYTE $0xcc // mov r12d,ecx + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x8c) + LONG $0x24840344; LONG $0x0000008c // add r8d,[rsp+0x8c] + WORD $0x2141; BYTE $0xdc // and r12d,ebx + LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb + LONG $0x310c8d47 // lea r9d,[r9+r14*1] + LONG $0x20048d47 // lea r8d,[r8+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 + LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 + LONG $0x20048d47 // lea r8d,[r8+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xcf // mov edi,r9d + LONG $0xd6feedc5 // vpaddd ymm2,ymm2,ymm6 + LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 + LONG $0x28048d47 // lea r8d,[r8+r13*1] + WORD $0x3144; BYTE $0xd7 // xor edi,r10d + LONG $0x75feedc5; BYTE $0x40 // vpaddd ymm6,ymm2,[rbp+0x40] + LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd + LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 + LONG $0x00048d42 // lea eax,[rax+r8*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xd7 // xor r15d,r10d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d47 // lea r8d,[r8+r15*1] + WORD $0x8941; BYTE $0xdc // mov r12d,ebx + + LONG $0x347ffdc5; BYTE $0x24 // vmovdqa [rsp],ymm6 + LONG $0x0f7de3c4; WORD $0x04e3 // vpalignr ymm4,ymm0,ymm3,0x4 + + // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0xa0) + LONG $0xa0249403; WORD $0x0000; BYTE $0x00 // add edx,[rsp+0xa0] + WORD $0x2141; BYTE $0xc4 // and r12d,eax + LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 + LONG $0x0f6de3c4; WORD $0x04f9 // vpalignr ymm7,ymm2,ymm1,0x4 + LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb + LONG $0x30048d47 // lea r8d,[r8+r14*1] + LONG $0x22148d42 // lea edx,[rdx+r12*1] + LONG $0xd472cdc5; BYTE $0x07 // vpsrld ymm6,ymm4,0x7 + LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 + LONG $0xdffee5c5 // vpaddd ymm3,ymm3,ymm7 + LONG $0x22148d42 // lea edx,[rdx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xc7 // mov r15d,r8d + LONG $0xd472c5c5; BYTE $0x03 // vpsrld ymm7,ymm4,0x3 + LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 + LONG $0x2a148d42 // lea edx,[rdx+r13*1] + WORD $0x3145; BYTE $0xcf // xor r15d,r9d + LONG $0xf472d5c5; BYTE $0x0e // vpslld ymm5,ymm4,0xe + LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd + LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 + LONG $0x131c8d45 // lea r11d,[r11+rdx*1] + LONG $0xe6efc5c5 // vpxor ymm4,ymm7,ymm6 + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xcf // xor edi,r9d + LONG $0xfa70fdc5; BYTE $0xfa // vpshufd ymm7,ymm2,0xfa + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] + WORD $0x8941; BYTE $0xc4 // mov r12d,eax + LONG $0xd672cdc5; BYTE $0x0b // vpsrld ymm6,ymm6,0xb + + // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0xa4) + LONG $0xa4248c03; WORD $0x0000; BYTE $0x00 // add ecx,[rsp+0xa4] + WORD $0x2145; BYTE $0xdc // and r12d,r11d + LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb + LONG $0x32148d42 // lea edx,[rdx+r14*1] + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + LONG $0xf572d5c5; BYTE $0x0b // vpslld ymm5,ymm5,0xb + LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 + LONG $0xe6efddc5 // vpxor ymm4,ymm4,ymm6 + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xd789 // mov edi,edx + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 + LONG $0x290c8d42 // lea ecx,[rcx+r13*1] + WORD $0x3144; BYTE $0xc7 // xor edi,r8d + LONG $0xe5efddc5 // vpxor ymm4,ymm4,ymm5 + LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd + LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 + LONG $0x0a148d45 // lea r10d,[r10+rcx*1] + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xc7 // xor r15d,r8d + LONG $0xdcfee5c5 // vpaddd ymm3,ymm3,ymm4 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d42 // lea ecx,[rcx+r15*1] + WORD $0x8945; BYTE $0xdc // mov r12d,r11d + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0xa8) + LONG $0xa8249c03; WORD $0x0000; BYTE $0x00 // add ebx,[rsp+0xa8] + WORD $0x2145; BYTE $0xd4 // and r12d,r10d + LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb + LONG $0x310c8d42 // lea ecx,[rcx+r14*1] + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 + LONG $0x004dc2c4; BYTE $0xf0 // vpshufb ymm6,ymm6,ymm8 + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xcf // mov r15d,ecx + LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6 + LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 + LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] + WORD $0x3141; BYTE $0xd7 // xor r15d,edx + LONG $0xfb70fdc5; BYTE $0x50 // vpshufd ymm7,ymm3,0x50 + LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd + LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 + LONG $0x190c8d45 // lea r9d,[r9+rbx*1] + LONG $0xd772cdc5; BYTE $0x0a // vpsrld ymm6,ymm7,0xa + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xd731 // xor edi,edx + LONG $0xd773c5c5; BYTE $0x11 // vpsrlq ymm7,ymm7,0x11 + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] + WORD $0x8945; BYTE $0xd4 // mov r12d,r10d + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + + // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0xac) + LONG $0xac248403; WORD $0x0000; BYTE $0x00 // add eax,[rsp+0xac] + WORD $0x2145; BYTE $0xcc // and r12d,r9d + LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 + LONG $0xd773c5c5; BYTE $0x02 // vpsrlq ymm7,ymm7,0x2 + LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb + LONG $0x331c8d42 // lea ebx,[rbx+r14*1] + LONG $0x20048d42 // lea eax,[rax+r12*1] + LONG $0xf7efcdc5 // vpxor ymm6,ymm6,ymm7 + LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 + LONG $0x004dc2c4; BYTE $0xf1 // vpshufb ymm6,ymm6,ymm9 + LONG $0x20048d42 // lea eax,[rax+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xdf89 // mov edi,ebx + LONG $0xdefee5c5 // vpaddd ymm3,ymm3,ymm6 + LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 + LONG $0x28048d42 // lea eax,[rax+r13*1] + WORD $0xcf31 // xor edi,ecx + LONG $0x75fee5c5; BYTE $0x60 // vpaddd ymm6,ymm3,[rbp+0x60] + LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd + LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 + LONG $0x00048d45 // lea r8d,[r8+rax*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xcf // xor r15d,ecx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d42 // lea eax,[rax+r15*1] + WORD $0x8945; BYTE $0xcc // mov r12d,r9d + + LONG $0x747ffdc5; WORD $0x2024 // vmovdqa [rsp+0x20],ymm6 + ADDQ $0x80, BP + + CMPB 0x3(BP),$0x0 + JNE loop1 + + // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x40) + LONG $0x245c0344; BYTE $0x40 // add r11d,[rsp+0x40] + WORD $0x2145; BYTE $0xc4 // and r12d,r8d + LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 + LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb + LONG $0x30048d42 // lea eax,[rax+r14*1] + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xc7 // mov r15d,eax + LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 + LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] + WORD $0x3141; BYTE $0xdf // xor r15d,ebx + LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd + LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 + LONG $0x1a148d42 // lea edx,[rdx+r11*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xdf31 // xor edi,ebx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] + WORD $0x8945; BYTE $0xc4 // mov r12d,r8d + + // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x44) + LONG $0x24540344; BYTE $0x44 // add r10d,[rsp+0x44] + WORD $0x2141; BYTE $0xd4 // and r12d,edx + LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 + LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb + LONG $0x331c8d47 // lea r11d,[r11+r14*1] + LONG $0x22148d47 // lea r10d,[r10+r12*1] + LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 + LONG $0x22148d47 // lea r10d,[r10+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xdf // mov edi,r11d + LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 + LONG $0x2a148d47 // lea r10d,[r10+r13*1] + WORD $0xc731 // xor edi,eax + LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd + LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 + LONG $0x110c8d42 // lea ecx,[rcx+r10*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xc7 // xor r15d,eax + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3a148d47 // lea r10d,[r10+r15*1] + WORD $0x8941; BYTE $0xd4 // mov r12d,edx + + // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x48) + LONG $0x244c0344; BYTE $0x48 // add r9d,[rsp+0x48] + WORD $0x2141; BYTE $0xcc // and r12d,ecx + LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 + LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb + LONG $0x32148d47 // lea r10d,[r10+r14*1] + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xd7 // mov r15d,r10d + LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 + LONG $0x290c8d47 // lea r9d,[r9+r13*1] + WORD $0x3145; BYTE $0xdf // xor r15d,r11d + LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd + LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 + LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xdf // xor edi,r11d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d45 // lea r9d,[r9+rdi*1] + WORD $0x8941; BYTE $0xcc // mov r12d,ecx + + // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x4c) + LONG $0x24440344; BYTE $0x4c // add r8d,[rsp+0x4c] + WORD $0x2141; BYTE $0xdc // and r12d,ebx + LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 + LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb + LONG $0x310c8d47 // lea r9d,[r9+r14*1] + LONG $0x20048d47 // lea r8d,[r8+r12*1] + LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 + LONG $0x20048d47 // lea r8d,[r8+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xcf // mov edi,r9d + LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 + LONG $0x28048d47 // lea r8d,[r8+r13*1] + WORD $0x3144; BYTE $0xd7 // xor edi,r10d + LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd + LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 + LONG $0x00048d42 // lea eax,[rax+r8*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xd7 // xor r15d,r10d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d47 // lea r8d,[r8+r15*1] + WORD $0x8941; BYTE $0xdc // mov r12d,ebx + + // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x60) + LONG $0x60245403 // add edx,[rsp+0x60] + WORD $0x2141; BYTE $0xc4 // and r12d,eax + LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 + LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb + LONG $0x30048d47 // lea r8d,[r8+r14*1] + LONG $0x22148d42 // lea edx,[rdx+r12*1] + LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 + LONG $0x22148d42 // lea edx,[rdx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xc7 // mov r15d,r8d + LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 + LONG $0x2a148d42 // lea edx,[rdx+r13*1] + WORD $0x3145; BYTE $0xcf // xor r15d,r9d + LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd + LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 + LONG $0x131c8d45 // lea r11d,[r11+rdx*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xcf // xor edi,r9d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] + WORD $0x8941; BYTE $0xc4 // mov r12d,eax + + // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x64) + LONG $0x64244c03 // add ecx,[rsp+0x64] + WORD $0x2145; BYTE $0xdc // and r12d,r11d + LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 + LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb + LONG $0x32148d42 // lea edx,[rdx+r14*1] + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xd789 // mov edi,edx + LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 + LONG $0x290c8d42 // lea ecx,[rcx+r13*1] + WORD $0x3144; BYTE $0xc7 // xor edi,r8d + LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd + LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 + LONG $0x0a148d45 // lea r10d,[r10+rcx*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xc7 // xor r15d,r8d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d42 // lea ecx,[rcx+r15*1] + WORD $0x8945; BYTE $0xdc // mov r12d,r11d + + // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x68) + LONG $0x68245c03 // add ebx,[rsp+0x68] + WORD $0x2145; BYTE $0xd4 // and r12d,r10d + LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 + LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb + LONG $0x310c8d42 // lea ecx,[rcx+r14*1] + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xcf // mov r15d,ecx + LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 + LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] + WORD $0x3141; BYTE $0xd7 // xor r15d,edx + LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd + LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 + LONG $0x190c8d45 // lea r9d,[r9+rbx*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xd731 // xor edi,edx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] + WORD $0x8945; BYTE $0xd4 // mov r12d,r10d + + // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x6c) + LONG $0x6c244403 // add eax,[rsp+0x6c] + WORD $0x2145; BYTE $0xcc // and r12d,r9d + LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 + LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb + LONG $0x331c8d42 // lea ebx,[rbx+r14*1] + LONG $0x20048d42 // lea eax,[rax+r12*1] + LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 + LONG $0x20048d42 // lea eax,[rax+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xdf89 // mov edi,ebx + LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 + LONG $0x28048d42 // lea eax,[rax+r13*1] + WORD $0xcf31 // xor edi,ecx + LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd + LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 + LONG $0x00048d45 // lea r8d,[r8+rax*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xcf // xor r15d,ecx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d42 // lea eax,[rax+r15*1] + WORD $0x8945; BYTE $0xcc // mov r12d,r9d + + // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, SP, 0x00) + LONG $0x241c0344 // add r11d,[rsp] + WORD $0x2145; BYTE $0xc4 // and r12d,r8d + LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 + LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb + LONG $0x30048d42 // lea eax,[rax+r14*1] + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xc7 // mov r15d,eax + LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 + LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] + WORD $0x3141; BYTE $0xdf // xor r15d,ebx + LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd + LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 + LONG $0x1a148d42 // lea edx,[rdx+r11*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xdf31 // xor edi,ebx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] + WORD $0x8945; BYTE $0xc4 // mov r12d,r8d + + // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, SP, 0x04) + LONG $0x24540344; BYTE $0x04 // add r10d,[rsp+0x4] + WORD $0x2141; BYTE $0xd4 // and r12d,edx + LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 + LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb + LONG $0x331c8d47 // lea r11d,[r11+r14*1] + LONG $0x22148d47 // lea r10d,[r10+r12*1] + LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 + LONG $0x22148d47 // lea r10d,[r10+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xdf // mov edi,r11d + LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 + LONG $0x2a148d47 // lea r10d,[r10+r13*1] + WORD $0xc731 // xor edi,eax + LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd + LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 + LONG $0x110c8d42 // lea ecx,[rcx+r10*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xc7 // xor r15d,eax + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3a148d47 // lea r10d,[r10+r15*1] + WORD $0x8941; BYTE $0xd4 // mov r12d,edx + + // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, SP, 0x08) + LONG $0x244c0344; BYTE $0x08 // add r9d,[rsp+0x8] + WORD $0x2141; BYTE $0xcc // and r12d,ecx + LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 + LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb + LONG $0x32148d47 // lea r10d,[r10+r14*1] + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xd7 // mov r15d,r10d + LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 + LONG $0x290c8d47 // lea r9d,[r9+r13*1] + WORD $0x3145; BYTE $0xdf // xor r15d,r11d + LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd + LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 + LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xdf // xor edi,r11d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d45 // lea r9d,[r9+rdi*1] + WORD $0x8941; BYTE $0xcc // mov r12d,ecx + + // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, SP, 0x0c) + LONG $0x24440344; BYTE $0x0c // add r8d,[rsp+0xc] + WORD $0x2141; BYTE $0xdc // and r12d,ebx + LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 + LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb + LONG $0x310c8d47 // lea r9d,[r9+r14*1] + LONG $0x20048d47 // lea r8d,[r8+r12*1] + LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 + LONG $0x20048d47 // lea r8d,[r8+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xcf // mov edi,r9d + LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 + LONG $0x28048d47 // lea r8d,[r8+r13*1] + WORD $0x3144; BYTE $0xd7 // xor edi,r10d + LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd + LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 + LONG $0x00048d42 // lea eax,[rax+r8*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xd7 // xor r15d,r10d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d47 // lea r8d,[r8+r15*1] + WORD $0x8941; BYTE $0xdc // mov r12d,ebx + + // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, SP, 0x20) + LONG $0x20245403 // add edx,[rsp+0x20] + WORD $0x2141; BYTE $0xc4 // and r12d,eax + LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 + LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb + LONG $0x30048d47 // lea r8d,[r8+r14*1] + LONG $0x22148d42 // lea edx,[rdx+r12*1] + LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 + LONG $0x22148d42 // lea edx,[rdx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xc7 // mov r15d,r8d + LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 + LONG $0x2a148d42 // lea edx,[rdx+r13*1] + WORD $0x3145; BYTE $0xcf // xor r15d,r9d + LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd + LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 + LONG $0x131c8d45 // lea r11d,[r11+rdx*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xcf // xor edi,r9d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] + WORD $0x8941; BYTE $0xc4 // mov r12d,eax + + // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, SP, 0x24) + LONG $0x24244c03 // add ecx,[rsp+0x24] + WORD $0x2145; BYTE $0xdc // and r12d,r11d + LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 + LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb + LONG $0x32148d42 // lea edx,[rdx+r14*1] + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xd789 // mov edi,edx + LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 + LONG $0x290c8d42 // lea ecx,[rcx+r13*1] + WORD $0x3144; BYTE $0xc7 // xor edi,r8d + LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd + LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 + LONG $0x0a148d45 // lea r10d,[r10+rcx*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xc7 // xor r15d,r8d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d42 // lea ecx,[rcx+r15*1] + WORD $0x8945; BYTE $0xdc // mov r12d,r11d + + // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, SP, 0x28) + LONG $0x28245c03 // add ebx,[rsp+0x28] + WORD $0x2145; BYTE $0xd4 // and r12d,r10d + LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 + LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb + LONG $0x310c8d42 // lea ecx,[rcx+r14*1] + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xcf // mov r15d,ecx + LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 + LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] + WORD $0x3141; BYTE $0xd7 // xor r15d,edx + LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd + LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 + LONG $0x190c8d45 // lea r9d,[r9+rbx*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xd731 // xor edi,edx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] + WORD $0x8945; BYTE $0xd4 // mov r12d,r10d + + // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, SP, 0x2c) + LONG $0x2c244403 // add eax,[rsp+0x2c] + WORD $0x2145; BYTE $0xcc // and r12d,r9d + LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 + LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb + LONG $0x331c8d42 // lea ebx,[rbx+r14*1] + LONG $0x20048d42 // lea eax,[rax+r12*1] + LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 + LONG $0x20048d42 // lea eax,[rax+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xdf89 // mov edi,ebx + LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 + LONG $0x28048d42 // lea eax,[rax+r13*1] + WORD $0xcf31 // xor edi,ecx + LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd + LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 + LONG $0x00048d45 // lea r8d,[r8+rax*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xcf // xor r15d,ecx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d42 // lea eax,[rax+r15*1] + WORD $0x8945; BYTE $0xcc // mov r12d,r9d + + MOVQ 0x200(SP), DI // $_ctx + ADDQ R14, AX + + LEAQ 0x1c0(SP), BP + + ADDL (DI), AX + ADDL 4(DI), BX + ADDL 8(DI), CX + ADDL 12(DI), DX + ADDL 16(DI), R8 + ADDL 20(DI), R9 + ADDL 24(DI), R10 + ADDL 28(DI), R11 + + MOVL AX, (DI) + MOVL BX, 4(DI) + MOVL CX, 8(DI) + MOVL DX, 12(DI) + MOVL R8, 16(DI) + MOVL R9, 20(DI) + MOVL R10, 24(DI) + MOVL R11, 28(DI) + + CMPQ SI, 0x50(BP) // $_end + JE done + + XORQ R14, R14 + MOVQ BX, DI + XORQ CX, DI // magic + MOVQ R9, R12 + +loop2: + // ROUND(AX, BX, CX, DX, R8, R9, R10, R11, R12, R13, R14, R15, DI, BP, 0x10) + LONG $0x105d0344 // add r11d,[rbp+0x10] + WORD $0x2145; BYTE $0xc4 // and r12d,r8d + LONG $0xf07b43c4; WORD $0x19e8 // rorx r13d,r8d,0x19 + LONG $0xf07b43c4; WORD $0x0bf8 // rorx r15d,r8d,0xb + LONG $0x30048d42 // lea eax,[rax+r14*1] + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + LONG $0xf23842c4; BYTE $0xe2 // andn r12d,r8d,r10d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f0 // rorx r14d,r8d,0x6 + LONG $0x231c8d47 // lea r11d,[r11+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xc7 // mov r15d,eax + LONG $0xf07b63c4; WORD $0x16e0 // rorx r12d,eax,0x16 + LONG $0x2b1c8d47 // lea r11d,[r11+r13*1] + WORD $0x3141; BYTE $0xdf // xor r15d,ebx + LONG $0xf07b63c4; WORD $0x0df0 // rorx r14d,eax,0xd + LONG $0xf07b63c4; WORD $0x02e8 // rorx r13d,eax,0x2 + LONG $0x1a148d42 // lea edx,[rdx+r11*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xdf31 // xor edi,ebx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3b1c8d45 // lea r11d,[r11+rdi*1] + WORD $0x8945; BYTE $0xc4 // mov r12d,r8d + + // ROUND(R11, AX, BX, CX, DX, R8, R9, R10, R12, R13, R14, DI, R15, BP, 0x14) + LONG $0x14550344 // add r10d,[rbp+0x14] + WORD $0x2141; BYTE $0xd4 // and r12d,edx + LONG $0xf07b63c4; WORD $0x19ea // rorx r13d,edx,0x19 + LONG $0xf07be3c4; WORD $0x0bfa // rorx edi,edx,0xb + LONG $0x331c8d47 // lea r11d,[r11+r14*1] + LONG $0x22148d47 // lea r10d,[r10+r12*1] + LONG $0xf26842c4; BYTE $0xe1 // andn r12d,edx,r9d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f2 // rorx r14d,edx,0x6 + LONG $0x22148d47 // lea r10d,[r10+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xdf // mov edi,r11d + LONG $0xf07b43c4; WORD $0x16e3 // rorx r12d,r11d,0x16 + LONG $0x2a148d47 // lea r10d,[r10+r13*1] + WORD $0xc731 // xor edi,eax + LONG $0xf07b43c4; WORD $0x0df3 // rorx r14d,r11d,0xd + LONG $0xf07b43c4; WORD $0x02eb // rorx r13d,r11d,0x2 + LONG $0x110c8d42 // lea ecx,[rcx+r10*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xc7 // xor r15d,eax + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x3a148d47 // lea r10d,[r10+r15*1] + WORD $0x8941; BYTE $0xd4 // mov r12d,edx + + // ROUND(R10, R11, AX, BX, CX, DX, R8, R9, R12, R13, R14, R15, DI, BP, 0x18) + LONG $0x184d0344 // add r9d,[rbp+0x18] + WORD $0x2141; BYTE $0xcc // and r12d,ecx + LONG $0xf07b63c4; WORD $0x19e9 // rorx r13d,ecx,0x19 + LONG $0xf07b63c4; WORD $0x0bf9 // rorx r15d,ecx,0xb + LONG $0x32148d47 // lea r10d,[r10+r14*1] + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + LONG $0xf27042c4; BYTE $0xe0 // andn r12d,ecx,r8d + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f1 // rorx r14d,ecx,0x6 + LONG $0x210c8d47 // lea r9d,[r9+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xd7 // mov r15d,r10d + LONG $0xf07b43c4; WORD $0x16e2 // rorx r12d,r10d,0x16 + LONG $0x290c8d47 // lea r9d,[r9+r13*1] + WORD $0x3145; BYTE $0xdf // xor r15d,r11d + LONG $0xf07b43c4; WORD $0x0df2 // rorx r14d,r10d,0xd + LONG $0xf07b43c4; WORD $0x02ea // rorx r13d,r10d,0x2 + LONG $0x0b1c8d42 // lea ebx,[rbx+r9*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xdf // xor edi,r11d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d45 // lea r9d,[r9+rdi*1] + WORD $0x8941; BYTE $0xcc // mov r12d,ecx + + // ROUND(R9, R10, R11, AX, BX, CX, DX, R8, R12, R13, R14, DI, R15, BP, 0x1c) + LONG $0x1c450344 // add r8d,[rbp+0x1c] + WORD $0x2141; BYTE $0xdc // and r12d,ebx + LONG $0xf07b63c4; WORD $0x19eb // rorx r13d,ebx,0x19 + LONG $0xf07be3c4; WORD $0x0bfb // rorx edi,ebx,0xb + LONG $0x310c8d47 // lea r9d,[r9+r14*1] + LONG $0x20048d47 // lea r8d,[r8+r12*1] + LONG $0xf26062c4; BYTE $0xe2 // andn r12d,ebx,edx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b63c4; WORD $0x06f3 // rorx r14d,ebx,0x6 + LONG $0x20048d47 // lea r8d,[r8+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8944; BYTE $0xcf // mov edi,r9d + LONG $0xf07b43c4; WORD $0x16e1 // rorx r12d,r9d,0x16 + LONG $0x28048d47 // lea r8d,[r8+r13*1] + WORD $0x3144; BYTE $0xd7 // xor edi,r10d + LONG $0xf07b43c4; WORD $0x0df1 // rorx r14d,r9d,0xd + LONG $0xf07b43c4; WORD $0x02e9 // rorx r13d,r9d,0x2 + LONG $0x00048d42 // lea eax,[rax+r8*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xd7 // xor r15d,r10d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d47 // lea r8d,[r8+r15*1] + WORD $0x8941; BYTE $0xdc // mov r12d,ebx + + // ROUND(R8, R9, R10, R11, AX, BX, CX, DX, R12, R13, R14, R15, DI, BP, 0x30) + WORD $0x5503; BYTE $0x30 // add edx,[rbp+0x30] + WORD $0x2141; BYTE $0xc4 // and r12d,eax + LONG $0xf07b63c4; WORD $0x19e8 // rorx r13d,eax,0x19 + LONG $0xf07b63c4; WORD $0x0bf8 // rorx r15d,eax,0xb + LONG $0x30048d47 // lea r8d,[r8+r14*1] + LONG $0x22148d42 // lea edx,[rdx+r12*1] + LONG $0xf27862c4; BYTE $0xe1 // andn r12d,eax,ecx + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b63c4; WORD $0x06f0 // rorx r14d,eax,0x6 + LONG $0x22148d42 // lea edx,[rdx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8945; BYTE $0xc7 // mov r15d,r8d + LONG $0xf07b43c4; WORD $0x16e0 // rorx r12d,r8d,0x16 + LONG $0x2a148d42 // lea edx,[rdx+r13*1] + WORD $0x3145; BYTE $0xcf // xor r15d,r9d + LONG $0xf07b43c4; WORD $0x0df0 // rorx r14d,r8d,0xd + LONG $0xf07b43c4; WORD $0x02e8 // rorx r13d,r8d,0x2 + LONG $0x131c8d45 // lea r11d,[r11+rdx*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3144; BYTE $0xcf // xor edi,r9d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x148d; BYTE $0x3a // lea edx,[rdx+rdi*1] + WORD $0x8941; BYTE $0xc4 // mov r12d,eax + + // ROUND(DX, R8, R9, R10, R11, AX, BX, CX, R12, R13, R14, DI, R15, BP, 0x34) + WORD $0x4d03; BYTE $0x34 // add ecx,[rbp+0x34] + WORD $0x2145; BYTE $0xdc // and r12d,r11d + LONG $0xf07b43c4; WORD $0x19eb // rorx r13d,r11d,0x19 + LONG $0xf07bc3c4; WORD $0x0bfb // rorx edi,r11d,0xb + LONG $0x32148d42 // lea edx,[rdx+r14*1] + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + LONG $0xf22062c4; BYTE $0xe3 // andn r12d,r11d,ebx + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f3 // rorx r14d,r11d,0x6 + LONG $0x210c8d42 // lea ecx,[rcx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xd789 // mov edi,edx + LONG $0xf07b63c4; WORD $0x16e2 // rorx r12d,edx,0x16 + LONG $0x290c8d42 // lea ecx,[rcx+r13*1] + WORD $0x3144; BYTE $0xc7 // xor edi,r8d + LONG $0xf07b63c4; WORD $0x0df2 // rorx r14d,edx,0xd + LONG $0xf07b63c4; WORD $0x02ea // rorx r13d,edx,0x2 + LONG $0x0a148d45 // lea r10d,[r10+rcx*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3145; BYTE $0xc7 // xor r15d,r8d + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x390c8d42 // lea ecx,[rcx+r15*1] + WORD $0x8945; BYTE $0xdc // mov r12d,r11d + + // ROUND(CX, DX, R8, R9, R10, R11, AX, BX, R12, R13, R14, R15, DI, BP, 0x38) + WORD $0x5d03; BYTE $0x38 // add ebx,[rbp+0x38] + WORD $0x2145; BYTE $0xd4 // and r12d,r10d + LONG $0xf07b43c4; WORD $0x19ea // rorx r13d,r10d,0x19 + LONG $0xf07b43c4; WORD $0x0bfa // rorx r15d,r10d,0xb + LONG $0x310c8d42 // lea ecx,[rcx+r14*1] + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + LONG $0xf22862c4; BYTE $0xe0 // andn r12d,r10d,eax + WORD $0x3145; BYTE $0xfd // xor r13d,r15d + LONG $0xf07b43c4; WORD $0x06f2 // rorx r14d,r10d,0x6 + LONG $0x231c8d42 // lea ebx,[rbx+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0x8941; BYTE $0xcf // mov r15d,ecx + LONG $0xf07b63c4; WORD $0x16e1 // rorx r12d,ecx,0x16 + LONG $0x2b1c8d42 // lea ebx,[rbx+r13*1] + WORD $0x3141; BYTE $0xd7 // xor r15d,edx + LONG $0xf07b63c4; WORD $0x0df1 // rorx r14d,ecx,0xd + LONG $0xf07b63c4; WORD $0x02e9 // rorx r13d,ecx,0x2 + LONG $0x190c8d45 // lea r9d,[r9+rbx*1] + WORD $0x2144; BYTE $0xff // and edi,r15d + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0xd731 // xor edi,edx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + WORD $0x1c8d; BYTE $0x3b // lea ebx,[rbx+rdi*1] + WORD $0x8945; BYTE $0xd4 // mov r12d,r10d + + // ROUND(BX, CX, DX, R8, R9, R10, R11, AX, R12, R13, R14, DI, R15, BP, 0x3c) + WORD $0x4503; BYTE $0x3c // add eax,[rbp+0x3c] + WORD $0x2145; BYTE $0xcc // and r12d,r9d + LONG $0xf07b43c4; WORD $0x19e9 // rorx r13d,r9d,0x19 + LONG $0xf07bc3c4; WORD $0x0bf9 // rorx edi,r9d,0xb + LONG $0x331c8d42 // lea ebx,[rbx+r14*1] + LONG $0x20048d42 // lea eax,[rax+r12*1] + LONG $0xf23042c4; BYTE $0xe3 // andn r12d,r9d,r11d + WORD $0x3141; BYTE $0xfd // xor r13d,edi + LONG $0xf07b43c4; WORD $0x06f1 // rorx r14d,r9d,0x6 + LONG $0x20048d42 // lea eax,[rax+r12*1] + WORD $0x3145; BYTE $0xf5 // xor r13d,r14d + WORD $0xdf89 // mov edi,ebx + LONG $0xf07b63c4; WORD $0x16e3 // rorx r12d,ebx,0x16 + LONG $0x28048d42 // lea eax,[rax+r13*1] + WORD $0xcf31 // xor edi,ecx + LONG $0xf07b63c4; WORD $0x0df3 // rorx r14d,ebx,0xd + LONG $0xf07b63c4; WORD $0x02eb // rorx r13d,ebx,0x2 + LONG $0x00048d45 // lea r8d,[r8+rax*1] + WORD $0x2141; BYTE $0xff // and r15d,edi + WORD $0x3145; BYTE $0xe6 // xor r14d,r12d + WORD $0x3141; BYTE $0xcf // xor r15d,ecx + WORD $0x3145; BYTE $0xee // xor r14d,r13d + LONG $0x38048d42 // lea eax,[rax+r15*1] + WORD $0x8945; BYTE $0xcc // mov r12d,r9d + + ADDQ $-0x40, BP + CMPQ BP, SP + JAE loop2 + + MOVQ 0x200(SP), DI // $_ctx + ADDQ R14, AX + + ADDQ $0x1c0, SP + + ADDL (DI), AX + ADDL 4(DI), BX + ADDL 8(DI), CX + ADDL 12(DI), DX + ADDL 16(DI), R8 + ADDL 20(DI), R9 + + ADDQ $0x80, SI // input += 2 + ADDL 24(DI), R10 + MOVQ SI, R12 + ADDL 28(DI), R11 + CMPQ SI, 0x50(SP) // input == _end + + MOVL AX, (DI) + LONG $0xe4440f4c // cmove r12,rsp /* next block or stale data */ + MOVL AX, (DI) + MOVL BX, 4(DI) + MOVL CX, 8(DI) + MOVL DX, 12(DI) + MOVL R8, 16(DI) + MOVL R9, 20(DI) + MOVL R10, 24(DI) + MOVL R11, 28(DI) + + JBE loop0 + LEAQ (SP), BP + +done: + MOVQ BP, SP + MOVQ 0x58(SP), SP + WORD $0xf8c5; BYTE $0x77 // vzeroupper + + RET + diff --git a/vendor/github.com/minio/sha256-simd/sha256blockAvx_amd64.go b/vendor/github.com/minio/sha256-simd/sha256blockAvx_amd64.go new file mode 100644 index 000000000..eb8a0ff0c --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256blockAvx_amd64.go @@ -0,0 +1,22 @@ +//+build !noasm + +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sha256 + +//go:noescape +func blockAvx(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64) diff --git a/vendor/github.com/minio/sha256-simd/sha256blockAvx_amd64.s b/vendor/github.com/minio/sha256-simd/sha256blockAvx_amd64.s new file mode 100644 index 000000000..512e3cf4c --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256blockAvx_amd64.s @@ -0,0 +1,409 @@ +//+build !noasm !appengine + +// SHA256 implementation for AVX + +// +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// +// This code is based on an Intel White-Paper: +// "Fast SHA-256 Implementations on Intel Architecture Processors" +// +// together with the reference implementation from the following authors: +// James Guilford +// Kirk Yap +// Tim Chen +// +// For Golang it has been converted to Plan 9 assembly with the help of +// github.com/minio/asm2plan9s to assemble Intel instructions to their Plan9 +// equivalents +// + +#include "textflag.h" + +#define ROTATE_XS \ + MOVOU X4, X15 \ + MOVOU X5, X4 \ + MOVOU X6, X5 \ + MOVOU X7, X6 \ + MOVOU X15, X7 + +// compute s0 four at a time and s1 two at a time +// compute W[-16] + W[-7] 4 at a time +#define FOUR_ROUNDS_AND_SCHED(a, b, c, d, e, f, g, h) \ + MOVL e, R13 \ /* y0 = e */ + ROLL $18, R13 \ /* y0 = e >> (25-11) */ + MOVL a, R14 \ /* y1 = a */ + LONG $0x0f41e3c4; WORD $0x04c6 \ // VPALIGNR XMM0,XMM7,XMM6,0x4 /* XTMP0 = W[-7] */ + ROLL $23, R14 \ /* y1 = a >> (22-13) */ + XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */ + MOVL f, R15 \ /* y2 = f */ + ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */ + XORL g, R15 \ /* y2 = f^g */ + LONG $0xc4fef9c5 \ // VPADDD XMM0,XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */ + XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) ) */ + ANDL e, R15 \ /* y2 = (f^g)&e */ + ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + \ /* */ + \ /* compute s0 */ + \ /* */ + LONG $0x0f51e3c4; WORD $0x04cc \ // VPALIGNR XMM1,XMM5,XMM4,0x4 /* XTMP1 = W[-15] */ + XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */ + ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + ADDL R13, R15 \ /* y2 = S1 + CH */ + ADDL _xfer+48(FP), R15 \ /* y2 = k + w + S1 + CH */ + MOVL a, R13 \ /* y0 = a */ + ADDL R15, h \ /* h = h + S1 + CH + k + w */ + \ /* ROTATE_ARGS */ + MOVL a, R15 \ /* y2 = a */ + LONG $0xd172e9c5; BYTE $0x07 \ // VPSRLD XMM2,XMM1,0x7 /* */ + ORL c, R13 \ /* y0 = a|c */ + ADDL h, d \ /* d = d + h + S1 + CH + k + w */ + ANDL c, R15 \ /* y2 = a&c */ + LONG $0xf172e1c5; BYTE $0x19 \ // VPSLLD XMM3,XMM1,0x19 /* */ + ANDL b, R13 \ /* y0 = (a|c)&b */ + ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */ + LONG $0xdaebe1c5 \ // VPOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */ + ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ + ADDL R13, h \ /* h = h + S1 + CH + k + w + S0 + MAJ */ + \ /* ROTATE_ARGS */ + MOVL d, R13 \ /* y0 = e */ + MOVL h, R14 \ /* y1 = a */ + ROLL $18, R13 \ /* y0 = e >> (25-11) */ + XORL d, R13 \ /* y0 = e ^ (e >> (25-11)) */ + MOVL e, R15 \ /* y2 = f */ + ROLL $23, R14 \ /* y1 = a >> (22-13) */ + LONG $0xd172e9c5; BYTE $0x12 \ // VPSRLD XMM2,XMM1,0x12 /* */ + XORL h, R14 \ /* y1 = a ^ (a >> (22-13) */ + ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + XORL f, R15 \ /* y2 = f^g */ + LONG $0xd172b9c5; BYTE $0x03 \ // VPSRLD XMM8,XMM1,0x3 /* XTMP4 = W[-15] >> 3 */ + ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + XORL d, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + ANDL d, R15 \ /* y2 = (f^g)&e */ + ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + LONG $0xf172f1c5; BYTE $0x0e \ // VPSLLD XMM1,XMM1,0xe /* */ + XORL h, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + XORL f, R15 \ /* y2 = CH = ((f^g)&e)^g */ + LONG $0xd9efe1c5 \ // VPXOR XMM3,XMM3,XMM1 /* */ + ADDL R13, R15 \ /* y2 = S1 + CH */ + ADDL _xfer+52(FP), R15 \ /* y2 = k + w + S1 + CH */ + ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + LONG $0xdaefe1c5 \ // VPXOR XMM3,XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */ + MOVL h, R13 \ /* y0 = a */ + ADDL R15, g \ /* h = h + S1 + CH + k + w */ + MOVL h, R15 \ /* y2 = a */ + LONG $0xef61c1c4; BYTE $0xc8 \ // VPXOR XMM1,XMM3,XMM8 /* XTMP1 = s0 */ + ORL b, R13 \ /* y0 = a|c */ + ADDL g, c \ /* d = d + h + S1 + CH + k + w */ + ANDL b, R15 \ /* y2 = a&c */ + \ /* */ + \ /* compute low s1 */ + \ /* */ + LONG $0xd770f9c5; BYTE $0xfa \ // VPSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */ + ANDL a, R13 \ /* y0 = (a|c)&b */ + ADDL R14, g \ /* h = h + S1 + CH + k + w + S0 */ + LONG $0xc1fef9c5 \ // VPADDD XMM0,XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */ + ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ + ADDL R13, g \ /* h = h + S1 + CH + k + w + S0 + MAJ */ + \ /* ROTATE_ARGS */ + MOVL c, R13 \ /* y0 = e */ + MOVL g, R14 \ /* y1 = a */ + ROLL $18, R13 \ /* y0 = e >> (25-11) */ + XORL c, R13 \ /* y0 = e ^ (e >> (25-11)) */ + ROLL $23, R14 \ /* y1 = a >> (22-13) */ + MOVL d, R15 \ /* y2 = f */ + XORL g, R14 \ /* y1 = a ^ (a >> (22-13) */ + ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + LONG $0xd272b9c5; BYTE $0x0a \ // VPSRLD XMM8,XMM2,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */ + XORL e, R15 \ /* y2 = f^g */ + LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ + XORL c, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + ANDL c, R15 \ /* y2 = (f^g)&e */ + LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ + ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + XORL g, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + XORL e, R15 \ /* y2 = CH = ((f^g)&e)^g */ + ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */ + ADDL R13, R15 \ /* y2 = S1 + CH */ + ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + ADDL _xfer+56(FP), R15 \ /* y2 = k + w + S1 + CH */ + LONG $0xc2ef39c5 \ // VPXOR XMM8,XMM8,XMM2 /* XTMP4 = s1 {xBxA} */ + MOVL g, R13 \ /* y0 = a */ + ADDL R15, f \ /* h = h + S1 + CH + k + w */ + MOVL g, R15 \ /* y2 = a */ + LONG $0x003942c4; BYTE $0xc2 \ // VPSHUFB XMM8,XMM8,XMM10 /* XTMP4 = s1 {00BA} */ + ORL a, R13 \ /* y0 = a|c */ + ADDL f, b \ /* d = d + h + S1 + CH + k + w */ + ANDL a, R15 \ /* y2 = a&c */ + LONG $0xfe79c1c4; BYTE $0xc0 \ // VPADDD XMM0,XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */ + ANDL h, R13 \ /* y0 = (a|c)&b */ + ADDL R14, f \ /* h = h + S1 + CH + k + w + S0 */ + \ /* */ + \ /* compute high s1 */ + \ /* */ + LONG $0xd070f9c5; BYTE $0x50 \ // VPSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */ + ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ + ADDL R13, f \ /* h = h + S1 + CH + k + w + S0 + MAJ */ + \ /* ROTATE_ARGS */ + MOVL b, R13 \ /* y0 = e */ + ROLL $18, R13 \ /* y0 = e >> (25-11) */ + MOVL f, R14 \ /* y1 = a */ + ROLL $23, R14 \ /* y1 = a >> (22-13) */ + XORL b, R13 \ /* y0 = e ^ (e >> (25-11)) */ + MOVL c, R15 \ /* y2 = f */ + ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + LONG $0xd272a1c5; BYTE $0x0a \ // VPSRLD XMM11,XMM2,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */ + XORL f, R14 \ /* y1 = a ^ (a >> (22-13) */ + XORL d, R15 \ /* y2 = f^g */ + LONG $0xd273e1c5; BYTE $0x13 \ // VPSRLQ XMM3,XMM2,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ + XORL b, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + ANDL b, R15 \ /* y2 = (f^g)&e */ + ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + LONG $0xd273e9c5; BYTE $0x11 \ // VPSRLQ XMM2,XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */ + XORL f, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + XORL d, R15 \ /* y2 = CH = ((f^g)&e)^g */ + LONG $0xd3efe9c5 \ // VPXOR XMM2,XMM2,XMM3 /* */ + ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + ADDL R13, R15 \ /* y2 = S1 + CH */ + ADDL _xfer+60(FP), R15 \ /* y2 = k + w + S1 + CH */ + LONG $0xdaef21c5 \ // VPXOR XMM11,XMM11,XMM2 /* XTMP5 = s1 {xDxC} */ + MOVL f, R13 \ /* y0 = a */ + ADDL R15, e \ /* h = h + S1 + CH + k + w */ + MOVL f, R15 \ /* y2 = a */ + LONG $0x002142c4; BYTE $0xdc \ // VPSHUFB XMM11,XMM11,XMM12 /* XTMP5 = s1 {DC00} */ + ORL h, R13 \ /* y0 = a|c */ + ADDL e, a \ /* d = d + h + S1 + CH + k + w */ + ANDL h, R15 \ /* y2 = a&c */ + LONG $0xe0fea1c5 \ // VPADDD XMM4,XMM11,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */ + ANDL g, R13 \ /* y0 = (a|c)&b */ + ADDL R14, e \ /* h = h + S1 + CH + k + w + S0 */ + ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ + ADDL R13, e \ /* h = h + S1 + CH + k + w + S0 + MAJ */ + \ /* ROTATE_ARGS */ + ROTATE_XS + + +#define DO_ROUND(a, b, c, d, e, f, g, h, offset) \ + MOVL e, R13 \ /* y0 = e */ + ROLL $18, R13 \ /* y0 = e >> (25-11) */ + MOVL a, R14 \ /* y1 = a */ + XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */ + ROLL $23, R14 \ /* y1 = a >> (22-13) */ + MOVL f, R15 \ /* y2 = f */ + XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */ + ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + XORL g, R15 \ /* y2 = f^g */ + XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + ANDL e, R15 \ /* y2 = (f^g)&e */ + XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */ + ADDL R13, R15 \ /* y2 = S1 + CH */ + ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + ADDL _xfer+offset(FP), R15 \ /* y2 = k + w + S1 + CH */ + MOVL a, R13 \ /* y0 = a */ + ADDL R15, h \ /* h = h + S1 + CH + k + w */ + MOVL a, R15 \ /* y2 = a */ + ORL c, R13 \ /* y0 = a|c */ + ADDL h, d \ /* d = d + h + S1 + CH + k + w */ + ANDL c, R15 \ /* y2 = a&c */ + ANDL b, R13 \ /* y0 = (a|c)&b */ + ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */ + ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ + ADDL R13, h /* h = h + S1 + CH + k + w + S0 + MAJ */ + + +// func blockAvx(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64) +TEXT ·blockAvx(SB), 7, $0 + + MOVQ h+0(FP), SI // SI: &h + MOVQ message+24(FP), R8 // &message + MOVQ lenmessage+32(FP), R9 // length of message + CMPQ R9, $0 + JEQ done_hash + ADDQ R8, R9 + MOVQ R9, _inp_end+64(FP) // store end of message + + // Register definition + // a --> eax + // b --> ebx + // c --> ecx + // d --> r8d + // e --> edx + // f --> r9d + // g --> r10d + // h --> r11d + // + // y0 --> r13d + // y1 --> r14d + // y2 --> r15d + + MOVL (0*4)(SI), AX // a = H0 + MOVL (1*4)(SI), BX // b = H1 + MOVL (2*4)(SI), CX // c = H2 + MOVL (3*4)(SI), R8 // d = H3 + MOVL (4*4)(SI), DX // e = H4 + MOVL (5*4)(SI), R9 // f = H5 + MOVL (6*4)(SI), R10 // g = H6 + MOVL (7*4)(SI), R11 // h = H7 + + MOVOU bflipMask<>(SB), X13 + MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA + MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00 + + MOVQ message+24(FP), SI // SI: &message + +loop0: + LEAQ constants<>(SB), BP + + // byte swap first 16 dwords + MOVOU 0*16(SI), X4 + LONG $0x0059c2c4; BYTE $0xe5 // VPSHUFB XMM4, XMM4, XMM13 + MOVOU 1*16(SI), X5 + LONG $0x0051c2c4; BYTE $0xed // VPSHUFB XMM5, XMM5, XMM13 + MOVOU 2*16(SI), X6 + LONG $0x0049c2c4; BYTE $0xf5 // VPSHUFB XMM6, XMM6, XMM13 + MOVOU 3*16(SI), X7 + LONG $0x0041c2c4; BYTE $0xfd // VPSHUFB XMM7, XMM7, XMM13 + + MOVQ SI, _inp+72(FP) + MOVD $0x3, DI + + // schedule 48 input dwords, by doing 3 rounds of 16 each +loop1: + LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */ + MOVOU X9, _xfer+48(FP) + FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) + + LONG $0x4dfe59c5; BYTE $0x10 // VPADDD XMM9, XMM4, 16[RBP] /* Add 2nd constant to message */ + MOVOU X9, _xfer+48(FP) + FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) + + LONG $0x4dfe59c5; BYTE $0x20 // VPADDD XMM9, XMM4, 32[RBP] /* Add 3rd constant to message */ + MOVOU X9, _xfer+48(FP) + FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) + + LONG $0x4dfe59c5; BYTE $0x30 // VPADDD XMM9, XMM4, 48[RBP] /* Add 4th constant to message */ + MOVOU X9, _xfer+48(FP) + ADDQ $64, BP + FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) + + SUBQ $1, DI + JNE loop1 + + MOVD $0x2, DI +loop2: + LONG $0x4dfe59c5; BYTE $0x00 // VPADDD XMM9, XMM4, 0[RBP] /* Add 1st constant to first part of message */ + MOVOU X9, _xfer+48(FP) + DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48) + DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52) + DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56) + DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60) + + LONG $0x4dfe51c5; BYTE $0x10 // VPADDD XMM9, XMM5, 16[RBP] /* Add 2nd constant to message */ + MOVOU X9, _xfer+48(FP) + ADDQ $32, BP + DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48) + DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52) + DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56) + DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60) + + MOVOU X6, X4 + MOVOU X7, X5 + + SUBQ $1, DI + JNE loop2 + + MOVQ h+0(FP), SI // SI: &h + ADDL (0*4)(SI), AX // H0 = a + H0 + MOVL AX, (0*4)(SI) + ADDL (1*4)(SI), BX // H1 = b + H1 + MOVL BX, (1*4)(SI) + ADDL (2*4)(SI), CX // H2 = c + H2 + MOVL CX, (2*4)(SI) + ADDL (3*4)(SI), R8 // H3 = d + H3 + MOVL R8, (3*4)(SI) + ADDL (4*4)(SI), DX // H4 = e + H4 + MOVL DX, (4*4)(SI) + ADDL (5*4)(SI), R9 // H5 = f + H5 + MOVL R9, (5*4)(SI) + ADDL (6*4)(SI), R10 // H6 = g + H6 + MOVL R10, (6*4)(SI) + ADDL (7*4)(SI), R11 // H7 = h + H7 + MOVL R11, (7*4)(SI) + + MOVQ _inp+72(FP), SI + ADDQ $64, SI + CMPQ _inp_end+64(FP), SI + JNE loop0 + +done_hash: + RET + +// Constants table +DATA constants<>+0x0(SB)/8, $0x71374491428a2f98 +DATA constants<>+0x8(SB)/8, $0xe9b5dba5b5c0fbcf +DATA constants<>+0x10(SB)/8, $0x59f111f13956c25b +DATA constants<>+0x18(SB)/8, $0xab1c5ed5923f82a4 +DATA constants<>+0x20(SB)/8, $0x12835b01d807aa98 +DATA constants<>+0x28(SB)/8, $0x550c7dc3243185be +DATA constants<>+0x30(SB)/8, $0x80deb1fe72be5d74 +DATA constants<>+0x38(SB)/8, $0xc19bf1749bdc06a7 +DATA constants<>+0x40(SB)/8, $0xefbe4786e49b69c1 +DATA constants<>+0x48(SB)/8, $0x240ca1cc0fc19dc6 +DATA constants<>+0x50(SB)/8, $0x4a7484aa2de92c6f +DATA constants<>+0x58(SB)/8, $0x76f988da5cb0a9dc +DATA constants<>+0x60(SB)/8, $0xa831c66d983e5152 +DATA constants<>+0x68(SB)/8, $0xbf597fc7b00327c8 +DATA constants<>+0x70(SB)/8, $0xd5a79147c6e00bf3 +DATA constants<>+0x78(SB)/8, $0x1429296706ca6351 +DATA constants<>+0x80(SB)/8, $0x2e1b213827b70a85 +DATA constants<>+0x88(SB)/8, $0x53380d134d2c6dfc +DATA constants<>+0x90(SB)/8, $0x766a0abb650a7354 +DATA constants<>+0x98(SB)/8, $0x92722c8581c2c92e +DATA constants<>+0xa0(SB)/8, $0xa81a664ba2bfe8a1 +DATA constants<>+0xa8(SB)/8, $0xc76c51a3c24b8b70 +DATA constants<>+0xb0(SB)/8, $0xd6990624d192e819 +DATA constants<>+0xb8(SB)/8, $0x106aa070f40e3585 +DATA constants<>+0xc0(SB)/8, $0x1e376c0819a4c116 +DATA constants<>+0xc8(SB)/8, $0x34b0bcb52748774c +DATA constants<>+0xd0(SB)/8, $0x4ed8aa4a391c0cb3 +DATA constants<>+0xd8(SB)/8, $0x682e6ff35b9cca4f +DATA constants<>+0xe0(SB)/8, $0x78a5636f748f82ee +DATA constants<>+0xe8(SB)/8, $0x8cc7020884c87814 +DATA constants<>+0xf0(SB)/8, $0xa4506ceb90befffa +DATA constants<>+0xf8(SB)/8, $0xc67178f2bef9a3f7 + +DATA bflipMask<>+0x00(SB)/8, $0x0405060700010203 +DATA bflipMask<>+0x08(SB)/8, $0x0c0d0e0f08090a0b + +DATA shuf00BA<>+0x00(SB)/8, $0x0b0a090803020100 +DATA shuf00BA<>+0x08(SB)/8, $0xFFFFFFFFFFFFFFFF + +DATA shufDC00<>+0x00(SB)/8, $0xFFFFFFFFFFFFFFFF +DATA shufDC00<>+0x08(SB)/8, $0x0b0a090803020100 + +GLOBL constants<>(SB), 8, $256 +GLOBL bflipMask<>(SB), (NOPTR+RODATA), $16 +GLOBL shuf00BA<>(SB), (NOPTR+RODATA), $16 +GLOBL shufDC00<>(SB), (NOPTR+RODATA), $16 diff --git a/vendor/github.com/minio/sha256-simd/sha256blockSsse_amd64.go b/vendor/github.com/minio/sha256-simd/sha256blockSsse_amd64.go new file mode 100644 index 000000000..54abbb0f0 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256blockSsse_amd64.go @@ -0,0 +1,22 @@ +//+build !noasm + +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sha256 + +//go:noescape +func blockSsse(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64) diff --git a/vendor/github.com/minio/sha256-simd/sha256blockSsse_amd64.s b/vendor/github.com/minio/sha256-simd/sha256blockSsse_amd64.s new file mode 100644 index 000000000..cf1248442 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256blockSsse_amd64.s @@ -0,0 +1,430 @@ +//+build !noasm !appengine + +// SHA256 implementation for SSSE3 + +// +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// +// This code is based on an Intel White-Paper: +// "Fast SHA-256 Implementations on Intel Architecture Processors" +// +// together with the reference implementation from the following authors: +// James Guilford +// Kirk Yap +// Tim Chen +// +// For Golang it has been converted to Plan 9 assembly with the help of +// github.com/minio/asm2plan9s to assemble Intel instructions to their Plan9 +// equivalents +// + +#include "textflag.h" + +#define ROTATE_XS \ + MOVOU X4, X15 \ + MOVOU X5, X4 \ + MOVOU X6, X5 \ + MOVOU X7, X6 \ + MOVOU X15, X7 + +// compute s0 four at a time and s1 two at a time +// compute W[-16] + W[-7] 4 at a time +#define FOUR_ROUNDS_AND_SCHED(a, b, c, d, e, f, g, h) \ + MOVL e, R13 \ /* y0 = e */ + ROLL $18, R13 \ /* y0 = e >> (25-11) */ + MOVL a, R14 \ /* y1 = a */ + MOVOU X7, X0 \ + LONG $0x0f3a0f66; WORD $0x04c6 \ // PALIGNR XMM0,XMM6,0x4 /* XTMP0 = W[-7] */ + ROLL $23, R14 \ /* y1 = a >> (22-13) */ + XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */ + MOVL f, R15 \ /* y2 = f */ + ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */ + XORL g, R15 \ /* y2 = f^g */ + LONG $0xc4fe0f66 \ // PADDD XMM0,XMM4 /* XTMP0 = W[-7] + W[-16] */ + XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6) ) */ + ANDL e, R15 \ /* y2 = (f^g)&e */ + ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + \ /* */ + \ /* compute s0 */ + \ /* */ + MOVOU X5, X1 \ + LONG $0x0f3a0f66; WORD $0x04cc \ // PALIGNR XMM1,XMM4,0x4 /* XTMP1 = W[-15] */ + XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */ + ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + ADDL R13, R15 \ /* y2 = S1 + CH */ + ADDL _xfer+48(FP), R15 \ /* y2 = k + w + S1 + CH */ + MOVL a, R13 \ /* y0 = a */ + ADDL R15, h \ /* h = h + S1 + CH + k + w */ + \ /* ROTATE_ARGS */ + MOVL a, R15 \ /* y2 = a */ + MOVOU X1, X2 \ + LONG $0xd2720f66; BYTE $0x07 \ // PSRLD XMM2,0x7 /* */ + ORL c, R13 \ /* y0 = a|c */ + ADDL h, d \ /* d = d + h + S1 + CH + k + w */ + ANDL c, R15 \ /* y2 = a&c */ + MOVOU X1, X3 \ + LONG $0xf3720f66; BYTE $0x19 \ // PSLLD XMM3,0x19 /* */ + ANDL b, R13 \ /* y0 = (a|c)&b */ + ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */ + LONG $0xdaeb0f66 \ // POR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 */ + ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ + ADDL R13, h \ /* h = h + S1 + CH + k + w + S0 + MAJ */ + \ /* ROTATE_ARGS */ + MOVL d, R13 \ /* y0 = e */ + MOVL h, R14 \ /* y1 = a */ + ROLL $18, R13 \ /* y0 = e >> (25-11) */ + XORL d, R13 \ /* y0 = e ^ (e >> (25-11)) */ + MOVL e, R15 \ /* y2 = f */ + ROLL $23, R14 \ /* y1 = a >> (22-13) */ + MOVOU X1, X2 \ + LONG $0xd2720f66; BYTE $0x12 \ // PSRLD XMM2,0x12 /* */ + XORL h, R14 \ /* y1 = a ^ (a >> (22-13) */ + ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + XORL f, R15 \ /* y2 = f^g */ + MOVOU X1, X8 \ + LONG $0x720f4166; WORD $0x03d0 \ // PSRLD XMM8,0x3 /* XTMP4 = W[-15] >> 3 */ + ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + XORL d, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + ANDL d, R15 \ /* y2 = (f^g)&e */ + ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + LONG $0xf1720f66; BYTE $0x0e \ // PSLLD XMM1,0xe /* */ + XORL h, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + XORL f, R15 \ /* y2 = CH = ((f^g)&e)^g */ + LONG $0xd9ef0f66 \ // PXOR XMM3,XMM1 /* */ + ADDL R13, R15 \ /* y2 = S1 + CH */ + ADDL _xfer+52(FP), R15 \ /* y2 = k + w + S1 + CH */ + ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + LONG $0xdaef0f66 \ // PXOR XMM3,XMM2 /* XTMP1 = W[-15] MY_ROR 7 ^ W[-15] MY_ROR */ + MOVL h, R13 \ /* y0 = a */ + ADDL R15, g \ /* h = h + S1 + CH + k + w */ + MOVL h, R15 \ /* y2 = a */ + MOVOU X3, X1 \ + LONG $0xef0f4166; BYTE $0xc8 \ // PXOR XMM1,XMM8 /* XTMP1 = s0 */ + ORL b, R13 \ /* y0 = a|c */ + ADDL g, c \ /* d = d + h + S1 + CH + k + w */ + ANDL b, R15 \ /* y2 = a&c */ + \ /* */ + \ /* compute low s1 */ + \ /* */ + LONG $0xd7700f66; BYTE $0xfa \ // PSHUFD XMM2,XMM7,0xfa /* XTMP2 = W[-2] {BBAA} */ + ANDL a, R13 \ /* y0 = (a|c)&b */ + ADDL R14, g \ /* h = h + S1 + CH + k + w + S0 */ + LONG $0xc1fe0f66 \ // PADDD XMM0,XMM1 /* XTMP0 = W[-16] + W[-7] + s0 */ + ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ + ADDL R13, g \ /* h = h + S1 + CH + k + w + S0 + MAJ */ + \ /* ROTATE_ARGS */ + MOVL c, R13 \ /* y0 = e */ + MOVL g, R14 \ /* y1 = a */ + ROLL $18, R13 \ /* y0 = e >> (25-11) */ + XORL c, R13 \ /* y0 = e ^ (e >> (25-11)) */ + ROLL $23, R14 \ /* y1 = a >> (22-13) */ + MOVL d, R15 \ /* y2 = f */ + XORL g, R14 \ /* y1 = a ^ (a >> (22-13) */ + ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + MOVOU X2, X8 \ + LONG $0x720f4166; WORD $0x0ad0 \ // PSRLD XMM8,0xa /* XTMP4 = W[-2] >> 10 {BBAA} */ + XORL e, R15 \ /* y2 = f^g */ + MOVOU X2, X3 \ + LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xBxA} */ + XORL c, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + ANDL c, R15 \ /* y2 = (f^g)&e */ + LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xBxA} */ + ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + XORL g, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + XORL e, R15 \ /* y2 = CH = ((f^g)&e)^g */ + ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */ + ADDL R13, R15 \ /* y2 = S1 + CH */ + ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + ADDL _xfer+56(FP), R15 \ /* y2 = k + w + S1 + CH */ + LONG $0xef0f4466; BYTE $0xc2 \ // PXOR XMM8,XMM2 /* XTMP4 = s1 {xBxA} */ + MOVL g, R13 \ /* y0 = a */ + ADDL R15, f \ /* h = h + S1 + CH + k + w */ + MOVL g, R15 \ /* y2 = a */ + LONG $0x380f4566; WORD $0xc200 \ // PSHUFB XMM8,XMM10 /* XTMP4 = s1 {00BA} */ + ORL a, R13 \ /* y0 = a|c */ + ADDL f, b \ /* d = d + h + S1 + CH + k + w */ + ANDL a, R15 \ /* y2 = a&c */ + LONG $0xfe0f4166; BYTE $0xc0 \ // PADDD XMM0,XMM8 /* XTMP0 = {..., ..., W[1], W[0]} */ + ANDL h, R13 \ /* y0 = (a|c)&b */ + ADDL R14, f \ /* h = h + S1 + CH + k + w + S0 */ + \ /* */ + \ /* compute high s1 */ + \ /* */ + LONG $0xd0700f66; BYTE $0x50 \ // PSHUFD XMM2,XMM0,0x50 /* XTMP2 = W[-2] {DDCC} */ + ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ + ADDL R13, f \ /* h = h + S1 + CH + k + w + S0 + MAJ */ + \ /* ROTATE_ARGS */ + MOVL b, R13 \ /* y0 = e */ + ROLL $18, R13 \ /* y0 = e >> (25-11) */ + MOVL f, R14 \ /* y1 = a */ + ROLL $23, R14 \ /* y1 = a >> (22-13) */ + XORL b, R13 \ /* y0 = e ^ (e >> (25-11)) */ + MOVL c, R15 \ /* y2 = f */ + ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + MOVOU X2, X11 \ + LONG $0x720f4166; WORD $0x0ad3 \ // PSRLD XMM11,0xa /* XTMP5 = W[-2] >> 10 {DDCC} */ + XORL f, R14 \ /* y1 = a ^ (a >> (22-13) */ + XORL d, R15 \ /* y2 = f^g */ + MOVOU X2, X3 \ + LONG $0xd3730f66; BYTE $0x13 \ // PSRLQ XMM3,0x13 /* XTMP3 = W[-2] MY_ROR 19 {xDxC} */ + XORL b, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + ANDL b, R15 \ /* y2 = (f^g)&e */ + ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + LONG $0xd2730f66; BYTE $0x11 \ // PSRLQ XMM2,0x11 /* XTMP2 = W[-2] MY_ROR 17 {xDxC} */ + XORL f, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + XORL d, R15 \ /* y2 = CH = ((f^g)&e)^g */ + LONG $0xd3ef0f66 \ // PXOR XMM2,XMM3 /* */ + ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + ADDL R13, R15 \ /* y2 = S1 + CH */ + ADDL _xfer+60(FP), R15 \ /* y2 = k + w + S1 + CH */ + LONG $0xef0f4466; BYTE $0xda \ // PXOR XMM11,XMM2 /* XTMP5 = s1 {xDxC} */ + MOVL f, R13 \ /* y0 = a */ + ADDL R15, e \ /* h = h + S1 + CH + k + w */ + MOVL f, R15 \ /* y2 = a */ + LONG $0x380f4566; WORD $0xdc00 \ // PSHUFB XMM11,XMM12 /* XTMP5 = s1 {DC00} */ + ORL h, R13 \ /* y0 = a|c */ + ADDL e, a \ /* d = d + h + S1 + CH + k + w */ + ANDL h, R15 \ /* y2 = a&c */ + MOVOU X11, X4 \ + LONG $0xe0fe0f66 \ // PADDD XMM4,XMM0 /* X0 = {W[3], W[2], W[1], W[0]} */ + ANDL g, R13 \ /* y0 = (a|c)&b */ + ADDL R14, e \ /* h = h + S1 + CH + k + w + S0 */ + ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ + ADDL R13, e \ /* h = h + S1 + CH + k + w + S0 + MAJ */ + \ /* ROTATE_ARGS */ + ROTATE_XS + + +#define DO_ROUND(a, b, c, d, e, f, g, h, offset) \ + MOVL e, R13 \ /* y0 = e */ + ROLL $18, R13 \ /* y0 = e >> (25-11) */ + MOVL a, R14 \ /* y1 = a */ + XORL e, R13 \ /* y0 = e ^ (e >> (25-11)) */ + ROLL $23, R14 \ /* y1 = a >> (22-13) */ + MOVL f, R15 \ /* y2 = f */ + XORL a, R14 \ /* y1 = a ^ (a >> (22-13) */ + ROLL $27, R13 \ /* y0 = (e >> (11-6)) ^ (e >> (25-6)) */ + XORL g, R15 \ /* y2 = f^g */ + XORL e, R13 \ /* y0 = e ^ (e >> (11-6)) ^ (e >> (25-6)) */ + ROLL $21, R14 \ /* y1 = (a >> (13-2)) ^ (a >> (22-2)) */ + ANDL e, R15 \ /* y2 = (f^g)&e */ + XORL a, R14 \ /* y1 = a ^ (a >> (13-2)) ^ (a >> (22-2)) */ + ROLL $26, R13 \ /* y0 = S1 = (e>>6) & (e>>11) ^ (e>>25) */ + XORL g, R15 \ /* y2 = CH = ((f^g)&e)^g */ + ADDL R13, R15 \ /* y2 = S1 + CH */ + ROLL $30, R14 \ /* y1 = S0 = (a>>2) ^ (a>>13) ^ (a>>22) */ + ADDL _xfer+offset(FP), R15 \ /* y2 = k + w + S1 + CH */ + MOVL a, R13 \ /* y0 = a */ + ADDL R15, h \ /* h = h + S1 + CH + k + w */ + MOVL a, R15 \ /* y2 = a */ + ORL c, R13 \ /* y0 = a|c */ + ADDL h, d \ /* d = d + h + S1 + CH + k + w */ + ANDL c, R15 \ /* y2 = a&c */ + ANDL b, R13 \ /* y0 = (a|c)&b */ + ADDL R14, h \ /* h = h + S1 + CH + k + w + S0 */ + ORL R15, R13 \ /* y0 = MAJ = (a|c)&b)|(a&c) */ + ADDL R13, h /* h = h + S1 + CH + k + w + S0 + MAJ */ + + +// func blockSsse(h []uint32, message []uint8, reserved0, reserved1, reserved2, reserved3 uint64) +TEXT ·blockSsse(SB), 7, $0 + + MOVQ h+0(FP), SI // SI: &h + MOVQ message+24(FP), R8 // &message + MOVQ lenmessage+32(FP), R9 // length of message + CMPQ R9, $0 + JEQ done_hash + ADDQ R8, R9 + MOVQ R9, _inp_end+64(FP) // store end of message + + // Register definition + // a --> eax + // b --> ebx + // c --> ecx + // d --> r8d + // e --> edx + // f --> r9d + // g --> r10d + // h --> r11d + // + // y0 --> r13d + // y1 --> r14d + // y2 --> r15d + + MOVL (0*4)(SI), AX // a = H0 + MOVL (1*4)(SI), BX // b = H1 + MOVL (2*4)(SI), CX // c = H2 + MOVL (3*4)(SI), R8 // d = H3 + MOVL (4*4)(SI), DX // e = H4 + MOVL (5*4)(SI), R9 // f = H5 + MOVL (6*4)(SI), R10 // g = H6 + MOVL (7*4)(SI), R11 // h = H7 + + MOVOU bflipMask<>(SB), X13 + MOVOU shuf00BA<>(SB), X10 // shuffle xBxA -> 00BA + MOVOU shufDC00<>(SB), X12 // shuffle xDxC -> DC00 + + MOVQ message+24(FP), SI // SI: &message + +loop0: + LEAQ constants<>(SB), BP + + // byte swap first 16 dwords + MOVOU 0*16(SI), X4 + LONG $0x380f4166; WORD $0xe500 // PSHUFB XMM4, XMM13 + MOVOU 1*16(SI), X5 + LONG $0x380f4166; WORD $0xed00 // PSHUFB XMM5, XMM13 + MOVOU 2*16(SI), X6 + LONG $0x380f4166; WORD $0xf500 // PSHUFB XMM6, XMM13 + MOVOU 3*16(SI), X7 + LONG $0x380f4166; WORD $0xfd00 // PSHUFB XMM7, XMM13 + + MOVQ SI, _inp+72(FP) + MOVD $0x3, DI + + // Align + // nop WORD PTR [rax+rax*1+0x0] + + // schedule 48 input dwords, by doing 3 rounds of 16 each +loop1: + MOVOU X4, X9 + LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */ + MOVOU X9, _xfer+48(FP) + FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) + + MOVOU X4, X9 + LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */ + MOVOU X9, _xfer+48(FP) + FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) + + MOVOU X4, X9 + LONG $0xfe0f4466; WORD $0x204d // PADDD XMM9, 32[RBP] /* Add 3rd constant to message */ + MOVOU X9, _xfer+48(FP) + FOUR_ROUNDS_AND_SCHED(AX, BX, CX, R8, DX, R9, R10, R11) + + MOVOU X4, X9 + LONG $0xfe0f4466; WORD $0x304d // PADDD XMM9, 48[RBP] /* Add 4th constant to message */ + MOVOU X9, _xfer+48(FP) + ADDQ $64, BP + FOUR_ROUNDS_AND_SCHED(DX, R9, R10, R11, AX, BX, CX, R8) + + SUBQ $1, DI + JNE loop1 + + MOVD $0x2, DI +loop2: + MOVOU X4, X9 + LONG $0xfe0f4466; WORD $0x004d // PADDD XMM9, 0[RBP] /* Add 1st constant to first part of message */ + MOVOU X9, _xfer+48(FP) + DO_ROUND( AX, BX, CX, R8, DX, R9, R10, R11, 48) + DO_ROUND(R11, AX, BX, CX, R8, DX, R9, R10, 52) + DO_ROUND(R10, R11, AX, BX, CX, R8, DX, R9, 56) + DO_ROUND( R9, R10, R11, AX, BX, CX, R8, DX, 60) + + MOVOU X5, X9 + LONG $0xfe0f4466; WORD $0x104d // PADDD XMM9, 16[RBP] /* Add 2nd constant to message */ + MOVOU X9, _xfer+48(FP) + ADDQ $32, BP + DO_ROUND( DX, R9, R10, R11, AX, BX, CX, R8, 48) + DO_ROUND( R8, DX, R9, R10, R11, AX, BX, CX, 52) + DO_ROUND( CX, R8, DX, R9, R10, R11, AX, BX, 56) + DO_ROUND( BX, CX, R8, DX, R9, R10, R11, AX, 60) + + MOVOU X6, X4 + MOVOU X7, X5 + + SUBQ $1, DI + JNE loop2 + + MOVQ h+0(FP), SI // SI: &h + ADDL (0*4)(SI), AX // H0 = a + H0 + MOVL AX, (0*4)(SI) + ADDL (1*4)(SI), BX // H1 = b + H1 + MOVL BX, (1*4)(SI) + ADDL (2*4)(SI), CX // H2 = c + H2 + MOVL CX, (2*4)(SI) + ADDL (3*4)(SI), R8 // H3 = d + H3 + MOVL R8, (3*4)(SI) + ADDL (4*4)(SI), DX // H4 = e + H4 + MOVL DX, (4*4)(SI) + ADDL (5*4)(SI), R9 // H5 = f + H5 + MOVL R9, (5*4)(SI) + ADDL (6*4)(SI), R10 // H6 = g + H6 + MOVL R10, (6*4)(SI) + ADDL (7*4)(SI), R11 // H7 = h + H7 + MOVL R11, (7*4)(SI) + + MOVQ _inp+72(FP), SI + ADDQ $64, SI + CMPQ _inp_end+64(FP), SI + JNE loop0 + +done_hash: + RET + +// Constants table +DATA constants<>+0x0(SB)/8, $0x71374491428a2f98 +DATA constants<>+0x8(SB)/8, $0xe9b5dba5b5c0fbcf +DATA constants<>+0x10(SB)/8, $0x59f111f13956c25b +DATA constants<>+0x18(SB)/8, $0xab1c5ed5923f82a4 +DATA constants<>+0x20(SB)/8, $0x12835b01d807aa98 +DATA constants<>+0x28(SB)/8, $0x550c7dc3243185be +DATA constants<>+0x30(SB)/8, $0x80deb1fe72be5d74 +DATA constants<>+0x38(SB)/8, $0xc19bf1749bdc06a7 +DATA constants<>+0x40(SB)/8, $0xefbe4786e49b69c1 +DATA constants<>+0x48(SB)/8, $0x240ca1cc0fc19dc6 +DATA constants<>+0x50(SB)/8, $0x4a7484aa2de92c6f +DATA constants<>+0x58(SB)/8, $0x76f988da5cb0a9dc +DATA constants<>+0x60(SB)/8, $0xa831c66d983e5152 +DATA constants<>+0x68(SB)/8, $0xbf597fc7b00327c8 +DATA constants<>+0x70(SB)/8, $0xd5a79147c6e00bf3 +DATA constants<>+0x78(SB)/8, $0x1429296706ca6351 +DATA constants<>+0x80(SB)/8, $0x2e1b213827b70a85 +DATA constants<>+0x88(SB)/8, $0x53380d134d2c6dfc +DATA constants<>+0x90(SB)/8, $0x766a0abb650a7354 +DATA constants<>+0x98(SB)/8, $0x92722c8581c2c92e +DATA constants<>+0xa0(SB)/8, $0xa81a664ba2bfe8a1 +DATA constants<>+0xa8(SB)/8, $0xc76c51a3c24b8b70 +DATA constants<>+0xb0(SB)/8, $0xd6990624d192e819 +DATA constants<>+0xb8(SB)/8, $0x106aa070f40e3585 +DATA constants<>+0xc0(SB)/8, $0x1e376c0819a4c116 +DATA constants<>+0xc8(SB)/8, $0x34b0bcb52748774c +DATA constants<>+0xd0(SB)/8, $0x4ed8aa4a391c0cb3 +DATA constants<>+0xd8(SB)/8, $0x682e6ff35b9cca4f +DATA constants<>+0xe0(SB)/8, $0x78a5636f748f82ee +DATA constants<>+0xe8(SB)/8, $0x8cc7020884c87814 +DATA constants<>+0xf0(SB)/8, $0xa4506ceb90befffa +DATA constants<>+0xf8(SB)/8, $0xc67178f2bef9a3f7 + +DATA bflipMask<>+0x00(SB)/8, $0x0405060700010203 +DATA bflipMask<>+0x08(SB)/8, $0x0c0d0e0f08090a0b + +DATA shuf00BA<>+0x00(SB)/8, $0x0b0a090803020100 +DATA shuf00BA<>+0x08(SB)/8, $0xFFFFFFFFFFFFFFFF + +DATA shufDC00<>+0x00(SB)/8, $0xFFFFFFFFFFFFFFFF +DATA shufDC00<>+0x08(SB)/8, $0x0b0a090803020100 + +GLOBL constants<>(SB), 8, $256 +GLOBL bflipMask<>(SB), (NOPTR+RODATA), $16 +GLOBL shuf00BA<>(SB), (NOPTR+RODATA), $16 +GLOBL shufDC00<>(SB), (NOPTR+RODATA), $16 diff --git a/vendor/github.com/minio/sha256-simd/sha256block_386.go b/vendor/github.com/minio/sha256-simd/sha256block_386.go new file mode 100644 index 000000000..84b54ae85 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256block_386.go @@ -0,0 +1,24 @@ +//+build !noasm + +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sha256 + +func blockArmGo(dig *digest, p []byte) {} +func blockAvx2Go(dig *digest, p []byte) {} +func blockAvxGo(dig *digest, p []byte) {} +func blockSsseGo(dig *digest, p []byte) {} diff --git a/vendor/github.com/minio/sha256-simd/sha256block_amd64.go b/vendor/github.com/minio/sha256-simd/sha256block_amd64.go new file mode 100644 index 000000000..b6db61e3f --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256block_amd64.go @@ -0,0 +1,48 @@ +//+build !noasm + +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sha256 + +func blockArmGo(dig *digest, p []byte) {} + +func blockAvxGo(dig *digest, p []byte) { + + h := []uint32{dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7]} + + blockAvx(h[:], p[:], 0, 0, 0, 0) + + dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7] +} + +func blockAvx2Go(dig *digest, p []byte) { + + h := []uint32{dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7]} + + blockAvx2(h[:], p[:]) + + dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7] +} + +func blockSsseGo(dig *digest, p []byte) { + + h := []uint32{dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7]} + + blockSsse(h[:], p[:], 0, 0, 0, 0) + + dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h[0], h[1], h[2], h[3], h[4], h[5], h[6], h[7] +} diff --git a/vendor/github.com/minio/sha256-simd/sha256block_arm.go b/vendor/github.com/minio/sha256-simd/sha256block_arm.go new file mode 100644 index 000000000..d892504aa --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256block_arm.go @@ -0,0 +1,24 @@ +//+build !noasm + +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sha256 + +func blockAvx2Go(dig *digest, p []byte) {} +func blockAvxGo(dig *digest, p []byte) {} +func blockSsseGo(dig *digest, p []byte) {} +func blockArmGo(dig *digest, p []byte) {} diff --git a/vendor/github.com/minio/sha256-simd/sha256block_arm64.go b/vendor/github.com/minio/sha256-simd/sha256block_arm64.go new file mode 100644 index 000000000..299cf3366 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256block_arm64.go @@ -0,0 +1,36 @@ +//+build !noasm + +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sha256 + +func blockAvx2Go(dig *digest, p []byte) {} +func blockAvxGo(dig *digest, p []byte) {} +func blockSsseGo(dig *digest, p []byte) {} + +//go:noescape +func blockArm(h []uint32, message []uint8) + +func blockArmGo(dig *digest, p []byte) { + + h := []uint32{dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7]} + + blockArm(h[:], p[:]) + + dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h[0], h[1], h[2], h[3], h[4], + h[5], h[6], h[7] +} diff --git a/vendor/github.com/minio/sha256-simd/sha256block_arm64.s b/vendor/github.com/minio/sha256-simd/sha256block_arm64.s new file mode 100644 index 000000000..e1e30c939 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256block_arm64.s @@ -0,0 +1,193 @@ +//+build !noasm !appengine + +// ARM64 version of SHA256 + +// +// Minio Cloud Storage, (C) 2016 Minio, Inc. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// + +// +// Based on implementaion as found in https://github.com/jocover/sha256-armv8 +// +// Use github.com/minio/asm2plan9s on this file to assemble ARM instructions to +// their Plan9 equivalents +// + +TEXT ·blockArm(SB), 7, $0 + MOVD h+0(FP), R0 + MOVD message+24(FP), R1 + MOVD lenmessage+32(FP), R2 // length of message + SUBS $64, R2 + BMI complete + + // Load constants table pointer + MOVD $·constants(SB), R3 + + // Cache constants table in registers v16 - v31 + WORD $0x4cdf2870 // ld1 {v16.4s-v19.4s}, [x3], #64 + WORD $0x4cdf7800 // ld1 {v0.4s}, [x0], #16 + WORD $0x4cdf2874 // ld1 {v20.4s-v23.4s}, [x3], #64 + + WORD $0x4c407801 // ld1 {v1.4s}, [x0] + WORD $0x4cdf2878 // ld1 {v24.4s-v27.4s}, [x3], #64 + WORD $0xd1004000 // sub x0, x0, #0x10 + WORD $0x4cdf287c // ld1 {v28.4s-v31.4s}, [x3], #64 + +loop: + // Main loop + WORD $0x4cdf2025 // ld1 {v5.16b-v8.16b}, [x1], #64 + WORD $0x4ea01c02 // mov v2.16b, v0.16b + WORD $0x4ea11c23 // mov v3.16b, v1.16b + WORD $0x6e2008a5 // rev32 v5.16b, v5.16b + WORD $0x6e2008c6 // rev32 v6.16b, v6.16b + WORD $0x4eb084a9 // add v9.4s, v5.4s, v16.4s + WORD $0x6e2008e7 // rev32 v7.16b, v7.16b + WORD $0x4eb184ca // add v10.4s, v6.4s, v17.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e094062 // sha256h q2, q3, v9.4s + WORD $0x5e095083 // sha256h2 q3, q4, v9.4s + WORD $0x5e2828c5 // sha256su0 v5.4s, v6.4s + WORD $0x6e200908 // rev32 v8.16b, v8.16b + WORD $0x4eb284e9 // add v9.4s, v7.4s, v18.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e0a4062 // sha256h q2, q3, v10.4s + WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s + WORD $0x5e2828e6 // sha256su0 v6.4s, v7.4s + WORD $0x5e0860e5 // sha256su1 v5.4s, v7.4s, v8.4s + WORD $0x4eb3850a // add v10.4s, v8.4s, v19.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e094062 // sha256h q2, q3, v9.4s + WORD $0x5e095083 // sha256h2 q3, q4, v9.4s + WORD $0x5e282907 // sha256su0 v7.4s, v8.4s + WORD $0x5e056106 // sha256su1 v6.4s, v8.4s, v5.4s + WORD $0x4eb484a9 // add v9.4s, v5.4s, v20.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e0a4062 // sha256h q2, q3, v10.4s + WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s + WORD $0x5e2828a8 // sha256su0 v8.4s, v5.4s + WORD $0x5e0660a7 // sha256su1 v7.4s, v5.4s, v6.4s + WORD $0x4eb584ca // add v10.4s, v6.4s, v21.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e094062 // sha256h q2, q3, v9.4s + WORD $0x5e095083 // sha256h2 q3, q4, v9.4s + WORD $0x5e2828c5 // sha256su0 v5.4s, v6.4s + WORD $0x5e0760c8 // sha256su1 v8.4s, v6.4s, v7.4s + WORD $0x4eb684e9 // add v9.4s, v7.4s, v22.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e0a4062 // sha256h q2, q3, v10.4s + WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s + WORD $0x5e2828e6 // sha256su0 v6.4s, v7.4s + WORD $0x5e0860e5 // sha256su1 v5.4s, v7.4s, v8.4s + WORD $0x4eb7850a // add v10.4s, v8.4s, v23.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e094062 // sha256h q2, q3, v9.4s + WORD $0x5e095083 // sha256h2 q3, q4, v9.4s + WORD $0x5e282907 // sha256su0 v7.4s, v8.4s + WORD $0x5e056106 // sha256su1 v6.4s, v8.4s, v5.4s + WORD $0x4eb884a9 // add v9.4s, v5.4s, v24.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e0a4062 // sha256h q2, q3, v10.4s + WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s + WORD $0x5e2828a8 // sha256su0 v8.4s, v5.4s + WORD $0x5e0660a7 // sha256su1 v7.4s, v5.4s, v6.4s + WORD $0x4eb984ca // add v10.4s, v6.4s, v25.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e094062 // sha256h q2, q3, v9.4s + WORD $0x5e095083 // sha256h2 q3, q4, v9.4s + WORD $0x5e2828c5 // sha256su0 v5.4s, v6.4s + WORD $0x5e0760c8 // sha256su1 v8.4s, v6.4s, v7.4s + WORD $0x4eba84e9 // add v9.4s, v7.4s, v26.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e0a4062 // sha256h q2, q3, v10.4s + WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s + WORD $0x5e2828e6 // sha256su0 v6.4s, v7.4s + WORD $0x5e0860e5 // sha256su1 v5.4s, v7.4s, v8.4s + WORD $0x4ebb850a // add v10.4s, v8.4s, v27.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e094062 // sha256h q2, q3, v9.4s + WORD $0x5e095083 // sha256h2 q3, q4, v9.4s + WORD $0x5e282907 // sha256su0 v7.4s, v8.4s + WORD $0x5e056106 // sha256su1 v6.4s, v8.4s, v5.4s + WORD $0x4ebc84a9 // add v9.4s, v5.4s, v28.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e0a4062 // sha256h q2, q3, v10.4s + WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s + WORD $0x5e2828a8 // sha256su0 v8.4s, v5.4s + WORD $0x5e0660a7 // sha256su1 v7.4s, v5.4s, v6.4s + WORD $0x4ebd84ca // add v10.4s, v6.4s, v29.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e094062 // sha256h q2, q3, v9.4s + WORD $0x5e095083 // sha256h2 q3, q4, v9.4s + WORD $0x5e0760c8 // sha256su1 v8.4s, v6.4s, v7.4s + WORD $0x4ebe84e9 // add v9.4s, v7.4s, v30.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e0a4062 // sha256h q2, q3, v10.4s + WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s + WORD $0x4ebf850a // add v10.4s, v8.4s, v31.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e094062 // sha256h q2, q3, v9.4s + WORD $0x5e095083 // sha256h2 q3, q4, v9.4s + WORD $0x4ea21c44 // mov v4.16b, v2.16b + WORD $0x5e0a4062 // sha256h q2, q3, v10.4s + WORD $0x5e0a5083 // sha256h2 q3, q4, v10.4s + WORD $0x4ea38421 // add v1.4s, v1.4s, v3.4s + WORD $0x4ea28400 // add v0.4s, v0.4s, v2.4s + + SUBS $64, R2 + BPL loop + + // Store result + WORD $0x4c00a800 // st1 {v0.4s, v1.4s}, [x0] + +complete: + RET + + +// Constants table +DATA ·constants+0x0(SB)/8, $0x71374491428a2f98 +DATA ·constants+0x8(SB)/8, $0xe9b5dba5b5c0fbcf +DATA ·constants+0x10(SB)/8, $0x59f111f13956c25b +DATA ·constants+0x18(SB)/8, $0xab1c5ed5923f82a4 +DATA ·constants+0x20(SB)/8, $0x12835b01d807aa98 +DATA ·constants+0x28(SB)/8, $0x550c7dc3243185be +DATA ·constants+0x30(SB)/8, $0x80deb1fe72be5d74 +DATA ·constants+0x38(SB)/8, $0xc19bf1749bdc06a7 +DATA ·constants+0x40(SB)/8, $0xefbe4786e49b69c1 +DATA ·constants+0x48(SB)/8, $0x240ca1cc0fc19dc6 +DATA ·constants+0x50(SB)/8, $0x4a7484aa2de92c6f +DATA ·constants+0x58(SB)/8, $0x76f988da5cb0a9dc +DATA ·constants+0x60(SB)/8, $0xa831c66d983e5152 +DATA ·constants+0x68(SB)/8, $0xbf597fc7b00327c8 +DATA ·constants+0x70(SB)/8, $0xd5a79147c6e00bf3 +DATA ·constants+0x78(SB)/8, $0x1429296706ca6351 +DATA ·constants+0x80(SB)/8, $0x2e1b213827b70a85 +DATA ·constants+0x88(SB)/8, $0x53380d134d2c6dfc +DATA ·constants+0x90(SB)/8, $0x766a0abb650a7354 +DATA ·constants+0x98(SB)/8, $0x92722c8581c2c92e +DATA ·constants+0xa0(SB)/8, $0xa81a664ba2bfe8a1 +DATA ·constants+0xa8(SB)/8, $0xc76c51a3c24b8b70 +DATA ·constants+0xb0(SB)/8, $0xd6990624d192e819 +DATA ·constants+0xb8(SB)/8, $0x106aa070f40e3585 +DATA ·constants+0xc0(SB)/8, $0x1e376c0819a4c116 +DATA ·constants+0xc8(SB)/8, $0x34b0bcb52748774c +DATA ·constants+0xd0(SB)/8, $0x4ed8aa4a391c0cb3 +DATA ·constants+0xd8(SB)/8, $0x682e6ff35b9cca4f +DATA ·constants+0xe0(SB)/8, $0x78a5636f748f82ee +DATA ·constants+0xe8(SB)/8, $0x8cc7020884c87814 +DATA ·constants+0xf0(SB)/8, $0xa4506ceb90befffa +DATA ·constants+0xf8(SB)/8, $0xc67178f2bef9a3f7 + +GLOBL ·constants(SB), 8, $256 + diff --git a/vendor/github.com/minio/sha256-simd/sha256block_noasm.go b/vendor/github.com/minio/sha256-simd/sha256block_noasm.go new file mode 100644 index 000000000..356ca5367 --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256block_noasm.go @@ -0,0 +1,136 @@ +//+build !arm64 !amd64 noasm appengine + +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sha256 + +func blockGeneric(dig *digest, p []byte) { + var w [64]uint32 + h0, h1, h2, h3, h4, h5, h6, h7 := dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] + for len(p) >= chunk { + // Can interlace the computation of w with the + // rounds below if needed for speed. + for i := 0; i < 16; i++ { + j := i * 4 + w[i] = uint32(p[j])<<24 | uint32(p[j+1])<<16 | uint32(p[j+2])<<8 | uint32(p[j+3]) + } + for i := 16; i < 64; i++ { + v1 := w[i-2] + t1 := (v1>>17 | v1<<(32-17)) ^ (v1>>19 | v1<<(32-19)) ^ (v1 >> 10) + v2 := w[i-15] + t2 := (v2>>7 | v2<<(32-7)) ^ (v2>>18 | v2<<(32-18)) ^ (v2 >> 3) + w[i] = t1 + w[i-7] + t2 + w[i-16] + } + + a, b, c, d, e, f, g, h := h0, h1, h2, h3, h4, h5, h6, h7 + + for i := 0; i < 64; i++ { + t1 := h + ((e>>6 | e<<(32-6)) ^ (e>>11 | e<<(32-11)) ^ (e>>25 | e<<(32-25))) + ((e & f) ^ (^e & g)) + _K[i] + w[i] + + t2 := ((a>>2 | a<<(32-2)) ^ (a>>13 | a<<(32-13)) ^ (a>>22 | a<<(32-22))) + ((a & b) ^ (a & c) ^ (b & c)) + + h = g + g = f + f = e + e = d + t1 + d = c + c = b + b = a + a = t1 + t2 + } + + h0 += a + h1 += b + h2 += c + h3 += d + h4 += e + h5 += f + h6 += g + h7 += h + + p = p[chunk:] + } + + dig.h[0], dig.h[1], dig.h[2], dig.h[3], dig.h[4], dig.h[5], dig.h[6], dig.h[7] = h0, h1, h2, h3, h4, h5, h6, h7 +} + +var _K = []uint32{ + 0x428a2f98, + 0x71374491, + 0xb5c0fbcf, + 0xe9b5dba5, + 0x3956c25b, + 0x59f111f1, + 0x923f82a4, + 0xab1c5ed5, + 0xd807aa98, + 0x12835b01, + 0x243185be, + 0x550c7dc3, + 0x72be5d74, + 0x80deb1fe, + 0x9bdc06a7, + 0xc19bf174, + 0xe49b69c1, + 0xefbe4786, + 0x0fc19dc6, + 0x240ca1cc, + 0x2de92c6f, + 0x4a7484aa, + 0x5cb0a9dc, + 0x76f988da, + 0x983e5152, + 0xa831c66d, + 0xb00327c8, + 0xbf597fc7, + 0xc6e00bf3, + 0xd5a79147, + 0x06ca6351, + 0x14292967, + 0x27b70a85, + 0x2e1b2138, + 0x4d2c6dfc, + 0x53380d13, + 0x650a7354, + 0x766a0abb, + 0x81c2c92e, + 0x92722c85, + 0xa2bfe8a1, + 0xa81a664b, + 0xc24b8b70, + 0xc76c51a3, + 0xd192e819, + 0xd6990624, + 0xf40e3585, + 0x106aa070, + 0x19a4c116, + 0x1e376c08, + 0x2748774c, + 0x34b0bcb5, + 0x391c0cb3, + 0x4ed8aa4a, + 0x5b9cca4f, + 0x682e6ff3, + 0x748f82ee, + 0x78a5636f, + 0x84c87814, + 0x8cc70208, + 0x90befffa, + 0xa4506ceb, + 0xbef9a3f7, + 0xc67178f2, +} diff --git a/vendor/github.com/minio/sha256-simd/sha256block_ppc64.go b/vendor/github.com/minio/sha256-simd/sha256block_ppc64.go new file mode 100644 index 000000000..b81017e8d --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256block_ppc64.go @@ -0,0 +1,22 @@ +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sha256 + +func blockAvx2Go(dig *digest, p []byte) {} +func blockAvxGo(dig *digest, p []byte) {} +func blockSsseGo(dig *digest, p []byte) {} +func blockArmGo(dig *digest, p []byte) {} diff --git a/vendor/github.com/minio/sha256-simd/sha256block_ppc64le.go b/vendor/github.com/minio/sha256-simd/sha256block_ppc64le.go new file mode 100644 index 000000000..b81017e8d --- /dev/null +++ b/vendor/github.com/minio/sha256-simd/sha256block_ppc64le.go @@ -0,0 +1,22 @@ +/* + * Minio Cloud Storage, (C) 2016 Minio, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package sha256 + +func blockAvx2Go(dig *digest, p []byte) {} +func blockAvxGo(dig *digest, p []byte) {} +func blockSsseGo(dig *digest, p []byte) {} +func blockArmGo(dig *digest, p []byte) {} diff --git a/vendor/manifest b/vendor/manifest index 2de298784..3a6518835 100644 --- a/vendor/manifest +++ b/vendor/manifest @@ -107,14 +107,6 @@ "branch": "master", "notests": true }, - { - "importpath": "github.com/gogo/protobuf/proto", - "repository": "https://github.com/gogo/protobuf", - "revision": "7883e1468d48d969e1c3ce4bcde89b6a7dd4adc4", - "branch": "master", - "path": "/proto", - "notests": true - }, { "importpath": "github.com/golang/groupcache/lru", "repository": "https://github.com/golang/groupcache", @@ -155,6 +147,13 @@ "branch": "master", "notests": true }, + { + "importpath": "github.com/minio/sha256-simd", + "repository": "https://github.com/minio/sha256-simd", + "revision": "672e7bc9f3482375df73741cf57a157fe187ec26", + "branch": "master", + "notests": true + }, { "importpath": "github.com/onsi/ginkgo", "repository": "https://github.com/onsi/ginkgo", @@ -199,13 +198,6 @@ "revision": "ab8b5dcf1042e818ab68e770d465112a899b668e", "branch": "master" }, - { - "importpath": "github.com/syndtr/goleveldb/leveldb", - "repository": "https://github.com/syndtr/goleveldb", - "revision": "ad0d8b2ab58a55ed5c58073aa46451d5e1ca1280", - "branch": "master", - "path": "/leveldb" - }, { "importpath": "github.com/thejerf/suture", "repository": "https://github.com/thejerf/suture",