mirror of
https://github.com/octoleo/syncthing.git
synced 2025-01-22 22:58:25 +00:00
lib/connections: Use our own fork of kcp (fixes #4063)
This updates kcp and uses our own fork which: 1. Keys sessions not just by remote address, but by remote address + conversation id 2. Allows not to close connections that were passed directly to the library. 3. Resets cache key if the session gets terminated. GitHub-Pull-Request: https://github.com/syncthing/syncthing/pull/4339 LGTM: calmh
This commit is contained in:
parent
ab132ff6fe
commit
cbcc3ea132
@ -11,9 +11,9 @@ import (
|
||||
"net/url"
|
||||
"time"
|
||||
|
||||
"github.com/AudriusButkevicius/kcp-go"
|
||||
"github.com/syncthing/syncthing/lib/config"
|
||||
"github.com/syncthing/syncthing/lib/protocol"
|
||||
"github.com/xtaci/kcp-go"
|
||||
"github.com/xtaci/smux"
|
||||
)
|
||||
|
||||
@ -38,7 +38,7 @@ func (d *kcpDialer) Dial(id protocol.DeviceID, uri *url.URL) (internalConn, erro
|
||||
// Try to dial via an existing listening connection
|
||||
// giving better changes punching through NAT.
|
||||
if f := getDialingFilter(); f != nil {
|
||||
conn, err = kcp.NewConn(uri.Host, nil, 0, 0, f.NewConn(kcpConversationFilterPriority, &kcpConversationFilter{}))
|
||||
conn, err = kcp.NewConn(uri.Host, nil, 0, 0, f.NewConn(kcpConversationFilterPriority, &kcpConversationFilter{}), false)
|
||||
l.Debugf("dial %s using existing conn on %s", uri.String(), conn.LocalAddr())
|
||||
} else {
|
||||
conn, err = kcp.DialWithOptions(uri.Host, nil, 0, 0)
|
||||
|
@ -14,11 +14,11 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/AudriusButkevicius/kcp-go"
|
||||
"github.com/AudriusButkevicius/pfilter"
|
||||
"github.com/ccding/go-stun/stun"
|
||||
"github.com/syncthing/syncthing/lib/config"
|
||||
"github.com/syncthing/syncthing/lib/nat"
|
||||
"github.com/xtaci/kcp-go"
|
||||
"github.com/xtaci/smux"
|
||||
)
|
||||
|
||||
|
@ -8,8 +8,8 @@ import (
|
||||
"net"
|
||||
"testing"
|
||||
|
||||
"github.com/AudriusButkevicius/kcp-go"
|
||||
"github.com/syncthing/syncthing/lib/dialer"
|
||||
"github.com/xtaci/kcp-go"
|
||||
)
|
||||
|
||||
func BenchmarkRequestsRawTCP(b *testing.B) {
|
||||
|
@ -6,6 +6,8 @@ import (
|
||||
"crypto/des"
|
||||
"crypto/sha1"
|
||||
|
||||
"github.com/templexxx/xor"
|
||||
|
||||
"golang.org/x/crypto/blowfish"
|
||||
"golang.org/x/crypto/cast5"
|
||||
"golang.org/x/crypto/pbkdf2"
|
||||
@ -218,8 +220,8 @@ func NewSimpleXORBlockCrypt(key []byte) (BlockCrypt, error) {
|
||||
return c, nil
|
||||
}
|
||||
|
||||
func (c *simpleXORBlockCrypt) Encrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) }
|
||||
func (c *simpleXORBlockCrypt) Decrypt(dst, src []byte) { xorBytes(dst, src, c.xortbl) }
|
||||
func (c *simpleXORBlockCrypt) Encrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
|
||||
func (c *simpleXORBlockCrypt) Decrypt(dst, src []byte) { xor.Bytes(dst, src, c.xortbl) }
|
||||
|
||||
type noneBlockCrypt struct{}
|
||||
|
||||
@ -239,11 +241,11 @@ func encrypt(block cipher.Block, dst, src, buf []byte) {
|
||||
n := len(src) / blocksize
|
||||
base := 0
|
||||
for i := 0; i < n; i++ {
|
||||
xorWords(dst[base:], src[base:], tbl)
|
||||
xor.BytesSrc1(dst[base:], src[base:], tbl)
|
||||
block.Encrypt(tbl, dst[base:])
|
||||
base += blocksize
|
||||
}
|
||||
xorBytes(dst[base:], src[base:], tbl)
|
||||
xor.BytesSrc0(dst[base:], src[base:], tbl)
|
||||
}
|
||||
|
||||
func decrypt(block cipher.Block, dst, src, buf []byte) {
|
||||
@ -255,9 +257,9 @@ func decrypt(block cipher.Block, dst, src, buf []byte) {
|
||||
base := 0
|
||||
for i := 0; i < n; i++ {
|
||||
block.Encrypt(next, src[base:])
|
||||
xorWords(dst[base:], src[base:], tbl)
|
||||
xor.BytesSrc1(dst[base:], src[base:], tbl)
|
||||
tbl, next = next, tbl
|
||||
base += blocksize
|
||||
}
|
||||
xorBytes(dst[base:], src[base:], tbl)
|
||||
xor.BytesSrc0(dst[base:], src[base:], tbl)
|
||||
}
|
@ -22,8 +22,8 @@ type (
|
||||
data []byte
|
||||
}
|
||||
|
||||
// FECDecoder for decoding incoming packets
|
||||
FECDecoder struct {
|
||||
// fecDecoder for decoding incoming packets
|
||||
fecDecoder struct {
|
||||
rxlimit int // queue size limit
|
||||
dataShards int
|
||||
parityShards int
|
||||
@ -39,7 +39,7 @@ type (
|
||||
}
|
||||
)
|
||||
|
||||
func newFECDecoder(rxlimit, dataShards, parityShards int) *FECDecoder {
|
||||
func newFECDecoder(rxlimit, dataShards, parityShards int) *fecDecoder {
|
||||
if dataShards <= 0 || parityShards <= 0 {
|
||||
return nil
|
||||
}
|
||||
@ -47,7 +47,7 @@ func newFECDecoder(rxlimit, dataShards, parityShards int) *FECDecoder {
|
||||
return nil
|
||||
}
|
||||
|
||||
fec := new(FECDecoder)
|
||||
fec := new(fecDecoder)
|
||||
fec.rxlimit = rxlimit
|
||||
fec.dataShards = dataShards
|
||||
fec.parityShards = parityShards
|
||||
@ -63,7 +63,7 @@ func newFECDecoder(rxlimit, dataShards, parityShards int) *FECDecoder {
|
||||
}
|
||||
|
||||
// decodeBytes a fec packet
|
||||
func (dec *FECDecoder) decodeBytes(data []byte) fecPacket {
|
||||
func (dec *fecDecoder) decodeBytes(data []byte) fecPacket {
|
||||
var pkt fecPacket
|
||||
pkt.seqid = binary.LittleEndian.Uint32(data)
|
||||
pkt.flag = binary.LittleEndian.Uint16(data[4:])
|
||||
@ -74,8 +74,8 @@ func (dec *FECDecoder) decodeBytes(data []byte) fecPacket {
|
||||
return pkt
|
||||
}
|
||||
|
||||
// Decode a fec packet
|
||||
func (dec *FECDecoder) Decode(pkt fecPacket) (recovered [][]byte) {
|
||||
// decode a fec packet
|
||||
func (dec *fecDecoder) decode(pkt fecPacket) (recovered [][]byte) {
|
||||
// insertion
|
||||
n := len(dec.rx) - 1
|
||||
insertIdx := 0
|
||||
@ -179,7 +179,7 @@ func (dec *FECDecoder) Decode(pkt fecPacket) (recovered [][]byte) {
|
||||
}
|
||||
|
||||
// free a range of fecPacket, and zero for GC recycling
|
||||
func (dec *FECDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
|
||||
func (dec *fecDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
|
||||
for i := first; i < first+n; i++ { // free
|
||||
xmitBuf.Put(q[i].data)
|
||||
}
|
||||
@ -191,8 +191,8 @@ func (dec *FECDecoder) freeRange(first, n int, q []fecPacket) []fecPacket {
|
||||
}
|
||||
|
||||
type (
|
||||
// FECEncoder for encoding outgoing packets
|
||||
FECEncoder struct {
|
||||
// fecEncoder for encoding outgoing packets
|
||||
fecEncoder struct {
|
||||
dataShards int
|
||||
parityShards int
|
||||
shardSize int
|
||||
@ -214,11 +214,11 @@ type (
|
||||
}
|
||||
)
|
||||
|
||||
func newFECEncoder(dataShards, parityShards, offset int) *FECEncoder {
|
||||
func newFECEncoder(dataShards, parityShards, offset int) *fecEncoder {
|
||||
if dataShards <= 0 || parityShards <= 0 {
|
||||
return nil
|
||||
}
|
||||
fec := new(FECEncoder)
|
||||
fec := new(fecEncoder)
|
||||
fec.dataShards = dataShards
|
||||
fec.parityShards = parityShards
|
||||
fec.shardSize = dataShards + parityShards
|
||||
@ -241,9 +241,9 @@ func newFECEncoder(dataShards, parityShards, offset int) *FECEncoder {
|
||||
return fec
|
||||
}
|
||||
|
||||
// Encode the packet, output parity shards if we have enough datashards
|
||||
// the content of returned parityshards will change in next Encode
|
||||
func (enc *FECEncoder) Encode(b []byte) (ps [][]byte) {
|
||||
// encode the packet, output parity shards if we have enough datashards
|
||||
// the content of returned parityshards will change in next encode
|
||||
func (enc *fecEncoder) encode(b []byte) (ps [][]byte) {
|
||||
enc.markData(b[enc.headerOffset:])
|
||||
binary.LittleEndian.PutUint16(b[enc.payloadOffset:], uint16(len(b[enc.payloadOffset:])))
|
||||
|
||||
@ -290,13 +290,13 @@ func (enc *FECEncoder) Encode(b []byte) (ps [][]byte) {
|
||||
return
|
||||
}
|
||||
|
||||
func (enc *FECEncoder) markData(data []byte) {
|
||||
func (enc *fecEncoder) markData(data []byte) {
|
||||
binary.LittleEndian.PutUint32(data, enc.next)
|
||||
binary.LittleEndian.PutUint16(data[4:], typeData)
|
||||
enc.next++
|
||||
}
|
||||
|
||||
func (enc *FECEncoder) markFEC(data []byte) {
|
||||
func (enc *fecEncoder) markFEC(data []byte) {
|
||||
binary.LittleEndian.PutUint32(data, enc.next)
|
||||
binary.LittleEndian.PutUint16(data[4:], typeFEC)
|
||||
enc.next = (enc.next + 1) % enc.paws
|
@ -30,8 +30,8 @@ const (
|
||||
IKCP_PROBE_LIMIT = 120000 // up to 120 secs to probe window
|
||||
)
|
||||
|
||||
// Output is a closure which captures conn and calls conn.Write
|
||||
type Output func(buf []byte, size int)
|
||||
// output_callback is a prototype which ought capture conn and call conn.Write
|
||||
type output_callback func(buf []byte, size int)
|
||||
|
||||
/* encode 8 bits unsigned int */
|
||||
func ikcp_encode8u(p []byte, c byte) []byte {
|
||||
@ -91,8 +91,8 @@ func _itimediff(later, earlier uint32) int32 {
|
||||
return (int32)(later - earlier)
|
||||
}
|
||||
|
||||
// Segment defines a KCP segment
|
||||
type Segment struct {
|
||||
// segment defines a KCP segment
|
||||
type segment struct {
|
||||
conv uint32
|
||||
cmd uint8
|
||||
frg uint8
|
||||
@ -108,11 +108,11 @@ type Segment struct {
|
||||
}
|
||||
|
||||
// encode a segment into buffer
|
||||
func (seg *Segment) encode(ptr []byte) []byte {
|
||||
func (seg *segment) encode(ptr []byte) []byte {
|
||||
ptr = ikcp_encode32u(ptr, seg.conv)
|
||||
ptr = ikcp_encode8u(ptr, uint8(seg.cmd))
|
||||
ptr = ikcp_encode8u(ptr, uint8(seg.frg))
|
||||
ptr = ikcp_encode16u(ptr, uint16(seg.wnd))
|
||||
ptr = ikcp_encode8u(ptr, seg.cmd)
|
||||
ptr = ikcp_encode8u(ptr, seg.frg)
|
||||
ptr = ikcp_encode16u(ptr, seg.wnd)
|
||||
ptr = ikcp_encode32u(ptr, seg.ts)
|
||||
ptr = ikcp_encode32u(ptr, seg.sn)
|
||||
ptr = ikcp_encode32u(ptr, seg.una)
|
||||
@ -137,15 +137,15 @@ type KCP struct {
|
||||
fastresend int32
|
||||
nocwnd, stream int32
|
||||
|
||||
snd_queue []Segment
|
||||
rcv_queue []Segment
|
||||
snd_buf []Segment
|
||||
rcv_buf []Segment
|
||||
snd_queue []segment
|
||||
rcv_queue []segment
|
||||
snd_buf []segment
|
||||
rcv_buf []segment
|
||||
|
||||
acklist []ackItem
|
||||
|
||||
buffer []byte
|
||||
output Output
|
||||
output output_callback
|
||||
}
|
||||
|
||||
type ackItem struct {
|
||||
@ -155,7 +155,7 @@ type ackItem struct {
|
||||
|
||||
// NewKCP create a new kcp control object, 'conv' must equal in two endpoint
|
||||
// from the same connection.
|
||||
func NewKCP(conv uint32, output Output) *KCP {
|
||||
func NewKCP(conv uint32, output output_callback) *KCP {
|
||||
kcp := new(KCP)
|
||||
kcp.conv = conv
|
||||
kcp.snd_wnd = IKCP_WND_SND
|
||||
@ -175,13 +175,13 @@ func NewKCP(conv uint32, output Output) *KCP {
|
||||
}
|
||||
|
||||
// newSegment creates a KCP segment
|
||||
func (kcp *KCP) newSegment(size int) (seg Segment) {
|
||||
func (kcp *KCP) newSegment(size int) (seg segment) {
|
||||
seg.data = xmitBuf.Get().([]byte)[:size]
|
||||
return
|
||||
}
|
||||
|
||||
// delSegment recycles a KCP segment
|
||||
func (kcp *KCP) delSegment(seg Segment) {
|
||||
func (kcp *KCP) delSegment(seg segment) {
|
||||
xmitBuf.Put(seg.data)
|
||||
}
|
||||
|
||||
@ -384,7 +384,7 @@ func (kcp *KCP) parse_ack(sn uint32) {
|
||||
if sn == seg.sn {
|
||||
kcp.delSegment(*seg)
|
||||
copy(kcp.snd_buf[k:], kcp.snd_buf[k+1:])
|
||||
kcp.snd_buf[len(kcp.snd_buf)-1] = Segment{}
|
||||
kcp.snd_buf[len(kcp.snd_buf)-1] = segment{}
|
||||
kcp.snd_buf = kcp.snd_buf[:len(kcp.snd_buf)-1]
|
||||
break
|
||||
}
|
||||
@ -430,7 +430,7 @@ func (kcp *KCP) ack_push(sn, ts uint32) {
|
||||
kcp.acklist = append(kcp.acklist, ackItem{sn, ts})
|
||||
}
|
||||
|
||||
func (kcp *KCP) parse_data(newseg Segment) {
|
||||
func (kcp *KCP) parse_data(newseg segment) {
|
||||
sn := newseg.sn
|
||||
if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) >= 0 ||
|
||||
_itimediff(sn, kcp.rcv_nxt) < 0 {
|
||||
@ -458,7 +458,7 @@ func (kcp *KCP) parse_data(newseg Segment) {
|
||||
if insert_idx == n+1 {
|
||||
kcp.rcv_buf = append(kcp.rcv_buf, newseg)
|
||||
} else {
|
||||
kcp.rcv_buf = append(kcp.rcv_buf, Segment{})
|
||||
kcp.rcv_buf = append(kcp.rcv_buf, segment{})
|
||||
copy(kcp.rcv_buf[insert_idx+1:], kcp.rcv_buf[insert_idx:])
|
||||
kcp.rcv_buf[insert_idx] = newseg
|
||||
}
|
||||
@ -625,7 +625,7 @@ func (kcp *KCP) wnd_unused() uint16 {
|
||||
|
||||
// flush pending data
|
||||
func (kcp *KCP) flush(ackOnly bool) {
|
||||
var seg Segment
|
||||
var seg segment
|
||||
seg.conv = kcp.conv
|
||||
seg.cmd = IKCP_CMD_ACK
|
||||
seg.wnd = kcp.wnd_unused()
|
||||
@ -989,10 +989,10 @@ func (kcp *KCP) WaitSnd() int {
|
||||
}
|
||||
|
||||
// remove front n elements from queue
|
||||
func (kcp *KCP) remove_front(q []Segment, n int) []Segment {
|
||||
func (kcp *KCP) remove_front(q []segment, n int) []segment {
|
||||
newn := copy(q, q[n:])
|
||||
for i := newn; i < len(q); i++ {
|
||||
q[i] = Segment{} // manual set nil for GC
|
||||
q[i] = segment{} // manual set nil for GC
|
||||
}
|
||||
return q[:newn]
|
||||
}
|
252
vendor/github.com/xtaci/kcp-go/sess.go → vendor/github.com/AudriusButkevicius/kcp-go/sess.go
generated
vendored
252
vendor/github.com/xtaci/kcp-go/sess.go → vendor/github.com/AudriusButkevicius/kcp-go/sess.go
generated
vendored
@ -3,6 +3,7 @@ package kcp
|
||||
import (
|
||||
"crypto/rand"
|
||||
"encoding/binary"
|
||||
"fmt"
|
||||
"hash/crc32"
|
||||
"io"
|
||||
"net"
|
||||
@ -54,9 +55,6 @@ var (
|
||||
// global packet buffer
|
||||
// shared among sending/receiving/FEC
|
||||
xmitBuf sync.Pool
|
||||
|
||||
// monotonic session id
|
||||
sid uint32
|
||||
)
|
||||
|
||||
func init() {
|
||||
@ -68,11 +66,12 @@ func init() {
|
||||
type (
|
||||
// UDPSession defines a KCP session implemented by UDP
|
||||
UDPSession struct {
|
||||
sid uint32 // session id(monotonic)
|
||||
conn net.PacketConn // the underlying packet connection
|
||||
kcp *KCP // KCP ARQ protocol
|
||||
l *Listener // point to the Listener if it's accepted by Listener
|
||||
block BlockCrypt // block encryption
|
||||
updaterIdx int // record slice index in updater
|
||||
conn net.PacketConn // the underlying packet connection
|
||||
closeConn bool // Should we close the underlying conn once UDPSession is closed.
|
||||
kcp *KCP // KCP ARQ protocol
|
||||
l *Listener // point to the Listener if it's accepted by Listener
|
||||
block BlockCrypt // block encryption
|
||||
|
||||
// kcp receiving is based on packets
|
||||
// recvbuf turns packets into stream
|
||||
@ -82,22 +81,23 @@ type (
|
||||
ext []byte
|
||||
|
||||
// FEC
|
||||
fecDecoder *FECDecoder
|
||||
fecEncoder *FECEncoder
|
||||
fecDecoder *fecDecoder
|
||||
fecEncoder *fecEncoder
|
||||
|
||||
// settings
|
||||
remote net.Addr // remote peer address
|
||||
rd time.Time // read deadline
|
||||
wd time.Time // write deadline
|
||||
headerSize int // the overall header size added before KCP frame
|
||||
updateInterval time.Duration // interval in seconds to call kcp.flush()
|
||||
ackNoDelay bool // send ack immediately for each incoming packet
|
||||
writeDelay bool // delay kcp.flush() for Write() for bulk transfer
|
||||
remote net.Addr // remote peer address
|
||||
rd time.Time // read deadline
|
||||
wd time.Time // write deadline
|
||||
headerSize int // the overall header size added before KCP frame
|
||||
ackNoDelay bool // send ack immediately for each incoming packet
|
||||
writeDelay bool // delay kcp.flush() for Write() for bulk transfer
|
||||
dup int // duplicate udp packets
|
||||
|
||||
// notifications
|
||||
die chan struct{} // notify session has Closed
|
||||
chReadEvent chan struct{} // notify Read() can be called without blocking
|
||||
chWriteEvent chan struct{} // notify Write() can be called without blocking
|
||||
chErrorEvent chan error // notify Read() have an error
|
||||
|
||||
isClosed bool // flag the session has Closed
|
||||
mu sync.Mutex
|
||||
@ -113,14 +113,15 @@ type (
|
||||
)
|
||||
|
||||
// newUDPSession create a new udp session for client or server
|
||||
func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt) *UDPSession {
|
||||
func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt, closeConn bool) *UDPSession {
|
||||
sess := new(UDPSession)
|
||||
sess.sid = atomic.AddUint32(&sid, 1)
|
||||
sess.die = make(chan struct{})
|
||||
sess.chReadEvent = make(chan struct{}, 1)
|
||||
sess.chWriteEvent = make(chan struct{}, 1)
|
||||
sess.chErrorEvent = make(chan error, 1)
|
||||
sess.remote = remote
|
||||
sess.conn = conn
|
||||
sess.closeConn = closeConn
|
||||
sess.l = l
|
||||
sess.block = block
|
||||
sess.recvbuf = make([]byte, mtuLimit)
|
||||
@ -232,6 +233,11 @@ func (s *UDPSession) Read(b []byte) (n int, err error) {
|
||||
case <-s.chReadEvent:
|
||||
case <-c:
|
||||
case <-s.die:
|
||||
case err = <-s.chErrorEvent:
|
||||
if timeout != nil {
|
||||
timeout.Stop()
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
if timeout != nil {
|
||||
@ -299,9 +305,11 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {
|
||||
|
||||
// Close closes the connection.
|
||||
func (s *UDPSession) Close() error {
|
||||
// remove this session from updater & listener(if necessary)
|
||||
updater.removeSession(s)
|
||||
if s.l != nil { // notify listener
|
||||
s.l.closeSession(s.remote)
|
||||
key := fmt.Sprintf("%s/%d", s.remote.String(), s.kcp.conv)
|
||||
s.l.closeSession(key)
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
@ -312,7 +320,7 @@ func (s *UDPSession) Close() error {
|
||||
close(s.die)
|
||||
s.isClosed = true
|
||||
atomic.AddUint64(&DefaultSnmp.CurrEstab, ^uint64(0))
|
||||
if s.l == nil { // client socket close
|
||||
if s.l == nil && s.closeConn { // client socket close
|
||||
return s.conn.Close()
|
||||
}
|
||||
return nil
|
||||
@ -393,12 +401,19 @@ func (s *UDPSession) SetACKNoDelay(nodelay bool) {
|
||||
s.ackNoDelay = nodelay
|
||||
}
|
||||
|
||||
// SetDUP duplicates udp packets for kcp output, for testing purpose only
|
||||
func (s *UDPSession) SetDUP(dup int) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.dup = dup
|
||||
}
|
||||
|
||||
// SetNoDelay calls nodelay() of kcp
|
||||
// https://github.com/skywind3000/kcp/blob/master/README.en.md#protocol-configuration
|
||||
func (s *UDPSession) SetNoDelay(nodelay, interval, resend, nc int) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
s.kcp.NoDelay(nodelay, interval, resend, nc)
|
||||
s.updateInterval = time.Duration(interval) * time.Millisecond
|
||||
}
|
||||
|
||||
// SetDSCP sets the 6bit DSCP field of IP header, no effect if it's accepted from Listener
|
||||
@ -406,8 +421,8 @@ func (s *UDPSession) SetDSCP(dscp int) error {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
if s.l == nil {
|
||||
if nc, ok := s.conn.(*ConnectedUDPConn); ok {
|
||||
return ipv4.NewConn(nc.Conn).SetTOS(dscp << 2)
|
||||
if nc, ok := s.conn.(*connectedUDPConn); ok {
|
||||
return ipv4.NewConn(nc.UDPConn).SetTOS(dscp << 2)
|
||||
} else if nc, ok := s.conn.(net.Conn); ok {
|
||||
return ipv4.NewConn(nc).SetTOS(dscp << 2)
|
||||
}
|
||||
@ -449,51 +464,47 @@ func (s *UDPSession) SetWriteBuffer(bytes int) error {
|
||||
func (s *UDPSession) output(buf []byte) {
|
||||
var ecc [][]byte
|
||||
|
||||
// extend buf's header space
|
||||
// 0. extend buf's header space(if necessary)
|
||||
ext := buf
|
||||
if s.headerSize > 0 {
|
||||
ext = s.ext[:s.headerSize+len(buf)]
|
||||
copy(ext[s.headerSize:], buf)
|
||||
}
|
||||
|
||||
// FEC stage
|
||||
// 1. FEC encoding
|
||||
if s.fecEncoder != nil {
|
||||
ecc = s.fecEncoder.Encode(ext)
|
||||
ecc = s.fecEncoder.encode(ext)
|
||||
}
|
||||
|
||||
// encryption stage
|
||||
// 2&3. crc32 & encryption
|
||||
if s.block != nil {
|
||||
io.ReadFull(rand.Reader, ext[:nonceSize])
|
||||
checksum := crc32.ChecksumIEEE(ext[cryptHeaderSize:])
|
||||
binary.LittleEndian.PutUint32(ext[nonceSize:], checksum)
|
||||
s.block.Encrypt(ext, ext)
|
||||
|
||||
if ecc != nil {
|
||||
for k := range ecc {
|
||||
io.ReadFull(rand.Reader, ecc[k][:nonceSize])
|
||||
checksum := crc32.ChecksumIEEE(ecc[k][cryptHeaderSize:])
|
||||
binary.LittleEndian.PutUint32(ecc[k][nonceSize:], checksum)
|
||||
s.block.Encrypt(ecc[k], ecc[k])
|
||||
}
|
||||
for k := range ecc {
|
||||
io.ReadFull(rand.Reader, ecc[k][:nonceSize])
|
||||
checksum := crc32.ChecksumIEEE(ecc[k][cryptHeaderSize:])
|
||||
binary.LittleEndian.PutUint32(ecc[k][nonceSize:], checksum)
|
||||
s.block.Encrypt(ecc[k], ecc[k])
|
||||
}
|
||||
}
|
||||
|
||||
// WriteTo kernel
|
||||
// 4. WriteTo kernel
|
||||
nbytes := 0
|
||||
npkts := 0
|
||||
// if mrand.Intn(100) < 50 {
|
||||
if n, err := s.conn.WriteTo(ext, s.remote); err == nil {
|
||||
nbytes += n
|
||||
npkts++
|
||||
for i := 0; i < s.dup+1; i++ {
|
||||
if n, err := s.conn.WriteTo(ext, s.remote); err == nil {
|
||||
nbytes += n
|
||||
npkts++
|
||||
}
|
||||
}
|
||||
// }
|
||||
|
||||
if ecc != nil {
|
||||
for k := range ecc {
|
||||
if n, err := s.conn.WriteTo(ecc[k], s.remote); err == nil {
|
||||
nbytes += n
|
||||
npkts++
|
||||
}
|
||||
for k := range ecc {
|
||||
if n, err := s.conn.WriteTo(ecc[k], s.remote); err == nil {
|
||||
nbytes += n
|
||||
npkts++
|
||||
}
|
||||
}
|
||||
atomic.AddUint64(&DefaultSnmp.OutPkts, uint64(npkts))
|
||||
@ -507,15 +518,13 @@ func (s *UDPSession) update() (interval time.Duration) {
|
||||
if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
|
||||
s.notifyWriteEvent()
|
||||
}
|
||||
interval = s.updateInterval
|
||||
interval = time.Duration(s.kcp.interval) * time.Millisecond
|
||||
s.mu.Unlock()
|
||||
return
|
||||
}
|
||||
|
||||
// GetConv gets conversation id of a session
|
||||
func (s *UDPSession) GetConv() uint32 {
|
||||
return s.kcp.conv
|
||||
}
|
||||
func (s *UDPSession) GetConv() uint32 { return s.kcp.conv }
|
||||
|
||||
func (s *UDPSession) notifyReadEvent() {
|
||||
select {
|
||||
@ -548,22 +557,21 @@ func (s *UDPSession) kcpInput(data []byte) {
|
||||
fecParityShards++
|
||||
}
|
||||
|
||||
if recovers := s.fecDecoder.Decode(f); recovers != nil {
|
||||
for _, r := range recovers {
|
||||
if len(r) >= 2 { // must be larger than 2bytes
|
||||
sz := binary.LittleEndian.Uint16(r)
|
||||
if int(sz) <= len(r) && sz >= 2 {
|
||||
if ret := s.kcp.Input(r[2:sz], false, s.ackNoDelay); ret == 0 {
|
||||
fecRecovered++
|
||||
} else {
|
||||
kcpInErrors++
|
||||
}
|
||||
recovers := s.fecDecoder.decode(f)
|
||||
for _, r := range recovers {
|
||||
if len(r) >= 2 { // must be larger than 2bytes
|
||||
sz := binary.LittleEndian.Uint16(r)
|
||||
if int(sz) <= len(r) && sz >= 2 {
|
||||
if ret := s.kcp.Input(r[2:sz], false, s.ackNoDelay); ret == 0 {
|
||||
fecRecovered++
|
||||
} else {
|
||||
fecErrs++
|
||||
kcpInErrors++
|
||||
}
|
||||
} else {
|
||||
fecErrs++
|
||||
}
|
||||
} else {
|
||||
fecErrs++
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -601,7 +609,7 @@ func (s *UDPSession) kcpInput(data []byte) {
|
||||
}
|
||||
}
|
||||
|
||||
func (s *UDPSession) receiver(ch chan []byte) {
|
||||
func (s *UDPSession) receiver(ch chan<- []byte) {
|
||||
for {
|
||||
data := xmitBuf.Get().([]byte)[:mtuLimit]
|
||||
if n, _, err := s.conn.ReadFrom(data); err == nil && n >= s.headerSize+IKCP_OVERHEAD {
|
||||
@ -611,6 +619,7 @@ func (s *UDPSession) receiver(ch chan []byte) {
|
||||
return
|
||||
}
|
||||
} else if err != nil {
|
||||
s.chErrorEvent <- err
|
||||
return
|
||||
} else {
|
||||
atomic.AddUint64(&DefaultSnmp.InErrs, 1)
|
||||
@ -658,12 +667,12 @@ type (
|
||||
block BlockCrypt // block encryption
|
||||
dataShards int // FEC data shard
|
||||
parityShards int // FEC parity shard
|
||||
fecDecoder *FECDecoder // FEC mock initialization
|
||||
fecDecoder *fecDecoder // FEC mock initialization
|
||||
conn net.PacketConn // the underlying packet connection
|
||||
|
||||
sessions map[string]*UDPSession // all sessions accepted by this Listener
|
||||
chAccepts chan *UDPSession // Listen() backlog
|
||||
chSessionClosed chan net.Addr // session close queue
|
||||
chSessionClosed chan string // session close queue
|
||||
headerSize int // the overall header size added before KCP frame
|
||||
die chan struct{} // notify the listener has closed
|
||||
rd atomic.Value // read deadline for Accept()
|
||||
@ -679,6 +688,10 @@ type (
|
||||
|
||||
// monitor incoming data for all connections of server
|
||||
func (l *Listener) monitor() {
|
||||
// cache last session
|
||||
var lastKey string
|
||||
var lastSession *UDPSession
|
||||
|
||||
chPacket := make(chan inPacket, qlen)
|
||||
go l.receiver(chPacket)
|
||||
for {
|
||||
@ -703,45 +716,60 @@ func (l *Listener) monitor() {
|
||||
}
|
||||
|
||||
if dataValid {
|
||||
addr := from.String()
|
||||
s, ok := l.sessions[addr]
|
||||
if !ok { // new session
|
||||
if len(l.chAccepts) < cap(l.chAccepts) { // do not let new session overwhelm accept queue
|
||||
var conv uint32
|
||||
convValid := false
|
||||
if l.fecDecoder != nil {
|
||||
isfec := binary.LittleEndian.Uint16(data[4:])
|
||||
if isfec == typeData {
|
||||
conv = binary.LittleEndian.Uint32(data[fecHeaderSizePlus2:])
|
||||
convValid = true
|
||||
}
|
||||
} else {
|
||||
conv = binary.LittleEndian.Uint32(data)
|
||||
convValid = true
|
||||
}
|
||||
|
||||
if convValid {
|
||||
s := newUDPSession(conv, l.dataShards, l.parityShards, l, l.conn, from, l.block)
|
||||
s.kcpInput(data)
|
||||
l.sessions[addr] = s
|
||||
l.chAccepts <- s
|
||||
}
|
||||
var conv uint32
|
||||
convValid := false
|
||||
if l.fecDecoder != nil {
|
||||
isfec := binary.LittleEndian.Uint16(data[4:])
|
||||
if isfec == typeData {
|
||||
conv = binary.LittleEndian.Uint32(data[fecHeaderSizePlus2:])
|
||||
convValid = true
|
||||
}
|
||||
} else {
|
||||
s.kcpInput(data)
|
||||
conv = binary.LittleEndian.Uint32(data)
|
||||
convValid = true
|
||||
}
|
||||
|
||||
if convValid {
|
||||
addr := from.String()
|
||||
key := fmt.Sprintf("%s/%d", addr, conv)
|
||||
var s *UDPSession
|
||||
var ok bool
|
||||
|
||||
// packets received from an address always come in batch.
|
||||
// cache the session for next packet, without querying map.
|
||||
if key == lastKey {
|
||||
s, ok = lastSession, true
|
||||
} else if s, ok = l.sessions[key]; ok {
|
||||
lastSession = s
|
||||
lastKey = addr
|
||||
}
|
||||
|
||||
if !ok { // new session
|
||||
if len(l.chAccepts) < cap(l.chAccepts) && len(l.sessions) < 4096 { // do not let new session overwhelm accept queue and connection count
|
||||
s := newUDPSession(conv, l.dataShards, l.parityShards, l, l.conn, from, l.block, false)
|
||||
s.kcpInput(data)
|
||||
l.sessions[key] = s
|
||||
l.chAccepts <- s
|
||||
}
|
||||
} else {
|
||||
s.kcpInput(data)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
xmitBuf.Put(raw)
|
||||
case deadlink := <-l.chSessionClosed:
|
||||
delete(l.sessions, deadlink.String())
|
||||
case key := <-l.chSessionClosed:
|
||||
if key == lastKey {
|
||||
lastKey = ""
|
||||
}
|
||||
delete(l.sessions, key)
|
||||
case <-l.die:
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (l *Listener) receiver(ch chan inPacket) {
|
||||
func (l *Listener) receiver(ch chan<- inPacket) {
|
||||
for {
|
||||
data := xmitBuf.Get().([]byte)[:mtuLimit]
|
||||
if n, from, err := l.conn.ReadFrom(data); err == nil && n >= l.headerSize+IKCP_OVERHEAD {
|
||||
@ -830,9 +858,9 @@ func (l *Listener) Close() error {
|
||||
}
|
||||
|
||||
// closeSession notify the listener that a session has closed
|
||||
func (l *Listener) closeSession(remote net.Addr) bool {
|
||||
func (l *Listener) closeSession(key string) bool {
|
||||
select {
|
||||
case l.chSessionClosed <- remote:
|
||||
case l.chSessionClosed <- key:
|
||||
return true
|
||||
case <-l.die:
|
||||
return false
|
||||
@ -840,14 +868,10 @@ func (l *Listener) closeSession(remote net.Addr) bool {
|
||||
}
|
||||
|
||||
// Addr returns the listener's network address, The Addr returned is shared by all invocations of Addr, so do not modify it.
|
||||
func (l *Listener) Addr() net.Addr {
|
||||
return l.conn.LocalAddr()
|
||||
}
|
||||
func (l *Listener) Addr() net.Addr { return l.conn.LocalAddr() }
|
||||
|
||||
// Listen listens for incoming KCP packets addressed to the local address laddr on the network "udp",
|
||||
func Listen(laddr string) (net.Listener, error) {
|
||||
return ListenWithOptions(laddr, nil, 0, 0)
|
||||
}
|
||||
func Listen(laddr string) (net.Listener, error) { return ListenWithOptions(laddr, nil, 0, 0) }
|
||||
|
||||
// ListenWithOptions listens for incoming KCP packets addressed to the local address laddr on the network "udp" with packet encryption,
|
||||
// dataShards, parityShards defines Reed-Solomon Erasure Coding parameters
|
||||
@ -870,7 +894,7 @@ func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketCo
|
||||
l.conn = conn
|
||||
l.sessions = make(map[string]*UDPSession)
|
||||
l.chAccepts = make(chan *UDPSession, acceptBacklog)
|
||||
l.chSessionClosed = make(chan net.Addr)
|
||||
l.chSessionClosed = make(chan string)
|
||||
l.die = make(chan struct{})
|
||||
l.dataShards = dataShards
|
||||
l.parityShards = parityShards
|
||||
@ -890,9 +914,7 @@ func ServeConn(block BlockCrypt, dataShards, parityShards int, conn net.PacketCo
|
||||
}
|
||||
|
||||
// Dial connects to the remote address "raddr" on the network "udp"
|
||||
func Dial(raddr string) (net.Conn, error) {
|
||||
return DialWithOptions(raddr, nil, 0, 0)
|
||||
}
|
||||
func Dial(raddr string) (net.Conn, error) { return DialWithOptions(raddr, nil, 0, 0) }
|
||||
|
||||
// DialWithOptions connects to the remote address "raddr" on the network "udp" with packet encryption
|
||||
func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards int) (*UDPSession, error) {
|
||||
@ -906,11 +928,11 @@ func DialWithOptions(raddr string, block BlockCrypt, dataShards, parityShards in
|
||||
return nil, errors.Wrap(err, "net.DialUDP")
|
||||
}
|
||||
|
||||
return NewConn(raddr, block, dataShards, parityShards, &ConnectedUDPConn{udpconn, udpconn})
|
||||
return NewConn(raddr, block, dataShards, parityShards, &connectedUDPConn{udpconn}, true)
|
||||
}
|
||||
|
||||
// NewConn establishes a session and talks KCP protocol over a packet connection.
|
||||
func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn) (*UDPSession, error) {
|
||||
func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn net.PacketConn, closeConn bool) (*UDPSession, error) {
|
||||
udpaddr, err := net.ResolveUDPAddr("udp", raddr)
|
||||
if err != nil {
|
||||
return nil, errors.Wrap(err, "net.ResolveUDPAddr")
|
||||
@ -918,22 +940,16 @@ func NewConn(raddr string, block BlockCrypt, dataShards, parityShards int, conn
|
||||
|
||||
var convid uint32
|
||||
binary.Read(rand.Reader, binary.LittleEndian, &convid)
|
||||
return newUDPSession(convid, dataShards, parityShards, nil, conn, udpaddr, block), nil
|
||||
return newUDPSession(convid, dataShards, parityShards, nil, conn, udpaddr, block, closeConn), nil
|
||||
}
|
||||
|
||||
func currentMs() uint32 {
|
||||
return uint32(time.Now().UnixNano() / int64(time.Millisecond))
|
||||
}
|
||||
// returns current time in milliseconds
|
||||
func currentMs() uint32 { return uint32(time.Now().UnixNano() / int64(time.Millisecond)) }
|
||||
|
||||
// ConnectedUDPConn is a wrapper for net.UDPConn which converts WriteTo syscalls
|
||||
// connectedUDPConn is a wrapper for net.UDPConn which converts WriteTo syscalls
|
||||
// to Write syscalls that are 4 times faster on some OS'es. This should only be
|
||||
// used for connections that were produced by a net.Dial* call.
|
||||
type ConnectedUDPConn struct {
|
||||
*net.UDPConn
|
||||
Conn net.Conn // underlying connection if any
|
||||
}
|
||||
type connectedUDPConn struct{ *net.UDPConn }
|
||||
|
||||
// WriteTo redirects all writes to the Write syscall, which is 4 times faster.
|
||||
func (c *ConnectedUDPConn) WriteTo(b []byte, addr net.Addr) (int, error) {
|
||||
return c.Write(b)
|
||||
}
|
||||
func (c *connectedUDPConn) WriteTo(b []byte, addr net.Addr) (int, error) { return c.Write(b) }
|
@ -15,15 +15,13 @@ func init() {
|
||||
|
||||
// entry contains a session update info
|
||||
type entry struct {
|
||||
sid uint32
|
||||
ts time.Time
|
||||
s *UDPSession
|
||||
ts time.Time
|
||||
s *UDPSession
|
||||
}
|
||||
|
||||
// a global heap managed kcp.flush() caller
|
||||
type updateHeap struct {
|
||||
entries []entry
|
||||
indices map[uint32]int
|
||||
mu sync.Mutex
|
||||
chWakeUp chan struct{}
|
||||
}
|
||||
@ -32,41 +30,40 @@ func (h *updateHeap) Len() int { return len(h.entries) }
|
||||
func (h *updateHeap) Less(i, j int) bool { return h.entries[i].ts.Before(h.entries[j].ts) }
|
||||
func (h *updateHeap) Swap(i, j int) {
|
||||
h.entries[i], h.entries[j] = h.entries[j], h.entries[i]
|
||||
h.indices[h.entries[i].sid] = i
|
||||
h.indices[h.entries[j].sid] = j
|
||||
h.entries[i].s.updaterIdx = i
|
||||
h.entries[j].s.updaterIdx = j
|
||||
}
|
||||
|
||||
func (h *updateHeap) Push(x interface{}) {
|
||||
h.entries = append(h.entries, x.(entry))
|
||||
n := len(h.entries)
|
||||
h.indices[h.entries[n-1].sid] = n - 1
|
||||
h.entries[n-1].s.updaterIdx = n - 1
|
||||
}
|
||||
|
||||
func (h *updateHeap) Pop() interface{} {
|
||||
n := len(h.entries)
|
||||
x := h.entries[n-1]
|
||||
h.entries[n-1].s.updaterIdx = -1
|
||||
h.entries[n-1] = entry{} // manual set nil for GC
|
||||
h.entries = h.entries[0 : n-1]
|
||||
delete(h.indices, x.sid)
|
||||
return x
|
||||
}
|
||||
|
||||
func (h *updateHeap) init() {
|
||||
h.indices = make(map[uint32]int)
|
||||
h.chWakeUp = make(chan struct{}, 1)
|
||||
}
|
||||
|
||||
func (h *updateHeap) addSession(s *UDPSession) {
|
||||
h.mu.Lock()
|
||||
heap.Push(h, entry{s.sid, time.Now(), s})
|
||||
heap.Push(h, entry{time.Now(), s})
|
||||
h.mu.Unlock()
|
||||
h.wakeup()
|
||||
}
|
||||
|
||||
func (h *updateHeap) removeSession(s *UDPSession) {
|
||||
h.mu.Lock()
|
||||
if idx, ok := h.indices[s.sid]; ok {
|
||||
heap.Remove(h, idx)
|
||||
if s.updaterIdx != -1 {
|
||||
heap.Remove(h, s.updaterIdx)
|
||||
}
|
||||
h.mu.Unlock()
|
||||
}
|
||||
@ -99,7 +96,8 @@ func (h *updateHeap) updateTask() {
|
||||
break
|
||||
}
|
||||
}
|
||||
if h.Len() > 0 {
|
||||
|
||||
if hlen > 0 {
|
||||
timer = time.After(h.entries[0].ts.Sub(now))
|
||||
}
|
||||
h.mu.Unlock()
|
21
vendor/github.com/templexxx/xor/LICENSE
generated
vendored
Normal file
21
vendor/github.com/templexxx/xor/LICENSE
generated
vendored
Normal file
@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2017 Temple3x
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
442
vendor/github.com/templexxx/xor/avx2_amd64.s
generated
vendored
Normal file
442
vendor/github.com/templexxx/xor/avx2_amd64.s
generated
vendored
Normal file
@ -0,0 +1,442 @@
|
||||
#include "textflag.h"
|
||||
|
||||
// addr of mem
|
||||
#define DST BX
|
||||
#define SRC SI
|
||||
#define SRC0 TMP4
|
||||
#define SRC1 TMP5
|
||||
|
||||
// loop args
|
||||
// num of vect
|
||||
#define VECT CX
|
||||
#define LEN DX
|
||||
// pos of matrix
|
||||
#define POS R8
|
||||
|
||||
// tmp store
|
||||
// num of vect or ...
|
||||
#define TMP1 R9
|
||||
// pos of matrix or ...
|
||||
#define TMP2 R10
|
||||
// store addr of data/parity or ...
|
||||
#define TMP3 R11
|
||||
#define TMP4 R12
|
||||
#define TMP5 R13
|
||||
#define TMP6 R14
|
||||
|
||||
// func bytesAVX2mini(dst, src0, src1 []byte, size int)
|
||||
TEXT ·bytesAVX2mini(SB), NOSPLIT, $0
|
||||
MOVQ len+72(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $31, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop32b:
|
||||
VMOVDQU (SRC0)(POS*1), Y0
|
||||
VPXOR (SRC1)(POS*1), Y0, Y0
|
||||
VMOVDQU Y0, (DST)(POS*1)
|
||||
ADDQ $32, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop32b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $31, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $31, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $32
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytesAVX2small(dst, src0, src1 []byte, size int)
|
||||
TEXT ·bytesAVX2small(SB), NOSPLIT, $0
|
||||
MOVQ len+72(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $127, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop128b:
|
||||
VMOVDQU (SRC0)(POS*1), Y0
|
||||
VMOVDQU 32(SRC0)(POS*1), Y1
|
||||
VMOVDQU 64(SRC0)(POS*1), Y2
|
||||
VMOVDQU 96(SRC0)(POS*1), Y3
|
||||
VPXOR (SRC1)(POS*1), Y0, Y0
|
||||
VPXOR 32(SRC1)(POS*1), Y1, Y1
|
||||
VPXOR 64(SRC1)(POS*1), Y2, Y2
|
||||
VPXOR 96(SRC1)(POS*1), Y3, Y3
|
||||
VMOVDQU Y0, (DST)(POS*1)
|
||||
VMOVDQU Y1, 32(DST)(POS*1)
|
||||
VMOVDQU Y2, 64(DST)(POS*1)
|
||||
VMOVDQU Y3, 96(DST)(POS*1)
|
||||
|
||||
ADDQ $128, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop128b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $127, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $127, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $128
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytesAVX2big(dst, src0, src1 []byte, size int)
|
||||
TEXT ·bytesAVX2big(SB), NOSPLIT, $0
|
||||
MOVQ len+72(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $127, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop128b:
|
||||
VMOVDQU (SRC0)(POS*1), Y0
|
||||
VMOVDQU 32(SRC0)(POS*1), Y1
|
||||
VMOVDQU 64(SRC0)(POS*1), Y2
|
||||
VMOVDQU 96(SRC0)(POS*1), Y3
|
||||
VPXOR (SRC1)(POS*1), Y0, Y0
|
||||
VPXOR 32(SRC1)(POS*1), Y1, Y1
|
||||
VPXOR 64(SRC1)(POS*1), Y2, Y2
|
||||
VPXOR 96(SRC1)(POS*1), Y3, Y3
|
||||
LONG $0xe77da1c4; WORD $0x0304
|
||||
LONG $0xe77da1c4; WORD $0x034c; BYTE $0x20
|
||||
LONG $0xe77da1c4; WORD $0x0354; BYTE $0x40
|
||||
LONG $0xe77da1c4; WORD $0x035c; BYTE $0x60
|
||||
|
||||
ADDQ $128, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop128b
|
||||
SFENCE
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $127, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $127, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $128
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func matrixAVX2small(dst []byte, src [][]byte)
|
||||
TEXT ·matrixAVX2small(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src+24(FP), SRC
|
||||
MOVQ vec+32(FP), VECT
|
||||
MOVQ len+8(FP), LEN
|
||||
TESTQ $127, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop128b:
|
||||
MOVQ VECT, TMP1
|
||||
SUBQ $2, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
VMOVDQU (TMP3)(POS*1), Y0
|
||||
VMOVDQU 32(TMP4)(POS*1), Y1
|
||||
VMOVDQU 64(TMP3)(POS*1), Y2
|
||||
VMOVDQU 96(TMP4)(POS*1), Y3
|
||||
|
||||
next_vect:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
VMOVDQU (TMP3)(POS*1), Y4
|
||||
VMOVDQU 32(TMP4)(POS*1), Y5
|
||||
VMOVDQU 64(TMP3)(POS*1), Y6
|
||||
VMOVDQU 96(TMP4)(POS*1), Y7
|
||||
VPXOR Y4, Y0, Y0
|
||||
VPXOR Y5, Y1, Y1
|
||||
VPXOR Y6, Y2, Y2
|
||||
VPXOR Y7, Y3, Y3
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect
|
||||
|
||||
VMOVDQU Y0, (DST)(POS*1)
|
||||
VMOVDQU Y1, 32(DST)(POS*1)
|
||||
VMOVDQU Y2, 64(DST)(POS*1)
|
||||
VMOVDQU Y3, 96(DST)(POS*1)
|
||||
|
||||
ADDQ $128, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop128b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVB -1(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_1b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVB -1(TMP3)(LEN*1), TMP6
|
||||
XORB TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_1b
|
||||
|
||||
MOVB TMP5, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $127, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP4
|
||||
ANDQ $127, TMP4
|
||||
|
||||
loop_8b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVQ -8(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_8b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ -8(TMP3)(LEN*1), TMP6
|
||||
XORQ TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_8b
|
||||
|
||||
MOVQ TMP5, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP4
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $128
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func matrixAVX2big(dst []byte, src [][]byte)
|
||||
TEXT ·matrixAVX2big(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src+24(FP), SRC
|
||||
MOVQ vec+32(FP), VECT
|
||||
MOVQ len+8(FP), LEN
|
||||
TESTQ $127, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop128b:
|
||||
MOVQ VECT, TMP1
|
||||
SUBQ $2, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
VMOVDQU (TMP3)(POS*1), Y0
|
||||
VMOVDQU 32(TMP4)(POS*1), Y1
|
||||
VMOVDQU 64(TMP3)(POS*1), Y2
|
||||
VMOVDQU 96(TMP4)(POS*1), Y3
|
||||
|
||||
next_vect:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
VMOVDQU (TMP3)(POS*1), Y4
|
||||
VMOVDQU 32(TMP4)(POS*1), Y5
|
||||
VMOVDQU 64(TMP3)(POS*1), Y6
|
||||
VMOVDQU 96(TMP4)(POS*1), Y7
|
||||
VPXOR Y4, Y0, Y0
|
||||
VPXOR Y5, Y1, Y1
|
||||
VPXOR Y6, Y2, Y2
|
||||
VPXOR Y7, Y3, Y3
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect
|
||||
|
||||
LONG $0xe77da1c4; WORD $0x0304 // VMOVNTDQ go1.8 has
|
||||
LONG $0xe77da1c4; WORD $0x034c; BYTE $0x20
|
||||
LONG $0xe77da1c4; WORD $0x0354; BYTE $0x40
|
||||
LONG $0xe77da1c4; WORD $0x035c; BYTE $0x60
|
||||
|
||||
ADDQ $128, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop128b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVB -1(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_1b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVB -1(TMP3)(LEN*1), TMP6
|
||||
XORB TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_1b
|
||||
|
||||
MOVB TMP5, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $127, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP4
|
||||
ANDQ $127, TMP4
|
||||
|
||||
loop_8b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVQ -8(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_8b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ -8(TMP3)(LEN*1), TMP6
|
||||
XORQ TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_8b
|
||||
|
||||
MOVQ TMP5, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP4
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $128
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
TEXT ·hasAVX2(SB), NOSPLIT, $0
|
||||
XORQ AX, AX
|
||||
XORQ CX, CX
|
||||
ADDL $7, AX
|
||||
CPUID
|
||||
SHRQ $5, BX
|
||||
ANDQ $1, BX
|
||||
MOVB BX, ret+0(FP)
|
||||
RET
|
116
vendor/github.com/templexxx/xor/nosimd.go
generated
vendored
Normal file
116
vendor/github.com/templexxx/xor/nosimd.go
generated
vendored
Normal file
@ -0,0 +1,116 @@
|
||||
// Copyright 2013 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package xor
|
||||
|
||||
import (
|
||||
"runtime"
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
const wordSize = int(unsafe.Sizeof(uintptr(0)))
|
||||
const supportsUnaligned = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "ppc64" || runtime.GOARCH == "ppc64le" || runtime.GOARCH == "s390x"
|
||||
|
||||
// xor the bytes in a and b. The destination is assumed to have enough space.
|
||||
func bytesNoSIMD(dst, a, b []byte, size int) {
|
||||
if supportsUnaligned {
|
||||
fastXORBytes(dst, a, b, size)
|
||||
} else {
|
||||
// TODO(hanwen): if (dst, a, b) have common alignment
|
||||
// we could still try fastXORBytes. It is not clear
|
||||
// how often this happens, and it's only worth it if
|
||||
// the block encryption itself is hardware
|
||||
// accelerated.
|
||||
safeXORBytes(dst, a, b, size)
|
||||
}
|
||||
}
|
||||
|
||||
// split slice for cache-friendly
|
||||
const unitSize = 16 * 1024
|
||||
|
||||
func matrixNoSIMD(dst []byte, src [][]byte) {
|
||||
size := len(src[0])
|
||||
start := 0
|
||||
do := unitSize
|
||||
for start < size {
|
||||
end := start + do
|
||||
if end <= size {
|
||||
partNoSIMD(start, end, dst, src)
|
||||
start = start + do
|
||||
} else {
|
||||
partNoSIMD(start, size, dst, src)
|
||||
start = size
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// split vect will improve performance with big data by reducing cache pollution
|
||||
func partNoSIMD(start, end int, dst []byte, src [][]byte) {
|
||||
bytesNoSIMD(dst[start:end], src[0][start:end], src[1][start:end], end-start)
|
||||
for i := 2; i < len(src); i++ {
|
||||
bytesNoSIMD(dst[start:end], dst[start:end], src[i][start:end], end-start)
|
||||
}
|
||||
}
|
||||
|
||||
// fastXORBytes xor in bulk. It only works on architectures that
|
||||
// support unaligned read/writes.
|
||||
func fastXORBytes(dst, a, b []byte, n int) {
|
||||
w := n / wordSize
|
||||
if w > 0 {
|
||||
wordBytes := w * wordSize
|
||||
fastXORWords(dst[:wordBytes], a[:wordBytes], b[:wordBytes])
|
||||
}
|
||||
for i := n - n%wordSize; i < n; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
}
|
||||
|
||||
func safeXORBytes(dst, a, b []byte, n int) {
|
||||
ex := n % 8
|
||||
for i := 0; i < ex; i++ {
|
||||
dst[i] = a[i] ^ b[i]
|
||||
}
|
||||
|
||||
for i := ex; i < n; i += 8 {
|
||||
_dst := dst[i : i+8]
|
||||
_a := a[i : i+8]
|
||||
_b := b[i : i+8]
|
||||
_dst[0] = _a[0] ^ _b[0]
|
||||
_dst[1] = _a[1] ^ _b[1]
|
||||
_dst[2] = _a[2] ^ _b[2]
|
||||
_dst[3] = _a[3] ^ _b[3]
|
||||
|
||||
_dst[4] = _a[4] ^ _b[4]
|
||||
_dst[5] = _a[5] ^ _b[5]
|
||||
_dst[6] = _a[6] ^ _b[6]
|
||||
_dst[7] = _a[7] ^ _b[7]
|
||||
}
|
||||
}
|
||||
|
||||
// fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
|
||||
// The arguments are assumed to be of equal length.
|
||||
func fastXORWords(dst, a, b []byte) {
|
||||
dw := *(*[]uintptr)(unsafe.Pointer(&dst))
|
||||
aw := *(*[]uintptr)(unsafe.Pointer(&a))
|
||||
bw := *(*[]uintptr)(unsafe.Pointer(&b))
|
||||
n := len(b) / wordSize
|
||||
ex := n % 8
|
||||
for i := 0; i < ex; i++ {
|
||||
dw[i] = aw[i] ^ bw[i]
|
||||
}
|
||||
|
||||
for i := ex; i < n; i += 8 {
|
||||
_dw := dw[i : i+8]
|
||||
_aw := aw[i : i+8]
|
||||
_bw := bw[i : i+8]
|
||||
_dw[0] = _aw[0] ^ _bw[0]
|
||||
_dw[1] = _aw[1] ^ _bw[1]
|
||||
_dw[2] = _aw[2] ^ _bw[2]
|
||||
_dw[3] = _aw[3] ^ _bw[3]
|
||||
_dw[4] = _aw[4] ^ _bw[4]
|
||||
_dw[5] = _aw[5] ^ _bw[5]
|
||||
_dw[6] = _aw[6] ^ _bw[6]
|
||||
_dw[7] = _aw[7] ^ _bw[7]
|
||||
}
|
||||
}
|
574
vendor/github.com/templexxx/xor/sse2_amd64.s
generated
vendored
Normal file
574
vendor/github.com/templexxx/xor/sse2_amd64.s
generated
vendored
Normal file
@ -0,0 +1,574 @@
|
||||
#include "textflag.h"
|
||||
|
||||
// addr of mem
|
||||
#define DST BX
|
||||
#define SRC SI
|
||||
#define SRC0 TMP4
|
||||
#define SRC1 TMP5
|
||||
|
||||
// loop args
|
||||
// num of vect
|
||||
#define VECT CX
|
||||
#define LEN DX
|
||||
// pos of matrix
|
||||
#define POS R8
|
||||
|
||||
// tmp store
|
||||
// num of vect or ...
|
||||
#define TMP1 R9
|
||||
// pos of matrix or ...
|
||||
#define TMP2 R10
|
||||
// store addr of data/parity or ...
|
||||
#define TMP3 R11
|
||||
#define TMP4 R12
|
||||
#define TMP5 R13
|
||||
#define TMP6 R14
|
||||
|
||||
// func bytesSrc0(dst, src0, src1 []byte)
|
||||
TEXT ·xorSrc0(SB), NOSPLIT, $0
|
||||
MOVQ len+32(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $15, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop16b:
|
||||
MOVOU (SRC0)(POS*1), X0
|
||||
XORPD (SRC1)(POS*1), X0
|
||||
MOVOU X0, (DST)(POS*1)
|
||||
ADDQ $16, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop16b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $15, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $15, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $16
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytesSrc1(dst, src0, src1 []byte)
|
||||
TEXT ·xorSrc1(SB), NOSPLIT, $0
|
||||
MOVQ len+56(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $15, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop16b:
|
||||
MOVOU (SRC0)(POS*1), X0
|
||||
XORPD (SRC1)(POS*1), X0
|
||||
MOVOU X0, (DST)(POS*1)
|
||||
ADDQ $16, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop16b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $15, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $15, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $16
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytesSSE2mini(dst, src0, src1 []byte, size int)
|
||||
TEXT ·bytesSSE2mini(SB), NOSPLIT, $0
|
||||
MOVQ len+72(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $15, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop16b:
|
||||
MOVOU (SRC0)(POS*1), X0
|
||||
XORPD (SRC1)(POS*1), X0
|
||||
|
||||
// MOVOU (SRC1)(POS*1), X4
|
||||
// PXOR X4, X0
|
||||
MOVOU X0, (DST)(POS*1)
|
||||
ADDQ $16, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop16b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $15, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $15, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $16
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytesSSE2small(dst, src0, src1 []byte, size int)
|
||||
TEXT ·bytesSSE2small(SB), NOSPLIT, $0
|
||||
MOVQ len+72(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $63, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop64b:
|
||||
MOVOU (SRC0)(POS*1), X0
|
||||
MOVOU 16(SRC0)(POS*1), X1
|
||||
MOVOU 32(SRC0)(POS*1), X2
|
||||
MOVOU 48(SRC0)(POS*1), X3
|
||||
|
||||
MOVOU (SRC1)(POS*1), X4
|
||||
MOVOU 16(SRC1)(POS*1), X5
|
||||
MOVOU 32(SRC1)(POS*1), X6
|
||||
MOVOU 48(SRC1)(POS*1), X7
|
||||
|
||||
PXOR X4, X0
|
||||
PXOR X5, X1
|
||||
PXOR X6, X2
|
||||
PXOR X7, X3
|
||||
|
||||
MOVOU X0, (DST)(POS*1)
|
||||
MOVOU X1, 16(DST)(POS*1)
|
||||
MOVOU X2, 32(DST)(POS*1)
|
||||
MOVOU X3, 48(DST)(POS*1)
|
||||
|
||||
ADDQ $64, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop64b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $63, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $63, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $64
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func bytesSSE2big(dst, src0, src1 []byte, size int)
|
||||
TEXT ·bytesSSE2big(SB), NOSPLIT, $0
|
||||
MOVQ len+72(FP), LEN
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src0+24(FP), SRC0
|
||||
MOVQ src1+48(FP), SRC1
|
||||
TESTQ $63, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop64b:
|
||||
MOVOU (SRC0)(POS*1), X0
|
||||
MOVOU 16(SRC0)(POS*1), X1
|
||||
MOVOU 32(SRC0)(POS*1), X2
|
||||
MOVOU 48(SRC0)(POS*1), X3
|
||||
|
||||
MOVOU (SRC1)(POS*1), X4
|
||||
MOVOU 16(SRC1)(POS*1), X5
|
||||
MOVOU 32(SRC1)(POS*1), X6
|
||||
MOVOU 48(SRC1)(POS*1), X7
|
||||
|
||||
PXOR X4, X0
|
||||
PXOR X5, X1
|
||||
PXOR X6, X2
|
||||
PXOR X7, X3
|
||||
|
||||
LONG $0xe70f4266; WORD $0x0304 // MOVNTDQ
|
||||
LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
|
||||
LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
|
||||
LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
|
||||
|
||||
ADDQ $64, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop64b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVB -1(SRC0)(LEN*1), TMP1
|
||||
MOVB -1(SRC1)(LEN*1), TMP2
|
||||
XORB TMP1, TMP2
|
||||
MOVB TMP2, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $63, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP1
|
||||
ANDQ $63, TMP1
|
||||
|
||||
loop_8b:
|
||||
MOVQ -8(SRC0)(LEN*1), TMP2
|
||||
MOVQ -8(SRC1)(LEN*1), TMP3
|
||||
XORQ TMP2, TMP3
|
||||
MOVQ TMP3, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP1
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $64
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func matrixSSE2small(dst []byte, src [][]byte)
|
||||
TEXT ·matrixSSE2small(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src+24(FP), SRC
|
||||
MOVQ vec+32(FP), VECT
|
||||
MOVQ len+8(FP), LEN
|
||||
TESTQ $63, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop64b:
|
||||
MOVQ VECT, TMP1
|
||||
SUBQ $2, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
MOVOU (TMP3)(POS*1), X0
|
||||
MOVOU 16(TMP4)(POS*1), X1
|
||||
MOVOU 32(TMP3)(POS*1), X2
|
||||
MOVOU 48(TMP4)(POS*1), X3
|
||||
|
||||
next_vect:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
MOVOU (TMP3)(POS*1), X4
|
||||
MOVOU 16(TMP4)(POS*1), X5
|
||||
MOVOU 32(TMP3)(POS*1), X6
|
||||
MOVOU 48(TMP4)(POS*1), X7
|
||||
PXOR X4, X0
|
||||
PXOR X5, X1
|
||||
PXOR X6, X2
|
||||
PXOR X7, X3
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect
|
||||
|
||||
MOVOU X0, (DST)(POS*1)
|
||||
MOVOU X1, 16(DST)(POS*1)
|
||||
MOVOU X2, 32(DST)(POS*1)
|
||||
MOVOU X3, 48(DST)(POS*1)
|
||||
|
||||
ADDQ $64, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop64b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVB -1(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_1b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVB -1(TMP3)(LEN*1), TMP6
|
||||
XORB TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_1b
|
||||
|
||||
MOVB TMP5, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $63, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP4
|
||||
ANDQ $63, TMP4
|
||||
|
||||
loop_8b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVQ -8(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_8b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ -8(TMP3)(LEN*1), TMP6
|
||||
XORQ TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_8b
|
||||
|
||||
MOVQ TMP5, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP4
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $64
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
// func matrixSSE2big(dst []byte, src [][]byte)
|
||||
TEXT ·matrixSSE2big(SB), NOSPLIT, $0
|
||||
MOVQ dst+0(FP), DST
|
||||
MOVQ src+24(FP), SRC
|
||||
MOVQ vec+32(FP), VECT
|
||||
MOVQ len+8(FP), LEN
|
||||
TESTQ $63, LEN
|
||||
JNZ not_aligned
|
||||
|
||||
aligned:
|
||||
MOVQ $0, POS
|
||||
|
||||
loop64b:
|
||||
MOVQ VECT, TMP1
|
||||
SUBQ $2, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
MOVOU (TMP3)(POS*1), X0
|
||||
MOVOU 16(TMP4)(POS*1), X1
|
||||
MOVOU 32(TMP3)(POS*1), X2
|
||||
MOVOU 48(TMP4)(POS*1), X3
|
||||
|
||||
next_vect:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ TMP3, TMP4
|
||||
MOVOU (TMP3)(POS*1), X4
|
||||
MOVOU 16(TMP4)(POS*1), X5
|
||||
MOVOU 32(TMP3)(POS*1), X6
|
||||
MOVOU 48(TMP4)(POS*1), X7
|
||||
PXOR X4, X0
|
||||
PXOR X5, X1
|
||||
PXOR X6, X2
|
||||
PXOR X7, X3
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect
|
||||
|
||||
LONG $0xe70f4266; WORD $0x0304
|
||||
LONG $0xe70f4266; WORD $0x034c; BYTE $0x10
|
||||
LONG $0xe70f4266; WORD $0x0354; BYTE $0x20
|
||||
LONG $0xe70f4266; WORD $0x035c; BYTE $0x30
|
||||
|
||||
ADDQ $64, POS
|
||||
CMPQ LEN, POS
|
||||
JNE loop64b
|
||||
RET
|
||||
|
||||
loop_1b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVB -1(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_1b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVB -1(TMP3)(LEN*1), TMP6
|
||||
XORB TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_1b
|
||||
|
||||
MOVB TMP5, -1(DST)(LEN*1)
|
||||
SUBQ $1, LEN
|
||||
TESTQ $7, LEN
|
||||
JNZ loop_1b
|
||||
|
||||
CMPQ LEN, $0
|
||||
JE ret
|
||||
TESTQ $63, LEN
|
||||
JZ aligned
|
||||
|
||||
not_aligned:
|
||||
TESTQ $7, LEN
|
||||
JNE loop_1b
|
||||
MOVQ LEN, TMP4
|
||||
ANDQ $63, TMP4
|
||||
|
||||
loop_8b:
|
||||
MOVQ VECT, TMP1
|
||||
MOVQ $0, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
SUBQ $2, TMP1
|
||||
MOVQ -8(TMP3)(LEN*1), TMP5
|
||||
|
||||
next_vect_8b:
|
||||
ADDQ $24, TMP2
|
||||
MOVQ (SRC)(TMP2*1), TMP3
|
||||
MOVQ -8(TMP3)(LEN*1), TMP6
|
||||
XORQ TMP6, TMP5
|
||||
SUBQ $1, TMP1
|
||||
JGE next_vect_8b
|
||||
|
||||
MOVQ TMP5, -8(DST)(LEN*1)
|
||||
SUBQ $8, LEN
|
||||
SUBQ $8, TMP4
|
||||
JG loop_8b
|
||||
|
||||
CMPQ LEN, $64
|
||||
JGE aligned
|
||||
RET
|
||||
|
||||
ret:
|
||||
RET
|
||||
|
||||
TEXT ·hasSSE2(SB), NOSPLIT, $0
|
||||
XORQ AX, AX
|
||||
INCL AX
|
||||
CPUID
|
||||
SHRQ $26, DX
|
||||
ANDQ $1, DX
|
||||
MOVB DX, ret+0(FP)
|
||||
RET
|
||||
|
49
vendor/github.com/templexxx/xor/xor.go
generated
vendored
Normal file
49
vendor/github.com/templexxx/xor/xor.go
generated
vendored
Normal file
@ -0,0 +1,49 @@
|
||||
package xor
|
||||
|
||||
// SIMD Extensions
|
||||
const (
|
||||
none = iota
|
||||
avx2
|
||||
// first introduced by Intel with the initial version of the Pentium 4 in 2001
|
||||
// so I think we can assume all amd64 has sse2
|
||||
sse2
|
||||
)
|
||||
|
||||
var extension = none
|
||||
|
||||
// Bytes : chose the shortest one as xor size
|
||||
// it's better to use it for big data ( > 64bytes )
|
||||
func Bytes(dst, src0, src1 []byte) {
|
||||
size := len(dst)
|
||||
if size > len(src0) {
|
||||
size = len(src0)
|
||||
}
|
||||
if size > len(src1) {
|
||||
size = len(src1)
|
||||
}
|
||||
xorBytes(dst, src0, src1, size)
|
||||
}
|
||||
|
||||
// BytesSameLen : all slice's length must be equal
|
||||
// cut size branch, save time for small data
|
||||
func BytesSameLen(dst, src0, src1 []byte) {
|
||||
xorSrc1(dst, src0, src1)
|
||||
}
|
||||
|
||||
// BytesSrc0 : src1 >= src0, dst >= src0
|
||||
// xor src0's len bytes
|
||||
func BytesSrc0(dst, src0, src1 []byte) {
|
||||
xorSrc0(dst, src0, src1)
|
||||
}
|
||||
|
||||
// BytesSrc1 : src0 >= src1, dst >= src1
|
||||
// xor src1's len bytes
|
||||
func BytesSrc1(dst, src0, src1 []byte) {
|
||||
xorSrc1(dst, src0, src1)
|
||||
}
|
||||
|
||||
// Matrix : all slice's length must be equal && != 0
|
||||
// len(src) must >= 2
|
||||
func Matrix(dst []byte, src [][]byte) {
|
||||
xorMatrix(dst, src)
|
||||
}
|
118
vendor/github.com/templexxx/xor/xor_amd64.go
generated
vendored
Normal file
118
vendor/github.com/templexxx/xor/xor_amd64.go
generated
vendored
Normal file
@ -0,0 +1,118 @@
|
||||
package xor
|
||||
|
||||
func init() {
|
||||
getEXT()
|
||||
}
|
||||
|
||||
func getEXT() {
|
||||
if hasAVX2() {
|
||||
extension = avx2
|
||||
} else {
|
||||
extension = sse2
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func xorBytes(dst, src0, src1 []byte, size int) {
|
||||
switch extension {
|
||||
case avx2:
|
||||
bytesAVX2(dst, src0, src1, size)
|
||||
default:
|
||||
bytesSSE2(dst, src0, src1, size)
|
||||
}
|
||||
}
|
||||
|
||||
// non-temporal hint store
|
||||
const nontmp = 8 * 1024
|
||||
const avx2loopsize = 128
|
||||
|
||||
func bytesAVX2(dst, src0, src1 []byte, size int) {
|
||||
if size < avx2loopsize {
|
||||
bytesAVX2mini(dst, src0, src1, size)
|
||||
} else if size >= avx2loopsize && size <= nontmp {
|
||||
bytesAVX2small(dst, src0, src1, size)
|
||||
} else {
|
||||
bytesAVX2big(dst, src0, src1, size)
|
||||
}
|
||||
}
|
||||
|
||||
const sse2loopsize = 64
|
||||
|
||||
func bytesSSE2(dst, src0, src1 []byte, size int) {
|
||||
if size < sse2loopsize {
|
||||
bytesSSE2mini(dst, src0, src1, size)
|
||||
} else if size >= sse2loopsize && size <= nontmp {
|
||||
bytesSSE2small(dst, src0, src1, size)
|
||||
} else {
|
||||
bytesSSE2big(dst, src0, src1, size)
|
||||
}
|
||||
}
|
||||
|
||||
func xorMatrix(dst []byte, src [][]byte) {
|
||||
switch extension {
|
||||
case avx2:
|
||||
matrixAVX2(dst, src)
|
||||
default:
|
||||
matrixSSE2(dst, src)
|
||||
}
|
||||
}
|
||||
|
||||
func matrixAVX2(dst []byte, src [][]byte) {
|
||||
size := len(dst)
|
||||
if size > nontmp {
|
||||
matrixAVX2big(dst, src)
|
||||
} else {
|
||||
matrixAVX2small(dst, src)
|
||||
}
|
||||
}
|
||||
|
||||
func matrixSSE2(dst []byte, src [][]byte) {
|
||||
size := len(dst)
|
||||
if size > nontmp {
|
||||
matrixSSE2big(dst, src)
|
||||
} else {
|
||||
matrixSSE2small(dst, src)
|
||||
}
|
||||
}
|
||||
|
||||
//go:noescape
|
||||
func xorSrc0(dst, src0, src1 []byte)
|
||||
|
||||
//go:noescape
|
||||
func xorSrc1(dst, src0, src1 []byte)
|
||||
|
||||
//go:noescape
|
||||
func bytesAVX2mini(dst, src0, src1 []byte, size int)
|
||||
|
||||
//go:noescape
|
||||
func bytesAVX2big(dst, src0, src1 []byte, size int)
|
||||
|
||||
//go:noescape
|
||||
func bytesAVX2small(dst, src0, src1 []byte, size int)
|
||||
|
||||
//go:noescape
|
||||
func bytesSSE2mini(dst, src0, src1 []byte, size int)
|
||||
|
||||
//go:noescape
|
||||
func bytesSSE2small(dst, src0, src1 []byte, size int)
|
||||
|
||||
//go:noescape
|
||||
func bytesSSE2big(dst, src0, src1 []byte, size int)
|
||||
|
||||
//go:noescape
|
||||
func matrixAVX2small(dst []byte, src [][]byte)
|
||||
|
||||
//go:noescape
|
||||
func matrixAVX2big(dst []byte, src [][]byte)
|
||||
|
||||
//go:noescape
|
||||
func matrixSSE2small(dst []byte, src [][]byte)
|
||||
|
||||
//go:noescape
|
||||
func matrixSSE2big(dst []byte, src [][]byte)
|
||||
|
||||
//go:noescape
|
||||
func hasAVX2() bool
|
||||
|
||||
//go:noescape
|
||||
func hasSSE2() bool
|
19
vendor/github.com/templexxx/xor/xor_other.go
generated
vendored
Normal file
19
vendor/github.com/templexxx/xor/xor_other.go
generated
vendored
Normal file
@ -0,0 +1,19 @@
|
||||
// +build !amd64 noasm
|
||||
|
||||
package xor
|
||||
|
||||
func xorBytes(dst, src0, src1 []byte, size int) {
|
||||
bytesNoSIMD(dst, src0, src1, size)
|
||||
}
|
||||
|
||||
func xorMatrix(dst []byte, src [][]byte) {
|
||||
matrixNoSIMD(dst, src)
|
||||
}
|
||||
|
||||
func xorSrc0(dst, src0, src1 []byte) {
|
||||
bytesNoSIMD(dst, src0, src1, len(src0))
|
||||
}
|
||||
|
||||
func xorSrc1(dst, src0, src1 []byte) {
|
||||
bytesNoSIMD(dst, src0, src1, len(src1))
|
||||
}
|
24
vendor/manifest
vendored
24
vendor/manifest
vendored
@ -17,6 +17,14 @@
|
||||
"branch": "master",
|
||||
"notests": true
|
||||
},
|
||||
{
|
||||
"importpath": "github.com/AudriusButkevicius/kcp-go",
|
||||
"repository": "https://github.com/AudriusButkevicius/kcp-go",
|
||||
"vcs": "git",
|
||||
"revision": "0ccc04f3b8a7bdf53e2d4d6d0769adbc7cb3851a",
|
||||
"branch": "master",
|
||||
"notests": true
|
||||
},
|
||||
{
|
||||
"importpath": "github.com/AudriusButkevicius/pfilter",
|
||||
"repository": "https://github.com/AudriusButkevicius/pfilter",
|
||||
@ -378,6 +386,14 @@
|
||||
"path": "/leveldb",
|
||||
"notests": true
|
||||
},
|
||||
{
|
||||
"importpath": "github.com/templexxx/xor",
|
||||
"repository": "https://github.com/templexxx/xor",
|
||||
"vcs": "git",
|
||||
"revision": "42f9c041c330b560afb991153bf183c25444bcdc",
|
||||
"branch": "master",
|
||||
"notests": true
|
||||
},
|
||||
{
|
||||
"importpath": "github.com/thejerf/suture",
|
||||
"repository": "https://github.com/thejerf/suture",
|
||||
@ -413,14 +429,6 @@
|
||||
"path": "/qr",
|
||||
"notests": true
|
||||
},
|
||||
{
|
||||
"importpath": "github.com/xtaci/kcp-go",
|
||||
"repository": "https://github.com/xtaci/kcp-go",
|
||||
"vcs": "git",
|
||||
"revision": "0b0731ef3f184a8985edcb4ca26a4b0598c6dc1a",
|
||||
"branch": "master",
|
||||
"notests": true
|
||||
},
|
||||
{
|
||||
"importpath": "github.com/xtaci/smux",
|
||||
"repository": "https://github.com/xtaci/smux",
|
||||
|
Loading…
x
Reference in New Issue
Block a user