vendor: Update github.com/xtaci/kcp

2025-02-02 11:58:28 +00:00 · 2017-03-07 14:28:09 +01:00 · 2017-03-07 14:28:09 +01:00 · b3e2665a79
commit b3e2665a79
parent 81af29e3e2
34 changed files with 686 additions and 3335 deletions
--- a/lib/connections/kcp_dial.go
+++ b/lib/connections/kcp_dial.go
@ -51,7 +51,6 @@ func (d *kcpDialer) Dial(id protocol.DeviceID, uri *url.URL) (internalConn, erro

 	opts := d.cfg.Options()

-	conn.SetKeepAlive(0) // yamux and stun service does keep-alives.
 	conn.SetStreamMode(true)
 	conn.SetACKNoDelay(false)
 	conn.SetWindowSize(opts.KCPSendWindowSize, opts.KCPReceiveWindowSize)
--- a/lib/connections/kcp_listen.go
+++ b/lib/connections/kcp_listen.go
@ -109,7 +109,6 @@ func (t *kcpListener) Serve() {

 		opts := t.cfg.Options()

-		conn.SetKeepAlive(0) // yamux and stun service does keep-alives.
 		conn.SetStreamMode(true)
 		conn.SetACKNoDelay(false)
 		conn.SetWindowSize(opts.KCPSendWindowSize, opts.KCPReceiveWindowSize)
--- a/vendor/github.com/klauspost/reedsolomon/README.md
+++ b/vendor/github.com/klauspost/reedsolomon/README.md
@ -1,204 +0,0 @@
-# Reed-Solomon
-[![GoDoc][1]][2] [![Build Status][3]][4]
-
-[1]: https://godoc.org/github.com/klauspost/reedsolomon?status.svg
-[2]: https://godoc.org/github.com/klauspost/reedsolomon
-[3]: https://travis-ci.org/klauspost/reedsolomon.svg?branch=master
-[4]: https://travis-ci.org/klauspost/reedsolomon
-
-Reed-Solomon Erasure Coding in Go, with speeds exceeding 1GB/s/cpu core implemented in pure Go.
-
-This is a golang port of the [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) library released by [Backblaze](http://backblaze.com), with some additional optimizations.
-
-For an introduction on erasure coding, see the post on the [Backblaze blog](https://www.backblaze.com/blog/reed-solomon/).
-
-Package home: https://github.com/klauspost/reedsolomon
-
-Godoc: https://godoc.org/github.com/klauspost/reedsolomon
-
-# Installation
-To get the package use the standard:
-```bash
-go get github.com/klauspost/reedsolomon
-```
-
-# Usage
-
-This section assumes you know the basics of Reed-Solomon encoding. A good start is this [Backblaze blog post](https://www.backblaze.com/blog/reed-solomon/).
-
-This package performs the calculation of the parity sets. The usage is therefore relatively simple.
-
-First of all, you need to choose your distribution of data and parity shards. A 'good' distribution is very subjective, and will depend a lot on your usage scenario. A good starting point is above 5 and below 257 data shards (the maximum supported number), and the number of parity shards to be 2 or above, and below the number of data shards.
-
-To create an encoder with 10 data shards (where your data goes) and 3 parity shards (calculated):
-```Go
-    enc, err := reedsolomon.New(10, 3)
-```
-This encoder will work for all parity sets with this distribution of data and parity shards. The error will only be set if you specify 0 or negative values in any of the parameters, or if you specify more than 256 data shards.
-
-The you send and receive data  is a simple slice of byte slices; `[][]byte`. In the example above, the top slice must have a length of 13.
-```Go
-    data := make([][]byte, 13)
-```
-You should then fill the 10 first slices with *equally sized* data, and create parity shards that will be populated with parity data. In this case we create the data in memory, but you could for instance also use [mmap](https://github.com/edsrzf/mmap-go) to map files.
-
-```Go
-    // Create all shards, size them at 50000 each
-    for i := range input {
-      data[i] := make([]byte, 50000)
-    }
-    
-    
-  // Fill some data into the data shards
-    for i, in := range data[:10] {
-      for j:= range in {
-         in[j] = byte((i+j)&0xff)
-      }
-    }
-```
-
-To populate the parity shards, you simply call `Encode()` with your data.
-```Go
-    err = enc.Encode(data)
-```
-The only cases where you should get an error is, if the data shards aren't of equal size. The last 3 shards now contain parity data. You can verify this by calling `Verify()`:
-
-```Go
-    ok, err = enc.Verify(data)
-```
-
-The final (and important) part is to be able to reconstruct missing shards. For this to work, you need to know which parts of your data is missing. The encoder *does not know which parts are invalid*, so if data corruption is a likely scenario, you need to implement a hash check for each shard. If a byte has changed in your set, and you don't know which it is, there is no way to reconstruct the data set.
-
-To indicate missing data, you set the shard to nil before calling `Reconstruct()`:
-
-```Go
-    // Delete two data shards
-    data[3] = nil
-    data[7] = nil
-    
-    // Reconstruct the missing shards
-    err := enc.Reconstruct(data)
-```
-The missing data and parity shards will be recreated. If more than 3 shards are missing, the reconstruction will fail.
-
-So to sum up reconstruction:
-* The number of data/parity shards must match the numbers used for encoding.
-* The order of shards must be the same as used when encoding.
-* You may only supply data you know is valid.
-* Invalid shards should be set to nil.
-
-For complete examples of an encoder and decoder see the [examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
-
-# Splitting/Joining Data
-
-You might have a large slice of data. To help you split this, there are some helper functions that can split and join a single byte slice.
-
-```Go
-   bigfile, _ := ioutil.Readfile("myfile.data")
-   
-   // Split the file
-   split, err := enc.Split(bigfile)
-```
-This will split the file into the number of data shards set when creating the encoder and create empty parity shards. 
-
-An important thing to note is that you have to *keep track of the exact input size*. If the size of the input isn't diviable by the number of data shards, extra zeros will be inserted in the last shard.
-
-To join a data set, use the `Join()` function, which will join the shards and write it to the `io.Writer` you supply: 
-```Go
-   // Join a data set and write it to io.Discard.
-   err = enc.Join(io.Discard, data, len(bigfile))
-```
-
-# Streaming/Merging
-
-It might seem like a limitation that all data should be in memory, but an important property is that *as long as the number of data/parity shards are the same, you can merge/split data sets*, and they will remain valid as a separate set.
-
-```Go
-    // Split the data set of 50000 elements into two of 25000
-    splitA := make([][]byte, 13)
-    splitB := make([][]byte, 13)
-    
-    // Merge into a 100000 element set
-    merged := make([][]byte, 13)
-    
-    for i := range data {
-      splitA[i] = data[i][:25000]
-      splitB[i] = data[i][25000:]
-      
-      // Concencate it to itself
-	  merged[i] = append(make([]byte, 0, len(data[i])*2), data[i]...)
-	  merged[i] = append(merged[i], data[i]...)
-    }
-    
-    // Each part should still verify as ok.
-    ok, err := enc.Verify(splitA)
-    if ok && err == nil {
-        log.Println("splitA ok")
-    }
-    
-    ok, err = enc.Verify(splitB)
-    if ok && err == nil {
-        log.Println("splitB ok")
-    }
-    
-    ok, err = enc.Verify(merge)
-    if ok && err == nil {
-        log.Println("merge ok")
-    }
-```
-
-This means that if you have a data set that may not fit into memory, you can split processing into smaller blocks. For the best throughput, don't use too small blocks.
-
-This also means that you can divide big input up into smaller blocks, and do reconstruction on parts of your data. This doesn't give the same flexibility of a higher number of data shards, but it will be much more performant.
-
-# Streaming API
-
-There has been added a fully streaming API, to help perform fully streaming operations, which enables you to do the same operations, but on streams. To use the stream API, use [`NewStream`](https://godoc.org/github.com/klauspost/reedsolomon#NewStream) function to create the encoding/decoding interfaces. You can use [`NewStreamC`](https://godoc.org/github.com/klauspost/reedsolomon#NewStreamC) to ready an interface that reads/writes concurrently from the streams.
-
-Input is delivered as `[]io.Reader`, output as `[]io.Writer`, and functionality corresponds to the in-memory API. Each stream must supply the same amount of data, similar to how each slice must be similar size with the in-memory API. 
-If an error occurs in relation to a stream, a [`StreamReadError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamReadError) or [`StreamWriteError`](https://godoc.org/github.com/klauspost/reedsolomon#StreamWriteError) will help you determine which stream was the offender.
-
-There is no buffering or timeouts/retry specified. If you want to add that, you need to add it to the Reader/Writer.
-
-For complete examples of a streaming encoder and decoder see the [examples folder](https://github.com/klauspost/reedsolomon/tree/master/examples).
-
-
-# Performance
-Performance depends mainly on the number of parity shards. In rough terms, doubling the number of parity shards will double the encoding time.
-
-Here are the throughput numbers with some different selections of data and parity shards. For reference each shard is 1MB random data, and 2 CPU cores are used for encoding.
-
-| Data | Parity | Parity | MB/s   | SSSE3 MB/s  | SSSE3 Speed | Rel. Speed |
-|------|--------|--------|--------|-------------|-------------|------------|
-| 5    | 2      | 40%    | 576,11 | 2599,2      | 451%        | 100,00%    |
-| 10   | 2      | 20%    | 587,73 | 3100,28     | 528%        | 102,02%    |
-| 10   | 4      | 40%    | 298,38 | 2470,97     | 828%        | 51,79%     |
-| 50   | 20     | 40%    | 59,81  | 713,28      | 1193%       | 10,38%     |
-
-If `runtime.GOMAXPROCS()` is set to a value higher than 1, the encoder will use multiple goroutines to perform the calculations in `Verify`, `Encode` and `Reconstruct`.
-
-Example of performance scaling on Intel(R) Core(TM) i7-2600 CPU @ 3.40GHz - 4 physical cores, 8 logical cores. The example uses 10 blocks with 16MB data each and 4 parity blocks.
-
-| Threads | MB/s    | Speed |
-|---------|---------|-------|
-| 1       | 1355,11 | 100%  |
-| 2       | 2339,78 | 172%  |
-| 4       | 3179,33 | 235%  |
-| 8       | 4346,18 | 321%  |
-
-# asm2plan9s
-
-[asm2plan9s](https://github.com/fwessels/asm2plan9s) is used for assembling the AVX2 instructions into their BYTE/WORD/LONG equivalents.
-
-# Links
-* [Backblaze Open Sources Reed-Solomon Erasure Coding Source Code](https://www.backblaze.com/blog/reed-solomon/).
-* [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon). Compatible java library by Backblaze.
-* [reedsolomon-c](https://github.com/jannson/reedsolomon-c). C version, compatible with output from this package.
-* [Reed-Solomon Erasure Coding in Haskell](https://github.com/NicolasT/reedsolomon). Haskell port of the package with similar performance.
-* [go-erasure](https://github.com/somethingnew2-0/go-erasure). A similar library using cgo, slower in my tests.
-* [rsraid](https://github.com/goayame/rsraid). A similar library written in Go. Slower, but supports more shards.
-* [Screaming Fast Galois Field Arithmetic](http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf). Basis for SSE3 optimizations.
-
-# License
-
-This code, as the original [JavaReedSolomon](https://github.com/Backblaze/JavaReedSolomon) is published under an MIT license. See LICENSE file for more information.
--- a/vendor/github.com/klauspost/reedsolomon/appveyor.yml
+++ b/vendor/github.com/klauspost/reedsolomon/appveyor.yml
@ -1,20 +0,0 @@
-os: Visual Studio 2015
-
-platform: x64
-
-clone_folder: c:\gopath\src\github.com\klauspost\reedsolomon
-
-# environment variables
-environment:
-  GOPATH: c:\gopath
-
-install:
-  - echo %PATH%
-  - echo %GOPATH%
-  - go version
-  - go env
-  - go get -d ./...
-
-build_script:
-  - go test -v -cpu=2 ./...
-  - go test -cpu=1,2,4 -short -race ./...
--- a/vendor/github.com/klauspost/reedsolomon/examples/README.md
+++ b/vendor/github.com/klauspost/reedsolomon/examples/README.md
@ -1,45 +0,0 @@
-# Examples
-
-This folder contains usage examples of the Reed-Solomon encoder.
-
-# Simple Encoder/Decoder
-
-Shows basic use of the encoder, and will encode a single file into a number of
-data and parity shards. This is meant as an example and is not meant for production use
-since there is a number of shotcomings noted below.
-
-To build an executable use:
-
-```bash 
-go build simple-decoder.go
-go build simple-encoder.go
-```
-
-# Streamin API examples
-
-There are streaming examples of the same functionality, which streams data instead of keeping it in memory.
-
-To build the executables use:
-
-```bash 
-go build stream-decoder.go
-go build stream-encoder.go
-```
-
-## Shortcomings
-* If the file size of the input isn't diviable by the number of data shards
-  the output will contain extra zeroes
-* If the shard numbers isn't the same for the decoder as in the
-  encoder, invalid output will be generated.
-* If values have changed in a shard, it cannot be reconstructed.
-* If two shards have been swapped, reconstruction will always fail.
-  You need to supply the shards in the same order as they were given to you.
-
-The solution for this is to save a metadata file containing:
-
-* File size.
-* The number of data/parity shards.
-* HASH of each shard.
-* Order of the shards.
-
-If you save these properties, you should abe able to detect file corruption in a shard and be able to reconstruct your data if you have the needed number of shards left.
--- a/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_amd64.go
@ -5,10 +5,6 @@

 package reedsolomon

-import (
-	"github.com/klauspost/cpuid"
-)
-
 //go:noescape
 func galMulSSSE3(low, high, in, out []byte)

@ -40,12 +36,12 @@ func galMulSSSE3Xor(low, high, in, out []byte) {
 }
 */

-func galMulSlice(c byte, in, out []byte) {
+func galMulSlice(c byte, in, out []byte, ssse3, avx2 bool) {
 	var done int
-	if cpuid.CPU.AVX2() {
+	if avx2 {
 		galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
 		done = (len(in) >> 5) << 5
-	} else if cpuid.CPU.SSSE3() {
+	} else if ssse3 {
 		galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
 		done = (len(in) >> 4) << 4
 	}
@ -58,12 +54,12 @@ func galMulSlice(c byte, in, out []byte) {
 	}
 }

-func galMulSliceXor(c byte, in, out []byte) {
+func galMulSliceXor(c byte, in, out []byte, ssse3, avx2 bool) {
 	var done int
-	if cpuid.CPU.AVX2() {
+	if avx2 {
 		galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
 		done = (len(in) >> 5) << 5
-	} else if cpuid.CPU.SSSE3() {
+	} else if ssse3 {
 		galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
 		done = (len(in) >> 4) << 4
 	}
--- a/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_noasm.go
@ -4,14 +4,14 @@

 package reedsolomon

-func galMulSlice(c byte, in, out []byte) {
+func galMulSlice(c byte, in, out []byte, ssse3, avx2 bool) {
 	mt := mulTable[c]
 	for n, input := range in {
 		out[n] = mt[input]
 	}
 }

-func galMulSliceXor(c byte, in, out []byte) {
+func galMulSliceXor(c byte, in, out []byte, ssse3, avx2 bool) {
 	mt := mulTable[c]
 	for n, input := range in {
 		out[n] ^= mt[input]
--- a/vendor/github.com/klauspost/reedsolomon/galois_test.go
+++ b/vendor/github.com/klauspost/reedsolomon/galois_test.go
@ -131,13 +131,13 @@ func TestGalois(t *testing.T) {
 	// Test slices (>16 entries to test assembler)
 	in := []byte{0, 1, 2, 3, 4, 5, 6, 10, 50, 100, 150, 174, 201, 255, 99, 32, 67, 85}
 	out := make([]byte, len(in))
-	galMulSlice(25, in, out)
+	galMulSlice(25, in, out, false, false)
 	expect := []byte{0x0, 0x19, 0x32, 0x2b, 0x64, 0x7d, 0x56, 0xfa, 0xb8, 0x6d, 0xc7, 0x85, 0xc3, 0x1f, 0x22, 0x7, 0x25, 0xfe}
 	if 0 != bytes.Compare(out, expect) {
 		t.Errorf("got %#v, expected %#v", out, expect)
 	}

-	galMulSlice(177, in, out)
+	galMulSlice(177, in, out, false, false)
 	expect = []byte{0x0, 0xb1, 0x7f, 0xce, 0xfe, 0x4f, 0x81, 0x9e, 0x3, 0x6, 0xe8, 0x75, 0xbd, 0x40, 0x36, 0xa3, 0x95, 0xcb}
 	if 0 != bytes.Compare(out, expect) {
 		t.Errorf("got %#v, expected %#v", out, expect)
--- a/vendor/github.com/klauspost/reedsolomon/options.go
+++ b/vendor/github.com/klauspost/reedsolomon/options.go
@ -0,0 +1,67 @@
+package reedsolomon
+
+import (
+	"runtime"
+
+	"github.com/klauspost/cpuid"
+)
+
+// Option allows to override processing parameters.
+type Option func(*options)
+
+type options struct {
+	maxGoroutines     int
+	minSplitSize      int
+	useAVX2, useSSSE3 bool
+}
+
+var defaultOptions = options{
+	maxGoroutines: 50,
+	minSplitSize:  512,
+}
+
+func init() {
+	if runtime.GOMAXPROCS(0) <= 1 {
+		defaultOptions.maxGoroutines = 1
+	}
+	// Detect CPU capabilities.
+	defaultOptions.useSSSE3 = cpuid.CPU.SSSE3()
+	defaultOptions.useAVX2 = cpuid.CPU.AVX2()
+}
+
+// WithMaxGoroutines is the maximum number of goroutines number for encoding & decoding.
+// Jobs will be split into this many parts, unless each goroutine would have to process
+// less than minSplitSize bytes (set with WithMinSplitSize).
+// For the best speed, keep this well above the GOMAXPROCS number for more fine grained
+// scheduling.
+// If n <= 0, it is ignored.
+func WithMaxGoroutines(n int) Option {
+	return func(o *options) {
+		if n > 0 {
+			o.maxGoroutines = n
+		}
+	}
+}
+
+// MinSplitSize Is the minimum encoding size in bytes per goroutine.
+// See WithMaxGoroutines on how jobs are split.
+// If n <= 0, it is ignored.
+func WithMinSplitSize(n int) Option {
+	return func(o *options) {
+		if n > 0 {
+			o.maxGoroutines = n
+		}
+	}
+}
+
+func withSSE3(enabled bool) Option {
+	return func(o *options) {
+		o.useSSSE3 = enabled
+	}
+}
+
+func withAVX2(enabled bool) Option {
+	return func(o *options) {
+		o.useAVX2 = enabled
+	}
+}
--- a/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
+++ b/vendor/github.com/klauspost/reedsolomon/reedsolomon.go
@ -15,7 +15,6 @@ import (
 	"bytes"
 	"errors"
 	"io"
-	"runtime"
 	"sync"
 )

@ -83,6 +82,7 @@ type reedSolomon struct {
 	m            matrix
 	tree         inversionTree
 	parity       [][]byte
+	o            options
 }

 // ErrInvShardNum will be returned by New, if you attempt to create
@ -98,13 +98,18 @@ var ErrMaxShardNum = errors.New("cannot create Encoder with 255 or more data+par
 // the number of data shards and parity shards that
 // you want to use. You can reuse this encoder.
 // Note that the maximum number of data shards is 256.
-func New(dataShards, parityShards int) (Encoder, error) {
+// If no options are supplied, default options are used.
+func New(dataShards, parityShards int, opts ...Option) (Encoder, error) {
 	r := reedSolomon{
 		DataShards:   dataShards,
 		ParityShards: parityShards,
 		Shards:       dataShards + parityShards,
+		o:            defaultOptions,
 	}

+	for _, opt := range opts {
+		opt(&r.o)
+	}
 	if dataShards <= 0 || parityShards <= 0 {
 		return nil, ErrInvShardNum
 	}
@ -201,7 +206,7 @@ func (r reedSolomon) Verify(shards [][]byte) (bool, error) {
 // number of matrix rows used, is determined by
 // outputCount, which is the number of outputs to compute.
 func (r reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
-	if runtime.GOMAXPROCS(0) > 1 && len(inputs[0]) > minSplitSize {
+	if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
 		r.codeSomeShardsP(matrixRows, inputs, outputs, outputCount, byteCount)
 		return
 	}
@ -209,26 +214,21 @@ func (r reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, output
 		in := inputs[c]
 		for iRow := 0; iRow < outputCount; iRow++ {
 			if c == 0 {
-				galMulSlice(matrixRows[iRow][c], in, outputs[iRow])
+				galMulSlice(matrixRows[iRow][c], in, outputs[iRow], r.o.useSSSE3, r.o.useAVX2)
 			} else {
-				galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow])
+				galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], r.o.useSSSE3, r.o.useAVX2)
 			}
 		}
 	}
 }

-const (
-	minSplitSize  = 512 // min split size per goroutine
-	maxGoroutines = 50  // max goroutines number for encoding & decoding
-)
-
 // Perform the same as codeSomeShards, but split the workload into
 // several goroutines.
 func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
 	var wg sync.WaitGroup
-	do := byteCount / maxGoroutines
-	if do < minSplitSize {
-		do = minSplitSize
+	do := byteCount / r.o.maxGoroutines
+	if do < r.o.minSplitSize {
+		do = r.o.minSplitSize
 	}
 	start := 0
 	for start < byteCount {
@ -241,9 +241,9 @@ func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outpu
 				in := inputs[c]
 				for iRow := 0; iRow < outputCount; iRow++ {
 					if c == 0 {
-						galMulSlice(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop])
+						galMulSlice(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop], r.o.useSSSE3, r.o.useAVX2)
 					} else {
-						galMulSliceXor(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop])
+						galMulSliceXor(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop], r.o.useSSSE3, r.o.useAVX2)
 					}
 				}
 			}
@ -258,13 +258,36 @@ func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outpu
 // except this will check values and return
 // as soon as a difference is found.
 func (r reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
+	if r.o.maxGoroutines > 1 && byteCount > r.o.minSplitSize {
+		return r.checkSomeShardsP(matrixRows, inputs, toCheck, outputCount, byteCount)
+	}
+	outputs := make([][]byte, len(toCheck))
+	for i := range outputs {
+		outputs[i] = make([]byte, byteCount)
+	}
+	for c := 0; c < r.DataShards; c++ {
+		in := inputs[c]
+		for iRow := 0; iRow < outputCount; iRow++ {
+			galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], r.o.useSSSE3, r.o.useAVX2)
+		}
+	}
+
+	for i, calc := range outputs {
+		if !bytes.Equal(calc, toCheck[i]) {
+			return false
+		}
+	}
+	return true
+}
+
+func (r reedSolomon) checkSomeShardsP(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
 	same := true
 	var mu sync.RWMutex // For above

 	var wg sync.WaitGroup
-	do := byteCount / maxGoroutines
-	if do < minSplitSize {
-		do = minSplitSize
+	do := byteCount / r.o.maxGoroutines
+	if do < r.o.minSplitSize {
+		do = r.o.minSplitSize
 	}
 	start := 0
 	for start < byteCount {
@ -287,7 +310,7 @@ func (r reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outpu
 				mu.RUnlock()
 				in := inputs[c][start : start+do]
 				for iRow := 0; iRow < outputCount; iRow++ {
-					galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow])
+					galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow], r.o.useSSSE3, r.o.useAVX2)
 				}
 			}

--- a/vendor/github.com/klauspost/reedsolomon/reedsolomon_test.go
+++ b/vendor/github.com/klauspost/reedsolomon/reedsolomon_test.go
@ -14,9 +14,43 @@ import (
 	"testing"
 )

+func testOpts() [][]Option {
+	if !testing.Short() {
+		return [][]Option{}
+	}
+	opts := [][]Option{
+		{WithMaxGoroutines(1), WithMinSplitSize(500), withSSE3(false), withAVX2(false)},
+		{WithMaxGoroutines(5000), WithMinSplitSize(50), withSSE3(false), withAVX2(false)},
+		{WithMaxGoroutines(5000), WithMinSplitSize(500000), withSSE3(false), withAVX2(false)},
+		{WithMaxGoroutines(1), WithMinSplitSize(500000), withSSE3(false), withAVX2(false)},
+	}
+	for _, o := range opts[:] {
+		if defaultOptions.useSSSE3 {
+			n := make([]Option, len(o), len(o)+1)
+			copy(n, o)
+			n = append(n, withSSE3(true))
+			opts = append(opts, n)
+		}
+		if defaultOptions.useAVX2 {
+			n := make([]Option, len(o), len(o)+1)
+			copy(n, o)
+			n = append(n, withAVX2(true))
+			opts = append(opts, n)
+		}
+	}
+	return opts
+}
+
 func TestEncoding(t *testing.T) {
+	testEncoding(t)
+	for _, o := range testOpts() {
+		testEncoding(t, o...)
+	}
+}
+
+func testEncoding(t *testing.T, o ...Option) {
 	perShard := 50000
-	r, err := New(10, 3)
+	r, err := New(10, 3, o...)
 	if err != nil {
 		t.Fatal(err)
 	}
@ -56,8 +90,15 @@ func TestEncoding(t *testing.T) {
 }

 func TestReconstruct(t *testing.T) {
+	testReconstruct(t)
+	for _, o := range testOpts() {
+		testReconstruct(t, o...)
+	}
+}
+
+func testReconstruct(t *testing.T, o ...Option) {
 	perShard := 50000
-	r, err := New(10, 3)
+	r, err := New(10, 3, o...)
 	if err != nil {
 		t.Fatal(err)
 	}
@ -122,8 +163,15 @@ func TestReconstruct(t *testing.T) {
 }

 func TestVerify(t *testing.T) {
+	testVerify(t)
+	for _, o := range testOpts() {
+		testVerify(t, o...)
+	}
+}
+
+func testVerify(t *testing.T, o ...Option) {
 	perShard := 33333
-	r, err := New(10, 4)
+	r, err := New(10, 4, o...)
 	if err != nil {
 		t.Fatal(err)
 	}
@ -536,14 +584,27 @@ func BenchmarkReconstructP10x4x16M(b *testing.B) {
 }

 func TestEncoderReconstruct(t *testing.T) {
+	testEncoderReconstruct(t)
+	for _, o := range testOpts() {
+		testEncoderReconstruct(t, o...)
+	}
+}
+
+func testEncoderReconstruct(t *testing.T, o ...Option) {
 	// Create some sample data
 	var data = make([]byte, 250000)
 	fillRandom(data)

 	// Create 5 data slices of 50000 elements each
-	enc, _ := New(5, 3)
-	shards, _ := enc.Split(data)
-	err := enc.Encode(shards)
+	enc, err := New(5, 3, o...)
+	if err != nil {
+		t.Fatal(err)
+	}
+	shards, err := enc.Split(data)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = enc.Encode(shards)
 	if err != nil {
 		t.Fatal(err)
 	}
--- a/vendor/github.com/klauspost/reedsolomon/streaming.go
+++ b/vendor/github.com/klauspost/reedsolomon/streaming.go
@ -145,8 +145,8 @@ type rsStream struct {
 // the number of data shards and parity shards that
 // you want to use. You can reuse this encoder.
 // Note that the maximum number of data shards is 256.
-func NewStream(dataShards, parityShards int) (StreamEncoder, error) {
-	enc, err := New(dataShards, parityShards)
+func NewStream(dataShards, parityShards int, o ...Option) (StreamEncoder, error) {
+	enc, err := New(dataShards, parityShards, o...)
 	if err != nil {
 		return nil, err
 	}
@ -161,8 +161,8 @@ func NewStream(dataShards, parityShards int) (StreamEncoder, error) {
 // the number of data shards and parity shards given.
 //
 // This functions as 'NewStream', but allows you to enable CONCURRENT reads and writes.
-func NewStreamC(dataShards, parityShards int, conReads, conWrites bool) (StreamEncoder, error) {
-	enc, err := New(dataShards, parityShards)
+func NewStreamC(dataShards, parityShards int, conReads, conWrites bool, o ...Option) (StreamEncoder, error) {
+	enc, err := New(dataShards, parityShards, o...)
 	if err != nil {
 		return nil, err
 	}
--- a/vendor/github.com/xtaci/kcp-go/emitter.go
+++ b/vendor/github.com/xtaci/kcp-go/emitter.go
@ -0,0 +1,50 @@
+package kcp
+
+import (
+	"net"
+	"sync/atomic"
+)
+
+var defaultEmitter Emitter
+
+const emitQueue = 8192
+
+func init() {
+	defaultEmitter.init()
+}
+
+type (
+	emitPacket struct {
+		conn    net.PacketConn
+		to      net.Addr
+		data    []byte
+		recycle bool
+	}
+
+	// Emitter is the global packet sender
+	Emitter struct {
+		ch chan emitPacket
+	}
+)
+
+func (e *Emitter) init() {
+	e.ch = make(chan emitPacket, emitQueue)
+	go e.emitTask()
+}
+
+// keepon writing packets to kernel
+func (e *Emitter) emitTask() {
+	for p := range e.ch {
+		if n, err := p.conn.WriteTo(p.data, p.to); err == nil {
+			atomic.AddUint64(&DefaultSnmp.OutSegs, 1)
+			atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(n))
+		}
+		if p.recycle {
+			xmitBuf.Put(p.data)
+		}
+	}
+}
+
+func (e *Emitter) emit(p emitPacket) {
+	e.ch <- p
+}
--- a/vendor/github.com/xtaci/kcp-go/fec.go
+++ b/vendor/github.com/xtaci/kcp-go/fec.go
@ -4,7 +4,7 @@ import (
 	"encoding/binary"
 	"sync/atomic"

-	"github.com/xtaci/reedsolomon"
+	"github.com/klauspost/reedsolomon"
 )

 const (
@ -12,26 +12,29 @@ const (
 	fecHeaderSizePlus2 = fecHeaderSize + 2 // plus 2B data size
 	typeData           = 0xf1
 	typeFEC            = 0xf2
-	fecExpire          = 30000 // 30s
 )

 type (
 	// FEC defines forward error correction for packets
 	FEC struct {
-		rx           []fecPacket // ordered receive queue
 		rxlimit      int // queue size limit
 		dataShards   int
 		parityShards int
 		shardSize    int
 		next         uint32      // next seqid
-		enc          reedsolomon.Encoder
-		shards       [][]byte
-		shards2      [][]byte // for calcECC
-		shardsflag   []bool
 		paws         uint32      // Protect Against Wrapped Sequence numbers
-		lastCheck    uint32
+		rx           []fecPacket // ordered receive queue
+
+		// caches
+		decodeCache [][]byte
+		encodeCache [][]byte
+		shardsflag  []bool
+
+		// RS encoder
+		enc reedsolomon.Encoder
 	}

+	// fecPacket is a decoded FEC packet
 	fecPacket struct {
 		seqid uint32
 		flag  uint16
@ -54,19 +57,19 @@ func newFEC(rxlimit, dataShards, parityShards int) *FEC {
 	fec.parityShards = parityShards
 	fec.shardSize = dataShards + parityShards
 	fec.paws = (0xffffffff/uint32(fec.shardSize) - 1) * uint32(fec.shardSize)
-	enc, err := reedsolomon.New(dataShards, parityShards)
+	enc, err := reedsolomon.New(dataShards, parityShards, reedsolomon.WithMaxGoroutines(1))
 	if err != nil {
 		return nil
 	}
 	fec.enc = enc
-	fec.shards = make([][]byte, fec.shardSize)
-	fec.shards2 = make([][]byte, fec.shardSize)
+	fec.decodeCache = make([][]byte, fec.shardSize)
+	fec.encodeCache = make([][]byte, fec.shardSize)
 	fec.shardsflag = make([]bool, fec.shardSize)
 	return fec
 }

-// decode a fec packet
-func (fec *FEC) decode(data []byte) fecPacket {
+// decodeBytes a fec packet
+func (fec *FEC) decodeBytes(data []byte) fecPacket {
 	var pkt fecPacket
 	pkt.seqid = binary.LittleEndian.Uint32(data)
 	pkt.flag = binary.LittleEndian.Uint16(data[4:])
@ -88,28 +91,11 @@ func (fec *FEC) markFEC(data []byte) {
 	binary.LittleEndian.PutUint32(data, fec.next)
 	binary.LittleEndian.PutUint16(data[4:], typeFEC)
 	fec.next++
-	if fec.next >= fec.paws { // paws would only occurs in markFEC
-		fec.next = 0
-	}
-}
-
-// input a fec packet
-func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) {
-	// expiration
-	now := currentMs()
-	if now-fec.lastCheck >= fecExpire {
-		var rx []fecPacket
-		for k := range fec.rx {
-			if now-fec.rx[k].ts < fecExpire {
-				rx = append(rx, fec.rx[k])
-			} else {
-				xmitBuf.Put(fec.rx[k].data)
-			}
-		}
-		fec.rx = rx
-		fec.lastCheck = now
+	fec.next %= fec.paws
 }

+// Decode a fec packet
+func (fec *FEC) Decode(pkt fecPacket) (recovered [][]byte) {
 	// insertion
 	n := len(fec.rx) - 1
 	insertIdx := 0
@ -117,7 +103,7 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) {
 		if pkt.seqid == fec.rx[i].seqid { // de-duplicate
 			xmitBuf.Put(pkt.data)
 			return nil
-		} else if pkt.seqid > fec.rx[i].seqid { // insertion
+		} else if _itimediff(pkt.seqid, fec.rx[i].seqid) > 0 { // insertion
 			insertIdx = i + 1
 			break
 		}
@ -146,23 +132,24 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) {
 		searchEnd = len(fec.rx) - 1
 	}

+	// re-construct datashards
 	if searchEnd > searchBegin && searchEnd-searchBegin+1 >= fec.dataShards {
 		numshard := 0
 		numDataShard := 0
 		first := -1
 		maxlen := 0
-		shards := fec.shards
+		shards := fec.decodeCache
 		shardsflag := fec.shardsflag
-		for k := range fec.shards {
+		for k := range fec.decodeCache {
 			shards[k] = nil
 			shardsflag[k] = false
 		}

 		for i := searchBegin; i <= searchEnd; i++ {
 			seqid := fec.rx[i].seqid
-			if seqid > shardEnd {
+			if _itimediff(seqid, shardEnd) > 0 {
 				break
-			} else if seqid >= shardBegin {
+			} else if _itimediff(seqid, shardBegin) >= 0 {
 				shards[seqid%uint32(fec.shardSize)] = fec.rx[i].data
 				shardsflag[seqid%uint32(fec.shardSize)] = true
 				numshard++
@ -226,11 +213,12 @@ func (fec *FEC) input(pkt fecPacket) (recovered [][]byte) {
 	return
 }

-func (fec *FEC) calcECC(data [][]byte, offset, maxlen int) (ecc [][]byte) {
+// Encode a group of datashards
+func (fec *FEC) Encode(data [][]byte, offset, maxlen int) (ecc [][]byte) {
 	if len(data) != fec.shardSize {
 		return nil
 	}
-	shards := fec.shards2
+	shards := fec.encodeCache
 	for k := range shards {
 		shards[k] = data[k][offset:maxlen]
 	}
--- a/vendor/github.com/xtaci/kcp-go/kcp.go
+++ b/vendor/github.com/xtaci/kcp-go/kcp.go
@ -72,17 +72,15 @@ func ikcp_decode32u(p []byte, l *uint32) []byte {
 func _imin_(a, b uint32) uint32 {
 	if a <= b {
 		return a
-	} else {
-		return b
 	}
+	return b
 }

 func _imax_(a, b uint32) uint32 {
 	if a >= b {
 		return a
-	} else {
-		return b
 	}
+	return b
 }

 func _ibound_(lower, middle, upper uint32) uint32 {
@ -102,11 +100,11 @@ type Segment struct {
 	ts       uint32
 	sn       uint32
 	una      uint32
+	data     []byte
 	resendts uint32
 	rto      uint32
 	fastack  uint32
 	xmit     uint32
-	data     []byte
 }

 // encode a segment into buffer
@ -127,7 +125,8 @@ type KCP struct {
 	conv, mtu, mss, state                  uint32
 	snd_una, snd_nxt, rcv_nxt              uint32
 	ssthresh                               uint32
-	rx_rttval, rx_srtt, rx_rto, rx_minrto  uint32
+	rx_rttvar, rx_srtt                     int32
+	rx_rto, rx_minrto                      uint32
 	snd_wnd, rcv_wnd, rmt_wnd, cwnd, probe uint32
 	interval, ts_flush, xmit               uint32
 	nodelay, updated                       uint32
@ -146,6 +145,7 @@ type KCP struct {

 	buffer                 []byte
 	output                 Output
+	datashard, parityshard int
 }

 type ackItem struct {
@ -340,20 +340,24 @@ func (kcp *KCP) update_ack(rtt int32) {
 	// https://tools.ietf.org/html/rfc6298
 	var rto uint32
 	if kcp.rx_srtt == 0 {
-		kcp.rx_srtt = uint32(rtt)
-		kcp.rx_rttval = uint32(rtt) / 2
+		kcp.rx_srtt = rtt
+		kcp.rx_rttvar = rtt >> 1
 	} else {
-		delta := rtt - int32(kcp.rx_srtt)
+		delta := rtt - kcp.rx_srtt
+		kcp.rx_srtt += delta >> 3
 		if delta < 0 {
 			delta = -delta
 		}
-		kcp.rx_rttval = (3*kcp.rx_rttval + uint32(delta)) / 4
-		kcp.rx_srtt = (7*kcp.rx_srtt + uint32(rtt)) / 8
-		if kcp.rx_srtt < 1 {
-			kcp.rx_srtt = 1
+		if rtt < kcp.rx_srtt-kcp.rx_rttvar {
+			// if the new RTT sample is below the bottom of the range of
+			// what an RTT measurement is expected to be.
+			// give an 8x reduced weight versus its normal weighting
+			kcp.rx_rttvar += (delta - kcp.rx_rttvar) >> 5
+		} else {
+			kcp.rx_rttvar += (delta - kcp.rx_rttvar) >> 2
 		}
 	}
-	rto = kcp.rx_srtt + _imax_(kcp.interval, 4*kcp.rx_rttval)
+	rto = uint32(kcp.rx_srtt) + _imax_(kcp.interval, uint32(kcp.rx_rttvar)<<2)
 	kcp.rx_rto = _ibound_(kcp.rx_minrto, rto, IKCP_RTO_MAX)
 }

@ -395,7 +399,7 @@ func (kcp *KCP) parse_fastack(sn uint32) {
 		seg := &kcp.snd_buf[k]
 		if _itimediff(sn, seg.sn) < 0 {
 			break
-		} else if sn != seg.sn { //  && kcp.current >= seg.ts+kcp.rx_srtt {
+		} else if sn != seg.sn {
 			seg.fastack++
 		}
 	}
@ -472,16 +476,17 @@ func (kcp *KCP) parse_data(newseg *Segment) {
 }

 // Input when you received a low level packet (eg. UDP packet), call it
-func (kcp *KCP) Input(data []byte, update_ack bool) int {
+// regular indicates a regular packet has received(not from FEC)
+func (kcp *KCP) Input(data []byte, regular, ackNoDelay bool) int {
 	una := kcp.snd_una
 	if len(data) < IKCP_OVERHEAD {
 		return -1
 	}

 	var maxack uint32
-	var recentack uint32
 	var flag int

+	current := currentMs()
 	for {
 		var ts, sn, length, una, conv uint32
 		var wnd uint16
@ -512,11 +517,18 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int {
 			return -3
 		}

+		// only trust window updates from regular packets. i.e: latest update
+		if regular {
 			kcp.rmt_wnd = uint32(wnd)
+		}
 		kcp.parse_una(una)
 		kcp.shrink_buf()

 		if cmd == IKCP_CMD_ACK {
+			if _itimediff(current, ts) >= 0 {
+				kcp.update_ack(_itimediff(current, ts))
+			}
+
 			kcp.parse_ack(sn)
 			kcp.shrink_buf()
 			if flag == 0 {
@ -525,7 +537,6 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int {
 			} else if _itimediff(sn, maxack) > 0 {
 				maxack = sn
 			}
-			recentack = ts
 		} else if cmd == IKCP_CMD_PUSH {
 			if _itimediff(sn, kcp.rcv_nxt+kcp.rcv_wnd) < 0 {
 				kcp.ack_push(sn, ts)
@ -559,12 +570,8 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int {
 		data = data[length:]
 	}

-	current := currentMs()
-	if flag != 0 && update_ack {
+	if flag != 0 && regular {
 		kcp.parse_fastack(maxack)
-		if _itimediff(current, recentack) >= 0 {
-			kcp.update_ack(_itimediff(current, recentack))
-		}
 	}

 	if _itimediff(kcp.snd_una, una) > 0 {
@ -589,6 +596,11 @@ func (kcp *KCP) Input(data []byte, update_ack bool) int {
 		}
 	}

+	if ackNoDelay && len(kcp.acklist) > 0 { // ack immediately
+		kcp.flush(true)
+	} else if kcp.rmt_wnd == 0 && len(kcp.acklist) > 0 { // window zero
+		kcp.flush(true)
+	}
 	return 0
 }

@ -600,7 +612,7 @@ func (kcp *KCP) wnd_unused() int32 {
 }

 // flush pending data
-func (kcp *KCP) flush() {
+func (kcp *KCP) flush(ackOnly bool) {
 	buffer := kcp.buffer
 	change := 0
 	lost := false
@ -612,21 +624,42 @@ func (kcp *KCP) flush() {
 	seg.una = kcp.rcv_nxt

 	// flush acknowledges
-	ptr := buffer
+	var required []ackItem
 	for i, ack := range kcp.acklist {
-		size := len(buffer) - len(ptr)
-		if size+IKCP_OVERHEAD > int(kcp.mtu) {
-			kcp.output(buffer, size)
-			ptr = buffer
-		}
-		// filter jitters caused by bufferbloat
+		// filter necessary acks only
 		if ack.sn >= kcp.rcv_nxt || len(kcp.acklist)-1 == i {
-			seg.sn, seg.ts = ack.sn, ack.ts
-			ptr = seg.encode(ptr)
+			required = append(required, kcp.acklist[i])
 		}
 	}
 	kcp.acklist = nil

+	ptr := buffer
+	maxBatchSize := kcp.mtu / IKCP_OVERHEAD
+	for len(required) > 0 {
+		var batchSize int
+		if kcp.datashard > 0 && kcp.parityshard > 0 { // try triggering FEC
+			batchSize = int(_ibound_(1, uint32(len(required)/kcp.datashard), maxBatchSize))
+		} else {
+			batchSize = int(_ibound_(1, uint32(len(required)), maxBatchSize))
+		}
+
+		for len(required) >= batchSize {
+			for i := 0; i < batchSize; i++ {
+				ack := required[i]
+				seg.sn, seg.ts = ack.sn, ack.ts
+				ptr = seg.encode(ptr)
+			}
+			size := len(buffer) - len(ptr)
+			kcp.output(buffer, size)
+			ptr = buffer
+			required = required[batchSize:]
+		}
+	}
+
+	if ackOnly { // flush acks only
+		return
+	}
+
 	current := currentMs()
 	// probe window size (if remote window size equals zero)
 	if kcp.rmt_wnd == 0 {
@ -682,7 +715,7 @@ func (kcp *KCP) flush() {
 	}

 	// sliding window, controlled by snd_nxt && sna_una+cwnd
-	count := 0
+	newSegsCount := 0
 	for k := range kcp.snd_queue {
 		if _itimediff(kcp.snd_nxt, kcp.snd_una+cwnd) >= 0 {
 			break
@ -690,24 +723,13 @@ func (kcp *KCP) flush() {
 		newseg := kcp.snd_queue[k]
 		newseg.conv = kcp.conv
 		newseg.cmd = IKCP_CMD_PUSH
-		newseg.wnd = seg.wnd
-		newseg.ts = current
 		newseg.sn = kcp.snd_nxt
-		newseg.una = kcp.rcv_nxt
-		newseg.resendts = newseg.ts
-		newseg.rto = kcp.rx_rto
 		kcp.snd_buf = append(kcp.snd_buf, newseg)
 		kcp.snd_nxt++
-		count++
+		newSegsCount++
 		kcp.snd_queue[k].data = nil
 	}
-	kcp.snd_queue = kcp.snd_queue[count:]
-
-	// flag pending data
-	hasPending := false
-	if count > 0 {
-		hasPending = true
-	}
+	kcp.snd_queue = kcp.snd_queue[newSegsCount:]

 	// calculate resent
 	resent := uint32(kcp.fastresend)
@ -715,18 +737,37 @@ func (kcp *KCP) flush() {
 		resent = 0xffffffff
 	}

-	// flush data segments
+	// counters
 	var lostSegs, fastRetransSegs, earlyRetransSegs uint64
-	for k := range kcp.snd_buf {
-		current := currentMs()
+
+	// send new segments
+	for k := len(kcp.snd_buf) - newSegsCount; k < len(kcp.snd_buf); k++ {
 		segment := &kcp.snd_buf[k]
-		needsend := false
-		if segment.xmit == 0 {
-			needsend = true
 		segment.xmit++
 		segment.rto = kcp.rx_rto
 		segment.resendts = current + segment.rto
-		} else if _itimediff(current, segment.resendts) >= 0 {
+		segment.ts = current
+		segment.wnd = seg.wnd
+		segment.una = kcp.rcv_nxt
+
+		size := len(buffer) - len(ptr)
+		need := IKCP_OVERHEAD + len(segment.data)
+
+		if size+need > int(kcp.mtu) {
+			kcp.output(buffer, size)
+			ptr = buffer
+		}
+
+		ptr = segment.encode(ptr)
+		copy(ptr, segment.data)
+		ptr = ptr[len(segment.data):]
+	}
+
+	// check for retransmissions
+	for k := 0; k < len(kcp.snd_buf)-newSegsCount; k++ {
+		segment := &kcp.snd_buf[k]
+		needsend := false
+		if _itimediff(current, segment.resendts) >= 0 { // RTO
 			needsend = true
 			segment.xmit++
 			kcp.xmit++
@ -739,26 +780,22 @@ func (kcp *KCP) flush() {
 			lost = true
 			lostSegs++
 		} else if segment.fastack >= resent { // fast retransmit
-			lastsend := segment.resendts - segment.rto
-			if _itimediff(current, lastsend) >= int32(kcp.rx_rto/4) {
 			needsend = true
 			segment.xmit++
 			segment.fastack = 0
+			segment.rto = kcp.rx_rto
 			segment.resendts = current + segment.rto
 			change++
 			fastRetransSegs++
-			}
-		} else if segment.fastack > 0 && !hasPending { // early retransmit
-			lastsend := segment.resendts - segment.rto
-			if _itimediff(current, lastsend) >= int32(kcp.rx_rto/4) {
+		} else if segment.fastack > 0 && newSegsCount == 0 { // early retransmit
 			needsend = true
 			segment.xmit++
 			segment.fastack = 0
+			segment.rto = kcp.rx_rto
 			segment.resendts = current + segment.rto
 			change++
 			earlyRetransSegs++
 		}
-		}

 		if needsend {
 			segment.ts = current
@ -783,17 +820,29 @@ func (kcp *KCP) flush() {
 		}
 	}

-	atomic.AddUint64(&DefaultSnmp.RetransSegs, lostSegs+fastRetransSegs+earlyRetransSegs)
-	atomic.AddUint64(&DefaultSnmp.LostSegs, lostSegs)
-	atomic.AddUint64(&DefaultSnmp.EarlyRetransSegs, earlyRetransSegs)
-	atomic.AddUint64(&DefaultSnmp.FastRetransSegs, fastRetransSegs)
-
 	// flash remain segments
 	size := len(buffer) - len(ptr)
 	if size > 0 {
 		kcp.output(buffer, size)
 	}

+	// counter updates
+	sum := lostSegs
+	if lostSegs > 0 {
+		atomic.AddUint64(&DefaultSnmp.LostSegs, lostSegs)
+	}
+	if fastRetransSegs > 0 {
+		atomic.AddUint64(&DefaultSnmp.FastRetransSegs, fastRetransSegs)
+		sum += fastRetransSegs
+	}
+	if earlyRetransSegs > 0 {
+		atomic.AddUint64(&DefaultSnmp.EarlyRetransSegs, earlyRetransSegs)
+		sum += earlyRetransSegs
+	}
+	if sum > 0 {
+		atomic.AddUint64(&DefaultSnmp.RetransSegs, sum)
+	}
+
 	// update ssthresh
 	// rate halving, https://tools.ietf.org/html/rfc6937
 	if change != 0 {
@ -846,7 +895,7 @@ func (kcp *KCP) Update() {
 		if _itimediff(current, kcp.ts_flush) >= 0 {
 			kcp.ts_flush = current + kcp.interval
 		}
-		kcp.flush()
+		kcp.flush(false)
 	}
 }

@ -900,6 +949,12 @@ func (kcp *KCP) Check() uint32 {
 	return current + minimal
 }

+// set datashard,parityshard info for some optimizations
+func (kcp *KCP) setFEC(datashard, parityshard int) {
+	kcp.datashard = datashard
+	kcp.parityshard = parityshard
+}
+
 // SetMtu changes MTU size, default is 1400
 func (kcp *KCP) SetMtu(mtu int) int {
 	if mtu < 50 || mtu < IKCP_OVERHEAD {
@ -962,3 +1017,12 @@ func (kcp *KCP) WndSize(sndwnd, rcvwnd int) int {
 func (kcp *KCP) WaitSnd() int {
 	return len(kcp.snd_buf) + len(kcp.snd_queue)
 }
+
+// Cwnd returns current congestion window size
+func (kcp *KCP) Cwnd() uint32 {
+	cwnd := _imin_(kcp.snd_wnd, kcp.rmt_wnd)
+	if kcp.nocwnd == 0 {
+		cwnd = _imin_(kcp.cwnd, cwnd)
+	}
+	return cwnd
+}
--- a/vendor/github.com/xtaci/kcp-go/sess.go
+++ b/vendor/github.com/xtaci/kcp-go/sess.go
@ -30,7 +30,6 @@ const (
 	mtuLimit        = 2048
 	rxQueueLimit    = 8192
 	rxFECMulti      = 3 // FEC keeps rxFECMulti* (dataShard+parityShard) ordered packets in memory
-	defaultKeepAliveInterval = 10
 )

 const (
@ -40,6 +39,7 @@ const (

 var (
 	xmitBuf sync.Pool
+	sid     uint32
 )

 func init() {
@ -51,25 +51,36 @@ func init() {
 type (
 	// UDPSession defines a KCP session implemented by UDP
 	UDPSession struct {
+		// core
+		sid      uint32
+		conn     net.PacketConn // the underlying packet socket
 		kcp      *KCP           // the core ARQ
 		l        *Listener      // point to server listener if it's a server socket
-		fec               *FEC           // forward error correction
-		conn              net.PacketConn // the underlying packet socket
-		block             BlockCrypt
+		block    BlockCrypt     // encryption
+		sockbuff []byte         // kcp receiving is based on packet, I turn it into stream
+
+		// forward error correction
+		fec              *FEC
+		fecDataShards    [][]byte
+		fecHeaderOffset  int
+		fecPayloadOffset int
+		fecCnt           int // count datashard
+		fecMaxSize       int // record maximum data length in datashard
+
+		// settings
 		remote         net.Addr
 		rd             time.Time // read deadline
 		wd             time.Time // write deadline
-		sockbuff          []byte    // kcp receiving is based on packet, I turn it into stream
+		headerSize     int
+		updateInterval int32
+		ackNoDelay     bool
+
+		// notifications
 		die          chan struct{}
 		chReadEvent  chan struct{}
 		chWriteEvent chan struct{}
-		chUDPOutput       chan []byte
-		headerSize        int
-		ackNoDelay        bool
 		isClosed     bool
-		keepAliveInterval int32
 		mu           sync.Mutex
-		updateInterval    int32
 	}

 	setReadBuffer interface {
@ -84,16 +95,30 @@ type (
 // newUDPSession create a new udp session for client or server
 func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn net.PacketConn, remote net.Addr, block BlockCrypt) *UDPSession {
 	sess := new(UDPSession)
-	sess.chUDPOutput = make(chan []byte)
+	sess.sid = atomic.AddUint32(&sid, 1)
 	sess.die = make(chan struct{})
 	sess.chReadEvent = make(chan struct{}, 1)
 	sess.chWriteEvent = make(chan struct{}, 1)
 	sess.remote = remote
 	sess.conn = conn
-	sess.keepAliveInterval = defaultKeepAliveInterval
 	sess.l = l
 	sess.block = block
+
+	// FEC initialization
 	sess.fec = newFEC(rxFECMulti*(dataShards+parityShards), dataShards, parityShards)
+	if sess.fec != nil {
+		if sess.block != nil {
+			sess.fecHeaderOffset = cryptHeaderSize
+		}
+		sess.fecPayloadOffset = sess.fecHeaderOffset + fecHeaderSize
+
+		// fec data shards
+		sess.fecDataShards = make([][]byte, sess.fec.shardSize)
+		for k := range sess.fecDataShards {
+			sess.fecDataShards[k] = make([]byte, mtuLimit)
+		}
+	}
+
 	// calculate header size
 	if sess.block != nil {
 		sess.headerSize += cryptHeaderSize
@ -104,19 +129,14 @@ func newUDPSession(conv uint32, dataShards, parityShards int, l *Listener, conn

 	sess.kcp = NewKCP(conv, func(buf []byte, size int) {
 		if size >= IKCP_OVERHEAD {
-			ext := xmitBuf.Get().([]byte)[:sess.headerSize+size]
-			copy(ext[sess.headerSize:], buf)
-			select {
-			case sess.chUDPOutput <- ext:
-			case <-sess.die:
-			}
+			sess.output(buf[:size])
 		}
 	})
 	sess.kcp.WndSize(defaultWndSize, defaultWndSize)
 	sess.kcp.SetMtu(IKCP_MTU_DEF - sess.headerSize)
+	sess.kcp.setFEC(dataShards, parityShards)

-	go sess.updateTask()
-	go sess.outputTask()
+	updater.addSession(sess)
 	if sess.l == nil { // it's a client connection
 		go sess.readLoop()
 		atomic.AddUint64(&DefaultSnmp.ActiveOpens, 1)
@ -207,19 +227,19 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {
 			}
 		}

-		if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
+		if s.kcp.WaitSnd() < int(s.kcp.Cwnd()) {
 			n = len(b)
-			max := s.kcp.mss << 8
 			for {
-				if len(b) <= int(max) { // in most cases
+				if len(b) <= int(s.kcp.mss) {
 					s.kcp.Send(b)
 					break
 				} else {
-					s.kcp.Send(b[:max])
-					b = b[max:]
+					s.kcp.Send(b[:s.kcp.mss])
+					b = b[s.kcp.mss:]
 				}
 			}
-			s.kcp.flush()
+
+			s.kcp.flush(false)
 			s.mu.Unlock()
 			atomic.AddUint64(&DefaultSnmp.BytesSent, uint64(n))
 			return n, nil
@ -249,6 +269,8 @@ func (s *UDPSession) Write(b []byte) (n int, err error) {

 // Close closes the connection.
 func (s *UDPSession) Close() error {
+	updater.removeSession(s)
+
 	s.mu.Lock()
 	defer s.mu.Unlock()
 	if s.isClosed {
@ -373,74 +395,59 @@ func (s *UDPSession) SetWriteBuffer(bytes int) error {
 	return errors.New(errInvalidOperation)
 }

-// SetKeepAlive changes per-connection NAT keepalive interval; 0 to disable, default to 10s
-func (s *UDPSession) SetKeepAlive(interval int) {
-	atomic.StoreInt32(&s.keepAliveInterval, int32(interval))
-}
-
-func (s *UDPSession) outputTask() {
-	// offset pre-compute
-	fecOffset := 0
-	if s.block != nil {
-		fecOffset = cryptHeaderSize
-	}
-	szOffset := fecOffset + fecHeaderSize
-
-	// fec data group
-	var cacheLine []byte
-	var fecGroup [][]byte
-	var fecCnt int
-	var fecMaxSize int
-	if s.fec != nil {
-		cacheLine = make([]byte, s.fec.shardSize*mtuLimit)
-		fecGroup = make([][]byte, s.fec.shardSize)
-		for k := range fecGroup {
-			fecGroup[k] = cacheLine[k*mtuLimit : (k+1)*mtuLimit]
-		}
-	}
-
-	// keepalive
-	var lastPing time.Time
-	ticker := time.NewTicker(5 * time.Second)
-	defer ticker.Stop()
-
-	for {
-		select {
-		// receive from a synchronous channel
-		// buffered channel must be avoided, because of "bufferbloat"
-		case ext := <-s.chUDPOutput:
+// output pipeline entry
+// steps for output data processing:
+// 1. FEC
+// 2. CRC32
+// 3. Encryption
+// 4. emit to emitTask
+// 5. emitTask WriteTo kernel
+func (s *UDPSession) output(buf []byte) {
 	var ecc [][]byte
-			if s.fec != nil {
-				s.fec.markData(ext[fecOffset:])
-				// explicit size, including 2bytes size itself.
-				binary.LittleEndian.PutUint16(ext[szOffset:], uint16(len(ext[szOffset:])))

-				// copy data to fec group
+	// extend buf's header space
+	ext := xmitBuf.Get().([]byte)[:s.headerSize+len(buf)]
+	copy(ext[s.headerSize:], buf)
+
+	// FEC stage
+	if s.fec != nil {
+		s.fec.markData(ext[s.fecHeaderOffset:])
+		binary.LittleEndian.PutUint16(ext[s.fecPayloadOffset:], uint16(len(ext[s.fecPayloadOffset:])))
+
+		// copy data to fec datashards
 		sz := len(ext)
-				fecGroup[fecCnt] = fecGroup[fecCnt][:sz]
-				copy(fecGroup[fecCnt], ext)
-				fecCnt++
-				if sz > fecMaxSize {
-					fecMaxSize = sz
+		s.fecDataShards[s.fecCnt] = s.fecDataShards[s.fecCnt][:sz]
+		copy(s.fecDataShards[s.fecCnt], ext)
+		s.fecCnt++
+
+		// record max datashard length
+		if sz > s.fecMaxSize {
+			s.fecMaxSize = sz
 		}

 		//  calculate Reed-Solomon Erasure Code
-				if fecCnt == s.fec.dataShards {
+		if s.fecCnt == s.fec.dataShards {
+			// bzero each datashard's tail
 			for i := 0; i < s.fec.dataShards; i++ {
-						shard := fecGroup[i]
+				shard := s.fecDataShards[i]
 				slen := len(shard)
-						xorBytes(shard[slen:fecMaxSize], shard[slen:fecMaxSize], shard[slen:fecMaxSize])
+				xorBytes(shard[slen:s.fecMaxSize], shard[slen:s.fecMaxSize], shard[slen:s.fecMaxSize])
 			}
-					ecc = s.fec.calcECC(fecGroup, szOffset, fecMaxSize)
+
+			// calculation of RS
+			ecc = s.fec.Encode(s.fecDataShards, s.fecPayloadOffset, s.fecMaxSize)
 			for k := range ecc {
-						s.fec.markFEC(ecc[k][fecOffset:])
-						ecc[k] = ecc[k][:fecMaxSize]
+				s.fec.markFEC(ecc[k][s.fecHeaderOffset:])
+				ecc[k] = ecc[k][:s.fecMaxSize]
 			}
-					fecCnt = 0
-					fecMaxSize = 0
+
+			// reset counters to zero
+			s.fecCnt = 0
+			s.fecMaxSize = 0
 		}
 	}

+	// encryption stage
 	if s.block != nil {
 		io.ReadFull(rand.Reader, ext[:nonceSize])
 		checksum := crc32.ChecksumIEEE(ext[cryptHeaderSize:])
@ -457,67 +464,24 @@ func (s *UDPSession) outputTask() {
 		}
 	}

-			nbytes := 0
-			nsegs := 0
-			// if mrand.Intn(100) < 50 {
-			if n, err := s.conn.WriteTo(ext, s.remote); err == nil {
-				nbytes += n
-				nsegs++
-			}
-			// }
-
+	// emit stage
+	defaultEmitter.emit(emitPacket{s.conn, s.remote, ext, true})
 	if ecc != nil {
 		for k := range ecc {
-					if n, err := s.conn.WriteTo(ecc[k], s.remote); err == nil {
-						nbytes += n
-						nsegs++
-					}
-				}
-			}
-			atomic.AddUint64(&DefaultSnmp.OutSegs, uint64(nsegs))
-			atomic.AddUint64(&DefaultSnmp.OutBytes, uint64(nbytes))
-			xmitBuf.Put(ext)
-		case <-ticker.C: // NAT keep-alive
-			interval := time.Duration(atomic.LoadInt32(&s.keepAliveInterval)) * time.Second
-			if interval > 0 && time.Now().After(lastPing.Add(interval)) {
-				var rnd uint16
-				binary.Read(rand.Reader, binary.LittleEndian, &rnd)
-				sz := int(rnd)%(IKCP_MTU_DEF-s.headerSize-IKCP_OVERHEAD) + s.headerSize + IKCP_OVERHEAD
-				ping := make([]byte, sz) // randomized ping packet
-				io.ReadFull(rand.Reader, ping)
-				s.conn.WriteTo(ping, s.remote)
-				lastPing = time.Now()
-			}
-		case <-s.die:
-			return
+			defaultEmitter.emit(emitPacket{s.conn, s.remote, ecc[k], false})
 		}
 	}
 }

-// kcp update, input loop
-func (s *UDPSession) updateTask() {
-	tc := time.After(time.Duration(atomic.LoadInt32(&s.updateInterval)) * time.Millisecond)
-
-	for {
-		select {
-		case <-tc:
+// kcp update, returns interval for next calling
+func (s *UDPSession) update() time.Duration {
 	s.mu.Lock()
-			s.kcp.flush()
-			if s.kcp.WaitSnd() < int(s.kcp.snd_wnd) {
+	s.kcp.flush(false)
+	if s.kcp.WaitSnd() < int(s.kcp.Cwnd()) {
 		s.notifyWriteEvent()
 	}
 	s.mu.Unlock()
-			tc = time.After(time.Duration(atomic.LoadInt32(&s.updateInterval)) * time.Millisecond)
-		case <-s.die:
-			if s.l != nil { // has listener
-				select {
-				case s.l.chDeadlinks <- s.remote:
-				case <-s.l.die:
-				}
-			}
-			return
-		}
-	}
+	return time.Duration(atomic.LoadInt32(&s.updateInterval)) * time.Millisecond
 }

 // GetConv gets conversation id of a session
@ -540,28 +504,28 @@ func (s *UDPSession) notifyWriteEvent() {
 }

 func (s *UDPSession) kcpInput(data []byte) {
-	var kcpInErrors, fecErrs, fecRecovered, fecSegs uint64
+	var kcpInErrors, fecErrs, fecRecovered, fecParityShards uint64

 	if s.fec != nil {
-		f := s.fec.decode(data)
+		f := s.fec.decodeBytes(data)
 		s.mu.Lock()
 		if f.flag == typeData {
-			if ret := s.kcp.Input(data[fecHeaderSizePlus2:], true); ret != 0 {
+			if ret := s.kcp.Input(data[fecHeaderSizePlus2:], true, s.ackNoDelay); ret != 0 {
 				kcpInErrors++
 			}
 		}

 		if f.flag == typeData || f.flag == typeFEC {
 			if f.flag == typeFEC {
-				fecSegs++
+				fecParityShards++
 			}

-			if recovers := s.fec.input(f); recovers != nil {
+			if recovers := s.fec.Decode(f); recovers != nil {
 				for _, r := range recovers {
 					if len(r) >= 2 { // must be larger than 2bytes
 						sz := binary.LittleEndian.Uint16(r)
 						if int(sz) <= len(r) && sz >= 2 {
-							if ret := s.kcp.Input(r[2:sz], false); ret == 0 {
+							if ret := s.kcp.Input(r[2:sz], false, s.ackNoDelay); ret == 0 {
 								fecRecovered++
 							} else {
 								kcpInErrors++
@ -580,29 +544,23 @@ func (s *UDPSession) kcpInput(data []byte) {
 		if n := s.kcp.PeekSize(); n > 0 {
 			s.notifyReadEvent()
 		}
-		if s.ackNoDelay {
-			s.kcp.flush()
-		}
 		s.mu.Unlock()
 	} else {
 		s.mu.Lock()
-		if ret := s.kcp.Input(data, true); ret != 0 {
+		if ret := s.kcp.Input(data, true, s.ackNoDelay); ret != 0 {
 			kcpInErrors++
 		}
 		// notify reader
 		if n := s.kcp.PeekSize(); n > 0 {
 			s.notifyReadEvent()
 		}
-		if s.ackNoDelay {
-			s.kcp.flush()
-		}
 		s.mu.Unlock()
 	}

 	atomic.AddUint64(&DefaultSnmp.InSegs, 1)
 	atomic.AddUint64(&DefaultSnmp.InBytes, uint64(len(data)))
-	if fecSegs > 0 {
-		atomic.AddUint64(&DefaultSnmp.FECSegs, fecSegs)
+	if fecParityShards > 0 {
+		atomic.AddUint64(&DefaultSnmp.FECParityShards, fecParityShards)
 	}
 	if kcpInErrors > 0 {
 		atomic.AddUint64(&DefaultSnmp.KCPInErrors, kcpInErrors)
--- a/vendor/github.com/xtaci/kcp-go/snmp.go
+++ b/vendor/github.com/xtaci/kcp-go/snmp.go
@ -27,7 +27,7 @@ type Snmp struct {
 	RepeatSegs       uint64 // number of segs duplicated
 	FECRecovered     uint64 // correct packets recovered from FEC
 	FECErrs          uint64 // incorrect packets recovered from FEC
-	FECSegs          uint64 // FEC segments received
+	FECParityShards  uint64 // FEC segments received
 	FECShortShards   uint64 // number of data shards that's not enough for recovery
 }

@ -35,6 +35,7 @@ func newSnmp() *Snmp {
 	return new(Snmp)
 }

+// Header returns all field names
 func (s *Snmp) Header() []string {
 	return []string{
 		"BytesSent",
@ -55,13 +56,14 @@ func (s *Snmp) Header() []string {
 		"EarlyRetransSegs",
 		"LostSegs",
 		"RepeatSegs",
-		"FECSegs",
+		"FECParityShards",
 		"FECErrs",
 		"FECRecovered",
 		"FECShortShards",
 	}
 }

+// ToSlice returns current snmp info as slice
 func (s *Snmp) ToSlice() []string {
 	snmp := s.Copy()
 	return []string{
@ -83,7 +85,7 @@ func (s *Snmp) ToSlice() []string {
 		fmt.Sprint(snmp.EarlyRetransSegs),
 		fmt.Sprint(snmp.LostSegs),
 		fmt.Sprint(snmp.RepeatSegs),
-		fmt.Sprint(snmp.FECSegs),
+		fmt.Sprint(snmp.FECParityShards),
 		fmt.Sprint(snmp.FECErrs),
 		fmt.Sprint(snmp.FECRecovered),
 		fmt.Sprint(snmp.FECShortShards),
@ -111,7 +113,7 @@ func (s *Snmp) Copy() *Snmp {
 	d.EarlyRetransSegs = atomic.LoadUint64(&s.EarlyRetransSegs)
 	d.LostSegs = atomic.LoadUint64(&s.LostSegs)
 	d.RepeatSegs = atomic.LoadUint64(&s.RepeatSegs)
-	d.FECSegs = atomic.LoadUint64(&s.FECSegs)
+	d.FECParityShards = atomic.LoadUint64(&s.FECParityShards)
 	d.FECErrs = atomic.LoadUint64(&s.FECErrs)
 	d.FECRecovered = atomic.LoadUint64(&s.FECRecovered)
 	d.FECShortShards = atomic.LoadUint64(&s.FECShortShards)
@ -138,7 +140,7 @@ func (s *Snmp) Reset() {
 	atomic.StoreUint64(&s.EarlyRetransSegs, 0)
 	atomic.StoreUint64(&s.LostSegs, 0)
 	atomic.StoreUint64(&s.RepeatSegs, 0)
-	atomic.StoreUint64(&s.FECSegs, 0)
+	atomic.StoreUint64(&s.FECParityShards, 0)
 	atomic.StoreUint64(&s.FECErrs, 0)
 	atomic.StoreUint64(&s.FECRecovered, 0)
 	atomic.StoreUint64(&s.FECShortShards, 0)
--- a/vendor/github.com/xtaci/kcp-go/updater.go
+++ b/vendor/github.com/xtaci/kcp-go/updater.go
@ -0,0 +1,104 @@
+package kcp
+
+import (
+	"container/heap"
+	"sync"
+	"time"
+)
+
+var updater updateHeap
+
+func init() {
+	updater.init()
+	go updater.updateTask()
+}
+
+type entry struct {
+	sid uint32
+	ts  time.Time
+	s   *UDPSession
+}
+
+type updateHeap struct {
+	entries  []entry
+	indices  map[uint32]int
+	mu       sync.Mutex
+	chWakeUp chan struct{}
+}
+
+func (h *updateHeap) Len() int           { return len(h.entries) }
+func (h *updateHeap) Less(i, j int) bool { return h.entries[i].ts.Before(h.entries[j].ts) }
+func (h *updateHeap) Swap(i, j int) {
+	h.entries[i], h.entries[j] = h.entries[j], h.entries[i]
+	h.indices[h.entries[i].sid] = i
+	h.indices[h.entries[j].sid] = j
+}
+
+func (h *updateHeap) Push(x interface{}) {
+	h.entries = append(h.entries, x.(entry))
+	n := len(h.entries)
+	h.indices[h.entries[n-1].sid] = n - 1
+}
+
+func (h *updateHeap) Pop() interface{} {
+	n := len(h.entries)
+	x := h.entries[n-1]
+	h.entries = h.entries[0 : n-1]
+	delete(h.indices, x.sid)
+	return x
+}
+
+func (h *updateHeap) init() {
+	h.indices = make(map[uint32]int)
+	h.chWakeUp = make(chan struct{}, 1)
+}
+
+func (h *updateHeap) addSession(s *UDPSession) {
+	h.mu.Lock()
+	heap.Push(h, entry{s.sid, time.Now(), s})
+	h.mu.Unlock()
+	h.wakeup()
+}
+
+func (h *updateHeap) removeSession(s *UDPSession) {
+	h.mu.Lock()
+	if idx, ok := h.indices[s.sid]; ok {
+		heap.Remove(h, idx)
+	}
+	h.mu.Unlock()
+}
+
+func (h *updateHeap) wakeup() {
+	select {
+	case h.chWakeUp <- struct{}{}:
+	default:
+	}
+}
+
+func (h *updateHeap) updateTask() {
+	var timer <-chan time.Time
+	for {
+		select {
+		case <-timer:
+		case <-h.chWakeUp:
+		}
+
+		h.mu.Lock()
+		hlen := h.Len()
+		now := time.Now()
+		for i := 0; i < hlen; i++ {
+			entry := heap.Pop(h).(entry)
+			if now.After(entry.ts) {
+				entry.ts = now.Add(entry.s.update())
+				heap.Push(h, entry)
+			} else {
+				heap.Push(h, entry)
+				break
+			}
+		}
+		if h.Len() > 0 {
+			timer = time.After(h.entries[0].ts.Sub(now))
+		}
+		h.mu.Unlock()
+	}
+}
--- a/vendor/github.com/xtaci/kcp-go/xor.go
+++ b/vendor/github.com/xtaci/kcp-go/xor.go
@ -65,7 +65,7 @@ func safeXORBytes(dst, a, b []byte) int {
 func xorBytes(dst, a, b []byte) int {
 	if supportsUnaligned {
 		return fastXORBytes(dst, a, b)
-	} else {
+	}
 	// TODO(hanwen): if (dst, a, b) have common alignment
 	// we could still try fastXORBytes. It is not clear
 	// how often this happens, and it's only worth it if
@ -73,7 +73,6 @@ func xorBytes(dst, a, b []byte) int {
 	// accelerated.
 	return safeXORBytes(dst, a, b)
 }
-}

 // fastXORWords XORs multiples of 4 or 8 bytes (depending on architecture.)
 // The arguments are assumed to be of equal length.
--- a/vendor/github.com/xtaci/reedsolomon/LICENSE
+++ b/vendor/github.com/xtaci/reedsolomon/LICENSE
@ -1,23 +0,0 @@
-The MIT License (MIT)
-
-Copyright (c) 2015 Klaus Post
-Copyright (c) 2015 Backblaze
-
-Permission is hereby granted, free of charge, to any person obtaining a copy
-of this software and associated documentation files (the "Software"), to deal
-in the Software without restriction, including without limitation the rights
-to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
-copies of the Software, and to permit persons to whom the Software is
-furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all
-copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-SOFTWARE.
-
--- a/vendor/github.com/xtaci/reedsolomon/examples/simple-decoder.go
+++ b/vendor/github.com/xtaci/reedsolomon/examples/simple-decoder.go
@ -1,125 +0,0 @@
-//+build ignore
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-//
-// Simple decoder example.
-//
-// The decoder reverses the process of "simple-encoder.go"
-//
-// To build an executable use:
-//
-// go build simple-decoder.go
-//
-// Simple Encoder/Decoder Shortcomings:
-// * If the file size of the input isn't diviable by the number of data shards
-//   the output will contain extra zeroes
-//
-// * If the shard numbers isn't the same for the decoder as in the
-//   encoder, invalid output will be generated.
-//
-// * If values have changed in a shard, it cannot be reconstructed.
-//
-// * If two shards have been swapped, reconstruction will always fail.
-//   You need to supply the shards in the same order as they were given to you.
-//
-// The solution for this is to save a metadata file containing:
-//
-// * File size.
-// * The number of data/parity shards.
-// * HASH of each shard.
-// * Order of the shards.
-//
-// If you save these properties, you should abe able to detect file corruption
-// in a shard and be able to reconstruct your data if you have the needed number of shards left.
-
-package main
-
-import (
-	"flag"
-	"fmt"
-	"io/ioutil"
-	"os"
-
-	"github.com/klauspost/reedsolomon"
-)
-
-var dataShards = flag.Int("data", 4, "Number of shards to split the data into")
-var parShards = flag.Int("par", 2, "Number of parity shards")
-var outFile = flag.String("out", "", "Alternative output path/file")
-
-func init() {
-	flag.Usage = func() {
-		fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "  simple-decoder [-flags] basefile.ext\nDo not add the number to the filename.\n")
-		fmt.Fprintf(os.Stderr, "Valid flags:\n")
-		flag.PrintDefaults()
-	}
-}
-
-func main() {
-	// Parse flags
-	flag.Parse()
-	args := flag.Args()
-	if len(args) != 1 {
-		fmt.Fprintf(os.Stderr, "Error: No filenames given\n")
-		flag.Usage()
-		os.Exit(1)
-	}
-	fname := args[0]
-
-	// Create matrix
-	enc, err := reedsolomon.New(*dataShards, *parShards)
-	checkErr(err)
-
-	// Create shards and load the data.
-	shards := make([][]byte, *dataShards+*parShards)
-	for i := range shards {
-		infn := fmt.Sprintf("%s.%d", fname, i)
-		fmt.Println("Opening", infn)
-		shards[i], err = ioutil.ReadFile(infn)
-		if err != nil {
-			fmt.Println("Error reading file", err)
-			shards[i] = nil
-		}
-	}
-
-	// Verify the shards
-	ok, err := enc.Verify(shards)
-	if ok {
-		fmt.Println("No reconstruction needed")
-	} else {
-		fmt.Println("Verification failed. Reconstructing data")
-		err = enc.Reconstruct(shards)
-		if err != nil {
-			fmt.Println("Reconstruct failed -", err)
-			os.Exit(1)
-		}
-		ok, err = enc.Verify(shards)
-		if !ok {
-			fmt.Println("Verification failed after reconstruction, data likely corrupted.")
-			os.Exit(1)
-		}
-		checkErr(err)
-	}
-
-	// Join the shards and write them
-	outfn := *outFile
-	if outfn == "" {
-		outfn = fname
-	}
-
-	fmt.Println("Writing data to", outfn)
-	f, err := os.Create(outfn)
-	checkErr(err)
-
-	// We don't know the exact filesize.
-	err = enc.Join(f, shards, len(shards[0])**dataShards)
-	checkErr(err)
-}
-
-func checkErr(err error) {
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error: %s", err.Error())
-		os.Exit(2)
-	}
-}
--- a/vendor/github.com/xtaci/reedsolomon/examples/simple-encoder.go
+++ b/vendor/github.com/xtaci/reedsolomon/examples/simple-encoder.go
@ -1,112 +0,0 @@
-//+build ignore
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-//
-// Simple encoder example
-//
-// The encoder encodes a simgle file into a number of shards
-// To reverse the process see "simpledecoder.go"
-//
-// To build an executable use:
-//
-// go build simple-decoder.go
-//
-// Simple Encoder/Decoder Shortcomings:
-// * If the file size of the input isn't diviable by the number of data shards
-//   the output will contain extra zeroes
-//
-// * If the shard numbers isn't the same for the decoder as in the
-//   encoder, invalid output will be generated.
-//
-// * If values have changed in a shard, it cannot be reconstructed.
-//
-// * If two shards have been swapped, reconstruction will always fail.
-//   You need to supply the shards in the same order as they were given to you.
-//
-// The solution for this is to save a metadata file containing:
-//
-// * File size.
-// * The number of data/parity shards.
-// * HASH of each shard.
-// * Order of the shards.
-//
-// If you save these properties, you should abe able to detect file corruption
-// in a shard and be able to reconstruct your data if you have the needed number of shards left.
-
-package main
-
-import (
-	"flag"
-	"fmt"
-	"io/ioutil"
-	"os"
-	"path/filepath"
-
-	"github.com/klauspost/reedsolomon"
-)
-
-var dataShards = flag.Int("data", 4, "Number of shards to split the data into, must be below 257.")
-var parShards = flag.Int("par", 2, "Number of parity shards")
-var outDir = flag.String("out", "", "Alternative output directory")
-
-func init() {
-	flag.Usage = func() {
-		fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "  simple-encoder [-flags] filename.ext\n\n")
-		fmt.Fprintf(os.Stderr, "Valid flags:\n")
-		flag.PrintDefaults()
-	}
-}
-
-func main() {
-	// Parse command line parameters.
-	flag.Parse()
-	args := flag.Args()
-	if len(args) != 1 {
-		fmt.Fprintf(os.Stderr, "Error: No input filename given\n")
-		flag.Usage()
-		os.Exit(1)
-	}
-	if *dataShards > 257 {
-		fmt.Fprintf(os.Stderr, "Error: Too many data shards\n")
-		os.Exit(1)
-	}
-	fname := args[0]
-
-	// Create encoding matrix.
-	enc, err := reedsolomon.New(*dataShards, *parShards)
-	checkErr(err)
-
-	fmt.Println("Opening", fname)
-	b, err := ioutil.ReadFile(fname)
-	checkErr(err)
-
-	// Split the file into equally sized shards.
-	shards, err := enc.Split(b)
-	checkErr(err)
-	fmt.Printf("File split into %d data+parity shards with %d bytes/shard.\n", len(shards), len(shards[0]))
-
-	// Encode parity
-	err = enc.Encode(shards)
-	checkErr(err)
-
-	// Write out the resulting files.
-	dir, file := filepath.Split(fname)
-	if *outDir != "" {
-		dir = *outDir
-	}
-	for i, shard := range shards {
-		outfn := fmt.Sprintf("%s.%d", file, i)
-
-		fmt.Println("Writing to", outfn)
-		err = ioutil.WriteFile(filepath.Join(dir, outfn), shard, os.ModePerm)
-		checkErr(err)
-	}
-}
-
-func checkErr(err error) {
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error: %s", err.Error())
-		os.Exit(2)
-	}
-}
--- a/vendor/github.com/xtaci/reedsolomon/examples/stream-decoder.go
+++ b/vendor/github.com/xtaci/reedsolomon/examples/stream-decoder.go
@ -1,167 +0,0 @@
-//+build ignore
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-//
-// Stream decoder example.
-//
-// The decoder reverses the process of "stream-encoder.go"
-//
-// To build an executable use:
-//
-// go build stream-decoder.go
-//
-// Simple Encoder/Decoder Shortcomings:
-// * If the file size of the input isn't dividable by the number of data shards
-//   the output will contain extra zeroes
-//
-// * If the shard numbers isn't the same for the decoder as in the
-//   encoder, invalid output will be generated.
-//
-// * If values have changed in a shard, it cannot be reconstructed.
-//
-// * If two shards have been swapped, reconstruction will always fail.
-//   You need to supply the shards in the same order as they were given to you.
-//
-// The solution for this is to save a metadata file containing:
-//
-// * File size.
-// * The number of data/parity shards.
-// * HASH of each shard.
-// * Order of the shards.
-//
-// If you save these properties, you should abe able to detect file corruption
-// in a shard and be able to reconstruct your data if you have the needed number of shards left.
-
-package main
-
-import (
-	"flag"
-	"fmt"
-	"io"
-	"os"
-	"path/filepath"
-
-	"github.com/klauspost/reedsolomon"
-)
-
-var dataShards = flag.Int("data", 4, "Number of shards to split the data into")
-var parShards = flag.Int("par", 2, "Number of parity shards")
-var outFile = flag.String("out", "", "Alternative output path/file")
-
-func init() {
-	flag.Usage = func() {
-		fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "  %s [-flags] basefile.ext\nDo not add the number to the filename.\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "Valid flags:\n")
-		flag.PrintDefaults()
-	}
-}
-
-func main() {
-	// Parse flags
-	flag.Parse()
-	args := flag.Args()
-	if len(args) != 1 {
-		fmt.Fprintf(os.Stderr, "Error: No filenames given\n")
-		flag.Usage()
-		os.Exit(1)
-	}
-	fname := args[0]
-
-	// Create matrix
-	enc, err := reedsolomon.NewStream(*dataShards, *parShards)
-	checkErr(err)
-
-	// Open the inputs
-	shards, size, err := openInput(*dataShards, *parShards, fname)
-	checkErr(err)
-
-	// Verify the shards
-	ok, err := enc.Verify(shards)
-	if ok {
-		fmt.Println("No reconstruction needed")
-	} else {
-		fmt.Println("Verification failed. Reconstructing data")
-		shards, size, err = openInput(*dataShards, *parShards, fname)
-		checkErr(err)
-		// Create out destination writers
-		out := make([]io.Writer, len(shards))
-		for i := range out {
-			if shards[i] == nil {
-				dir, _ := filepath.Split(fname)
-				outfn := fmt.Sprintf("%s.%d", fname, i)
-				fmt.Println("Creating", outfn)
-				out[i], err = os.Create(filepath.Join(dir, outfn))
-				checkErr(err)
-			}
-		}
-		err = enc.Reconstruct(shards, out)
-		if err != nil {
-			fmt.Println("Reconstruct failed -", err)
-			os.Exit(1)
-		}
-		// Close output.
-		for i := range out {
-			if out[i] != nil {
-				err := out[i].(*os.File).Close()
-				checkErr(err)
-			}
-		}
-		shards, size, err = openInput(*dataShards, *parShards, fname)
-		ok, err = enc.Verify(shards)
-		if !ok {
-			fmt.Println("Verification failed after reconstruction, data likely corrupted:", err)
-			os.Exit(1)
-		}
-		checkErr(err)
-	}
-
-	// Join the shards and write them
-	outfn := *outFile
-	if outfn == "" {
-		outfn = fname
-	}
-
-	fmt.Println("Writing data to", outfn)
-	f, err := os.Create(outfn)
-	checkErr(err)
-
-	shards, size, err = openInput(*dataShards, *parShards, fname)
-	checkErr(err)
-
-	// We don't know the exact filesize.
-	err = enc.Join(f, shards, int64(*dataShards)*size)
-	checkErr(err)
-}
-
-func openInput(dataShards, parShards int, fname string) (r []io.Reader, size int64, err error) {
-	// Create shards and load the data.
-	shards := make([]io.Reader, dataShards+parShards)
-	for i := range shards {
-		infn := fmt.Sprintf("%s.%d", fname, i)
-		fmt.Println("Opening", infn)
-		f, err := os.Open(infn)
-		if err != nil {
-			fmt.Println("Error reading file", err)
-			shards[i] = nil
-			continue
-		} else {
-			shards[i] = f
-		}
-		stat, err := f.Stat()
-		checkErr(err)
-		if stat.Size() > 0 {
-			size = stat.Size()
-		} else {
-			shards[i] = nil
-		}
-	}
-	return shards, size, nil
-}
-
-func checkErr(err error) {
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error: %s", err.Error())
-		os.Exit(2)
-	}
-}
--- a/vendor/github.com/xtaci/reedsolomon/examples/stream-encoder.go
+++ b/vendor/github.com/xtaci/reedsolomon/examples/stream-encoder.go
@ -1,142 +0,0 @@
-//+build ignore
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-//
-// Simple stream encoder example
-//
-// The encoder encodes a single file into a number of shards
-// To reverse the process see "stream-decoder.go"
-//
-// To build an executable use:
-//
-// go build stream-encoder.go
-//
-// Simple Encoder/Decoder Shortcomings:
-// * If the file size of the input isn't dividable by the number of data shards
-//   the output will contain extra zeroes
-//
-// * If the shard numbers isn't the same for the decoder as in the
-//   encoder, invalid output will be generated.
-//
-// * If values have changed in a shard, it cannot be reconstructed.
-//
-// * If two shards have been swapped, reconstruction will always fail.
-//   You need to supply the shards in the same order as they were given to you.
-//
-// The solution for this is to save a metadata file containing:
-//
-// * File size.
-// * The number of data/parity shards.
-// * HASH of each shard.
-// * Order of the shards.
-//
-// If you save these properties, you should abe able to detect file corruption
-// in a shard and be able to reconstruct your data if you have the needed number of shards left.
-
-package main
-
-import (
-	"flag"
-	"fmt"
-	"os"
-	"path/filepath"
-
-	"io"
-
-	"github.com/klauspost/reedsolomon"
-)
-
-var dataShards = flag.Int("data", 4, "Number of shards to split the data into, must be below 257.")
-var parShards = flag.Int("par", 2, "Number of parity shards")
-var outDir = flag.String("out", "", "Alternative output directory")
-
-func init() {
-	flag.Usage = func() {
-		fmt.Fprintf(os.Stderr, "Usage of %s:\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "  %s [-flags] filename.ext\n\n", os.Args[0])
-		fmt.Fprintf(os.Stderr, "Valid flags:\n")
-		flag.PrintDefaults()
-	}
-}
-
-func main() {
-	// Parse command line parameters.
-	flag.Parse()
-	args := flag.Args()
-	if len(args) != 1 {
-		fmt.Fprintf(os.Stderr, "Error: No input filename given\n")
-		flag.Usage()
-		os.Exit(1)
-	}
-	if *dataShards > 257 {
-		fmt.Fprintf(os.Stderr, "Error: Too many data shards\n")
-		os.Exit(1)
-	}
-	fname := args[0]
-
-	// Create encoding matrix.
-	enc, err := reedsolomon.NewStream(*dataShards, *parShards)
-	checkErr(err)
-
-	fmt.Println("Opening", fname)
-	f, err := os.Open(fname)
-	checkErr(err)
-
-	instat, err := f.Stat()
-	checkErr(err)
-
-	shards := *dataShards + *parShards
-	out := make([]*os.File, shards)
-
-	// Create the resulting files.
-	dir, file := filepath.Split(fname)
-	if *outDir != "" {
-		dir = *outDir
-	}
-	for i := range out {
-		outfn := fmt.Sprintf("%s.%d", file, i)
-		fmt.Println("Creating", outfn)
-		out[i], err = os.Create(filepath.Join(dir, outfn))
-		checkErr(err)
-	}
-
-	// Split into files.
-	data := make([]io.Writer, *dataShards)
-	for i := range data {
-		data[i] = out[i]
-	}
-	// Do the split
-	err = enc.Split(f, data, instat.Size())
-	checkErr(err)
-
-	// Close and re-open the files.
-	input := make([]io.Reader, *dataShards)
-
-	for i := range data {
-		out[i].Close()
-		f, err := os.Open(out[i].Name())
-		checkErr(err)
-		input[i] = f
-		defer f.Close()
-	}
-
-	// Create parity output writers
-	parity := make([]io.Writer, *parShards)
-	for i := range parity {
-		parity[i] = out[*dataShards+i]
-		defer out[*dataShards+i].Close()
-	}
-
-	// Encode parity
-	err = enc.Encode(input, parity)
-	checkErr(err)
-	fmt.Printf("File split into %d data + %d parity shards.\n", *dataShards, *parShards)
-
-}
-
-func checkErr(err error) {
-	if err != nil {
-		fmt.Fprintf(os.Stderr, "Error: %s", err.Error())
-		os.Exit(2)
-	}
-}
--- a/vendor/github.com/xtaci/reedsolomon/galois.go
+++ b/vendor/github.com/xtaci/reedsolomon/galois.go
--- a/vendor/github.com/xtaci/reedsolomon/galois_amd64.go
+++ b/vendor/github.com/xtaci/reedsolomon/galois_amd64.go
@ -1,77 +0,0 @@
-//+build !noasm
-//+build !appengine
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-
-package reedsolomon
-
-import (
-	"github.com/klauspost/cpuid"
-)
-
-//go:noescape
-func galMulSSSE3(low, high, in, out []byte)
-
-//go:noescape
-func galMulSSSE3Xor(low, high, in, out []byte)
-
-//go:noescape
-func galMulAVX2Xor(low, high, in, out []byte)
-
-//go:noescape
-func galMulAVX2(low, high, in, out []byte)
-
-// This is what the assembler rountes does in blocks of 16 bytes:
-/*
-func galMulSSSE3(low, high, in, out []byte) {
-	for n, input := range in {
-		l := input & 0xf
-		h := input >> 4
-		out[n] = low[l] ^ high[h]
-	}
-}
-
-func galMulSSSE3Xor(low, high, in, out []byte) {
-	for n, input := range in {
-		l := input & 0xf
-		h := input >> 4
-		out[n] ^= low[l] ^ high[h]
-	}
-}
-*/
-
-func galMulSlice(c byte, in, out []byte) {
-	var done int
-	if cpuid.CPU.AVX2() {
-		galMulAVX2(mulTableLow[c][:], mulTableHigh[c][:], in, out)
-		done = (len(in) >> 5) << 5
-	} else if cpuid.CPU.SSSE3() {
-		galMulSSSE3(mulTableLow[c][:], mulTableHigh[c][:], in, out)
-		done = (len(in) >> 4) << 4
-	}
-	remain := len(in) - done
-	if remain > 0 {
-		mt := mulTable[c]
-		for i := done; i < len(in); i++ {
-			out[i] = mt[in[i]]
-		}
-	}
-}
-
-func galMulSliceXor(c byte, in, out []byte) {
-	var done int
-	if cpuid.CPU.AVX2() {
-		galMulAVX2Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
-		done = (len(in) >> 5) << 5
-	} else if cpuid.CPU.SSSE3() {
-		galMulSSSE3Xor(mulTableLow[c][:], mulTableHigh[c][:], in, out)
-		done = (len(in) >> 4) << 4
-	}
-	remain := len(in) - done
-	if remain > 0 {
-		mt := mulTable[c]
-		for i := done; i < len(in); i++ {
-			out[i] ^= mt[in[i]]
-		}
-	}
-}
--- a/vendor/github.com/xtaci/reedsolomon/galois_amd64.s
+++ b/vendor/github.com/xtaci/reedsolomon/galois_amd64.s
@ -1,164 +0,0 @@
-//+build !noasm !appengine
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-
-// Based on http://www.snia.org/sites/default/files2/SDC2013/presentations/NewThinking/EthanMiller_Screaming_Fast_Galois_Field%20Arithmetic_SIMD%20Instructions.pdf
-// and http://jerasure.org/jerasure/gf-complete/tree/master
-
-// func galMulSSSE3Xor(low, high, in, out []byte)
-TEXT ·galMulSSSE3Xor(SB), 7, $0
-	MOVQ   low+0(FP), SI     // SI: &low
-	MOVQ   high+24(FP), DX   // DX: &high
-	MOVOU  (SI), X6          // X6 low
-	MOVOU  (DX), X7          // X7: high
-	MOVQ   $15, BX           // BX: low mask
-	MOVQ   BX, X8
-	PXOR   X5, X5
-	MOVQ   in+48(FP), SI     // R11: &in
-	MOVQ   in_len+56(FP), R9 // R9: len(in)
-	MOVQ   out+72(FP), DX    // DX: &out
-	PSHUFB X5, X8            // X8: lomask (unpacked)
-	SHRQ   $4, R9            // len(in) / 16
-	CMPQ   R9, $0
-	JEQ    done_xor
-
-loopback_xor:
-	MOVOU  (SI), X0     // in[x]
-	MOVOU  (DX), X4     // out[x]
-	MOVOU  X0, X1       // in[x]
-	MOVOU  X6, X2       // low copy
-	MOVOU  X7, X3       // high copy
-	PSRLQ  $4, X1       // X1: high input
-	PAND   X8, X0       // X0: low input
-	PAND   X8, X1       // X0: high input
-	PSHUFB X0, X2       // X2: mul low part
-	PSHUFB X1, X3       // X3: mul high part
-	PXOR   X2, X3       // X3: Result
-	PXOR   X4, X3       // X3: Result xor existing out
-	MOVOU  X3, (DX)     // Store
-	ADDQ   $16, SI      // in+=16
-	ADDQ   $16, DX      // out+=16
-	SUBQ   $1, R9
-	JNZ    loopback_xor
-
-done_xor:
-	RET
-
-// func galMulSSSE3(low, high, in, out []byte)
-TEXT ·galMulSSSE3(SB), 7, $0
-	MOVQ   low+0(FP), SI     // SI: &low
-	MOVQ   high+24(FP), DX   // DX: &high
-	MOVOU  (SI), X6          // X6 low
-	MOVOU  (DX), X7          // X7: high
-	MOVQ   $15, BX           // BX: low mask
-	MOVQ   BX, X8
-	PXOR   X5, X5
-	MOVQ   in+48(FP), SI     // R11: &in
-	MOVQ   in_len+56(FP), R9 // R9: len(in)
-	MOVQ   out+72(FP), DX    // DX: &out
-	PSHUFB X5, X8            // X8: lomask (unpacked)
-	SHRQ   $4, R9            // len(in) / 16
-	CMPQ   R9, $0
-	JEQ    done
-
-loopback:
-	MOVOU  (SI), X0 // in[x]
-	MOVOU  X0, X1   // in[x]
-	MOVOU  X6, X2   // low copy
-	MOVOU  X7, X3   // high copy
-	PSRLQ  $4, X1   // X1: high input
-	PAND   X8, X0   // X0: low input
-	PAND   X8, X1   // X0: high input
-	PSHUFB X0, X2   // X2: mul low part
-	PSHUFB X1, X3   // X3: mul high part
-	PXOR   X2, X3   // X3: Result
-	MOVOU  X3, (DX) // Store
-	ADDQ   $16, SI  // in+=16
-	ADDQ   $16, DX  // out+=16
-	SUBQ   $1, R9
-	JNZ    loopback
-
-done:
-	RET
-
-// func galMulAVX2Xor(low, high, in, out []byte)
-TEXT ·galMulAVX2Xor(SB), 7, $0
-	MOVQ  low+0(FP), SI     // SI: &low
-	MOVQ  high+24(FP), DX   // DX: &high
-	MOVQ  $15, BX           // BX: low mask
-	MOVQ  BX, X5
-	MOVOU (SI), X6          // X6 low
-	MOVOU (DX), X7          // X7: high
-	MOVQ  in_len+56(FP), R9 // R9: len(in)
-
-	LONG $0x384de3c4; WORD $0x01f6 // VINSERTI128 YMM6, YMM6, XMM6, 1 ; low
-	LONG $0x3845e3c4; WORD $0x01ff // VINSERTI128 YMM7, YMM7, XMM7, 1 ; high
-	LONG $0x787d62c4; BYTE $0xc5   // VPBROADCASTB YMM8, XMM5         ; X8: lomask (unpacked)
-
-	SHRQ  $5, R9         // len(in) /32
-	MOVQ  out+72(FP), DX // DX: &out
-	MOVQ  in+48(FP), SI  // R11: &in
-	TESTQ R9, R9
-	JZ    done_xor_avx2
-
-loopback_xor_avx2:
-	LONG $0x066ffec5             // VMOVDQU YMM0, [rsi]
-	LONG $0x226ffec5             // VMOVDQU YMM4, [rdx]
-	LONG $0xd073f5c5; BYTE $0x04 // VPSRLQ  YMM1, YMM0, 4   ; X1: high input
-	LONG $0xdb7dc1c4; BYTE $0xc0 // VPAND   YMM0, YMM0, YMM8      ; X0: low input
-	LONG $0xdb75c1c4; BYTE $0xc8 // VPAND   YMM1, YMM1, YMM8      ; X1: high input
-	LONG $0x004de2c4; BYTE $0xd0 // VPSHUFB  YMM2, YMM6, YMM0   ; X2: mul low part
-	LONG $0x0045e2c4; BYTE $0xd9 // VPSHUFB  YMM3, YMM7, YMM1   ; X2: mul high part
-	LONG $0xdbefedc5             // VPXOR   YMM3, YMM2, YMM3    ; X3: Result
-	LONG $0xe4efe5c5             // VPXOR   YMM4, YMM3, YMM4    ; X4: Result
-	LONG $0x227ffec5             // VMOVDQU [rdx], YMM4
-
-	ADDQ $32, SI           // in+=32
-	ADDQ $32, DX           // out+=32
-	SUBQ $1, R9
-	JNZ  loopback_xor_avx2
-
-done_xor_avx2:
-	// VZEROUPPER
-	BYTE $0xc5; BYTE $0xf8; BYTE $0x77
-	RET
-
-// func galMulAVX2(low, high, in, out []byte)
-TEXT ·galMulAVX2(SB), 7, $0
-	MOVQ  low+0(FP), SI     // SI: &low
-	MOVQ  high+24(FP), DX   // DX: &high
-	MOVQ  $15, BX           // BX: low mask
-	MOVQ  BX, X5
-	MOVOU (SI), X6          // X6 low
-	MOVOU (DX), X7          // X7: high
-	MOVQ  in_len+56(FP), R9 // R9: len(in)
-
-	LONG $0x384de3c4; WORD $0x01f6 // VINSERTI128 YMM6, YMM6, XMM6, 1 ; low
-	LONG $0x3845e3c4; WORD $0x01ff // VINSERTI128 YMM7, YMM7, XMM7, 1 ; high
-	LONG $0x787d62c4; BYTE $0xc5   // VPBROADCASTB YMM8, XMM5         ; X8: lomask (unpacked)
-
-	SHRQ  $5, R9         // len(in) /32
-	MOVQ  out+72(FP), DX // DX: &out
-	MOVQ  in+48(FP), SI  // R11: &in
-	TESTQ R9, R9
-	JZ    done_avx2
-
-loopback_avx2:
-	LONG $0x066ffec5             // VMOVDQU YMM0, [rsi]
-	LONG $0xd073f5c5; BYTE $0x04 // VPSRLQ  YMM1, YMM0, 4   ; X1: high input
-	LONG $0xdb7dc1c4; BYTE $0xc0 // VPAND   YMM0, YMM0, YMM8      ; X0: low input
-	LONG $0xdb75c1c4; BYTE $0xc8 // VPAND   YMM1, YMM1, YMM8      ; X1: high input
-	LONG $0x004de2c4; BYTE $0xd0 // VPSHUFB  YMM2, YMM6, YMM0   ; X2: mul low part
-	LONG $0x0045e2c4; BYTE $0xd9 // VPSHUFB  YMM3, YMM7, YMM1   ; X2: mul high part
-	LONG $0xe3efedc5             // VPXOR   YMM4, YMM2, YMM3    ; X4: Result
-	LONG $0x227ffec5             // VMOVDQU [rdx], YMM4
-
-	ADDQ $32, SI       // in+=32
-	ADDQ $32, DX       // out+=32
-	SUBQ $1, R9
-	JNZ  loopback_avx2
-
-done_avx2:
-
-	BYTE $0xc5; BYTE $0xf8; BYTE $0x77 // VZEROUPPER
-	RET
--- a/vendor/github.com/xtaci/reedsolomon/galois_noasm.go
+++ b/vendor/github.com/xtaci/reedsolomon/galois_noasm.go
@ -1,19 +0,0 @@
-//+build !amd64 noasm appengine
-
-// Copyright 2015, Klaus Post, see LICENSE for details.
-
-package reedsolomon
-
-func galMulSlice(c byte, in, out []byte) {
-	mt := mulTable[c]
-	for n, input := range in {
-		out[n] = mt[input]
-	}
-}
-
-func galMulSliceXor(c byte, in, out []byte) {
-	mt := mulTable[c]
-	for n, input := range in {
-		out[n] ^= mt[input]
-	}
-}
--- a/vendor/github.com/xtaci/reedsolomon/gentables.go
+++ b/vendor/github.com/xtaci/reedsolomon/gentables.go
@ -1,132 +0,0 @@
-//+build ignore
-
-package main
-
-import (
-	"fmt"
-)
-
-var logTable = [fieldSize]int16{
-	-1, 0, 1, 25, 2, 50, 26, 198,
-	3, 223, 51, 238, 27, 104, 199, 75,
-	4, 100, 224, 14, 52, 141, 239, 129,
-	28, 193, 105, 248, 200, 8, 76, 113,
-	5, 138, 101, 47, 225, 36, 15, 33,
-	53, 147, 142, 218, 240, 18, 130, 69,
-	29, 181, 194, 125, 106, 39, 249, 185,
-	201, 154, 9, 120, 77, 228, 114, 166,
-	6, 191, 139, 98, 102, 221, 48, 253,
-	226, 152, 37, 179, 16, 145, 34, 136,
-	54, 208, 148, 206, 143, 150, 219, 189,
-	241, 210, 19, 92, 131, 56, 70, 64,
-	30, 66, 182, 163, 195, 72, 126, 110,
-	107, 58, 40, 84, 250, 133, 186, 61,
-	202, 94, 155, 159, 10, 21, 121, 43,
-	78, 212, 229, 172, 115, 243, 167, 87,
-	7, 112, 192, 247, 140, 128, 99, 13,
-	103, 74, 222, 237, 49, 197, 254, 24,
-	227, 165, 153, 119, 38, 184, 180, 124,
-	17, 68, 146, 217, 35, 32, 137, 46,
-	55, 63, 209, 91, 149, 188, 207, 205,
-	144, 135, 151, 178, 220, 252, 190, 97,
-	242, 86, 211, 171, 20, 42, 93, 158,
-	132, 60, 57, 83, 71, 109, 65, 162,
-	31, 45, 67, 216, 183, 123, 164, 118,
-	196, 23, 73, 236, 127, 12, 111, 246,
-	108, 161, 59, 82, 41, 157, 85, 170,
-	251, 96, 134, 177, 187, 204, 62, 90,
-	203, 89, 95, 176, 156, 169, 160, 81,
-	11, 245, 22, 235, 122, 117, 44, 215,
-	79, 174, 213, 233, 230, 231, 173, 232,
-	116, 214, 244, 234, 168, 80, 88, 175,
-}
-
-const (
-	// The number of elements in the field.
-	fieldSize = 256
-
-	// The polynomial used to generate the logarithm table.
-	//
-	// There are a number of polynomials that work to generate
-	// a Galois field of 256 elements.  The choice is arbitrary,
-	// and we just use the first one.
-	//
-	// The possibilities are: 29, 43, 45, 77, 95, 99, 101, 105,
-	//* 113, 135, 141, 169, 195, 207, 231, and 245.
-	generatingPolynomial = 29
-)
-
-func main() {
-	t := generateExpTable()
-	fmt.Printf("var expTable = %#v\n", t)
-	//t2 := generateMulTableSplit(t)
-	//fmt.Printf("var mulTable = %#v\n", t2)
-	low, high := generateMulTableHalf(t)
-	fmt.Printf("var mulTableLow = %#v\n", low)
-	fmt.Printf("var mulTableHigh = %#v\n", high)
-}
-
-/**
- * Generates the inverse log table.
- */
-func generateExpTable() []byte {
-	result := make([]byte, fieldSize*2-2)
-	for i := 1; i < fieldSize; i++ {
-		log := logTable[i]
-		result[log] = byte(i)
-		result[log+fieldSize-1] = byte(i)
-	}
-	return result
-}
-
-func generateMulTable(expTable []byte) []byte {
-	result := make([]byte, 256*256)
-	for v := range result {
-		a := byte(v & 0xff)
-		b := byte(v >> 8)
-		if a == 0 || b == 0 {
-			result[v] = 0
-			continue
-		}
-		logA := int(logTable[a])
-		logB := int(logTable[b])
-		result[v] = expTable[logA+logB]
-	}
-	return result
-}
-
-func generateMulTableSplit(expTable []byte) [256][256]byte {
-	var result [256][256]byte
-	for a := range result {
-		for b := range result[a] {
-			if a == 0 || b == 0 {
-				result[a][b] = 0
-				continue
-			}
-			logA := int(logTable[a])
-			logB := int(logTable[b])
-			result[a][b] = expTable[logA+logB]
-		}
-	}
-	return result
-}
-
-func generateMulTableHalf(expTable []byte) (low [256][16]byte, high [256][16]byte) {
-	for a := range low {
-		for b := range low {
-			result := 0
-			if !(a == 0 || b == 0) {
-				logA := int(logTable[a])
-				logB := int(logTable[b])
-				result = int(expTable[logA+logB])
-			}
-			if (b & 0xf) == b {
-				low[a][b] = byte(result)
-			}
-			if (b & 0xf0) == b {
-				high[a][b>>4] = byte(result)
-			}
-		}
-	}
-	return
-}
--- a/vendor/github.com/xtaci/reedsolomon/inversion_tree.go
+++ b/vendor/github.com/xtaci/reedsolomon/inversion_tree.go
@ -1,160 +0,0 @@
-/**
- * A thread-safe tree which caches inverted matrices.
- *
- * Copyright 2016, Peter Collins
- */
-
-package reedsolomon
-
-import (
-	"errors"
-	"sync"
-)
-
-// The tree uses a Reader-Writer mutex to make it thread-safe
-// when accessing cached matrices and inserting new ones.
-type inversionTree struct {
-	mutex *sync.RWMutex
-	root  inversionNode
-}
-
-type inversionNode struct {
-	matrix   matrix
-	children []*inversionNode
-}
-
-// newInversionTree initializes a tree for storing inverted matrices.
-// Note that the root node is the identity matrix as it implies
-// there were no errors with the original data.
-func newInversionTree(dataShards, parityShards int) inversionTree {
-	identity, _ := identityMatrix(dataShards)
-	root := inversionNode{
-		matrix:   identity,
-		children: make([]*inversionNode, dataShards+parityShards),
-	}
-	return inversionTree{
-		mutex: &sync.RWMutex{},
-		root:  root,
-	}
-}
-
-// GetInvertedMatrix returns the cached inverted matrix or nil if it
-// is not found in the tree keyed on the indices of invalid rows.
-func (t inversionTree) GetInvertedMatrix(invalidIndices []int) matrix {
-	// Lock the tree for reading before accessing the tree.
-	t.mutex.RLock()
-	defer t.mutex.RUnlock()
-
-	// If no invalid indices were give we should return the root
-	// identity matrix.
-	if len(invalidIndices) == 0 {
-		return t.root.matrix
-	}
-
-	// Recursively search for the inverted matrix in the tree, passing in
-	// 0 as the parent index as we start at the root of the tree.
-	return t.root.getInvertedMatrix(invalidIndices, 0)
-}
-
-// errAlreadySet is returned if the root node matrix is overwritten
-var errAlreadySet = errors.New("the root node identity matrix is already set")
-
-// InsertInvertedMatrix inserts a new inverted matrix into the tree
-// keyed by the indices of invalid rows.  The total number of shards
-// is required for creating the proper length lists of child nodes for
-// each node.
-func (t inversionTree) InsertInvertedMatrix(invalidIndices []int, matrix matrix, shards int) error {
-	// If no invalid indices were given then we are done because the
-	// root node is already set with the identity matrix.
-	if len(invalidIndices) == 0 {
-		return errAlreadySet
-	}
-
-	if !matrix.IsSquare() {
-		return errNotSquare
-	}
-
-	// Lock the tree for writing and reading before accessing the tree.
-	t.mutex.Lock()
-	defer t.mutex.Unlock()
-
-	// Recursively create nodes for the inverted matrix in the tree until
-	// we reach the node to insert the matrix to.  We start by passing in
-	// 0 as the parent index as we start at the root of the tree.
-	t.root.insertInvertedMatrix(invalidIndices, matrix, shards, 0)
-
-	return nil
-}
-
-func (n inversionNode) getInvertedMatrix(invalidIndices []int, parent int) matrix {
-	// Get the child node to search next from the list of children.  The
-	// list of children starts relative to the parent index passed in
-	// because the indices of invalid rows is sorted (by default).  As we
-	// search recursively, the first invalid index gets popped off the list,
-	// so when searching through the list of children, use that first invalid
-	// index to find the child node.
-	firstIndex := invalidIndices[0]
-	node := n.children[firstIndex-parent]
-
-	// If the child node doesn't exist in the list yet, fail fast by
-	// returning, so we can construct and insert the proper inverted matrix.
-	if node == nil {
-		return nil
-	}
-
-	// If there's more than one invalid index left in the list we should
-	// keep searching recursively.
-	if len(invalidIndices) > 1 {
-		// Search recursively on the child node by passing in the invalid indices
-		// with the first index popped off the front.  Also the parent index to
-		// pass down is the first index plus one.
-		return node.getInvertedMatrix(invalidIndices[1:], firstIndex+1)
-	}
-	// If there aren't any more invalid indices to search, we've found our
-	// node.  Return it, however keep in mind that the matrix could still be
-	// nil because intermediary nodes in the tree are created sometimes with
-	// their inversion matrices uninitialized.
-	return node.matrix
-}
-
-func (n inversionNode) insertInvertedMatrix(invalidIndices []int, matrix matrix, shards, parent int) {
-	// As above, get the child node to search next from the list of children.
-	// The list of children starts relative to the parent index passed in
-	// because the indices of invalid rows is sorted (by default).  As we
-	// search recursively, the first invalid index gets popped off the list,
-	// so when searching through the list of children, use that first invalid
-	// index to find the child node.
-	firstIndex := invalidIndices[0]
-	node := n.children[firstIndex-parent]
-
-	// If the child node doesn't exist in the list yet, create a new
-	// node because we have the writer lock and add it to the list
-	// of children.
-	if node == nil {
-		// Make the length of the list of children equal to the number
-		// of shards minus the first invalid index because the list of
-		// invalid indices is sorted, so only this length of errors
-		// are possible in the tree.
-		node = &inversionNode{
-			children: make([]*inversionNode, shards-firstIndex),
-		}
-		// Insert the new node into the tree at the first index relative
-		// to the parent index that was given in this recursive call.
-		n.children[firstIndex-parent] = node
-	}
-
-	// If there's more than one invalid index left in the list we should
-	// keep searching recursively in order to find the node to add our
-	// matrix.
-	if len(invalidIndices) > 1 {
-		// As above, search recursively on the child node by passing in
-		// the invalid indices with the first index popped off the front.
-		// Also the total number of shards and parent index are passed down
-		// which is equal to the first index plus one.
-		node.insertInvertedMatrix(invalidIndices[1:], matrix, shards, firstIndex+1)
-	} else {
-		// If there aren't any more invalid indices to search, we've found our
-		// node.  Cache the inverted matrix in this node.
-		node.matrix = matrix
-	}
-}
--- a/vendor/github.com/xtaci/reedsolomon/matrix.go
+++ b/vendor/github.com/xtaci/reedsolomon/matrix.go
@ -1,279 +0,0 @@
-/**
- * Matrix Algebra over an 8-bit Galois Field
- *
- * Copyright 2015, Klaus Post
- * Copyright 2015, Backblaze, Inc.
- */
-
-package reedsolomon
-
-import (
-	"errors"
-	"fmt"
-	"strconv"
-	"strings"
-)
-
-// byte[row][col]
-type matrix [][]byte
-
-// newMatrix returns a matrix of zeros.
-func newMatrix(rows, cols int) (matrix, error) {
-	if rows <= 0 {
-		return nil, errInvalidRowSize
-	}
-	if cols <= 0 {
-		return nil, errInvalidColSize
-	}
-
-	m := matrix(make([][]byte, rows))
-	for i := range m {
-		m[i] = make([]byte, cols)
-	}
-	return m, nil
-}
-
-// NewMatrixData initializes a matrix with the given row-major data.
-// Note that data is not copied from input.
-func newMatrixData(data [][]byte) (matrix, error) {
-	m := matrix(data)
-	err := m.Check()
-	if err != nil {
-		return nil, err
-	}
-	return m, nil
-}
-
-// IdentityMatrix returns an identity matrix of the given size.
-func identityMatrix(size int) (matrix, error) {
-	m, err := newMatrix(size, size)
-	if err != nil {
-		return nil, err
-	}
-	for i := range m {
-		m[i][i] = 1
-	}
-	return m, nil
-}
-
-// errInvalidRowSize will be returned if attempting to create a matrix with negative or zero row number.
-var errInvalidRowSize = errors.New("invalid row size")
-
-// errInvalidColSize will be returned if attempting to create a matrix with negative or zero column number.
-var errInvalidColSize = errors.New("invalid column size")
-
-// errColSizeMismatch is returned if the size of matrix columns mismatch.
-var errColSizeMismatch = errors.New("column size is not the same for all rows")
-
-func (m matrix) Check() error {
-	rows := len(m)
-	if rows <= 0 {
-		return errInvalidRowSize
-	}
-	cols := len(m[0])
-	if cols <= 0 {
-		return errInvalidColSize
-	}
-
-	for _, col := range m {
-		if len(col) != cols {
-			return errColSizeMismatch
-		}
-	}
-	return nil
-}
-
-// String returns a human-readable string of the matrix contents.
-//
-// Example: [[1, 2], [3, 4]]
-func (m matrix) String() string {
-	rowOut := make([]string, 0, len(m))
-	for _, row := range m {
-		colOut := make([]string, 0, len(row))
-		for _, col := range row {
-			colOut = append(colOut, strconv.Itoa(int(col)))
-		}
-		rowOut = append(rowOut, "["+strings.Join(colOut, ", ")+"]")
-	}
-	return "[" + strings.Join(rowOut, ", ") + "]"
-}
-
-// Multiply multiplies this matrix (the one on the left) by another
-// matrix (the one on the right) and returns a new matrix with the result.
-func (m matrix) Multiply(right matrix) (matrix, error) {
-	if len(m[0]) != len(right) {
-		return nil, fmt.Errorf("columns on left (%d) is different than rows on right (%d)", len(m[0]), len(right))
-	}
-	result, _ := newMatrix(len(m), len(right[0]))
-	for r, row := range result {
-		for c := range row {
-			var value byte
-			for i := range m[0] {
-				value ^= galMultiply(m[r][i], right[i][c])
-			}
-			result[r][c] = value
-		}
-	}
-	return result, nil
-}
-
-// Augment returns the concatenation of this matrix and the matrix on the right.
-func (m matrix) Augment(right matrix) (matrix, error) {
-	if len(m) != len(right) {
-		return nil, errMatrixSize
-	}
-
-	result, _ := newMatrix(len(m), len(m[0])+len(right[0]))
-	for r, row := range m {
-		for c := range row {
-			result[r][c] = m[r][c]
-		}
-		cols := len(m[0])
-		for c := range right[0] {
-			result[r][cols+c] = right[r][c]
-		}
-	}
-	return result, nil
-}
-
-// errMatrixSize is returned if matrix dimensions are doesn't match.
-var errMatrixSize = errors.New("matrix sizes does not match")
-
-func (m matrix) SameSize(n matrix) error {
-	if len(m) != len(n) {
-		return errMatrixSize
-	}
-	for i := range m {
-		if len(m[i]) != len(n[i]) {
-			return errMatrixSize
-		}
-	}
-	return nil
-}
-
-// Returns a part of this matrix. Data is copied.
-func (m matrix) SubMatrix(rmin, cmin, rmax, cmax int) (matrix, error) {
-	result, err := newMatrix(rmax-rmin, cmax-cmin)
-	if err != nil {
-		return nil, err
-	}
-	// OPTME: If used heavily, use copy function to copy slice
-	for r := rmin; r < rmax; r++ {
-		for c := cmin; c < cmax; c++ {
-			result[r-rmin][c-cmin] = m[r][c]
-		}
-	}
-	return result, nil
-}
-
-// SwapRows Exchanges two rows in the matrix.
-func (m matrix) SwapRows(r1, r2 int) error {
-	if r1 < 0 || len(m) <= r1 || r2 < 0 || len(m) <= r2 {
-		return errInvalidRowSize
-	}
-	m[r2], m[r1] = m[r1], m[r2]
-	return nil
-}
-
-// IsSquare will return true if the matrix is square
-// and nil if the matrix is square
-func (m matrix) IsSquare() bool {
-	return len(m) == len(m[0])
-}
-
-// errSingular is returned if the matrix is singular and cannot be inversed
-var errSingular = errors.New("matrix is singular")
-
-// errNotSquare is returned if attempting to inverse a non-square matrix.
-var errNotSquare = errors.New("only square matrices can be inverted")
-
-// Invert returns the inverse of this matrix.
-// Returns ErrSingular when the matrix is singular and doesn't have an inverse.
-// The matrix must be square, otherwise ErrNotSquare is returned.
-func (m matrix) Invert() (matrix, error) {
-	if !m.IsSquare() {
-		return nil, errNotSquare
-	}
-
-	size := len(m)
-	work, _ := identityMatrix(size)
-	work, _ = m.Augment(work)
-
-	err := work.gaussianElimination()
-	if err != nil {
-		return nil, err
-	}
-
-	return work.SubMatrix(0, size, size, size*2)
-}
-
-func (m matrix) gaussianElimination() error {
-	rows := len(m)
-	columns := len(m[0])
-	// Clear out the part below the main diagonal and scale the main
-	// diagonal to be 1.
-	for r := 0; r < rows; r++ {
-		// If the element on the diagonal is 0, find a row below
-		// that has a non-zero and swap them.
-		if m[r][r] == 0 {
-			for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
-				if m[rowBelow][r] != 0 {
-					m.SwapRows(r, rowBelow)
-					break
-				}
-			}
-		}
-		// If we couldn't find one, the matrix is singular.
-		if m[r][r] == 0 {
-			return errSingular
-		}
-		// Scale to 1.
-		if m[r][r] != 1 {
-			scale := galDivide(1, m[r][r])
-			for c := 0; c < columns; c++ {
-				m[r][c] = galMultiply(m[r][c], scale)
-			}
-		}
-		// Make everything below the 1 be a 0 by subtracting
-		// a multiple of it.  (Subtraction and addition are
-		// both exclusive or in the Galois field.)
-		for rowBelow := r + 1; rowBelow < rows; rowBelow++ {
-			if m[rowBelow][r] != 0 {
-				scale := m[rowBelow][r]
-				for c := 0; c < columns; c++ {
-					m[rowBelow][c] ^= galMultiply(scale, m[r][c])
-				}
-			}
-		}
-	}
-
-	// Now clear the part above the main diagonal.
-	for d := 0; d < rows; d++ {
-		for rowAbove := 0; rowAbove < d; rowAbove++ {
-			if m[rowAbove][d] != 0 {
-				scale := m[rowAbove][d]
-				for c := 0; c < columns; c++ {
-					m[rowAbove][c] ^= galMultiply(scale, m[d][c])
-				}
-
-			}
-		}
-	}
-	return nil
-}
-
-// Create a Vandermonde matrix, which is guaranteed to have the
-// property that any subset of rows that forms a square matrix
-// is invertible.
-func vandermonde(rows, cols int) (matrix, error) {
-	result, err := newMatrix(rows, cols)
-	if err != nil {
-		return nil, err
-	}
-	for r, row := range result {
-		for c := range row {
-			result[r][c] = galExp(byte(r), c)
-		}
-	}
-	return result, nil
-}
--- a/vendor/github.com/xtaci/reedsolomon/reedsolomon.go
+++ b/vendor/github.com/xtaci/reedsolomon/reedsolomon.go
@ -1,573 +0,0 @@
-/**
- * Reed-Solomon Coding over 8-bit values.
- *
- * Copyright 2015, Klaus Post
- * Copyright 2015, Backblaze, Inc.
- */
-
-// Package reedsolomon enables Erasure Coding in Go
-//
-// For usage and examples, see https://github.com/klauspost/reedsolomon
-//
-package reedsolomon
-
-import (
-	"bytes"
-	"errors"
-	"io"
-	"runtime"
-	"sync"
-)
-
-// Encoder is an interface to encode Reed-Salomon parity sets for your data.
-type Encoder interface {
-	// Encodes parity for a set of data shards.
-	// Input is 'shards' containing data shards followed by parity shards.
-	// The number of shards must match the number given to New().
-	// Each shard is a byte array, and they must all be the same size.
-	// The parity shards will always be overwritten and the data shards
-	// will remain the same, so it is safe for you to read from the
-	// data shards while this is running.
-	Encode(shards [][]byte) error
-
-	// Verify returns true if the parity shards contain correct data.
-	// The data is the same format as Encode. No data is modified, so
-	// you are allowed to read from data while this is running.
-	Verify(shards [][]byte) (bool, error)
-
-	// Reconstruct will recreate the missing shards if possible.
-	//
-	// Given a list of shards, some of which contain data, fills in the
-	// ones that don't have data.
-	//
-	// The length of the array must be equal to the total number of shards.
-	// You indicate that a shard is missing by setting it to nil.
-	//
-	// If there are too few shards to reconstruct the missing
-	// ones, ErrTooFewShards will be returned.
-	//
-	// The reconstructed shard set is complete, but integrity is not verified.
-	// Use the Verify function to check if data set is ok.
-	Reconstruct(shards [][]byte) error
-
-	// Split a data slice into the number of shards given to the encoder,
-	// and create empty parity shards.
-	//
-	// The data will be split into equally sized shards.
-	// If the data size isn't dividable by the number of shards,
-	// the last shard will contain extra zeros.
-	//
-	// There must be at least 1 byte otherwise ErrShortData will be
-	// returned.
-	//
-	// The data will not be copied, except for the last shard, so you
-	// should not modify the data of the input slice afterwards.
-	Split(data []byte) ([][]byte, error)
-
-	// Join the shards and write the data segment to dst.
-	//
-	// Only the data shards are considered.
-	// You must supply the exact output size you want.
-	// If there are to few shards given, ErrTooFewShards will be returned.
-	// If the total data size is less than outSize, ErrShortData will be returned.
-	Join(dst io.Writer, shards [][]byte, outSize int) error
-}
-
-// reedSolomon contains a matrix for a specific
-// distribution of datashards and parity shards.
-// Construct if using New()
-type reedSolomon struct {
-	DataShards   int // Number of data shards, should not be modified.
-	ParityShards int // Number of parity shards, should not be modified.
-	Shards       int // Total number of shards. Calculated, and should not be modified.
-	m            matrix
-	tree         inversionTree
-	parity       [][]byte
-}
-
-// ErrInvShardNum will be returned by New, if you attempt to create
-// an Encoder where either data or parity shards is zero or less.
-var ErrInvShardNum = errors.New("cannot create Encoder with zero or less data/parity shards")
-
-// ErrMaxShardNum will be returned by New, if you attempt to create
-// an Encoder where data and parity shards cannot be bigger than
-// Galois field GF(2^8) - 1.
-var ErrMaxShardNum = errors.New("cannot create Encoder with 255 or more data+parity shards")
-
-// New creates a new encoder and initializes it to
-// the number of data shards and parity shards that
-// you want to use. You can reuse this encoder.
-// Note that the maximum number of data shards is 256.
-func New(dataShards, parityShards int) (Encoder, error) {
-	r := reedSolomon{
-		DataShards:   dataShards,
-		ParityShards: parityShards,
-		Shards:       dataShards + parityShards,
-	}
-
-	if dataShards <= 0 || parityShards <= 0 {
-		return nil, ErrInvShardNum
-	}
-
-	if dataShards+parityShards > 255 {
-		return nil, ErrMaxShardNum
-	}
-
-	// Start with a Vandermonde matrix.  This matrix would work,
-	// in theory, but doesn't have the property that the data
-	// shards are unchanged after encoding.
-	vm, err := vandermonde(r.Shards, dataShards)
-	if err != nil {
-		return nil, err
-	}
-
-	// Multiply by the inverse of the top square of the matrix.
-	// This will make the top square be the identity matrix, but
-	// preserve the property that any square subset of rows  is
-	// invertible.
-	top, _ := vm.SubMatrix(0, 0, dataShards, dataShards)
-	top, _ = top.Invert()
-	r.m, _ = vm.Multiply(top)
-
-	// Inverted matrices are cached in a tree keyed by the indices
-	// of the invalid rows of the data to reconstruct.
-	// The inversion root node will have the identity matrix as
-	// its inversion matrix because it implies there are no errors
-	// with the original data.
-	r.tree = newInversionTree(dataShards, parityShards)
-
-	r.parity = make([][]byte, parityShards)
-	for i := range r.parity {
-		r.parity[i] = r.m[dataShards+i]
-	}
-
-	return &r, err
-}
-
-// ErrTooFewShards is returned if too few shards where given to
-// Encode/Verify/Reconstruct. It will also be returned from Reconstruct
-// if there were too few shards to reconstruct the missing data.
-var ErrTooFewShards = errors.New("too few shards given")
-
-// Encodes parity for a set of data shards.
-// An array 'shards' containing data shards followed by parity shards.
-// The number of shards must match the number given to New.
-// Each shard is a byte array, and they must all be the same size.
-// The parity shards will always be overwritten and the data shards
-// will remain the same.
-func (r reedSolomon) Encode(shards [][]byte) error {
-	if len(shards) != r.Shards {
-		return ErrTooFewShards
-	}
-
-	err := checkShards(shards, false)
-	if err != nil {
-		return err
-	}
-
-	// Get the slice of output buffers.
-	output := shards[r.DataShards:]
-
-	// Do the coding.
-	r.codeSomeShards(r.parity, shards[0:r.DataShards], output, r.ParityShards, len(shards[0]))
-	return nil
-}
-
-// Verify returns true if the parity shards contain the right data.
-// The data is the same format as Encode. No data is modified.
-func (r reedSolomon) Verify(shards [][]byte) (bool, error) {
-	if len(shards) != r.Shards {
-		return false, ErrTooFewShards
-	}
-	err := checkShards(shards, false)
-	if err != nil {
-		return false, err
-	}
-
-	// Slice of buffers being checked.
-	toCheck := shards[r.DataShards:]
-
-	// Do the checking.
-	return r.checkSomeShards(r.parity, shards[0:r.DataShards], toCheck, r.ParityShards, len(shards[0])), nil
-}
-
-// Multiplies a subset of rows from a coding matrix by a full set of
-// input shards to produce some output shards.
-// 'matrixRows' is The rows from the matrix to use.
-// 'inputs' An array of byte arrays, each of which is one input shard.
-// The number of inputs used is determined by the length of each matrix row.
-// outputs Byte arrays where the computed shards are stored.
-// The number of outputs computed, and the
-// number of matrix rows used, is determined by
-// outputCount, which is the number of outputs to compute.
-func (r reedSolomon) codeSomeShards(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
-	if runtime.GOMAXPROCS(0) > 1 && len(inputs[0]) > minSplitSize {
-		r.codeSomeShardsP(matrixRows, inputs, outputs, outputCount, byteCount)
-		return
-	}
-	for c := 0; c < r.DataShards; c++ {
-		in := inputs[c]
-		for iRow := 0; iRow < outputCount; iRow++ {
-			if c == 0 {
-				galMulSlice(matrixRows[iRow][c], in, outputs[iRow])
-			} else {
-				galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow])
-			}
-		}
-	}
-}
-
-const (
-	minSplitSize  = 65536 // min split size per goroutine
-	maxGoroutines = 50    // max goroutines number for encoding & decoding
-)
-
-// Perform the same as codeSomeShards, but split the workload into
-// several goroutines.
-func (r reedSolomon) codeSomeShardsP(matrixRows, inputs, outputs [][]byte, outputCount, byteCount int) {
-	var wg sync.WaitGroup
-	do := byteCount / maxGoroutines
-	if do < minSplitSize {
-		do = minSplitSize
-	}
-	start := 0
-	for start < byteCount {
-		if start+do > byteCount {
-			do = byteCount - start
-		}
-		wg.Add(1)
-		go func(start, stop int) {
-			for c := 0; c < r.DataShards; c++ {
-				in := inputs[c]
-				for iRow := 0; iRow < outputCount; iRow++ {
-					if c == 0 {
-						galMulSlice(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop])
-					} else {
-						galMulSliceXor(matrixRows[iRow][c], in[start:stop], outputs[iRow][start:stop])
-					}
-				}
-			}
-			wg.Done()
-		}(start, start+do)
-		start += do
-	}
-	wg.Wait()
-}
-
-// checkSomeShards is mostly the same as codeSomeShards,
-// except this will check values and return
-// as soon as a difference is found.
-func (r reedSolomon) checkSomeShards(matrixRows, inputs, toCheck [][]byte, outputCount, byteCount int) bool {
-	same := true
-	var mu sync.RWMutex // For above
-
-	var wg sync.WaitGroup
-	do := byteCount / maxGoroutines
-	if do < minSplitSize {
-		do = minSplitSize
-	}
-	start := 0
-	for start < byteCount {
-		if start+do > byteCount {
-			do = byteCount - start
-		}
-		wg.Add(1)
-		go func(start, do int) {
-			defer wg.Done()
-			outputs := make([][]byte, len(toCheck))
-			for i := range outputs {
-				outputs[i] = make([]byte, do)
-			}
-			for c := 0; c < r.DataShards; c++ {
-				mu.RLock()
-				if !same {
-					mu.RUnlock()
-					return
-				}
-				mu.RUnlock()
-				in := inputs[c][start : start+do]
-				for iRow := 0; iRow < outputCount; iRow++ {
-					galMulSliceXor(matrixRows[iRow][c], in, outputs[iRow])
-				}
-			}
-
-			for i, calc := range outputs {
-				if !bytes.Equal(calc, toCheck[i][start:start+do]) {
-					mu.Lock()
-					same = false
-					mu.Unlock()
-					return
-				}
-			}
-		}(start, do)
-		start += do
-	}
-	wg.Wait()
-	return same
-}
-
-// ErrShardNoData will be returned if there are no shards,
-// or if the length of all shards is zero.
-var ErrShardNoData = errors.New("no shard data")
-
-// ErrShardSize is returned if shard length isn't the same for all
-// shards.
-var ErrShardSize = errors.New("shard sizes does not match")
-
-// checkShards will check if shards are the same size
-// or 0, if allowed. An error is returned if this fails.
-// An error is also returned if all shards are size 0.
-func checkShards(shards [][]byte, nilok bool) error {
-	size := shardSize(shards)
-	if size == 0 {
-		return ErrShardNoData
-	}
-	for _, shard := range shards {
-		if len(shard) != size {
-			if len(shard) != 0 || !nilok {
-				return ErrShardSize
-			}
-		}
-	}
-	return nil
-}
-
-// shardSize return the size of a single shard.
-// The first non-zero size is returned,
-// or 0 if all shards are size 0.
-func shardSize(shards [][]byte) int {
-	for _, shard := range shards {
-		if len(shard) != 0 {
-			return len(shard)
-		}
-	}
-	return 0
-}
-
-// Reconstruct will recreate the missing shards, if possible.
-//
-// Given a list of shards, some of which contain data, fills in the
-// ones that don't have data.
-//
-// The length of the array must be equal to Shards.
-// You indicate that a shard is missing by setting it to nil.
-//
-// If there are too few shards to reconstruct the missing
-// ones, ErrTooFewShards will be returned.
-//
-// The reconstructed shard set is complete, but integrity is not verified.
-// Use the Verify function to check if data set is ok.
-func (r reedSolomon) Reconstruct(shards [][]byte) error {
-	if len(shards) != r.Shards {
-		return ErrTooFewShards
-	}
-	// Check arguments.
-	err := checkShards(shards, true)
-	if err != nil {
-		return err
-	}
-
-	shardSize := shardSize(shards)
-
-	// Quick check: are all of the shards present?  If so, there's
-	// nothing to do.
-	numberPresent := 0
-	for i := 0; i < r.Shards; i++ {
-		if len(shards[i]) != 0 {
-			numberPresent++
-		}
-	}
-	if numberPresent == r.Shards {
-		// Cool.  All of the shards data data.  We don't
-		// need to do anything.
-		return nil
-	}
-
-	// More complete sanity check
-	if numberPresent < r.DataShards {
-		return ErrTooFewShards
-	}
-
-	// Pull out an array holding just the shards that
-	// correspond to the rows of the submatrix.  These shards
-	// will be the input to the decoding process that re-creates
-	// the missing data shards.
-	//
-	// Also, create an array of indices of the valid rows we do have
-	// and the invalid rows we don't have up until we have enough valid rows.
-	subShards := make([][]byte, r.DataShards)
-	validIndices := make([]int, r.DataShards)
-	invalidIndices := make([]int, 0)
-	subMatrixRow := 0
-	for matrixRow := 0; matrixRow < r.Shards && subMatrixRow < r.DataShards; matrixRow++ {
-		if len(shards[matrixRow]) != 0 {
-			subShards[subMatrixRow] = shards[matrixRow]
-			validIndices[subMatrixRow] = matrixRow
-			subMatrixRow++
-		} else {
-			invalidIndices = append(invalidIndices, matrixRow)
-		}
-	}
-
-	// Attempt to get the cached inverted matrix out of the tree
-	// based on the indices of the invalid rows.
-	dataDecodeMatrix := r.tree.GetInvertedMatrix(invalidIndices)
-
-	// If the inverted matrix isn't cached in the tree yet we must
-	// construct it ourselves and insert it into the tree for the
-	// future.  In this way the inversion tree is lazily loaded.
-	if dataDecodeMatrix == nil {
-		// Pull out the rows of the matrix that correspond to the
-		// shards that we have and build a square matrix.  This
-		// matrix could be used to generate the shards that we have
-		// from the original data.
-		subMatrix, _ := newMatrix(r.DataShards, r.DataShards)
-		for subMatrixRow, validIndex := range validIndices {
-			for c := 0; c < r.DataShards; c++ {
-				subMatrix[subMatrixRow][c] = r.m[validIndex][c]
-			}
-		}
-		// Invert the matrix, so we can go from the encoded shards
-		// back to the original data.  Then pull out the row that
-		// generates the shard that we want to decode.  Note that
-		// since this matrix maps back to the original data, it can
-		// be used to create a data shard, but not a parity shard.
-		dataDecodeMatrix, err = subMatrix.Invert()
-		if err != nil {
-			return err
-		}
-
-		// Cache the inverted matrix in the tree for future use keyed on the
-		// indices of the invalid rows.
-		err = r.tree.InsertInvertedMatrix(invalidIndices, dataDecodeMatrix, r.Shards)
-		if err != nil {
-			return err
-		}
-	}
-
-	// Re-create any data shards that were missing.
-	//
-	// The input to the coding is all of the shards we actually
-	// have, and the output is the missing data shards.  The computation
-	// is done using the special decode matrix we just built.
-	outputs := make([][]byte, r.ParityShards)
-	matrixRows := make([][]byte, r.ParityShards)
-	outputCount := 0
-
-	for iShard := 0; iShard < r.DataShards; iShard++ {
-		if len(shards[iShard]) == 0 {
-			shards[iShard] = make([]byte, shardSize)
-			outputs[outputCount] = shards[iShard]
-			matrixRows[outputCount] = dataDecodeMatrix[iShard]
-			outputCount++
-		}
-	}
-	r.codeSomeShards(matrixRows, subShards, outputs[:outputCount], outputCount, shardSize)
-
-	// Now that we have all of the data shards intact, we can
-	// compute any of the parity that is missing.
-	//
-	// The input to the coding is ALL of the data shards, including
-	// any that we just calculated.  The output is whichever of the
-	// data shards were missing.
-	outputCount = 0
-	for iShard := r.DataShards; iShard < r.Shards; iShard++ {
-		if len(shards[iShard]) == 0 {
-			shards[iShard] = make([]byte, shardSize)
-			outputs[outputCount] = shards[iShard]
-			matrixRows[outputCount] = r.parity[iShard-r.DataShards]
-			outputCount++
-		}
-	}
-	r.codeSomeShards(matrixRows, shards[:r.DataShards], outputs[:outputCount], outputCount, shardSize)
-	return nil
-}
-
-// ErrShortData will be returned by Split(), if there isn't enough data
-// to fill the number of shards.
-var ErrShortData = errors.New("not enough data to fill the number of requested shards")
-
-// Split a data slice into the number of shards given to the encoder,
-// and create empty parity shards.
-//
-// The data will be split into equally sized shards.
-// If the data size isn't divisible by the number of shards,
-// the last shard will contain extra zeros.
-//
-// There must be at least 1 byte otherwise ErrShortData will be
-// returned.
-//
-// The data will not be copied, except for the last shard, so you
-// should not modify the data of the input slice afterwards.
-func (r reedSolomon) Split(data []byte) ([][]byte, error) {
-	if len(data) == 0 {
-		return nil, ErrShortData
-	}
-	// Calculate number of bytes per shard.
-	perShard := (len(data) + r.DataShards - 1) / r.DataShards
-
-	// Pad data to r.Shards*perShard.
-	padding := make([]byte, (r.Shards*perShard)-len(data))
-	data = append(data, padding...)
-
-	// Split into equal-length shards.
-	dst := make([][]byte, r.Shards)
-	for i := range dst {
-		dst[i] = data[:perShard]
-		data = data[perShard:]
-	}
-
-	return dst, nil
-}
-
-// ErrReconstructRequired is returned if too few data shards are intact and a
-// reconstruction is required before you can successfully join the shards.
-var ErrReconstructRequired = errors.New("reconstruction required as one or more required data shards are nil")
-
-// Join the shards and write the data segment to dst.
-//
-// Only the data shards are considered.
-// You must supply the exact output size you want.
-//
-// If there are to few shards given, ErrTooFewShards will be returned.
-// If the total data size is less than outSize, ErrShortData will be returned.
-// If one or more required data shards are nil, ErrReconstructRequired will be returned.
-func (r reedSolomon) Join(dst io.Writer, shards [][]byte, outSize int) error {
-	// Do we have enough shards?
-	if len(shards) < r.DataShards {
-		return ErrTooFewShards
-	}
-	shards = shards[:r.DataShards]
-
-	// Do we have enough data?
-	size := 0
-	for _, shard := range shards {
-		if shard == nil {
-			return ErrReconstructRequired
-		}
-		size += len(shard)
-
-		// Do we have enough data already?
-		if size >= outSize {
-			break
-		}
-	}
-	if size < outSize {
-		return ErrShortData
-	}
-
-	// Copy data to dst
-	write := outSize
-	for _, shard := range shards {
-		if write < len(shard) {
-			_, err := dst.Write(shard[:write])
-			return err
-		}
-		n, err := dst.Write(shard)
-		if err != nil {
-			return err
-		}
-		write -= n
-	}
-	return nil
-}
--- a/vendor/github.com/xtaci/reedsolomon/streaming.go
+++ b/vendor/github.com/xtaci/reedsolomon/streaming.go
@ -1,575 +0,0 @@
-/**
- * Reed-Solomon Coding over 8-bit values.
- *
- * Copyright 2015, Klaus Post
- * Copyright 2015, Backblaze, Inc.
- */
-
-package reedsolomon
-
-import (
-	"bytes"
-	"errors"
-	"fmt"
-	"io"
-	"sync"
-)
-
-// StreamEncoder is an interface to encode Reed-Salomon parity sets for your data.
-// It provides a fully streaming interface, and processes data in blocks of up to 4MB.
-//
-// For small shard sizes, 10MB and below, it is recommended to use the in-memory interface,
-// since the streaming interface has a start up overhead.
-//
-// For all operations, no readers and writers should not assume any order/size of
-// individual reads/writes.
-//
-// For usage examples, see "stream-encoder.go" and "streamdecoder.go" in the examples
-// folder.
-type StreamEncoder interface {
-	// Encodes parity shards for a set of data shards.
-	//
-	// Input is 'shards' containing readers for data shards followed by parity shards
-	// io.Writer.
-	//
-	// The number of shards must match the number given to NewStream().
-	//
-	// Each reader must supply the same number of bytes.
-	//
-	// The parity shards will be written to the writer.
-	// The number of bytes written will match the input size.
-	//
-	// If a data stream returns an error, a StreamReadError type error
-	// will be returned. If a parity writer returns an error, a
-	// StreamWriteError will be returned.
-	Encode(data []io.Reader, parity []io.Writer) error
-
-	// Verify returns true if the parity shards contain correct data.
-	//
-	// The number of shards must match the number total data+parity shards
-	// given to NewStream().
-	//
-	// Each reader must supply the same number of bytes.
-	// If a shard stream returns an error, a StreamReadError type error
-	// will be returned.
-	Verify(shards []io.Reader) (bool, error)
-
-	// Reconstruct will recreate the missing shards if possible.
-	//
-	// Given a list of valid shards (to read) and invalid shards (to write)
-	//
-	// You indicate that a shard is missing by setting it to nil in the 'valid'
-	// slice and at the same time setting a non-nil writer in "fill".
-	// An index cannot contain both non-nil 'valid' and 'fill' entry.
-	// If both are provided 'ErrReconstructMismatch' is returned.
-	//
-	// If there are too few shards to reconstruct the missing
-	// ones, ErrTooFewShards will be returned.
-	//
-	// The reconstructed shard set is complete, but integrity is not verified.
-	// Use the Verify function to check if data set is ok.
-	Reconstruct(valid []io.Reader, fill []io.Writer) error
-
-	// Split a an input stream into the number of shards given to the encoder.
-	//
-	// The data will be split into equally sized shards.
-	// If the data size isn't dividable by the number of shards,
-	// the last shard will contain extra zeros.
-	//
-	// You must supply the total size of your input.
-	// 'ErrShortData' will be returned if it is unable to retrieve the
-	// number of bytes indicated.
-	Split(data io.Reader, dst []io.Writer, size int64) (err error)
-
-	// Join the shards and write the data segment to dst.
-	//
-	// Only the data shards are considered.
-	//
-	// You must supply the exact output size you want.
-	// If there are to few shards given, ErrTooFewShards will be returned.
-	// If the total data size is less than outSize, ErrShortData will be returned.
-	Join(dst io.Writer, shards []io.Reader, outSize int64) error
-}
-
-// StreamReadError is returned when a read error is encountered
-// that relates to a supplied stream.
-// This will allow you to find out which reader has failed.
-type StreamReadError struct {
-	Err    error // The error
-	Stream int   // The stream number on which the error occurred
-}
-
-// Error returns the error as a string
-func (s StreamReadError) Error() string {
-	return fmt.Sprintf("error reading stream %d: %s", s.Stream, s.Err)
-}
-
-// String returns the error as a string
-func (s StreamReadError) String() string {
-	return s.Error()
-}
-
-// StreamWriteError is returned when a write error is encountered
-// that relates to a supplied stream. This will allow you to
-// find out which reader has failed.
-type StreamWriteError struct {
-	Err    error // The error
-	Stream int   // The stream number on which the error occurred
-}
-
-// Error returns the error as a string
-func (s StreamWriteError) Error() string {
-	return fmt.Sprintf("error writing stream %d: %s", s.Stream, s.Err)
-}
-
-// String returns the error as a string
-func (s StreamWriteError) String() string {
-	return s.Error()
-}
-
-// rsStream contains a matrix for a specific
-// distribution of datashards and parity shards.
-// Construct if using NewStream()
-type rsStream struct {
-	r  *reedSolomon
-	bs int // Block size
-	// Shard reader
-	readShards func(dst [][]byte, in []io.Reader) error
-	// Shard writer
-	writeShards func(out []io.Writer, in [][]byte) error
-	creads      bool
-	cwrites     bool
-}
-
-// NewStream creates a new encoder and initializes it to
-// the number of data shards and parity shards that
-// you want to use. You can reuse this encoder.
-// Note that the maximum number of data shards is 256.
-func NewStream(dataShards, parityShards int) (StreamEncoder, error) {
-	enc, err := New(dataShards, parityShards)
-	if err != nil {
-		return nil, err
-	}
-	rs := enc.(*reedSolomon)
-	r := rsStream{r: rs, bs: 4 << 20}
-	r.readShards = readShards
-	r.writeShards = writeShards
-	return &r, err
-}
-
-// NewStreamC creates a new encoder and initializes it to
-// the number of data shards and parity shards given.
-//
-// This functions as 'NewStream', but allows you to enable CONCURRENT reads and writes.
-func NewStreamC(dataShards, parityShards int, conReads, conWrites bool) (StreamEncoder, error) {
-	enc, err := New(dataShards, parityShards)
-	if err != nil {
-		return nil, err
-	}
-	rs := enc.(*reedSolomon)
-	r := rsStream{r: rs, bs: 4 << 20}
-	r.readShards = readShards
-	r.writeShards = writeShards
-	if conReads {
-		r.readShards = cReadShards
-	}
-	if conWrites {
-		r.writeShards = cWriteShards
-	}
-	return &r, err
-}
-
-func createSlice(n, length int) [][]byte {
-	out := make([][]byte, n)
-	for i := range out {
-		out[i] = make([]byte, length)
-	}
-	return out
-}
-
-// Encodes parity shards for a set of data shards.
-//
-// Input is 'shards' containing readers for data shards followed by parity shards
-// io.Writer.
-//
-// The number of shards must match the number given to NewStream().
-//
-// Each reader must supply the same number of bytes.
-//
-// The parity shards will be written to the writer.
-// The number of bytes written will match the input size.
-//
-// If a data stream returns an error, a StreamReadError type error
-// will be returned. If a parity writer returns an error, a
-// StreamWriteError will be returned.
-func (r rsStream) Encode(data []io.Reader, parity []io.Writer) error {
-	if len(data) != r.r.DataShards {
-		return ErrTooFewShards
-	}
-
-	if len(parity) != r.r.ParityShards {
-		return ErrTooFewShards
-	}
-
-	all := createSlice(r.r.Shards, r.bs)
-	in := all[:r.r.DataShards]
-	out := all[r.r.DataShards:]
-	read := 0
-
-	for {
-		err := r.readShards(in, data)
-		switch err {
-		case nil:
-		case io.EOF:
-			if read == 0 {
-				return ErrShardNoData
-			}
-			return nil
-		default:
-			return err
-		}
-		out = trimShards(out, shardSize(in))
-		read += shardSize(in)
-		err = r.r.Encode(all)
-		if err != nil {
-			return err
-		}
-		err = r.writeShards(parity, out)
-		if err != nil {
-			return err
-		}
-	}
-}
-
-// Trim the shards so they are all the same size
-func trimShards(in [][]byte, size int) [][]byte {
-	for i := range in {
-		if in[i] != nil {
-			in[i] = in[i][0:size]
-		}
-		if len(in[i]) < size {
-			in[i] = nil
-		}
-	}
-	return in
-}
-
-func readShards(dst [][]byte, in []io.Reader) error {
-	if len(in) != len(dst) {
-		panic("internal error: in and dst size does not match")
-	}
-	size := -1
-	for i := range in {
-		if in[i] == nil {
-			dst[i] = nil
-			continue
-		}
-		n, err := io.ReadFull(in[i], dst[i])
-		// The error is EOF only if no bytes were read.
-		// If an EOF happens after reading some but not all the bytes,
-		// ReadFull returns ErrUnexpectedEOF.
-		switch err {
-		case io.ErrUnexpectedEOF, io.EOF:
-			if size < 0 {
-				size = n
-			} else if n != size {
-				// Shard sizes must match.
-				return ErrShardSize
-			}
-			dst[i] = dst[i][0:n]
-		case nil:
-			continue
-		default:
-			return StreamReadError{Err: err, Stream: i}
-		}
-	}
-	if size == 0 {
-		return io.EOF
-	}
-	return nil
-}
-
-func writeShards(out []io.Writer, in [][]byte) error {
-	if len(out) != len(in) {
-		panic("internal error: in and out size does not match")
-	}
-	for i := range in {
-		if out[i] == nil {
-			continue
-		}
-		n, err := out[i].Write(in[i])
-		if err != nil {
-			return StreamWriteError{Err: err, Stream: i}
-		}
-		//
-		if n != len(in[i]) {
-			return StreamWriteError{Err: io.ErrShortWrite, Stream: i}
-		}
-	}
-	return nil
-}
-
-type readResult struct {
-	n    int
-	size int
-	err  error
-}
-
-// cReadShards reads shards concurrently
-func cReadShards(dst [][]byte, in []io.Reader) error {
-	if len(in) != len(dst) {
-		panic("internal error: in and dst size does not match")
-	}
-	var wg sync.WaitGroup
-	wg.Add(len(in))
-	res := make(chan readResult, len(in))
-	for i := range in {
-		if in[i] == nil {
-			dst[i] = nil
-			wg.Done()
-			continue
-		}
-		go func(i int) {
-			defer wg.Done()
-			n, err := io.ReadFull(in[i], dst[i])
-			// The error is EOF only if no bytes were read.
-			// If an EOF happens after reading some but not all the bytes,
-			// ReadFull returns ErrUnexpectedEOF.
-			res <- readResult{size: n, err: err, n: i}
-
-		}(i)
-	}
-	wg.Wait()
-	close(res)
-	size := -1
-	for r := range res {
-		switch r.err {
-		case io.ErrUnexpectedEOF, io.EOF:
-			if size < 0 {
-				size = r.size
-			} else if r.size != size {
-				// Shard sizes must match.
-				return ErrShardSize
-			}
-			dst[r.n] = dst[r.n][0:r.size]
-		case nil:
-		default:
-			return StreamReadError{Err: r.err, Stream: r.n}
-		}
-	}
-	if size == 0 {
-		return io.EOF
-	}
-	return nil
-}
-
-// cWriteShards writes shards concurrently
-func cWriteShards(out []io.Writer, in [][]byte) error {
-	if len(out) != len(in) {
-		panic("internal error: in and out size does not match")
-	}
-	var errs = make(chan error, len(out))
-	var wg sync.WaitGroup
-	wg.Add(len(out))
-	for i := range in {
-		go func(i int) {
-			defer wg.Done()
-			if out[i] == nil {
-				errs <- nil
-				return
-			}
-			n, err := out[i].Write(in[i])
-			if err != nil {
-				errs <- StreamWriteError{Err: err, Stream: i}
-				return
-			}
-			if n != len(in[i]) {
-				errs <- StreamWriteError{Err: io.ErrShortWrite, Stream: i}
-			}
-		}(i)
-	}
-	wg.Wait()
-	close(errs)
-	for err := range errs {
-		if err != nil {
-			return err
-		}
-	}
-
-	return nil
-}
-
-// Verify returns true if the parity shards contain correct data.
-//
-// The number of shards must match the number total data+parity shards
-// given to NewStream().
-//
-// Each reader must supply the same number of bytes.
-// If a shard stream returns an error, a StreamReadError type error
-// will be returned.
-func (r rsStream) Verify(shards []io.Reader) (bool, error) {
-	if len(shards) != r.r.Shards {
-		return false, ErrTooFewShards
-	}
-
-	read := 0
-	all := createSlice(r.r.Shards, r.bs)
-	for {
-		err := r.readShards(all, shards)
-		if err == io.EOF {
-			if read == 0 {
-				return false, ErrShardNoData
-			}
-			return true, nil
-		}
-		if err != nil {
-			return false, err
-		}
-		read += shardSize(all)
-		ok, err := r.r.Verify(all)
-		if !ok || err != nil {
-			return ok, err
-		}
-	}
-}
-
-// ErrReconstructMismatch is returned by the StreamEncoder, if you supply
-// "valid" and "fill" streams on the same index.
-// Therefore it is impossible to see if you consider the shard valid
-// or would like to have it reconstructed.
-var ErrReconstructMismatch = errors.New("valid shards and fill shards are mutually exclusive")
-
-// Reconstruct will recreate the missing shards if possible.
-//
-// Given a list of valid shards (to read) and invalid shards (to write)
-//
-// You indicate that a shard is missing by setting it to nil in the 'valid'
-// slice and at the same time setting a non-nil writer in "fill".
-// An index cannot contain both non-nil 'valid' and 'fill' entry.
-//
-// If there are too few shards to reconstruct the missing
-// ones, ErrTooFewShards will be returned.
-//
-// The reconstructed shard set is complete, but integrity is not verified.
-// Use the Verify function to check if data set is ok.
-func (r rsStream) Reconstruct(valid []io.Reader, fill []io.Writer) error {
-	if len(valid) != r.r.Shards {
-		return ErrTooFewShards
-	}
-	if len(fill) != r.r.Shards {
-		return ErrTooFewShards
-	}
-
-	all := createSlice(r.r.Shards, r.bs)
-	for i := range valid {
-		if valid[i] != nil && fill[i] != nil {
-			return ErrReconstructMismatch
-		}
-	}
-
-	read := 0
-	for {
-		err := r.readShards(all, valid)
-		if err == io.EOF {
-			if read == 0 {
-				return ErrShardNoData
-			}
-			return nil
-		}
-		if err != nil {
-			return err
-		}
-		read += shardSize(all)
-		all = trimShards(all, shardSize(all))
-
-		err = r.r.Reconstruct(all)
-		if err != nil {
-			return err
-		}
-		err = r.writeShards(fill, all)
-		if err != nil {
-			return err
-		}
-	}
-}
-
-// Join the shards and write the data segment to dst.
-//
-// Only the data shards are considered.
-//
-// You must supply the exact output size you want.
-// If there are to few shards given, ErrTooFewShards will be returned.
-// If the total data size is less than outSize, ErrShortData will be returned.
-func (r rsStream) Join(dst io.Writer, shards []io.Reader, outSize int64) error {
-	// Do we have enough shards?
-	if len(shards) < r.r.DataShards {
-		return ErrTooFewShards
-	}
-
-	// Trim off parity shards if any
-	shards = shards[:r.r.DataShards]
-	for i := range shards {
-		if shards[i] == nil {
-			return StreamReadError{Err: ErrShardNoData, Stream: i}
-		}
-	}
-	// Join all shards
-	src := io.MultiReader(shards...)
-
-	// Copy data to dst
-	n, err := io.CopyN(dst, src, outSize)
-	if err == io.EOF {
-		return ErrShortData
-	}
-	if err != nil {
-		return err
-	}
-	if n != outSize {
-		return ErrShortData
-	}
-	return nil
-}
-
-// Split a an input stream into the number of shards given to the encoder.
-//
-// The data will be split into equally sized shards.
-// If the data size isn't dividable by the number of shards,
-// the last shard will contain extra zeros.
-//
-// You must supply the total size of your input.
-// 'ErrShortData' will be returned if it is unable to retrieve the
-// number of bytes indicated.
-func (r rsStream) Split(data io.Reader, dst []io.Writer, size int64) error {
-	if size == 0 {
-		return ErrShortData
-	}
-	if len(dst) != r.r.DataShards {
-		return ErrInvShardNum
-	}
-
-	for i := range dst {
-		if dst[i] == nil {
-			return StreamWriteError{Err: ErrShardNoData, Stream: i}
-		}
-	}
-
-	// Calculate number of bytes per shard.
-	perShard := (size + int64(r.r.DataShards) - 1) / int64(r.r.DataShards)
-
-	// Pad data to r.Shards*perShard.
-	padding := make([]byte, (int64(r.r.Shards)*perShard)-size)
-	data = io.MultiReader(data, bytes.NewBuffer(padding))
-
-	// Split into equal-length shards and copy.
-	for i := range dst {
-		n, err := io.CopyN(dst[i], data, perShard)
-		if err != io.EOF && err != nil {
-			return err
-		}
-		if n != perShard {
-			return ErrShortData
-		}
-	}
-
-	return nil
-}
--- a/vendor/manifest
+++ b/vendor/manifest
@ -238,8 +238,8 @@
 		{
 			"importpath": "github.com/klauspost/reedsolomon",
 			"repository": "https://github.com/klauspost/reedsolomon",
-			"vcs": "",
-			"revision": "d0a56f72c0d40a6cdde43a1575ad9686a0098b70",
+			"vcs": "git",
+			"revision": "5abf0ee302ccf4834e84f63ff74eca3e8b88e4e2",
 			"branch": "master"
 		},
 		{
@ -354,15 +354,7 @@
 			"importpath": "github.com/xtaci/kcp-go",
 			"repository": "https://github.com/xtaci/kcp-go",
 			"vcs": "git",
-			"revision": "d719435bc7494d9d2b2cc4b57c416f9e6976eeb6",
-			"branch": "master",
-			"notests": true
-		},
-		{
-			"importpath": "github.com/xtaci/reedsolomon",
-			"repository": "https://github.com/xtaci/reedsolomon",
-			"vcs": "git",
-			"revision": "7bbd3662bdabfaafbe1552513e42a976fe7e7f55",
+			"revision": "0ca962cb10f29ee0735ff7dec69ec7283af47f65",
 			"branch": "master",
 			"notests": true
 		},