mirror of
https://github.com/octoleo/restic.git
synced 2024-12-29 13:22:43 +00:00
b9f0f031b6
Closes #2129
533 lines
14 KiB
Go
533 lines
14 KiB
Go
/*
|
|
* Minio Go Library for Amazon S3 Compatible Cloud Storage
|
|
* (C) 2018 Minio, Inc.
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
package minio
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"encoding/binary"
|
|
"encoding/xml"
|
|
"errors"
|
|
"fmt"
|
|
"hash"
|
|
"hash/crc32"
|
|
"io"
|
|
"net/http"
|
|
"net/url"
|
|
"strings"
|
|
|
|
"github.com/minio/minio-go/pkg/encrypt"
|
|
"github.com/minio/minio-go/pkg/s3utils"
|
|
)
|
|
|
|
// CSVFileHeaderInfo - is the parameter for whether to utilize headers.
|
|
type CSVFileHeaderInfo string
|
|
|
|
// Constants for file header info.
|
|
const (
|
|
CSVFileHeaderInfoNone CSVFileHeaderInfo = "NONE"
|
|
CSVFileHeaderInfoIgnore = "IGNORE"
|
|
CSVFileHeaderInfoUse = "USE"
|
|
)
|
|
|
|
// SelectCompressionType - is the parameter for what type of compression is
|
|
// present
|
|
type SelectCompressionType string
|
|
|
|
// Constants for compression types under select API.
|
|
const (
|
|
SelectCompressionNONE SelectCompressionType = "NONE"
|
|
SelectCompressionGZIP = "GZIP"
|
|
SelectCompressionBZIP = "BZIP2"
|
|
)
|
|
|
|
// CSVQuoteFields - is the parameter for how CSV fields are quoted.
|
|
type CSVQuoteFields string
|
|
|
|
// Constants for csv quote styles.
|
|
const (
|
|
CSVQuoteFieldsAlways CSVQuoteFields = "Always"
|
|
CSVQuoteFieldsAsNeeded = "AsNeeded"
|
|
)
|
|
|
|
// QueryExpressionType - is of what syntax the expression is, this should only
|
|
// be SQL
|
|
type QueryExpressionType string
|
|
|
|
// Constants for expression type.
|
|
const (
|
|
QueryExpressionTypeSQL QueryExpressionType = "SQL"
|
|
)
|
|
|
|
// JSONType determines json input serialization type.
|
|
type JSONType string
|
|
|
|
// Constants for JSONTypes.
|
|
const (
|
|
JSONDocumentType JSONType = "DOCUMENT"
|
|
JSONLinesType = "LINES"
|
|
)
|
|
|
|
// ParquetInputOptions parquet input specific options
|
|
type ParquetInputOptions struct{}
|
|
|
|
// CSVInputOptions csv input specific options
|
|
type CSVInputOptions struct {
|
|
FileHeaderInfo CSVFileHeaderInfo
|
|
RecordDelimiter string
|
|
FieldDelimiter string
|
|
QuoteCharacter string
|
|
QuoteEscapeCharacter string
|
|
Comments string
|
|
}
|
|
|
|
// CSVOutputOptions csv output specific options
|
|
type CSVOutputOptions struct {
|
|
QuoteFields CSVQuoteFields
|
|
RecordDelimiter string
|
|
FieldDelimiter string
|
|
QuoteCharacter string
|
|
QuoteEscapeCharacter string
|
|
}
|
|
|
|
// JSONInputOptions json input specific options
|
|
type JSONInputOptions struct {
|
|
Type JSONType
|
|
}
|
|
|
|
// JSONOutputOptions - json output specific options
|
|
type JSONOutputOptions struct {
|
|
RecordDelimiter string
|
|
}
|
|
|
|
// SelectObjectInputSerialization - input serialization parameters
|
|
type SelectObjectInputSerialization struct {
|
|
CompressionType SelectCompressionType
|
|
Parquet *ParquetInputOptions `xml:"Parquet,omitempty"`
|
|
CSV *CSVInputOptions `xml:"CSV,omitempty"`
|
|
JSON *JSONInputOptions `xml:"JSON,omitempty"`
|
|
}
|
|
|
|
// SelectObjectOutputSerialization - output serialization parameters.
|
|
type SelectObjectOutputSerialization struct {
|
|
CSV *CSVOutputOptions `xml:"CSV,omitempty"`
|
|
JSON *JSONOutputOptions `xml:"JSON,omitempty"`
|
|
}
|
|
|
|
// SelectObjectOptions - represents the input select body
|
|
type SelectObjectOptions struct {
|
|
XMLName xml.Name `xml:"SelectObjectContentRequest" json:"-"`
|
|
ServerSideEncryption encrypt.ServerSide `xml:"-"`
|
|
Expression string
|
|
ExpressionType QueryExpressionType
|
|
InputSerialization SelectObjectInputSerialization
|
|
OutputSerialization SelectObjectOutputSerialization
|
|
RequestProgress struct {
|
|
Enabled bool
|
|
}
|
|
}
|
|
|
|
// Header returns the http.Header representation of the SelectObject options.
|
|
func (o SelectObjectOptions) Header() http.Header {
|
|
headers := make(http.Header)
|
|
if o.ServerSideEncryption != nil && o.ServerSideEncryption.Type() == encrypt.SSEC {
|
|
o.ServerSideEncryption.Marshal(headers)
|
|
}
|
|
return headers
|
|
}
|
|
|
|
// SelectObjectType - is the parameter which defines what type of object the
|
|
// operation is being performed on.
|
|
type SelectObjectType string
|
|
|
|
// Constants for input data types.
|
|
const (
|
|
SelectObjectTypeCSV SelectObjectType = "CSV"
|
|
SelectObjectTypeJSON = "JSON"
|
|
SelectObjectTypeParquet = "Parquet"
|
|
)
|
|
|
|
// preludeInfo is used for keeping track of necessary information from the
|
|
// prelude.
|
|
type preludeInfo struct {
|
|
totalLen uint32
|
|
headerLen uint32
|
|
}
|
|
|
|
// SelectResults is used for the streaming responses from the server.
|
|
type SelectResults struct {
|
|
pipeReader *io.PipeReader
|
|
resp *http.Response
|
|
stats *StatsMessage
|
|
progress *ProgressMessage
|
|
}
|
|
|
|
// ProgressMessage is a struct for progress xml message.
|
|
type ProgressMessage struct {
|
|
XMLName xml.Name `xml:"Progress" json:"-"`
|
|
StatsMessage
|
|
}
|
|
|
|
// StatsMessage is a struct for stat xml message.
|
|
type StatsMessage struct {
|
|
XMLName xml.Name `xml:"Stats" json:"-"`
|
|
BytesScanned int64
|
|
BytesProcessed int64
|
|
BytesReturned int64
|
|
}
|
|
|
|
// messageType represents the type of message.
|
|
type messageType string
|
|
|
|
const (
|
|
errorMsg messageType = "error"
|
|
commonMsg = "event"
|
|
)
|
|
|
|
// eventType represents the type of event.
|
|
type eventType string
|
|
|
|
// list of event-types returned by Select API.
|
|
const (
|
|
endEvent eventType = "End"
|
|
recordsEvent = "Records"
|
|
progressEvent = "Progress"
|
|
statsEvent = "Stats"
|
|
)
|
|
|
|
// contentType represents content type of event.
|
|
type contentType string
|
|
|
|
const (
|
|
xmlContent contentType = "text/xml"
|
|
)
|
|
|
|
// SelectObjectContent is a implementation of http://docs.aws.amazon.com/AmazonS3/latest/API/RESTObjectSELECTContent.html AWS S3 API.
|
|
func (c Client) SelectObjectContent(ctx context.Context, bucketName, objectName string, opts SelectObjectOptions) (*SelectResults, error) {
|
|
// Input validation.
|
|
if err := s3utils.CheckValidBucketName(bucketName); err != nil {
|
|
return nil, err
|
|
}
|
|
if err := s3utils.CheckValidObjectName(objectName); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
selectReqBytes, err := xml.Marshal(opts)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
urlValues := make(url.Values)
|
|
urlValues.Set("select", "")
|
|
urlValues.Set("select-type", "2")
|
|
|
|
// Execute POST on bucket/object.
|
|
resp, err := c.executeMethod(ctx, "POST", requestMetadata{
|
|
bucketName: bucketName,
|
|
objectName: objectName,
|
|
queryValues: urlValues,
|
|
customHeader: opts.Header(),
|
|
contentMD5Base64: sumMD5Base64(selectReqBytes),
|
|
contentSHA256Hex: sum256Hex(selectReqBytes),
|
|
contentBody: bytes.NewReader(selectReqBytes),
|
|
contentLength: int64(len(selectReqBytes)),
|
|
})
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if resp.StatusCode != http.StatusOK {
|
|
return nil, httpRespToErrorResponse(resp, bucketName, "")
|
|
}
|
|
|
|
pipeReader, pipeWriter := io.Pipe()
|
|
streamer := &SelectResults{
|
|
resp: resp,
|
|
stats: &StatsMessage{},
|
|
progress: &ProgressMessage{},
|
|
pipeReader: pipeReader,
|
|
}
|
|
streamer.start(pipeWriter)
|
|
return streamer, nil
|
|
}
|
|
|
|
// Close - closes the underlying response body and the stream reader.
|
|
func (s *SelectResults) Close() error {
|
|
defer closeResponse(s.resp)
|
|
return s.pipeReader.Close()
|
|
}
|
|
|
|
// Read - is a reader compatible implementation for SelectObjectContent records.
|
|
func (s *SelectResults) Read(b []byte) (n int, err error) {
|
|
return s.pipeReader.Read(b)
|
|
}
|
|
|
|
// Stats - information about a request's stats when processing is complete.
|
|
func (s *SelectResults) Stats() *StatsMessage {
|
|
return s.stats
|
|
}
|
|
|
|
// Progress - information about the progress of a request.
|
|
func (s *SelectResults) Progress() *ProgressMessage {
|
|
return s.progress
|
|
}
|
|
|
|
// start is the main function that decodes the large byte array into
|
|
// several events that are sent through the eventstream.
|
|
func (s *SelectResults) start(pipeWriter *io.PipeWriter) {
|
|
go func() {
|
|
for {
|
|
var prelude preludeInfo
|
|
var headers = make(http.Header)
|
|
var err error
|
|
|
|
// Create CRC code
|
|
crc := crc32.New(crc32.IEEETable)
|
|
crcReader := io.TeeReader(s.resp.Body, crc)
|
|
|
|
// Extract the prelude(12 bytes) into a struct to extract relevant information.
|
|
prelude, err = processPrelude(crcReader, crc)
|
|
if err != nil {
|
|
pipeWriter.CloseWithError(err)
|
|
closeResponse(s.resp)
|
|
return
|
|
}
|
|
|
|
// Extract the headers(variable bytes) into a struct to extract relevant information
|
|
if prelude.headerLen > 0 {
|
|
if err = extractHeader(io.LimitReader(crcReader, int64(prelude.headerLen)), headers); err != nil {
|
|
pipeWriter.CloseWithError(err)
|
|
closeResponse(s.resp)
|
|
return
|
|
}
|
|
}
|
|
|
|
// Get the actual payload length so that the appropriate amount of
|
|
// bytes can be read or parsed.
|
|
payloadLen := prelude.PayloadLen()
|
|
|
|
m := messageType(headers.Get("message-type"))
|
|
|
|
switch m {
|
|
case errorMsg:
|
|
pipeWriter.CloseWithError(errors.New("Error Type of " + headers.Get("error-type") + " " + headers.Get("error-message")))
|
|
closeResponse(s.resp)
|
|
return
|
|
case commonMsg:
|
|
// Get content-type of the payload.
|
|
c := contentType(headers.Get("content-type"))
|
|
|
|
// Get event type of the payload.
|
|
e := eventType(headers.Get("event-type"))
|
|
|
|
// Handle all supported events.
|
|
switch e {
|
|
case endEvent:
|
|
pipeWriter.Close()
|
|
closeResponse(s.resp)
|
|
return
|
|
case recordsEvent:
|
|
if _, err = io.Copy(pipeWriter, io.LimitReader(crcReader, payloadLen)); err != nil {
|
|
pipeWriter.CloseWithError(err)
|
|
closeResponse(s.resp)
|
|
return
|
|
}
|
|
case progressEvent:
|
|
switch c {
|
|
case xmlContent:
|
|
if err = xmlDecoder(io.LimitReader(crcReader, payloadLen), s.progress); err != nil {
|
|
pipeWriter.CloseWithError(err)
|
|
closeResponse(s.resp)
|
|
return
|
|
}
|
|
default:
|
|
pipeWriter.CloseWithError(fmt.Errorf("Unexpected content-type %s sent for event-type %s", c, progressEvent))
|
|
closeResponse(s.resp)
|
|
return
|
|
}
|
|
case statsEvent:
|
|
switch c {
|
|
case xmlContent:
|
|
if err = xmlDecoder(io.LimitReader(crcReader, payloadLen), s.stats); err != nil {
|
|
pipeWriter.CloseWithError(err)
|
|
closeResponse(s.resp)
|
|
return
|
|
}
|
|
default:
|
|
pipeWriter.CloseWithError(fmt.Errorf("Unexpected content-type %s sent for event-type %s", c, statsEvent))
|
|
closeResponse(s.resp)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// Ensures that the full message's CRC is correct and
|
|
// that the message is not corrupted
|
|
if err := checkCRC(s.resp.Body, crc.Sum32()); err != nil {
|
|
pipeWriter.CloseWithError(err)
|
|
closeResponse(s.resp)
|
|
return
|
|
}
|
|
|
|
}
|
|
}()
|
|
}
|
|
|
|
// PayloadLen is a function that calculates the length of the payload.
|
|
func (p preludeInfo) PayloadLen() int64 {
|
|
return int64(p.totalLen - p.headerLen - 16)
|
|
}
|
|
|
|
// processPrelude is the function that reads the 12 bytes of the prelude and
|
|
// ensures the CRC is correct while also extracting relevant information into
|
|
// the struct,
|
|
func processPrelude(prelude io.Reader, crc hash.Hash32) (preludeInfo, error) {
|
|
var err error
|
|
var pInfo = preludeInfo{}
|
|
|
|
// reads total length of the message (first 4 bytes)
|
|
pInfo.totalLen, err = extractUint32(prelude)
|
|
if err != nil {
|
|
return pInfo, err
|
|
}
|
|
|
|
// reads total header length of the message (2nd 4 bytes)
|
|
pInfo.headerLen, err = extractUint32(prelude)
|
|
if err != nil {
|
|
return pInfo, err
|
|
}
|
|
|
|
// checks that the CRC is correct (3rd 4 bytes)
|
|
preCRC := crc.Sum32()
|
|
if err := checkCRC(prelude, preCRC); err != nil {
|
|
return pInfo, err
|
|
}
|
|
|
|
return pInfo, nil
|
|
}
|
|
|
|
// extracts the relevant information from the Headers.
|
|
func extractHeader(body io.Reader, myHeaders http.Header) error {
|
|
for {
|
|
// extracts the first part of the header,
|
|
headerTypeName, err := extractHeaderType(body)
|
|
if err != nil {
|
|
// Since end of file, we have read all of our headers
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
return err
|
|
}
|
|
|
|
// reads the 7 present in the header and ignores it.
|
|
extractUint8(body)
|
|
|
|
headerValueName, err := extractHeaderValue(body)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
myHeaders.Set(headerTypeName, headerValueName)
|
|
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// extractHeaderType extracts the first half of the header message, the header type.
|
|
func extractHeaderType(body io.Reader) (string, error) {
|
|
// extracts 2 bit integer
|
|
headerNameLen, err := extractUint8(body)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
// extracts the string with the appropriate number of bytes
|
|
headerName, err := extractString(body, int(headerNameLen))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return strings.TrimPrefix(headerName, ":"), nil
|
|
}
|
|
|
|
// extractsHeaderValue extracts the second half of the header message, the
|
|
// header value
|
|
func extractHeaderValue(body io.Reader) (string, error) {
|
|
bodyLen, err := extractUint16(body)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
bodyName, err := extractString(body, int(bodyLen))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return bodyName, nil
|
|
}
|
|
|
|
// extracts a string from byte array of a particular number of bytes.
|
|
func extractString(source io.Reader, lenBytes int) (string, error) {
|
|
myVal := make([]byte, lenBytes)
|
|
_, err := source.Read(myVal)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return string(myVal), nil
|
|
}
|
|
|
|
// extractUint32 extracts a 4 byte integer from the byte array.
|
|
func extractUint32(r io.Reader) (uint32, error) {
|
|
buf := make([]byte, 4)
|
|
_, err := io.ReadFull(r, buf)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return binary.BigEndian.Uint32(buf), nil
|
|
}
|
|
|
|
// extractUint16 extracts a 2 byte integer from the byte array.
|
|
func extractUint16(r io.Reader) (uint16, error) {
|
|
buf := make([]byte, 2)
|
|
_, err := io.ReadFull(r, buf)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return binary.BigEndian.Uint16(buf), nil
|
|
}
|
|
|
|
// extractUint8 extracts a 1 byte integer from the byte array.
|
|
func extractUint8(r io.Reader) (uint8, error) {
|
|
buf := make([]byte, 1)
|
|
_, err := io.ReadFull(r, buf)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
return buf[0], nil
|
|
}
|
|
|
|
// checkCRC ensures that the CRC matches with the one from the reader.
|
|
func checkCRC(r io.Reader, expect uint32) error {
|
|
msgCRC, err := extractUint32(r)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if msgCRC != expect {
|
|
return fmt.Errorf("Checksum Mismatch, MessageCRC of 0x%X does not equal expected CRC of 0x%X", msgCRC, expect)
|
|
|
|
}
|
|
return nil
|
|
}
|