2
2
mirror of https://github.com/octoleo/restic.git synced 2024-12-21 02:19:04 +00:00
restic/internal/backend/gs/gs.go

358 lines
9.5 KiB
Go
Raw Normal View History

// Package gs provides a restic backend for Google Cloud Storage.
package gs
import (
"context"
"crypto/md5"
"hash"
"io"
"net/http"
"os"
"path"
"strings"
"cloud.google.com/go/storage"
"github.com/pkg/errors"
"github.com/restic/restic/internal/backend"
"github.com/restic/restic/internal/backend/layout"
"github.com/restic/restic/internal/backend/location"
"github.com/restic/restic/internal/backend/util"
"github.com/restic/restic/internal/debug"
"golang.org/x/oauth2"
"golang.org/x/oauth2/google"
"google.golang.org/api/googleapi"
"google.golang.org/api/iterator"
"google.golang.org/api/option"
)
// Backend stores data in a GCS bucket.
//
// The service account used to access the bucket must have these permissions:
// - storage.objects.create
// - storage.objects.delete
// - storage.objects.get
// - storage.objects.list
type Backend struct {
gcsClient *storage.Client
projectID string
2021-08-07 20:20:49 +00:00
connections uint
bucketName string
2023-02-27 06:53:25 +00:00
region string
bucket *storage.BucketHandle
prefix string
listMaxItems int
layout.Layout
}
// Ensure that *Backend implements backend.Backend.
var _ backend.Backend = &Backend{}
func NewFactory() location.Factory {
return location.NewHTTPBackendFactory("gs", ParseConfig, location.NoPassword, Create, Open)
}
func getStorageClient(rt http.RoundTripper) (*storage.Client, error) {
// create a new HTTP client
httpClient := &http.Client{
Transport: rt,
}
// create a new context with the HTTP client stored at the oauth2.HTTPClient key
ctx := context.WithValue(context.Background(), oauth2.HTTPClient, httpClient)
var ts oauth2.TokenSource
if token := os.Getenv("GOOGLE_ACCESS_TOKEN"); token != "" {
ts = oauth2.StaticTokenSource(&oauth2.Token{
AccessToken: token,
TokenType: "Bearer",
})
} else {
var err error
ts, err = google.DefaultTokenSource(ctx, storage.ScopeReadWrite)
if err != nil {
return nil, err
}
}
oauthClient := oauth2.NewClient(ctx, ts)
gcsClient, err := storage.NewClient(ctx, option.WithHTTPClient(oauthClient))
if err != nil {
return nil, err
}
return gcsClient, nil
}
func (be *Backend) bucketExists(ctx context.Context, bucket *storage.BucketHandle) (bool, error) {
_, err := bucket.Attrs(ctx)
if err == storage.ErrBucketNotExist {
return false, nil
}
return err == nil, err
}
const defaultListMaxItems = 1000
func open(cfg Config, rt http.RoundTripper) (*Backend, error) {
debug.Log("open, config %#v", cfg)
gcsClient, err := getStorageClient(rt)
if err != nil {
return nil, errors.Wrap(err, "getStorageClient")
}
be := &Backend{
2021-08-07 20:20:49 +00:00
gcsClient: gcsClient,
projectID: cfg.ProjectID,
connections: cfg.Connections,
bucketName: cfg.Bucket,
2023-02-27 06:53:25 +00:00
region: cfg.Region,
2021-08-07 20:20:49 +00:00
bucket: gcsClient.Bucket(cfg.Bucket),
prefix: cfg.Prefix,
Layout: &layout.DefaultLayout{
Path: cfg.Prefix,
Join: path.Join,
},
listMaxItems: defaultListMaxItems,
}
return be, nil
}
// Open opens the gs backend at the specified bucket.
func Open(_ context.Context, cfg Config, rt http.RoundTripper) (backend.Backend, error) {
return open(cfg, rt)
}
gs: allow backend creation without storage.buckets.get If the service account used with restic does not have the storage.buckets.get permission (in the "Storage Admin" role), Create cannot use Get to determine if the bucket is accessible. Rather than always trying to create the bucket on Get error, gracefully fall back to assuming the bucket is accessible. If it is, restic init will complete successfully. If it is not, it will fail on a later call. Here is what init looks like now in different cases. Service account without "Storage Admin": Bucket exists and is accessible (this is the case that didn't work before): $ ./restic init -r gs:this-bucket-does-exist:/ enter password for new backend: enter password again: created restic backend c02e2edb67 at gs:this-bucket-does-exist:/ Please note that knowledge of your password is required to access the repository. Losing your password means that your data is irrecoverably lost. Bucket exists but is not accessible: $ ./restic init -r gs:this-bucket-does-exist:/ enter password for new backend: enter password again: create key in backend at gs:this-bucket-does-exist:/ failed: service.Objects.Insert: googleapi: Error 403: my-service-account@myproject.iam.gserviceaccount.com does not have storage.objects.create access to object this-bucket-exists/keys/0fa714e695c8ecd58cb467cdeb04d36f3b710f883496a90f23cae0315daf0b93., forbidden Bucket does not exist: $ ./restic init -r gs:this-bucket-does-not-exist:/ create backend at gs:this-bucket-does-not-exist:/ failed: service.Buckets.Insert: googleapi: Error 403: my-service-account@myproject.iam.gserviceaccount.com does not have storage.buckets.create access to bucket this-bucket-does-not-exist., forbidden Service account with "Storage Admin": Bucket exists and is accessible: Same Bucket exists but is not accessible: Same. Previously this would fail when Create tried to create the bucket. Now it fails when trying to create the keys. Bucket does not exist: $ ./restic init -r gs:this-bucket-does-not-exist:/ enter password for new backend: enter password again: created restic backend c3c48b481d at gs:this-bucket-does-not-exist:/ Please note that knowledge of your password is required to access the repository. Losing your password means that your data is irrecoverably lost.
2017-09-26 04:53:21 +00:00
// Create opens the gs backend at the specified bucket and attempts to creates
// the bucket if it does not exist yet.
//
gs: allow backend creation without storage.buckets.get If the service account used with restic does not have the storage.buckets.get permission (in the "Storage Admin" role), Create cannot use Get to determine if the bucket is accessible. Rather than always trying to create the bucket on Get error, gracefully fall back to assuming the bucket is accessible. If it is, restic init will complete successfully. If it is not, it will fail on a later call. Here is what init looks like now in different cases. Service account without "Storage Admin": Bucket exists and is accessible (this is the case that didn't work before): $ ./restic init -r gs:this-bucket-does-exist:/ enter password for new backend: enter password again: created restic backend c02e2edb67 at gs:this-bucket-does-exist:/ Please note that knowledge of your password is required to access the repository. Losing your password means that your data is irrecoverably lost. Bucket exists but is not accessible: $ ./restic init -r gs:this-bucket-does-exist:/ enter password for new backend: enter password again: create key in backend at gs:this-bucket-does-exist:/ failed: service.Objects.Insert: googleapi: Error 403: my-service-account@myproject.iam.gserviceaccount.com does not have storage.objects.create access to object this-bucket-exists/keys/0fa714e695c8ecd58cb467cdeb04d36f3b710f883496a90f23cae0315daf0b93., forbidden Bucket does not exist: $ ./restic init -r gs:this-bucket-does-not-exist:/ create backend at gs:this-bucket-does-not-exist:/ failed: service.Buckets.Insert: googleapi: Error 403: my-service-account@myproject.iam.gserviceaccount.com does not have storage.buckets.create access to bucket this-bucket-does-not-exist., forbidden Service account with "Storage Admin": Bucket exists and is accessible: Same Bucket exists but is not accessible: Same. Previously this would fail when Create tried to create the bucket. Now it fails when trying to create the keys. Bucket does not exist: $ ./restic init -r gs:this-bucket-does-not-exist:/ enter password for new backend: enter password again: created restic backend c3c48b481d at gs:this-bucket-does-not-exist:/ Please note that knowledge of your password is required to access the repository. Losing your password means that your data is irrecoverably lost.
2017-09-26 04:53:21 +00:00
// The service account must have the "storage.buckets.create" permission to
// create a bucket the does not yet exist.
func Create(ctx context.Context, cfg Config, rt http.RoundTripper) (backend.Backend, error) {
be, err := open(cfg, rt)
if err != nil {
return nil, errors.Wrap(err, "open")
}
gs: allow backend creation without storage.buckets.get If the service account used with restic does not have the storage.buckets.get permission (in the "Storage Admin" role), Create cannot use Get to determine if the bucket is accessible. Rather than always trying to create the bucket on Get error, gracefully fall back to assuming the bucket is accessible. If it is, restic init will complete successfully. If it is not, it will fail on a later call. Here is what init looks like now in different cases. Service account without "Storage Admin": Bucket exists and is accessible (this is the case that didn't work before): $ ./restic init -r gs:this-bucket-does-exist:/ enter password for new backend: enter password again: created restic backend c02e2edb67 at gs:this-bucket-does-exist:/ Please note that knowledge of your password is required to access the repository. Losing your password means that your data is irrecoverably lost. Bucket exists but is not accessible: $ ./restic init -r gs:this-bucket-does-exist:/ enter password for new backend: enter password again: create key in backend at gs:this-bucket-does-exist:/ failed: service.Objects.Insert: googleapi: Error 403: my-service-account@myproject.iam.gserviceaccount.com does not have storage.objects.create access to object this-bucket-exists/keys/0fa714e695c8ecd58cb467cdeb04d36f3b710f883496a90f23cae0315daf0b93., forbidden Bucket does not exist: $ ./restic init -r gs:this-bucket-does-not-exist:/ create backend at gs:this-bucket-does-not-exist:/ failed: service.Buckets.Insert: googleapi: Error 403: my-service-account@myproject.iam.gserviceaccount.com does not have storage.buckets.create access to bucket this-bucket-does-not-exist., forbidden Service account with "Storage Admin": Bucket exists and is accessible: Same Bucket exists but is not accessible: Same. Previously this would fail when Create tried to create the bucket. Now it fails when trying to create the keys. Bucket does not exist: $ ./restic init -r gs:this-bucket-does-not-exist:/ enter password for new backend: enter password again: created restic backend c3c48b481d at gs:this-bucket-does-not-exist:/ Please note that knowledge of your password is required to access the repository. Losing your password means that your data is irrecoverably lost.
2017-09-26 04:53:21 +00:00
// Try to determine if the bucket exists. If it does not, try to create it.
exists, err := be.bucketExists(ctx, be.bucket)
if err != nil {
if e, ok := err.(*googleapi.Error); ok && e.Code == http.StatusForbidden {
// the bucket might exist!
// however, the client doesn't have storage.bucket.get permission
return be, nil
}
return nil, errors.Wrap(err, "service.Buckets.Get")
}
if !exists {
2023-02-27 06:53:25 +00:00
bucketAttrs := &storage.BucketAttrs{
Location: cfg.Region,
}
// Bucket doesn't exist, try to create it.
2023-02-27 06:53:25 +00:00
if err := be.bucket.Create(ctx, be.projectID, bucketAttrs); err != nil {
// Always an error, as the bucket definitely doesn't exist.
return nil, errors.Wrap(err, "service.Buckets.Insert")
}
}
return be, nil
}
// SetListMaxItems sets the number of list items to load per request.
func (be *Backend) SetListMaxItems(i int) {
be.listMaxItems = i
}
// IsNotExist returns true if the error is caused by a not existing file.
func (be *Backend) IsNotExist(err error) bool {
return errors.Is(err, storage.ErrObjectNotExist)
}
// Join combines path components with slashes.
func (be *Backend) Join(p ...string) string {
return path.Join(p...)
}
2021-08-07 20:20:49 +00:00
func (be *Backend) Connections() uint {
return be.connections
}
// Location returns this backend's location (the bucket name).
func (be *Backend) Location() string {
return be.Join(be.bucketName, be.prefix)
}
// Hasher may return a hash function for calculating a content hash for the backend
func (be *Backend) Hasher() hash.Hash {
return md5.New()
}
// HasAtomicReplace returns whether Save() can atomically replace files
func (be *Backend) HasAtomicReplace() bool {
return true
}
// Path returns the path in the bucket that is used for this backend.
func (be *Backend) Path() string {
return be.prefix
}
// Save stores data in the backend at the handle.
func (be *Backend) Save(ctx context.Context, h backend.Handle, rd backend.RewindReader) error {
objName := be.Filename(h)
gs: disable resumable uploads By default, the GCS Go packages have an internal "chunk size" of 8MB, used for blob uploads. Media().Do() will buffer a full 8MB from the io.Reader (or less if EOF is reached) then write that full 8MB to the network all at once. This behavior does not play nicely with --limit-upload, which only limits the Reader passed to Media. While the long-term average upload rate will be correctly limited, the actual network bandwidth will be very spikey. e.g., if an 8MB/s connection is limited to 1MB/s, Media().Do() will spend 8s reading from the rate-limited reader (performing no network requests), then 1s writing to the network at 8MB/s. This is bad for network connections hurt by full-speed uploads, particularly when writing 8MB will take several seconds. Disable resumable uploads entirely by setting the chunk size to zero. This causes the io.Reader to be passed further down the request stack, where there is less (but still some) buffering. My connection is around 1.5MB/s up, with nominal ~15ms ping times to 8.8.8.8. Without this change, --limit-upload 1024 results in several seconds of ~200ms ping times (uploading), followed by several seconds of ~15ms ping times (reading from rate-limited reader). A bandwidth monitor reports this as several seconds of ~1.5MB/s followed by several seconds of 0.0MB/s. With this change, --limit-upload 1024 results in ~20ms ping times and the bandwidth monitor reports a constant ~1MB/s. I've elected to make this change unconditional of --limit-upload because the resumable uploads shouldn't be providing much benefit anyways, as restic already uploads mostly small blobs and already has a retry mechanism. --limit-download is not affected by this problem, as Get().Download() returns the real http.Response.Body without any internal buffering. Updates #1216
2017-10-18 04:04:35 +00:00
// Set chunk size to zero to disable resumable uploads.
//
// With a non-zero chunk size (the default is
// googleapi.DefaultUploadChunkSize, 8MB), Insert will buffer data from
// rd in chunks of this size so it can upload these chunks in
// individual requests.
//
// This chunking allows the library to automatically handle network
// interruptions and re-upload only the last chunk rather than the full
// file.
//
// Unfortunately, this buffering doesn't play nicely with
// --limit-upload, which applies a rate limit to rd. This rate limit
// ends up only limiting the read from rd into the buffer rather than
// the network traffic itself. This results in poor network rate limit
// behavior, where individual chunks are written to the network at full
// bandwidth for several seconds, followed by several seconds of no
// network traffic as the next chunk is read through the rate limiter.
//
// By disabling chunking, rd is passed further down the request stack,
// where there is less (but some) buffering, which ultimately results
// in better rate limiting behavior.
//
// restic typically writes small blobs (4MB-30MB), so the resumable
// uploads are not providing significant benefit anyways.
w := be.bucket.Object(objName).NewWriter(ctx)
w.ChunkSize = 0
w.MD5 = rd.Hash()
wbytes, err := io.Copy(w, rd)
cerr := w.Close()
if err == nil {
err = cerr
}
gs: fix nil dereference info can be nil if err != nil, resulting in a nil dereference while logging: $ # GCS config $ ./restic init debug enabled panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x935947] goroutine 1 [running]: github.com/restic/restic/internal/backend/gs.(*Backend).Save(0xc420012690, 0xe84e80, 0xc420010448, 0xb57149, 0x3, 0xc4203fc140, 0x40, 0xe7be40, 0xc4201d8f90, 0xa0, ...) src/github.com/restic/restic/internal/backend/gs/gs.go:226 +0x6d7 github.com/restic/restic/internal/repository.AddKey(0xe84e80, 0xc420010448, 0xc4202f0360, 0xc42000a1b0, 0x4, 0x0, 0xa55b60, 0xc4203043e0, 0xa55420) src/github.com/restic/restic/internal/repository/key.go:235 +0x4a1 github.com/restic/restic/internal/repository.createMasterKey(0xc4202f0360, 0xc42000a1b0, 0x4, 0xa55420, 0xc420304370, 0x6a6070) src/github.com/restic/restic/internal/repository/key.go:62 +0x60 github.com/restic/restic/internal/repository.(*Repository).init(0xc4202f0360, 0xe84e80, 0xc420010448, 0xc42000a1b0, 0x4, 0x1, 0xc42030a440, 0x40, 0x32a4573d3d9eb5, 0x0, ...) src/github.com/restic/restic/internal/repository/repository.go:403 +0x5d github.com/restic/restic/internal/repository.(*Repository).Init(0xc4202f0360, 0xe84e80, 0xc420010448, 0xc42000a1b0, 0x4, 0xe84e40, 0xc42004ad80) src/github.com/restic/restic/internal/repository/repository.go:397 +0x12c main.runInit(0xc420018072, 0x16, 0x0, 0x0, 0x0, 0xe84e40, 0xc42004ad80, 0xc42000a1b0, 0x4, 0xe7dac0, ...) src/github.com/restic/restic/cmd/restic/cmd_init.go:47 +0x2a4 main.glob..func9(0xeb5000, 0xedad70, 0x0, 0x0, 0x0, 0x0) src/github.com/restic/restic/cmd/restic/cmd_init.go:20 +0x8e github.com/restic/restic/vendor/github.com/spf13/cobra.(*Command).execute(0xeb5000, 0xedad70, 0x0, 0x0, 0xeb5000, 0xedad70) src/github.com/restic/restic/vendor/github.com/spf13/cobra/command.go:649 +0x457 github.com/restic/restic/vendor/github.com/spf13/cobra.(*Command).ExecuteC(0xeb3e00, 0xc420011650, 0xa55b60, 0xc420011660) src/github.com/restic/restic/vendor/github.com/spf13/cobra/command.go:728 +0x339 github.com/restic/restic/vendor/github.com/spf13/cobra.(*Command).Execute(0xeb3e00, 0x25, 0xc4201a7eb8) src/github.com/restic/restic/vendor/github.com/spf13/cobra/command.go:687 +0x2b main.main() src/github.com/restic/restic/cmd/restic/main.go:72 +0x268 (The error was likely because I had just enabled the GCS API. Subsequent runs were fine.)
2017-08-28 04:28:39 +00:00
if err != nil {
return errors.Wrap(err, "service.Objects.Insert")
}
// sanity check
if wbytes != rd.Length() {
return errors.Errorf("wrote %d bytes instead of the expected %d bytes", wbytes, rd.Length())
}
gs: fix nil dereference info can be nil if err != nil, resulting in a nil dereference while logging: $ # GCS config $ ./restic init debug enabled panic: runtime error: invalid memory address or nil pointer dereference [signal SIGSEGV: segmentation violation code=0x1 addr=0x0 pc=0x935947] goroutine 1 [running]: github.com/restic/restic/internal/backend/gs.(*Backend).Save(0xc420012690, 0xe84e80, 0xc420010448, 0xb57149, 0x3, 0xc4203fc140, 0x40, 0xe7be40, 0xc4201d8f90, 0xa0, ...) src/github.com/restic/restic/internal/backend/gs/gs.go:226 +0x6d7 github.com/restic/restic/internal/repository.AddKey(0xe84e80, 0xc420010448, 0xc4202f0360, 0xc42000a1b0, 0x4, 0x0, 0xa55b60, 0xc4203043e0, 0xa55420) src/github.com/restic/restic/internal/repository/key.go:235 +0x4a1 github.com/restic/restic/internal/repository.createMasterKey(0xc4202f0360, 0xc42000a1b0, 0x4, 0xa55420, 0xc420304370, 0x6a6070) src/github.com/restic/restic/internal/repository/key.go:62 +0x60 github.com/restic/restic/internal/repository.(*Repository).init(0xc4202f0360, 0xe84e80, 0xc420010448, 0xc42000a1b0, 0x4, 0x1, 0xc42030a440, 0x40, 0x32a4573d3d9eb5, 0x0, ...) src/github.com/restic/restic/internal/repository/repository.go:403 +0x5d github.com/restic/restic/internal/repository.(*Repository).Init(0xc4202f0360, 0xe84e80, 0xc420010448, 0xc42000a1b0, 0x4, 0xe84e40, 0xc42004ad80) src/github.com/restic/restic/internal/repository/repository.go:397 +0x12c main.runInit(0xc420018072, 0x16, 0x0, 0x0, 0x0, 0xe84e40, 0xc42004ad80, 0xc42000a1b0, 0x4, 0xe7dac0, ...) src/github.com/restic/restic/cmd/restic/cmd_init.go:47 +0x2a4 main.glob..func9(0xeb5000, 0xedad70, 0x0, 0x0, 0x0, 0x0) src/github.com/restic/restic/cmd/restic/cmd_init.go:20 +0x8e github.com/restic/restic/vendor/github.com/spf13/cobra.(*Command).execute(0xeb5000, 0xedad70, 0x0, 0x0, 0xeb5000, 0xedad70) src/github.com/restic/restic/vendor/github.com/spf13/cobra/command.go:649 +0x457 github.com/restic/restic/vendor/github.com/spf13/cobra.(*Command).ExecuteC(0xeb3e00, 0xc420011650, 0xa55b60, 0xc420011660) src/github.com/restic/restic/vendor/github.com/spf13/cobra/command.go:728 +0x339 github.com/restic/restic/vendor/github.com/spf13/cobra.(*Command).Execute(0xeb3e00, 0x25, 0xc4201a7eb8) src/github.com/restic/restic/vendor/github.com/spf13/cobra/command.go:687 +0x2b main.main() src/github.com/restic/restic/cmd/restic/main.go:72 +0x268 (The error was likely because I had just enabled the GCS API. Subsequent runs were fine.)
2017-08-28 04:28:39 +00:00
return nil
}
// Load runs fn with a reader that yields the contents of the file at h at the
// given offset.
func (be *Backend) Load(ctx context.Context, h backend.Handle, length int, offset int64, fn func(rd io.Reader) error) error {
ctx, cancel := context.WithCancel(ctx)
defer cancel()
return util.DefaultLoad(ctx, h, length, offset, be.openReader, fn)
}
func (be *Backend) openReader(ctx context.Context, h backend.Handle, length int, offset int64) (io.ReadCloser, error) {
if length == 0 {
// negative length indicates read till end to GCS lib
length = -1
}
objName := be.Filename(h)
r, err := be.bucket.Object(objName).NewRangeReader(ctx, offset, int64(length))
if err != nil {
return nil, err
}
return r, err
}
// Stat returns information about a blob.
func (be *Backend) Stat(ctx context.Context, h backend.Handle) (bi backend.FileInfo, err error) {
objName := be.Filename(h)
attr, err := be.bucket.Object(objName).Attrs(ctx)
if err != nil {
return backend.FileInfo{}, errors.Wrap(err, "service.Objects.Get")
}
return backend.FileInfo{Size: attr.Size, Name: h.Name}, nil
}
// Remove removes the blob with the given name and type.
func (be *Backend) Remove(ctx context.Context, h backend.Handle) error {
objName := be.Filename(h)
err := be.bucket.Object(objName).Delete(ctx)
2023-04-07 21:16:08 +00:00
if be.IsNotExist(err) {
err = nil
}
return errors.Wrap(err, "client.RemoveObject")
}
// List runs fn for each file in the backend which has the type t. When an
// error occurs (or fn returns an error), List stops and returns it.
func (be *Backend) List(ctx context.Context, t backend.FileType, fn func(backend.FileInfo) error) error {
prefix, _ := be.Basedir(t)
// make sure prefix ends with a slash
if !strings.HasSuffix(prefix, "/") {
prefix += "/"
}
ctx, cancel := context.WithCancel(ctx)
defer cancel()
itr := be.bucket.Objects(ctx, &storage.Query{Prefix: prefix})
for {
attrs, err := itr.Next()
if err == iterator.Done {
break
}
if err != nil {
return err
}
m := strings.TrimPrefix(attrs.Name, prefix)
if m == "" {
continue
}
fi := backend.FileInfo{
Name: path.Base(m),
Size: int64(attrs.Size),
}
err = fn(fi)
if err != nil {
return err
}
if ctx.Err() != nil {
return ctx.Err()
}
}
return ctx.Err()
}
// Delete removes all restic keys in the bucket. It will not remove the bucket itself.
func (be *Backend) Delete(ctx context.Context) error {
return util.DefaultDelete(ctx, be)
}
// Close does nothing.
func (be *Backend) Close() error { return nil }