hcloud-upload-image/hcloudimages/client.go

484 lines
15 KiB
Go
Raw Normal View History

2024-05-02 21:42:36 +02:00
package hcloudimages
2024-04-29 21:00:04 +02:00
import (
"context"
"errors"
2024-04-29 21:00:04 +02:00
"fmt"
"io"
"log/slog"
"net/url"
2024-04-29 21:00:04 +02:00
"time"
"github.com/hetznercloud/hcloud-go/v2/hcloud"
"golang.org/x/crypto/ssh"
2024-05-02 21:42:36 +02:00
"github.com/apricote/hcloud-upload-image/hcloudimages/contextlogger"
"github.com/apricote/hcloud-upload-image/hcloudimages/internal/actionutil"
2024-05-02 21:42:36 +02:00
"github.com/apricote/hcloud-upload-image/hcloudimages/internal/control"
"github.com/apricote/hcloud-upload-image/hcloudimages/internal/labelutil"
2024-05-02 21:42:36 +02:00
"github.com/apricote/hcloud-upload-image/hcloudimages/internal/randomid"
"github.com/apricote/hcloud-upload-image/hcloudimages/internal/sshkey"
"github.com/apricote/hcloud-upload-image/hcloudimages/internal/sshsession"
2024-04-29 21:00:04 +02:00
)
const (
CreatedByLabel = "apricote.de/created-by"
CreatedByValue = "hcloud-upload-image"
resourcePrefix = "hcloud-upload-image-"
)
var (
DefaultLabels = map[string]string{
CreatedByLabel: CreatedByValue,
}
serverTypePerArchitecture = map[hcloud.Architecture]*hcloud.ServerType{
hcloud.ArchitectureX86: {Name: "cx11"},
hcloud.ArchitectureARM: {Name: "cax11"},
}
defaultImage = &hcloud.Image{Name: "ubuntu-22.04"}
defaultLocation = &hcloud.Location{Name: "fsn1"}
defaultRescueType = hcloud.ServerRescueTypeLinux64
defaultSSHDialTimeout = 1 * time.Minute
)
type UploadOptions struct {
// ImageURL must be publicly available. The instance will download the image from this endpoint.
ImageURL *url.URL
// ImageReader
ImageReader io.Reader
// ImageCompression describes the compression of the referenced image file. It defaults to [CompressionNone]. If
// set to anything else, the file will be decompressed before written to the disk.
ImageCompression Compression
// Possible future additions:
// ImageSignatureVerification
// ImageLocalPath
// ImageType (RawDiskImage, ISO, qcow2, ...)
// Architecture should match the architecture of the Image. This decides if the Snapshot can later be
// used with [hcloud.ArchitectureX86] or [hcloud.ArchitectureARM] servers.
//
// Internally this decides what server type is used for the temporary server.
//
// Optional if [UploadOptions.ServerType] is set.
Architecture hcloud.Architecture
// ServerType can be optionally set to override the default server type for the architecture.
// Situations where this makes sense:
//
// - Your image is larger than the root disk of the default server types.
// - The default server type is no longer available, or not temporarily out of stock.
ServerType *hcloud.ServerType
// Description is an optional description that the resulting image (snapshot) will have. There is no way to
// select images by its description, you should use Labels if you need to identify your image later.
Description *string
// Labels will be added to the resulting image (snapshot). Use these to filter the image list if you
// need to identify the image later on.
//
// We also always add a label `apricote.de/created-by=hcloud-image-upload` ([CreatedByLabel], [CreatedByValue]).
Labels map[string]string
// DebugSkipResourceCleanup will skip the cleanup of the temporary SSH Key and Server.
DebugSkipResourceCleanup bool
}
type Compression string
const (
CompressionNone Compression = ""
CompressionBZ2 Compression = "bz2"
CompressionXZ Compression = "xz"
// Possible future additions:
// zip,zstd
)
// NewClient instantiates a new client. It requires a working [*hcloud.Client] to interact with the Hetzner Cloud API.
func NewClient(c *hcloud.Client) *Client {
return &Client{
2024-05-02 21:42:36 +02:00
c: c,
2024-04-29 21:00:04 +02:00
}
}
type Client struct {
2024-05-02 21:42:36 +02:00
c *hcloud.Client
2024-04-29 21:00:04 +02:00
}
// Upload the specified image into a snapshot on Hetzner Cloud.
//
// As the Hetzner Cloud API has no direct way to upload images, we create a temporary server,
// overwrite the root disk and take a snapshot of that disk instead.
//
// The temporary server costs money. If the upload fails, we might be unable to delete the server. Check out
// CleanupTempResources for a helper in this case.
func (s *Client) Upload(ctx context.Context, options UploadOptions) (*hcloud.Image, error) {
2024-04-30 23:48:59 +02:00
logger := contextlogger.From(ctx).With(
"library", "hcloudimages",
2024-04-30 23:48:59 +02:00
"method", "upload",
)
2024-04-29 21:00:04 +02:00
id, err := randomid.Generate()
if err != nil {
return nil, err
}
2024-04-30 23:48:59 +02:00
logger = logger.With("run-id", id)
2024-04-29 21:00:04 +02:00
// For simplicity, we use the name random name for SSH Key + Server
resourceName := resourcePrefix + id
labels := labelutil.Merge(DefaultLabels, options.Labels)
2024-04-29 21:00:04 +02:00
// 1. Create SSH Key
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "# Step 1: Generating SSH Key")
2024-04-29 21:00:04 +02:00
publicKey, privateKey, err := sshkey.GenerateKeyPair()
if err != nil {
return nil, fmt.Errorf("failed to generate temporary ssh key pair: %w", err)
}
2024-05-02 21:42:36 +02:00
key, _, err := s.c.SSHKey.Create(ctx, hcloud.SSHKeyCreateOpts{
2024-04-29 21:00:04 +02:00
Name: resourceName,
PublicKey: string(publicKey),
Labels: labels,
2024-04-29 21:00:04 +02:00
})
if err != nil {
return nil, fmt.Errorf("failed to submit temporary ssh key to API: %w", err)
}
2024-04-30 23:48:59 +02:00
logger.DebugContext(ctx, "Uploaded ssh key", "ssh-key-id", key.ID)
2024-04-29 21:00:04 +02:00
defer func() {
// Cleanup SSH Key
if options.DebugSkipResourceCleanup {
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "Cleanup: Skipping cleanup of temporary ssh key")
2024-04-29 21:00:04 +02:00
return
}
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "Cleanup: Deleting temporary ssh key")
2024-05-02 21:42:36 +02:00
_, err := s.c.SSHKey.Delete(ctx, key)
2024-04-29 21:00:04 +02:00
if err != nil {
2024-04-30 23:48:59 +02:00
logger.WarnContext(ctx, "Cleanup: ssh key could not be deleted", "error", err)
2024-04-29 21:00:04 +02:00
// TODO
}
}()
// 2. Create Server
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "# Step 2: Creating Server")
var serverType *hcloud.ServerType
if options.ServerType != nil {
serverType = options.ServerType
} else {
var ok bool
serverType, ok = serverTypePerArchitecture[options.Architecture]
if !ok {
return nil, fmt.Errorf("unknown architecture %q, valid options: %q, %q", options.Architecture, hcloud.ArchitectureX86, hcloud.ArchitectureARM)
}
2024-04-29 21:00:04 +02:00
}
2024-04-30 23:48:59 +02:00
logger.DebugContext(ctx, "creating server with config",
"image", defaultImage.Name,
"location", defaultLocation.Name,
"serverType", serverType.Name,
)
2024-05-02 21:42:36 +02:00
serverCreateResult, _, err := s.c.Server.Create(ctx, hcloud.ServerCreateOpts{
2024-04-29 21:00:04 +02:00
Name: resourceName,
ServerType: serverType,
// Not used, but without this the user receives an email with a password for every created server
SSHKeys: []*hcloud.SSHKey{key},
// We need to enable rescue system first
StartAfterCreate: hcloud.Ptr(false),
// Image will never be booted, we only boot into rescue system
Image: defaultImage,
Location: defaultLocation,
Labels: labels,
2024-04-29 21:00:04 +02:00
})
if err != nil {
return nil, fmt.Errorf("creating the temporary server failed: %w", err)
}
2024-04-30 23:48:59 +02:00
logger = logger.With("server", serverCreateResult.Server.ID)
logger.DebugContext(ctx, "Created Server")
2024-04-29 21:00:04 +02:00
2024-04-30 23:48:59 +02:00
logger.DebugContext(ctx, "waiting on actions")
2024-05-02 21:42:36 +02:00
err = s.c.Action.WaitFor(ctx, append(serverCreateResult.NextActions, serverCreateResult.Action)...)
2024-04-29 21:00:04 +02:00
if err != nil {
return nil, fmt.Errorf("creating the temporary server failed: %w", err)
}
2024-04-30 23:48:59 +02:00
logger.DebugContext(ctx, "actions finished")
2024-04-29 21:00:04 +02:00
server := serverCreateResult.Server
defer func() {
// Cleanup Server
if options.DebugSkipResourceCleanup {
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "Cleanup: Skipping cleanup of temporary server")
2024-04-29 21:00:04 +02:00
return
}
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "Cleanup: Deleting temporary server")
2024-05-02 21:42:36 +02:00
_, _, err := s.c.Server.DeleteWithResult(ctx, server)
2024-04-29 21:00:04 +02:00
if err != nil {
2024-04-30 23:48:59 +02:00
logger.WarnContext(ctx, "Cleanup: server could not be deleted", "error", err)
2024-04-29 21:00:04 +02:00
}
}()
// 3. Activate Rescue System
logger.InfoContext(ctx, "# Step 3: Activating Rescue System")
2024-05-02 21:42:36 +02:00
enableRescueResult, _, err := s.c.Server.EnableRescue(ctx, server, hcloud.ServerEnableRescueOpts{
2024-04-29 21:00:04 +02:00
Type: defaultRescueType,
SSHKeys: []*hcloud.SSHKey{key},
})
if err != nil {
return nil, fmt.Errorf("enabling the rescue system on the temporary server failed: %w", err)
}
2024-04-30 23:48:59 +02:00
logger.DebugContext(ctx, "rescue system requested, waiting on action")
2024-05-02 21:42:36 +02:00
err = s.c.Action.WaitFor(ctx, enableRescueResult.Action)
2024-04-29 21:00:04 +02:00
if err != nil {
return nil, fmt.Errorf("enabling the rescue system on the temporary server failed: %w", err)
}
2024-04-30 23:48:59 +02:00
logger.DebugContext(ctx, "action finished, rescue system enabled")
2024-04-29 21:00:04 +02:00
// 4. Boot Server
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "# Step 4: Booting Server")
2024-05-02 21:42:36 +02:00
powerOnAction, _, err := s.c.Server.Poweron(ctx, server)
2024-04-29 21:00:04 +02:00
if err != nil {
return nil, fmt.Errorf("starting the temporary server failed: %w", err)
}
2024-04-30 23:48:59 +02:00
logger.DebugContext(ctx, "boot requested, waiting on action")
2024-05-02 21:42:36 +02:00
err = s.c.Action.WaitFor(ctx, powerOnAction)
2024-04-29 21:00:04 +02:00
if err != nil {
return nil, fmt.Errorf("starting the temporary server failed: %w", err)
}
2024-04-30 23:48:59 +02:00
logger.DebugContext(ctx, "action finished, server is booting")
2024-04-29 21:00:04 +02:00
// 5. Open SSH Session
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "# Step 5: Opening SSH Connection")
2024-04-29 21:00:04 +02:00
signer, err := ssh.ParsePrivateKey(privateKey)
if err != nil {
return nil, fmt.Errorf("parsing the automatically generated temporary private key failed: %w", err)
}
sshClientConfig := &ssh.ClientConfig{
User: "root",
Auth: []ssh.AuthMethod{
ssh.PublicKeys(signer),
},
// There is no way to get the host key of the rescue system beforehand
HostKeyCallback: ssh.InsecureIgnoreHostKey(),
Timeout: defaultSSHDialTimeout,
}
// the server needs some time until its properly started and ssh is available
var sshClient *ssh.Client
2024-04-30 23:48:59 +02:00
err = control.Retry(
contextlogger.New(ctx, logger.With("operation", "ssh")),
10,
func() error {
var err error
logger.DebugContext(ctx, "trying to connect to server", "ip", server.PublicNet.IPv4.IP)
sshClient, err = ssh.Dial("tcp", server.PublicNet.IPv4.IP.String()+":ssh", sshClientConfig)
return err
},
)
2024-04-29 21:00:04 +02:00
if err != nil {
return nil, fmt.Errorf("failed to ssh into temporary server: %w", err)
}
defer sshClient.Close()
// 6. SSH On Server: Download Image, Decompress, Write to Root Disk
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "# Step 6: Downloading image and writing to disk")
cmd := ""
if options.ImageURL != nil {
cmd += fmt.Sprintf("wget --no-verbose -O - %q | ", options.ImageURL.String())
}
2024-04-29 21:00:04 +02:00
if options.ImageCompression != CompressionNone {
switch options.ImageCompression {
case CompressionBZ2:
cmd += "bzip2 -cd | "
case CompressionXZ:
cmd += "xz -cd | "
2024-04-29 21:00:04 +02:00
default:
return nil, fmt.Errorf("unknown compression: %q", options.ImageCompression)
}
}
cmd += "dd of=/dev/sda bs=4M && sync"
// Make sure that we fail early, ie. if the image url does not work.
// the pipefail does not work correctly without wrapping in bash.
cmd = fmt.Sprintf("bash -c 'set -euo pipefail && %s'", cmd)
logger.DebugContext(ctx, "running download, decompress and write to disk command", "cmd", cmd)
2024-04-30 23:48:59 +02:00
output, err := sshsession.Run(sshClient, cmd, options.ImageReader)
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "# Step 6: Finished writing image to disk")
logger.DebugContext(ctx, string(output))
2024-04-29 21:00:04 +02:00
if err != nil {
return nil, fmt.Errorf("failed to download and write the image: %w", err)
}
// 7. SSH On Server: Shutdown
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "# Step 7: Shutting down server")
_, err = sshsession.Run(sshClient, "shutdown now", nil)
2024-04-29 21:00:04 +02:00
if err != nil {
// TODO Verify if shutdown error, otherwise return
2024-04-30 23:48:59 +02:00
logger.WarnContext(ctx, "shutdown returned error", "err", err)
2024-04-29 21:00:04 +02:00
}
// 8. Create Image from Server
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "# Step 8: Creating Image")
2024-05-02 21:42:36 +02:00
createImageResult, _, err := s.c.Server.CreateImage(ctx, server, &hcloud.ServerCreateImageOpts{
2024-04-29 21:00:04 +02:00
Type: hcloud.ImageTypeSnapshot,
Description: options.Description,
Labels: labels,
2024-04-29 21:00:04 +02:00
})
if err != nil {
return nil, fmt.Errorf("failed to create snapshot: %w", err)
}
2024-04-30 23:48:59 +02:00
logger.DebugContext(ctx, "image creation requested, waiting on action")
2024-05-02 21:42:36 +02:00
err = s.c.Action.WaitFor(ctx, createImageResult.Action)
2024-04-29 21:00:04 +02:00
if err != nil {
return nil, fmt.Errorf("failed to create snapshot: %w", err)
}
2024-04-30 23:48:59 +02:00
logger.DebugContext(ctx, "action finished, image was created")
2024-04-29 21:00:04 +02:00
image := createImageResult.Image
2024-04-30 23:48:59 +02:00
logger.InfoContext(ctx, "# Image was created", "image", image.ID)
2024-04-29 21:00:04 +02:00
// Resource cleanup is happening in `defer`
return image, nil
}
// CleanupTempResources tries to delete any resources that were left over from previous calls to [Client.Upload].
// Upload tries to clean up any temporary resources it created at runtime, but might fail at any point.
// You can then use this command to make sure that all temporary resources are removed from your project.
//
// This method tries to delete any server or ssh keys that match the [DefaultLabels]
func (s *Client) CleanupTempResources(ctx context.Context) error {
logger := contextlogger.From(ctx).With(
"library", "hcloudimages",
"method", "cleanup",
)
selector := labelutil.Selector(DefaultLabels)
logger = logger.With("selector", selector)
logger.InfoContext(ctx, "# Cleaning up Servers")
err := s.cleanupTempServers(ctx, logger, selector)
if err != nil {
return fmt.Errorf("failed to clean up all servers: %w", err)
2024-04-29 21:00:04 +02:00
}
logger.DebugContext(ctx, "cleaned up all servers")
logger.InfoContext(ctx, "# Cleaning up SSH Keys")
err = s.cleanupTempSSHKeys(ctx, logger, selector)
if err != nil {
return fmt.Errorf("failed to clean up all ssh keys: %w", err)
}
logger.DebugContext(ctx, "cleaned up all ssh keys")
return nil
}
func (s *Client) cleanupTempServers(ctx context.Context, logger *slog.Logger, selector string) error {
servers, err := s.c.Server.AllWithOpts(ctx, hcloud.ServerListOpts{ListOpts: hcloud.ListOpts{
LabelSelector: selector,
}})
if err != nil {
return fmt.Errorf("failed to list servers: %w", err)
}
if len(servers) == 0 {
logger.InfoContext(ctx, "No servers found")
return nil
}
logger.InfoContext(ctx, "removing servers", "count", len(servers))
errs := []error{}
actions := make([]*hcloud.Action, 0, len(servers))
for _, server := range servers {
result, _, err := s.c.Server.DeleteWithResult(ctx, server)
if err != nil {
errs = append(errs, err)
logger.WarnContext(ctx, "failed to delete server", "server", server.ID, "error", err)
continue
}
actions = append(actions, result.Action)
}
successActions, errorActions, err := actionutil.Settle(ctx, &s.c.Action, actions...)
if err != nil {
return fmt.Errorf("failed to wait for server delete: %w", err)
}
if len(successActions) > 0 {
ids := make([]int64, 0, len(successActions))
for _, action := range successActions {
for _, resource := range action.Resources {
if resource.Type == hcloud.ActionResourceTypeServer {
ids = append(ids, resource.ID)
}
}
}
logger.InfoContext(ctx, "successfully deleted servers", "servers", ids)
}
if len(errorActions) > 0 {
for _, action := range errorActions {
errs = append(errs, action.Error())
}
}
if len(errs) > 0 {
// The returned message contains no info about the server IDs which failed
return fmt.Errorf("failed to delete some of the servers: %w", errors.Join(errs...))
}
return nil
}
func (s *Client) cleanupTempSSHKeys(ctx context.Context, logger *slog.Logger, selector string) error {
keys, _, err := s.c.SSHKey.List(ctx, hcloud.SSHKeyListOpts{ListOpts: hcloud.ListOpts{
LabelSelector: selector,
}})
if err != nil {
return fmt.Errorf("failed to list keys: %w", err)
}
if len(keys) == 0 {
logger.InfoContext(ctx, "No ssh keys found")
return nil
}
errs := []error{}
for _, key := range keys {
_, err := s.c.SSHKey.Delete(ctx, key)
if err != nil {
errs = append(errs, err)
logger.WarnContext(ctx, "failed to delete ssh key", "ssh-key", key.ID, "error", err)
continue
}
}
if len(errs) > 0 {
// The returned message contains no info about the server IDs which failed
return fmt.Errorf("failed to delete some of the ssh keys: %w", errors.Join(errs...))
2024-04-29 21:00:04 +02:00
}
return nil
2024-04-29 21:00:04 +02:00
}