From 06f968d662e3f49daeb9182cf1b6e4abc247df4e Mon Sep 17 00:00:00 2001 From: zeripath Date: Tue, 6 Sep 2022 07:54:47 +0100 Subject: [PATCH] Fix hard-coded timeout and error panic in API archive download endpoint (#20925) (#21051) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Backport #20925 This commit updates the `GET /api/v1/repos/{owner}/{repo}/archive/{archive}` endpoint which prior to this PR had a couple of issues. 1. The endpoint had a hard-coded 20s timeout for the archiver to complete after which a 500 (Internal Server Error) was returned to client. For a scripted API client there was no clear way of telling that the operation timed out and that it should retry. 2. Whenever the timeout _did occur_, the code used to panic. This was caused by the API endpoint "delegating" to the same call path as the web, which uses a slightly different way of reporting errors (HTML rather than JSON for example). More specifically, `api/v1/repo/file.go#GetArchive` just called through to `web/repo/repo.go#Download`, which expects the `Context` to have a `Render` field set, but which is `nil` for API calls. Hence, a `nil` pointer error. The code addresses (1) by dropping the hard-coded timeout. Instead, any timeout/cancelation on the incoming `Context` is used. The code addresses (2) by updating the API endpoint to use a separate call path for the API-triggered archive download. This avoids producing HTML-errors on errors (it now produces JSON errors). Signed-off-by: Peter Gardfjäll Signed-off-by: Peter Gardfjäll Signed-off-by: Andrew Thornton Co-authored-by: Peter Gardfjäll Co-authored-by: Lunny Xiao --- cmd/migrate_storage.go | 7 +-- models/repo.go | 3 +- models/repo/archiver.go | 6 +- modules/timeutil/timestamp.go | 5 ++ routers/api/v1/repo/file.go | 51 ++++++++++++++++- routers/web/repo/repo.go | 55 ++---------------- services/repository/archiver/archiver.go | 71 ++++++++++++++++++++---- 7 files changed, 127 insertions(+), 71 deletions(-) diff --git a/cmd/migrate_storage.go b/cmd/migrate_storage.go index f11cf9b11f3..a283f914018 100644 --- a/cmd/migrate_storage.go +++ b/cmd/migrate_storage.go @@ -112,11 +112,8 @@ func migrateRepoAvatars(ctx context.Context, dstStorage storage.ObjectStorage) e func migrateRepoArchivers(ctx context.Context, dstStorage storage.ObjectStorage) error { return db.IterateObjects(ctx, func(archiver *repo_model.RepoArchiver) error { - p, err := archiver.RelativePath() - if err != nil { - return err - } - _, err = storage.Copy(dstStorage, p, storage.RepoArchives, p) + p := archiver.RelativePath() + _, err := storage.Copy(dstStorage, p, storage.RepoArchives, p) return err }) } diff --git a/models/repo.go b/models/repo.go index e9d83f5f327..d2f3a45940f 100644 --- a/models/repo.go +++ b/models/repo.go @@ -385,8 +385,7 @@ func DeleteRepository(doer *user_model.User, uid, repoID int64) error { archivePaths := make([]string, 0, len(archives)) for _, v := range archives { - p, _ := v.RelativePath() - archivePaths = append(archivePaths, p) + archivePaths = append(archivePaths, v.RelativePath()) } if _, err := db.DeleteByBean(ctx, &repo_model.RepoArchiver{RepoID: repoID}); err != nil { diff --git a/models/repo/archiver.go b/models/repo/archiver.go index dc64cce49ba..6a68650fe89 100644 --- a/models/repo/archiver.go +++ b/models/repo/archiver.go @@ -39,9 +39,9 @@ func init() { db.RegisterModel(new(RepoArchiver)) } -// RelativePath returns relative path -func (archiver *RepoArchiver) RelativePath() (string, error) { - return fmt.Sprintf("%d/%s/%s.%s", archiver.RepoID, archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()), nil +// RelativePath returns the archive path relative to the archive storage root. +func (archiver *RepoArchiver) RelativePath() string { + return fmt.Sprintf("%d/%s/%s.%s", archiver.RepoID, archiver.CommitID[:2], archiver.CommitID, archiver.Type.String()) } var delRepoArchiver = new(RepoArchiver) diff --git a/modules/timeutil/timestamp.go b/modules/timeutil/timestamp.go index 1fe8d4fcb18..9c421914cb5 100644 --- a/modules/timeutil/timestamp.go +++ b/modules/timeutil/timestamp.go @@ -54,6 +54,11 @@ func (ts TimeStamp) AsTime() (tm time.Time) { return ts.AsTimeInLocation(setting.DefaultUILocation) } +// AsLocalTime convert timestamp as time.Time in local location +func (ts TimeStamp) AsLocalTime() time.Time { + return time.Unix(int64(ts), 0) +} + // AsTimeInLocation convert timestamp as time.Time in Local locale func (ts TimeStamp) AsTimeInLocation(loc *time.Location) (tm time.Time) { tm = time.Unix(int64(ts), 0).In(loc) diff --git a/routers/api/v1/repo/file.go b/routers/api/v1/repo/file.go index 2190094bac5..57c783d3eed 100644 --- a/routers/api/v1/repo/file.go +++ b/routers/api/v1/repo/file.go @@ -8,6 +8,7 @@ package repo import ( "bytes" "encoding/base64" + "errors" "fmt" "io" "net/http" @@ -29,7 +30,7 @@ import ( api "code.gitea.io/gitea/modules/structs" "code.gitea.io/gitea/modules/web" "code.gitea.io/gitea/routers/common" - "code.gitea.io/gitea/routers/web/repo" + archiver_service "code.gitea.io/gitea/services/repository/archiver" files_service "code.gitea.io/gitea/services/repository/files" ) @@ -294,7 +295,53 @@ func GetArchive(ctx *context.APIContext) { defer gitRepo.Close() } - repo.Download(ctx.Context) + archiveDownload(ctx) +} + +func archiveDownload(ctx *context.APIContext) { + uri := ctx.Params("*") + aReq, err := archiver_service.NewRequest(ctx.Repo.Repository.ID, ctx.Repo.GitRepo, uri) + if err != nil { + if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) { + ctx.Error(http.StatusBadRequest, "unknown archive format", err) + } else if errors.Is(err, archiver_service.RepoRefNotFoundError{}) { + ctx.Error(http.StatusNotFound, "unrecognized reference", err) + } else { + ctx.ServerError("archiver_service.NewRequest", err) + } + return + } + + archiver, err := aReq.Await(ctx) + if err != nil { + ctx.ServerError("archiver.Await", err) + return + } + + download(ctx, aReq.GetArchiveName(), archiver) +} + +func download(ctx *context.APIContext, archiveName string, archiver *repo_model.RepoArchiver) { + downloadName := ctx.Repo.Repository.Name + "-" + archiveName + + rPath := archiver.RelativePath() + if setting.RepoArchive.ServeDirect { + // If we have a signed url (S3, object storage), redirect to this directly. + u, err := storage.RepoArchives.URL(rPath, downloadName) + if u != nil && err == nil { + ctx.Redirect(u.String()) + return + } + } + + // If we have matched and access to release or issue + fr, err := storage.RepoArchives.Open(rPath) + if err != nil { + ctx.ServerError("Open", err) + return + } + defer fr.Close() + ctx.ServeContent(downloadName, fr, archiver.CreatedUnix.AsLocalTime()) } // GetEditorconfig get editor config of a repository diff --git a/routers/web/repo/repo.go b/routers/web/repo/repo.go index c2c79e4a0df..873884356b5 100644 --- a/routers/web/repo/repo.go +++ b/routers/web/repo/repo.go @@ -10,7 +10,6 @@ import ( "fmt" "net/http" "strings" - "time" "code.gitea.io/gitea/models" "code.gitea.io/gitea/models/db" @@ -21,7 +20,6 @@ import ( "code.gitea.io/gitea/modules/base" "code.gitea.io/gitea/modules/context" "code.gitea.io/gitea/modules/convert" - "code.gitea.io/gitea/modules/graceful" "code.gitea.io/gitea/modules/log" repo_module "code.gitea.io/gitea/modules/repository" "code.gitea.io/gitea/modules/setting" @@ -389,68 +387,27 @@ func Download(ctx *context.Context) { if err != nil { if errors.Is(err, archiver_service.ErrUnknownArchiveFormat{}) { ctx.Error(http.StatusBadRequest, err.Error()) + } else if errors.Is(err, archiver_service.RepoRefNotFoundError{}) { + ctx.Error(http.StatusNotFound, err.Error()) } else { ctx.ServerError("archiver_service.NewRequest", err) } return } - if aReq == nil { - ctx.Error(http.StatusNotFound) - return - } - archiver, err := repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID) + archiver, err := aReq.Await(ctx) if err != nil { - ctx.ServerError("models.GetRepoArchiver", err) + ctx.ServerError("archiver.Await", err) return } - if archiver != nil && archiver.Status == repo_model.ArchiverReady { - download(ctx, aReq.GetArchiveName(), archiver) - return - } - - if err := archiver_service.StartArchive(aReq); err != nil { - ctx.ServerError("archiver_service.StartArchive", err) - return - } - - var times int - t := time.NewTicker(time.Second * 1) - defer t.Stop() - for { - select { - case <-graceful.GetManager().HammerContext().Done(): - log.Warn("exit archive download because system stop") - return - case <-t.C: - if times > 20 { - ctx.ServerError("wait download timeout", nil) - return - } - times++ - archiver, err = repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID) - if err != nil { - ctx.ServerError("archiver_service.StartArchive", err) - return - } - if archiver != nil && archiver.Status == repo_model.ArchiverReady { - download(ctx, aReq.GetArchiveName(), archiver) - return - } - } - } + download(ctx, aReq.GetArchiveName(), archiver) } func download(ctx *context.Context, archiveName string, archiver *repo_model.RepoArchiver) { downloadName := ctx.Repo.Repository.Name + "-" + archiveName - rPath, err := archiver.RelativePath() - if err != nil { - ctx.ServerError("archiver.RelativePath", err) - return - } - + rPath := archiver.RelativePath() if setting.RepoArchive.ServeDirect { // If we have a signed url (S3, object storage), redirect to this directly. u, err := storage.RepoArchives.URL(rPath, downloadName) diff --git a/services/repository/archiver/archiver.go b/services/repository/archiver/archiver.go index ebd3eaf236a..ae43503bae1 100644 --- a/services/repository/archiver/archiver.go +++ b/services/repository/archiver/archiver.go @@ -57,6 +57,21 @@ func (ErrUnknownArchiveFormat) Is(err error) bool { return ok } +// RepoRefNotFoundError is returned when a requested reference (commit, tag) was not found. +type RepoRefNotFoundError struct { + RefName string +} + +// Error implements error. +func (e RepoRefNotFoundError) Error() string { + return fmt.Sprintf("unrecognized repository reference: %s", e.RefName) +} + +func (e RepoRefNotFoundError) Is(err error) bool { + _, ok := err.(RepoRefNotFoundError) + return ok +} + // NewRequest creates an archival request, based on the URI. The // resulting ArchiveRequest is suitable for being passed to ArchiveRepository() // if it's determined that the request still needs to be satisfied. @@ -103,7 +118,7 @@ func NewRequest(repoID int64, repo *git.Repository, uri string) (*ArchiveRequest } } } else { - return nil, fmt.Errorf("Unknow ref %s type", r.refName) + return nil, RepoRefNotFoundError{RefName: r.refName} } return r, nil @@ -115,6 +130,49 @@ func (aReq *ArchiveRequest) GetArchiveName() string { return strings.ReplaceAll(aReq.refName, "/", "-") + "." + aReq.Type.String() } +// Await awaits the completion of an ArchiveRequest. If the archive has +// already been prepared the method returns immediately. Otherwise an archiver +// process will be started and its completion awaited. On success the returned +// RepoArchiver may be used to download the archive. Note that even if the +// context is cancelled/times out a started archiver will still continue to run +// in the background. +func (aReq *ArchiveRequest) Await(ctx context.Context) (*repo_model.RepoArchiver, error) { + archiver, err := repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID) + if err != nil { + return nil, fmt.Errorf("models.GetRepoArchiver: %v", err) + } + + if archiver != nil && archiver.Status == repo_model.ArchiverReady { + // Archive already generated, we're done. + return archiver, nil + } + + if err := StartArchive(aReq); err != nil { + return nil, fmt.Errorf("archiver.StartArchive: %v", err) + } + + poll := time.NewTicker(time.Second * 1) + defer poll.Stop() + + for { + select { + case <-graceful.GetManager().HammerContext().Done(): + // System stopped. + return nil, graceful.GetManager().HammerContext().Err() + case <-ctx.Done(): + return nil, ctx.Err() + case <-poll.C: + archiver, err = repo_model.GetRepoArchiver(ctx, aReq.RepoID, aReq.Type, aReq.CommitID) + if err != nil { + return nil, fmt.Errorf("repo_model.GetRepoArchiver: %v", err) + } + if archiver != nil && archiver.Status == repo_model.ArchiverReady { + return archiver, nil + } + } + } +} + func doArchive(r *ArchiveRequest) (*repo_model.RepoArchiver, error) { txCtx, committer, err := db.TxContext() if err != nil { @@ -147,11 +205,7 @@ func doArchive(r *ArchiveRequest) (*repo_model.RepoArchiver, error) { } } - rPath, err := archiver.RelativePath() - if err != nil { - return nil, err - } - + rPath := archiver.RelativePath() _, err = storage.RepoArchives.Stat(rPath) if err == nil { if archiver.Status == repo_model.ArchiverGenerating { @@ -284,13 +338,10 @@ func StartArchive(request *ArchiveRequest) error { } func deleteOldRepoArchiver(ctx context.Context, archiver *repo_model.RepoArchiver) error { - p, err := archiver.RelativePath() - if err != nil { - return err - } if err := repo_model.DeleteRepoArchiver(ctx, archiver); err != nil { return err } + p := archiver.RelativePath() if err := storage.RepoArchives.Delete(p); err != nil { log.Error("delete repo archive file failed: %v", err) }