Artifacts retention and auto clean up (#26131)

Currently, Artifact does not have an expiration and automatic cleanup
mechanism, and this feature needs to be added. It contains the following
key points:

- [x] add global artifact retention days option in config file. Default
value is 90 days.
- [x] add cron task to clean up expired artifacts. It should run once a
day.
- [x] support custom retention period from `retention-days: 5` in
`upload-artifact@v3`.
- [x] artifacts link in actions view should be non-clickable text when
expired.
pull/26842/head
FuXiaoHei 1 year ago committed by GitHub
parent 113eb5fc24
commit 460a2b0edf
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      custom/conf/app.example.ini
  2. 7
      docs/content/administration/config-cheat-sheet.en-us.md
  3. 38
      models/actions/artifact.go
  4. 2
      models/migrations/migrations.go
  5. 36
      models/migrations/v1_21/v274.go
  6. 14
      modules/setting/actions.go
  7. 1
      options/locale/locale_en-US.ini
  8. 28
      routers/api/actions/artifacts.go
  9. 2
      routers/api/actions/artifacts_chunks.go
  10. 14
      routers/web/repo/actions/view.go
  11. 42
      services/actions/cleanup.go
  12. 18
      services/cron/tasks_basic.go
  13. 42
      tests/integration/api_actions_artifact_test.go

@ -2564,6 +2564,8 @@ LEVEL = Info
;;
;; Default platform to get action plugins, `github` for `https://github.com`, `self` for the current Gitea instance.
;DEFAULT_ACTIONS_URL = github
;; Default artifact retention time in days, default is 90 days
;ARTIFACT_RETENTION_DAYS = 90
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

@ -955,6 +955,12 @@ Default templates for project boards:
- `SCHEDULE`: **@midnight** : Interval as a duration between each synchronization, it will always attempt synchronization when the instance starts.
- `UPDATE_EXISTING`: **true**: Create new users, update existing user data and disable users that are not in external source anymore (default) or only create new users if UPDATE_EXISTING is set to false.
## Cron - Cleanup Expired Actions Assets (`cron.cleanup_actions`)
- `ENABLED`: **true**: Enable cleanup expired actions assets job.
- `RUN_AT_START`: **true**: Run job at start time (if ENABLED).
- `SCHEDULE`: **@midnight** : Cron syntax for the job.
### Extended cron tasks (not enabled by default)
#### Cron - Garbage collect all repositories (`cron.git_gc_repos`)
@ -1381,6 +1387,7 @@ PROXY_HOSTS = *.github.com
- `DEFAULT_ACTIONS_URL`: **github**: Default platform to get action plugins, `github` for `https://github.com`, `self` for the current Gitea instance.
- `STORAGE_TYPE`: **local**: Storage type for actions logs, `local` for local disk or `minio` for s3 compatible object storage service, default is `local` or other name defined with `[storage.xxx]`
- `MINIO_BASE_PATH`: **actions_log/**: Minio base path on the bucket only available when STORAGE_TYPE is `minio`
- `ARTIFACT_RETENTION_DAYS`: **90**: Number of days to keep artifacts. Set to 0 to disable artifact retention. Default is 90 days if not set.
`DEFAULT_ACTIONS_URL` indicates where the Gitea Actions runners should find the actions with relative path.
For example, `uses: actions/checkout@v3` means `https://github.com/actions/checkout@v3` since the value of `DEFAULT_ACTIONS_URL` is `github`.

@ -9,19 +9,21 @@ package actions
import (
"context"
"errors"
"time"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
)
// ArtifactStatus is the status of an artifact, uploading, expired or need-delete
type ArtifactStatus int64
const (
// ArtifactStatusUploadPending is the status of an artifact upload that is pending
ArtifactStatusUploadPending = 1
// ArtifactStatusUploadConfirmed is the status of an artifact upload that is confirmed
ArtifactStatusUploadConfirmed = 2
// ArtifactStatusUploadError is the status of an artifact upload that is errored
ArtifactStatusUploadError = 3
ArtifactStatusUploadPending ArtifactStatus = iota + 1 // 1, ArtifactStatusUploadPending is the status of an artifact upload that is pending
ArtifactStatusUploadConfirmed // 2, ArtifactStatusUploadConfirmed is the status of an artifact upload that is confirmed
ArtifactStatusUploadError // 3, ArtifactStatusUploadError is the status of an artifact upload that is errored
ArtifactStatusExpired // 4, ArtifactStatusExpired is the status of an artifact that is expired
)
func init() {
@ -45,9 +47,10 @@ type ActionArtifact struct {
Status int64 `xorm:"index"` // The status of the artifact, uploading, expired or need-delete
CreatedUnix timeutil.TimeStamp `xorm:"created"`
UpdatedUnix timeutil.TimeStamp `xorm:"updated index"`
ExpiredUnix timeutil.TimeStamp `xorm:"index"` // The time when the artifact will be expired
}
func CreateArtifact(ctx context.Context, t *ActionTask, artifactName, artifactPath string) (*ActionArtifact, error) {
func CreateArtifact(ctx context.Context, t *ActionTask, artifactName, artifactPath string, expiredDays int64) (*ActionArtifact, error) {
if err := t.LoadJob(ctx); err != nil {
return nil, err
}
@ -61,7 +64,8 @@ func CreateArtifact(ctx context.Context, t *ActionTask, artifactName, artifactPa
RepoID: t.RepoID,
OwnerID: t.OwnerID,
CommitSHA: t.CommitSHA,
Status: ArtifactStatusUploadPending,
Status: int64(ArtifactStatusUploadPending),
ExpiredUnix: timeutil.TimeStamp(time.Now().Unix() + 3600*24*expiredDays),
}
if _, err := db.GetEngine(ctx).Insert(artifact); err != nil {
return nil, err
@ -126,15 +130,16 @@ func ListUploadedArtifactsByRunID(ctx context.Context, runID int64) ([]*ActionAr
type ActionArtifactMeta struct {
ArtifactName string
FileSize int64
Status int64
}
// ListUploadedArtifactsMeta returns all uploaded artifacts meta of a run
func ListUploadedArtifactsMeta(ctx context.Context, runID int64) ([]*ActionArtifactMeta, error) {
arts := make([]*ActionArtifactMeta, 0, 10)
return arts, db.GetEngine(ctx).Table("action_artifact").
Where("run_id=? AND status=?", runID, ArtifactStatusUploadConfirmed).
Where("run_id=? AND (status=? OR status=?)", runID, ArtifactStatusUploadConfirmed, ArtifactStatusExpired).
GroupBy("artifact_name").
Select("artifact_name, sum(file_size) as file_size").
Select("artifact_name, sum(file_size) as file_size, max(status) as status").
Find(&arts)
}
@ -149,3 +154,16 @@ func ListArtifactsByRunIDAndName(ctx context.Context, runID int64, name string)
arts := make([]*ActionArtifact, 0, 10)
return arts, db.GetEngine(ctx).Where("run_id=? AND artifact_name=?", runID, name).Find(&arts)
}
// ListNeedExpiredArtifacts returns all need expired artifacts but not deleted
func ListNeedExpiredArtifacts(ctx context.Context) ([]*ActionArtifact, error) {
arts := make([]*ActionArtifact, 0, 10)
return arts, db.GetEngine(ctx).
Where("expired_unix < ? AND status = ?", timeutil.TimeStamp(time.Now().Unix()), ArtifactStatusUploadConfirmed).Find(&arts)
}
// SetArtifactExpired sets an artifact to expired
func SetArtifactExpired(ctx context.Context, artifactID int64) error {
_, err := db.GetEngine(ctx).Where("id=? AND status = ?", artifactID, ArtifactStatusUploadConfirmed).Cols("status").Update(&ActionArtifact{Status: int64(ArtifactStatusExpired)})
return err
}

@ -528,6 +528,8 @@ var migrations = []Migration{
NewMigration("Add Version to ActionRun table", v1_21.AddVersionToActionRunTable),
// v273 -> v274
NewMigration("Add Action Schedule Table", v1_21.AddActionScheduleTable),
// v274 -> v275
NewMigration("Add Actions artifacts expiration date", v1_21.AddExpiredUnixColumnInActionArtifactTable),
}
// GetCurrentDBVersion returns the current db version

@ -0,0 +1,36 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package v1_21 //nolint
import (
"time"
"code.gitea.io/gitea/modules/timeutil"
"xorm.io/xorm"
)
func AddExpiredUnixColumnInActionArtifactTable(x *xorm.Engine) error {
type ActionArtifact struct {
ExpiredUnix timeutil.TimeStamp `xorm:"index"` // time when the artifact will be expired
}
if err := x.Sync(new(ActionArtifact)); err != nil {
return err
}
return updateArtifactsExpiredUnixTo90Days(x)
}
func updateArtifactsExpiredUnixTo90Days(x *xorm.Engine) error {
sess := x.NewSession()
defer sess.Close()
if err := sess.Begin(); err != nil {
return err
}
expiredTime := time.Now().AddDate(0, 0, 90).Unix()
if _, err := sess.Exec(`UPDATE action_artifact SET expired_unix=? WHERE status='2' AND expired_unix is NULL`, expiredTime); err != nil {
return err
}
return sess.Commit()
}

@ -13,10 +13,11 @@ import (
// Actions settings
var (
Actions = struct {
LogStorage *Storage // how the created logs should be stored
ArtifactStorage *Storage // how the created artifacts should be stored
Enabled bool
DefaultActionsURL defaultActionsURL `ini:"DEFAULT_ACTIONS_URL"`
LogStorage *Storage // how the created logs should be stored
ArtifactStorage *Storage // how the created artifacts should be stored
ArtifactRetentionDays int64 `ini:"ARTIFACT_RETENTION_DAYS"`
Enabled bool
DefaultActionsURL defaultActionsURL `ini:"DEFAULT_ACTIONS_URL"`
}{
Enabled: false,
DefaultActionsURL: defaultActionsURLGitHub,
@ -76,5 +77,10 @@ func loadActionsFrom(rootCfg ConfigProvider) error {
Actions.ArtifactStorage, err = getStorage(rootCfg, "actions_artifacts", "", actionsSec)
// default to 90 days in Github Actions
if Actions.ArtifactRetentionDays <= 0 {
Actions.ArtifactRetentionDays = 90
}
return err
}

@ -2731,6 +2731,7 @@ dashboard.reinit_missing_repos = Reinitialize all missing Git repositories for w
dashboard.sync_external_users = Synchronize external user data
dashboard.cleanup_hook_task_table = Cleanup hook_task table
dashboard.cleanup_packages = Cleanup expired packages
dashboard.cleanup_actions = Cleanup actions expired logs and artifacts
dashboard.server_uptime = Server Uptime
dashboard.current_goroutine = Current Goroutines
dashboard.current_memory_usage = Current Memory Usage

@ -170,8 +170,9 @@ func (ar artifactRoutes) buildArtifactURL(runID int64, artifactHash, suffix stri
}
type getUploadArtifactRequest struct {
Type string
Name string
Type string
Name string
RetentionDays int64
}
type getUploadArtifactResponse struct {
@ -192,10 +193,16 @@ func (ar artifactRoutes) getUploadArtifactURL(ctx *ArtifactContext) {
return
}
// set retention days
retentionQuery := ""
if req.RetentionDays > 0 {
retentionQuery = fmt.Sprintf("?retentionDays=%d", req.RetentionDays)
}
// use md5(artifact_name) to create upload url
artifactHash := fmt.Sprintf("%x", md5.Sum([]byte(req.Name)))
resp := getUploadArtifactResponse{
FileContainerResourceURL: ar.buildArtifactURL(runID, artifactHash, "upload"),
FileContainerResourceURL: ar.buildArtifactURL(runID, artifactHash, "upload"+retentionQuery),
}
log.Debug("[artifact] get upload url: %s", resp.FileContainerResourceURL)
ctx.JSON(http.StatusOK, resp)
@ -219,8 +226,21 @@ func (ar artifactRoutes) uploadArtifact(ctx *ArtifactContext) {
return
}
// get artifact retention days
expiredDays := setting.Actions.ArtifactRetentionDays
if queryRetentionDays := ctx.Req.URL.Query().Get("retentionDays"); queryRetentionDays != "" {
expiredDays, err = strconv.ParseInt(queryRetentionDays, 10, 64)
if err != nil {
log.Error("Error parse retention days: %v", err)
ctx.Error(http.StatusBadRequest, "Error parse retention days")
return
}
}
log.Debug("[artifact] upload chunk, name: %s, path: %s, size: %d, retention days: %d",
artifactName, artifactPath, fileRealTotalSize, expiredDays)
// create or get artifact with name and path
artifact, err := actions.CreateArtifact(ctx, task, artifactName, artifactPath)
artifact, err := actions.CreateArtifact(ctx, task, artifactName, artifactPath, expiredDays)
if err != nil {
log.Error("Error create or get artifact: %v", err)
ctx.Error(http.StatusInternalServerError, "Error create or get artifact")

@ -179,7 +179,7 @@ func mergeChunksForArtifact(ctx *ArtifactContext, chunks []*chunkFileItem, st st
// save storage path to artifact
log.Debug("[artifact] merge chunks to artifact: %d, %s", artifact.ID, storagePath)
artifact.StoragePath = storagePath
artifact.Status = actions.ArtifactStatusUploadConfirmed
artifact.Status = int64(actions.ArtifactStatusUploadConfirmed)
if err := actions.UpdateArtifactByID(ctx, artifact.ID, artifact); err != nil {
return fmt.Errorf("update artifact error: %v", err)
}

@ -486,8 +486,9 @@ type ArtifactsViewResponse struct {
}
type ArtifactsViewItem struct {
Name string `json:"name"`
Size int64 `json:"size"`
Name string `json:"name"`
Size int64 `json:"size"`
Status string `json:"status"`
}
func ArtifactsView(ctx *context_module.Context) {
@ -510,9 +511,14 @@ func ArtifactsView(ctx *context_module.Context) {
Artifacts: make([]*ArtifactsViewItem, 0, len(artifacts)),
}
for _, art := range artifacts {
status := "completed"
if art.Status == int64(actions_model.ArtifactStatusExpired) {
status = "expired"
}
artifactsResponse.Artifacts = append(artifactsResponse.Artifacts, &ArtifactsViewItem{
Name: art.ArtifactName,
Size: art.FileSize,
Name: art.ArtifactName,
Size: art.FileSize,
Status: status,
})
}
ctx.JSON(http.StatusOK, artifactsResponse)

@ -0,0 +1,42 @@
// Copyright 2023 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package actions
import (
"context"
"time"
"code.gitea.io/gitea/models/actions"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/storage"
)
// Cleanup removes expired actions logs, data and artifacts
func Cleanup(taskCtx context.Context, olderThan time.Duration) error {
// TODO: clean up expired actions logs
// clean up expired artifacts
return CleanupArtifacts(taskCtx)
}
// CleanupArtifacts removes expired artifacts and set records expired status
func CleanupArtifacts(taskCtx context.Context) error {
artifacts, err := actions.ListNeedExpiredArtifacts(taskCtx)
if err != nil {
return err
}
log.Info("Found %d expired artifacts", len(artifacts))
for _, artifact := range artifacts {
if err := storage.ActionsArtifacts.Delete(artifact.StoragePath); err != nil {
log.Error("Cannot delete artifact %d: %v", artifact.ID, err)
continue
}
if err := actions.SetArtifactExpired(taskCtx, artifact.ID); err != nil {
log.Error("Cannot set artifact %d expired: %v", artifact.ID, err)
continue
}
log.Info("Artifact %d set expired", artifact.ID)
}
return nil
}

@ -13,6 +13,7 @@ import (
"code.gitea.io/gitea/models/webhook"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/services/actions"
"code.gitea.io/gitea/services/auth"
"code.gitea.io/gitea/services/migrations"
mirror_service "code.gitea.io/gitea/services/mirror"
@ -156,6 +157,20 @@ func registerCleanupPackages() {
})
}
func registerActionsCleanup() {
RegisterTaskFatal("cleanup_actions", &OlderThanConfig{
BaseConfig: BaseConfig{
Enabled: true,
RunAtStart: true,
Schedule: "@midnight",
},
OlderThan: 24 * time.Hour,
}, func(ctx context.Context, _ *user_model.User, config Config) error {
realConfig := config.(*OlderThanConfig)
return actions.Cleanup(ctx, realConfig.OlderThan)
})
}
func initBasicTasks() {
if setting.Mirror.Enabled {
registerUpdateMirrorTask()
@ -172,4 +187,7 @@ func initBasicTasks() {
if setting.Packages.Enabled {
registerCleanupPackages()
}
if setting.Actions.Enabled {
registerActionsCleanup()
}
}

@ -18,8 +18,9 @@ type uploadArtifactResponse struct {
}
type getUploadArtifactRequest struct {
Type string
Name string
Type string
Name string
RetentionDays int64
}
func TestActionsArtifactUploadSingleFile(t *testing.T) {
@ -252,3 +253,40 @@ func TestActionsArtifactDownloadMultiFiles(t *testing.T) {
assert.Equal(t, resp.Body.String(), body)
}
}
func TestActionsArtifactUploadWithRetentionDays(t *testing.T) {
defer tests.PrepareTestEnv(t)()
// acquire artifact upload url
req := NewRequestWithJSON(t, "POST", "/api/actions_pipeline/_apis/pipelines/workflows/791/artifacts", getUploadArtifactRequest{
Type: "actions_storage",
Name: "artifact-retention-days",
RetentionDays: 9,
})
req = addTokenAuthHeader(req, "Bearer 8061e833a55f6fc0157c98b883e91fcfeeb1a71a")
resp := MakeRequest(t, req, http.StatusOK)
var uploadResp uploadArtifactResponse
DecodeJSON(t, resp, &uploadResp)
assert.Contains(t, uploadResp.FileContainerResourceURL, "/api/actions_pipeline/_apis/pipelines/workflows/791/artifacts")
assert.Contains(t, uploadResp.FileContainerResourceURL, "?retentionDays=9")
// get upload url
idx := strings.Index(uploadResp.FileContainerResourceURL, "/api/actions_pipeline/_apis/pipelines/")
url := uploadResp.FileContainerResourceURL[idx:] + "&itemPath=artifact-retention-days/abc.txt"
// upload artifact chunk
body := strings.Repeat("A", 1024)
req = NewRequestWithBody(t, "PUT", url, strings.NewReader(body))
req = addTokenAuthHeader(req, "Bearer 8061e833a55f6fc0157c98b883e91fcfeeb1a71a")
req.Header.Add("Content-Range", "bytes 0-1023/1024")
req.Header.Add("x-tfs-filelength", "1024")
req.Header.Add("x-actions-results-md5", "1HsSe8LeLWh93ILaw1TEFQ==") // base64(md5(body))
MakeRequest(t, req, http.StatusOK)
t.Logf("Create artifact confirm")
// confirm artifact upload
req = NewRequest(t, "PATCH", "/api/actions_pipeline/_apis/pipelines/workflows/791/artifacts?artifactName=artifact-retention-days")
req = addTokenAuthHeader(req, "Bearer 8061e833a55f6fc0157c98b883e91fcfeeb1a71a")
MakeRequest(t, req, http.StatusOK)
}

Loading…
Cancel
Save