Added cleanup method for files in Maven snapshot versions

pull/33420/head
diana.strebkova@t-systems.com 3 days ago
parent 182e3896bf
commit ae6ee1df19
  1. 3
      custom/conf/app.example.ini
  2. 59
      models/packages/package_file.go
  3. 11
      models/packages/package_version.go
  4. 39
      modules/packages/maven/metadata.go
  5. 10
      modules/setting/packages.go
  6. 7
      services/packages/cleanup/cleanup.go
  7. 112
      services/packages/maven/cleanup.go

@ -2612,6 +2612,9 @@ LEVEL = Info
;LIMIT_SIZE_HELM = -1
;; Maximum size of a Maven upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)
;LIMIT_SIZE_MAVEN = -1
;; Specifies the number of most recent Maven snapshot builds to retain. `-1` retains all builds, while `1` retains only the latest build. Value should be -1 or positive.
;; Cleanup expired packages/data then targets the files within all maven snapshots versions
;RETAIN_MAVEN_SNAPSHOT_BUILDS = -1
;; Maximum size of a npm upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)
;LIMIT_SIZE_NPM = -1
;; Maximum size of a NuGet upload (`-1` means no limits, format `1000`, `1 MB`, `1 GiB`)

@ -5,11 +5,14 @@ package packages
import (
"context"
"errors"
"fmt"
"strconv"
"strings"
"time"
"code.gitea.io/gitea/models/db"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/timeutil"
"code.gitea.io/gitea/modules/util"
@ -226,6 +229,62 @@ func HasFiles(ctx context.Context, opts *PackageFileSearchOptions) (bool, error)
return db.Exist[PackageFile](ctx, opts.toConds())
}
// GetFilesByBuildNumber retrieves all files for a package version with build numbers <= maxBuildNumber.
func GetFilesByBuildNumber(ctx context.Context, versionID int64, maxBuildNumber int) ([]*PackageFile, error) {
if maxBuildNumber < 0 {
return nil, errors.New("maxBuildNumber must be a non-negative integer")
}
files, err := GetFilesByVersionID(ctx, versionID)
if err != nil {
return nil, fmt.Errorf("failed to retrieve files: %w", err)
}
var filteredFiles []*PackageFile
for _, file := range files {
buildNumber, err := extractBuildNumberFromFileName(file.Name)
if err != nil {
if err.Error() == "metadata file" {
continue
}
log.Warn("Failed to extract build number from file name '%s': %v", file.Name, err)
continue
}
if buildNumber <= maxBuildNumber {
filteredFiles = append(filteredFiles, file)
}
}
log.Info("Filtered %d files out of %d total files for version ID %d with maxBuildNumber %d", len(filteredFiles), len(files), versionID, maxBuildNumber)
return filteredFiles, nil
}
// extractBuildNumberFromFileName extracts the build number from the file name.
func extractBuildNumberFromFileName(filename string) (int, error) {
// Skip metadata files
if strings.Contains(filename, "maven-metadata.xml") {
return 0, errors.New("metadata file")
}
// Split filename by hyphens to extract the build number
parts := strings.Split(filename, "-")
if len(parts) < 3 {
return 0, fmt.Errorf("invalid file name format: '%s'", filename)
}
// Extract the last part before the extension
buildNumberWithExt := parts[len(parts)-1]
buildNumberStr := strings.Split(buildNumberWithExt, ".")[0]
buildNumber, err := strconv.Atoi(buildNumberStr)
if err != nil {
return 0, fmt.Errorf("failed to convert build number to integer: '%s'", buildNumberStr)
}
return buildNumber, nil
}
// CalculateFileSize sums up all blob sizes matching the search options.
// It does NOT respect the deduplication of blobs.
func CalculateFileSize(ctx context.Context, opts *PackageFileSearchOptions) (int64, error) {

@ -120,11 +120,16 @@ func getVersionByNameAndVersion(ctx context.Context, ownerID int64, packageType
// GetVersionsByPackageType gets all versions of a specific type
func GetVersionsByPackageType(ctx context.Context, ownerID int64, packageType Type) ([]*PackageVersion, error) {
pvs, _, err := SearchVersions(ctx, &PackageSearchOptions{
OwnerID: ownerID,
opts := &PackageSearchOptions{
Type: packageType,
IsInternal: optional.Some(false),
})
}
if ownerID != 0 {
opts.OwnerID = ownerID
}
pvs, _, err := SearchVersions(ctx, opts)
return pvs, err
}

@ -5,6 +5,7 @@ package maven
import (
"encoding/xml"
"errors"
"io"
"code.gitea.io/gitea/modules/util"
@ -61,6 +62,27 @@ type pomStruct struct {
} `xml:"dependencies>dependency"`
}
type MavenMetadata struct {
XMLName xml.Name `xml:"metadata"`
GroupID string `xml:"groupId"`
ArtifactID string `xml:"artifactId"`
Version string `xml:"version"`
Versioning struct {
LastUpdated string `xml:"lastUpdated"`
Snapshot struct {
Timestamp string `xml:"timestamp"`
BuildNumber string `xml:"buildNumber"`
} `xml:"snapshot"`
SnapshotVersions []struct {
SnapshotVersion struct {
Extension string `xml:"extension"`
Value string `xml:"value"`
Updated string `xml:"updated"`
} `xml:"snapshotVersion"`
} `xml:"snapshotVersions>snapshotVersion"`
} `xml:"versioning"`
}
// ParsePackageMetaData parses the metadata of a pom file
func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
var pom pomStruct
@ -109,3 +131,20 @@ func ParsePackageMetaData(r io.Reader) (*Metadata, error) {
Dependencies: dependencies,
}, nil
}
// ParseMavenMetadata parses the Maven metadata XML to extract the build number.
func ParseMavenMetaData(r io.Reader) (string, error) {
var metadata MavenMetadata
dec := xml.NewDecoder(r)
dec.CharsetReader = charset.NewReaderLabel // Assuming charset.NewReaderLabel is a function you've set up to handle character encoding.
if err := dec.Decode(&metadata); err != nil {
return "", err
}
if metadata.Versioning.Snapshot.BuildNumber == "" {
return "", errors.New("no build number in snapshot metadata found")
}
return metadata.Versioning.Snapshot.BuildNumber, nil
}

@ -44,10 +44,12 @@ var (
LimitSizeSwift int64
LimitSizeVagrant int64
DefaultRPMSignEnabled bool
DefaultRPMSignEnabled bool
RetainMavenSnapshotBuilds int
}{
Enabled: true,
LimitTotalOwnerCount: -1,
Enabled: true,
LimitTotalOwnerCount: -1,
RetainMavenSnapshotBuilds: -1,
}
)
@ -101,7 +103,7 @@ func loadPackagesFrom(rootCfg ConfigProvider) (err error) {
Packages.LimitSizeRubyGems = mustBytes(sec, "LIMIT_SIZE_RUBYGEMS")
Packages.LimitSizeSwift = mustBytes(sec, "LIMIT_SIZE_SWIFT")
Packages.LimitSizeVagrant = mustBytes(sec, "LIMIT_SIZE_VAGRANT")
Packages.DefaultRPMSignEnabled = sec.Key("DEFAULT_RPM_SIGN_ENABLED").MustBool(false)
Packages.RetainMavenSnapshotBuilds = sec.Key("RETAIN_MAVEN_SNAPSHOT_BUILDS").MustInt(Packages.RetainMavenSnapshotBuilds)
return nil
}

@ -1,7 +1,7 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// SPDX-License-Identifier: MIT
package container
package cleanup
import (
"context"
@ -20,6 +20,7 @@ import (
cargo_service "code.gitea.io/gitea/services/packages/cargo"
container_service "code.gitea.io/gitea/services/packages/container"
debian_service "code.gitea.io/gitea/services/packages/debian"
maven_service "code.gitea.io/gitea/services/packages/maven"
rpm_service "code.gitea.io/gitea/services/packages/rpm"
)
@ -166,6 +167,10 @@ func CleanupExpiredData(outerCtx context.Context, olderThan time.Duration) error
return err
}
if err := maven_service.CleanupSnapshotVersions(ctx); err != nil {
return err
}
ps, err := packages_model.FindUnreferencedPackages(ctx)
if err != nil {
return err

@ -0,0 +1,112 @@
package maven
import (
"context"
"fmt"
"strconv"
"strings"
"code.gitea.io/gitea/models/packages"
"code.gitea.io/gitea/modules/log"
"code.gitea.io/gitea/modules/packages/maven"
"code.gitea.io/gitea/modules/setting"
packages_service "code.gitea.io/gitea/services/packages"
)
// CleanupSnapshotVersion removes outdated files for SNAPHOT versions for all Maven packages.
func CleanupSnapshotVersions(ctx context.Context) error {
retainBuilds := setting.Packages.RetainMavenSnapshotBuilds
log.Info("Starting CleanupSnapshotVersion with retainBuilds: %d", retainBuilds)
if retainBuilds == -1 {
log.Info("CleanupSnapshotVersion skipped because retainBuilds is set to -1")
return nil
}
if retainBuilds < 1 {
return fmt.Errorf("forbidden value for retainBuilds: %d. Minimum 1 build should be retained", retainBuilds)
}
versions, err := packages.GetVersionsByPackageType(ctx, 0, packages.TypeMaven)
if err != nil {
return fmt.Errorf("failed to retrieve Maven package versions: %w", err)
}
for _, version := range versions {
log.Info("Processing version: %s (ID: %d)", version.Version, version.ID)
if !isSnapshotVersion(version.Version) {
log.Info("Skipping non-SNAPSHOT version: %s (ID: %d)", version.Version, version.ID)
continue
}
if err := cleanSnapshotFiles(ctx, version.ID, retainBuilds); err != nil {
log.Error("Failed to clean up snapshot files for version '%s' (ID: %d): %v", version.Version, version.ID, err)
return err
}
}
log.Info("Completed CleanupSnapshotVersion")
return nil
}
func isSnapshotVersion(version string) bool {
return strings.Contains(version, "-SNAPSHOT")
}
func cleanSnapshotFiles(ctx context.Context, versionID int64, retainBuilds int) error {
log.Info("Starting cleanSnapshotFiles for versionID: %d with retainBuilds: %d", versionID, retainBuilds)
metadataFile, err := packages.GetFileForVersionByName(ctx, versionID, "maven-metadata.xml", packages.EmptyFileKey)
if err != nil {
return fmt.Errorf("failed to retrieve Maven metadata file for version ID %d: %w", versionID, err)
}
maxBuildNumber, err := extractMaxBuildNumberFromMetadata(ctx, metadataFile)
if err != nil {
return fmt.Errorf("failed to extract max build number from maven-metadata.xml for version ID %d: %w", versionID, err)
}
log.Info("Max build number for versionID %d: %d", versionID, maxBuildNumber)
thresholdBuildNumber := maxBuildNumber - retainBuilds
if thresholdBuildNumber <= 0 {
log.Info("No files to clean up, as the threshold build number is less than or equal to zero for versionID %d", versionID)
return nil
}
filesToRemove, err := packages.GetFilesByBuildNumber(ctx, versionID, thresholdBuildNumber)
if err != nil {
return fmt.Errorf("failed to retrieve files for version ID %d: %w", versionID, err)
}
for _, file := range filesToRemove {
log.Debug("Removing file '%s' below threshold %d", file.Name, thresholdBuildNumber)
if err := packages_service.DeletePackageFile(ctx, file); err != nil {
return fmt.Errorf("failed to delete file '%s': %w", file.Name, err)
}
}
log.Info("Completed cleanSnapshotFiles for versionID: %d", versionID)
return nil
}
func extractMaxBuildNumberFromMetadata(ctx context.Context, metadataFile *packages.PackageFile) (int, error) {
content, _, _, err := packages_service.GetPackageFileStream(ctx, metadataFile)
if err != nil {
return 0, fmt.Errorf("failed to get package file stream: %w", err)
}
defer content.Close()
buildNumberStr, err := maven.ParseMavenMetaData(content)
if err != nil {
return 0, fmt.Errorf("failed to parse maven-metadata.xml: %w", err)
}
buildNumber, err := strconv.Atoi(buildNumberStr)
if err != nil {
return 0, fmt.Errorf("invalid build number format: %w", err)
}
return buildNumber, nil
}
Loading…
Cancel
Save