Validate migration files (#18203)

JSON Schema validation for data used by Gitea during migrations

Discussion at https://forum.forgefriends.org/t/common-json-schema-for-repository-information/563

Co-authored-by: Loïc Dachary <loic@dachary.org>
pull/18414/head
Aravinth Manivannan 3 years ago committed by GitHub
parent 49dd906753
commit 3bb028cc46
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
  1. 2
      .gitignore
  2. 5
      cmd/restore_repo.go
  3. 1
      go.mod
  4. 2
      go.sum
  5. 2
      integrations/dump_restore_test.go
  6. 112
      modules/migration/file_format.go
  7. 39
      modules/migration/file_format_test.go
  8. 14
      modules/migration/file_format_testdata/issue_a.json
  9. 10
      modules/migration/file_format_testdata/issue_a.yml
  10. 5
      modules/migration/file_format_testdata/issue_b.json
  11. 20
      modules/migration/file_format_testdata/milestones.json
  12. 32
      modules/migration/issue.go
  13. 6
      modules/migration/label.go
  14. 14
      modules/migration/milestone.go
  15. 6
      modules/migration/reaction.go
  16. 114
      modules/migration/schemas/issue.json
  17. 28
      modules/migration/schemas/label.json
  18. 67
      modules/migration/schemas/milestone.json
  19. 29
      modules/migration/schemas/reaction.json
  20. 10
      modules/migration/schemas_bindata.go
  21. 40
      modules/migration/schemas_dynamic.go
  22. 17
      modules/migration/schemas_static.go
  23. 20
      modules/private/restore_repo.go
  24. 10
      routers/private/restore_repo.go
  25. 4
      services/migrations/dump.go
  26. 42
      services/migrations/restore.go

2
.gitignore vendored

@ -36,6 +36,8 @@ _testmain.go
coverage.all
cpu.out
/modules/migration/bindata.go
/modules/migration/bindata.go.hash
/modules/options/bindata.go
/modules/options/bindata.go.hash
/modules/public/bindata.go

@ -43,6 +43,10 @@ var CmdRestoreRepository = cli.Command{
Usage: `Which items will be restored, one or more units should be separated as comma.
wiki, issues, labels, releases, release_assets, milestones, pull_requests, comments are allowed. Empty means all units.`,
},
cli.BoolFlag{
Name: "validation",
Usage: "Sanity check the content of the files before trying to load them",
},
},
}
@ -58,6 +62,7 @@ func runRestoreRepository(c *cli.Context) error {
c.String("owner_name"),
c.String("repo_name"),
c.StringSlice("units"),
c.Bool("validation"),
)
if statusCode == http.StatusOK {
return nil

@ -97,6 +97,7 @@ require (
github.com/quasoft/websspi v1.0.0
github.com/rs/xid v1.3.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/santhosh-tekuri/jsonschema/v5 v5.0.0 // indirect
github.com/sergi/go-diff v1.2.0
github.com/shurcooL/httpfs v0.0.0-20190707220628-8d4bc4ba7749 // indirect
github.com/shurcooL/vfsgen v0.0.0-20200824052919-0d455de96546

@ -1039,6 +1039,8 @@ github.com/russross/blackfriday/v2 v2.1.0 h1:JIOH55/0cWyOuilr9/qlrm0BSXldqnqwMsf
github.com/russross/blackfriday/v2 v2.1.0/go.mod h1:+Rmxgy9KzJVeS9/2gXHxylqXiyQDYRxCVz55jmeOWTM=
github.com/ryanuber/columnize v0.0.0-20160712163229-9b3edd62028f/go.mod h1:sm1tb6uqfes/u+d4ooFouqFdy9/2g9QGwK3SQygK0Ts=
github.com/samuel/go-zookeeper v0.0.0-20190923202752-2cc03de413da/go.mod h1:gi+0XIa01GRL2eRQVjQkKGqKF3SF9vZR/HnPullcV2E=
github.com/santhosh-tekuri/jsonschema/v5 v5.0.0 h1:TToq11gyfNlrMFZiYujSekIsPd9AmsA2Bj/iv+s4JHE=
github.com/santhosh-tekuri/jsonschema/v5 v5.0.0/go.mod h1:FKdcjfQW6rpZSnxxUvEA5H/cDPdvJ/SZJQLWWXWGrZ0=
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
github.com/sergi/go-diff v1.1.0/go.mod h1:STckp+ISIX8hZLjrqAeVduY0gWCT9IjLuqbuNXdaHfM=

@ -81,7 +81,7 @@ func TestDumpRestore(t *testing.T) {
//
newreponame := "restoredrepo"
err = migrations.RestoreRepository(ctx, d, repo.OwnerName, newreponame, []string{"labels", "milestones", "issues", "comments"})
err = migrations.RestoreRepository(ctx, d, repo.OwnerName, newreponame, []string{"labels", "milestones", "issues", "comments"}, false)
assert.NoError(t, err)
newrepo := unittest.AssertExistsAndLoadBean(t, &repo_model.Repository{Name: newreponame}).(*repo_model.Repository)

@ -0,0 +1,112 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package migration
import (
"fmt"
"os"
"strings"
"code.gitea.io/gitea/modules/json"
"code.gitea.io/gitea/modules/log"
"github.com/santhosh-tekuri/jsonschema/v5"
"gopkg.in/yaml.v2"
)
// Load project data from file, with optional validation
func Load(filename string, data interface{}, validation bool) error {
isJSON := strings.HasSuffix(filename, ".json")
bs, err := os.ReadFile(filename)
if err != nil {
return err
}
if validation {
err := validate(bs, data, isJSON)
if err != nil {
return err
}
}
return unmarshal(bs, data, isJSON)
}
func unmarshal(bs []byte, data interface{}, isJSON bool) error {
if isJSON {
return json.Unmarshal(bs, data)
}
return yaml.Unmarshal(bs, data)
}
func getSchema(filename string) (*jsonschema.Schema, error) {
c := jsonschema.NewCompiler()
c.LoadURL = openSchema
return c.Compile(filename)
}
func validate(bs []byte, datatype interface{}, isJSON bool) error {
var v interface{}
err := unmarshal(bs, &v, isJSON)
if err != nil {
return err
}
if !isJSON {
v, err = toStringKeys(v)
if err != nil {
return err
}
}
var schemaFilename string
switch datatype := datatype.(type) {
case *[]*Issue:
schemaFilename = "issue.json"
case *[]*Milestone:
schemaFilename = "milestone.json"
default:
return fmt.Errorf("file_format:validate: %T has not a validation implemented", datatype)
}
sch, err := getSchema(schemaFilename)
if err != nil {
return err
}
err = sch.Validate(v)
if err != nil {
log.Error("migration validation with %s failed for\n%s", schemaFilename, string(bs))
}
return err
}
func toStringKeys(val interface{}) (interface{}, error) {
var err error
switch val := val.(type) {
case map[interface{}]interface{}:
m := make(map[string]interface{})
for k, v := range val {
k, ok := k.(string)
if !ok {
return nil, fmt.Errorf("found non-string key %T %s", k, k)
}
m[k], err = toStringKeys(v)
if err != nil {
return nil, err
}
}
return m, nil
case []interface{}:
l := make([]interface{}, len(val))
for i, v := range val {
l[i], err = toStringKeys(v)
if err != nil {
return nil, err
}
}
return l, nil
default:
return val, nil
}
}

@ -0,0 +1,39 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
package migration
import (
"strings"
"testing"
"github.com/santhosh-tekuri/jsonschema/v5"
"github.com/stretchr/testify/assert"
)
func TestMigrationJSON_IssueOK(t *testing.T) {
issues := make([]*Issue, 0, 10)
err := Load("file_format_testdata/issue_a.json", &issues, true)
assert.NoError(t, err)
err = Load("file_format_testdata/issue_a.yml", &issues, true)
assert.NoError(t, err)
}
func TestMigrationJSON_IssueFail(t *testing.T) {
issues := make([]*Issue, 0, 10)
err := Load("file_format_testdata/issue_b.json", &issues, true)
if _, ok := err.(*jsonschema.ValidationError); ok {
errors := strings.Split(err.(*jsonschema.ValidationError).GoString(), "\n")
assert.Contains(t, errors[1], "missing properties")
assert.Contains(t, errors[1], "poster_id")
} else {
t.Fatalf("got: type %T with value %s, want: *jsonschema.ValidationError", err, err)
}
}
func TestMigrationJSON_MilestoneOK(t *testing.T) {
milestones := make([]*Milestone, 0, 10)
err := Load("file_format_testdata/milestones.json", &milestones, true)
assert.NoError(t, err)
}

@ -0,0 +1,14 @@
[
{
"number": 1,
"poster_id": 1,
"poster_name": "name_a",
"title": "title_a",
"content": "content_a",
"state": "closed",
"is_locked": false,
"created": "1985-04-12T23:20:50.52Z",
"updated": "1986-04-12T23:20:50.52Z",
"closed": "1987-04-12T23:20:50.52Z"
}
]

@ -0,0 +1,10 @@
- number: 1
poster_id: 1
poster_name: name_a
title: title_a
content: content_a
state: closed
is_locked: false
created: 2021-05-27T15:24:13+02:00
updated: 2021-11-11T10:52:45+01:00
closed: 2021-11-11T10:52:45+01:00

@ -0,0 +1,5 @@
[
{
"number": 1
}
]

@ -0,0 +1,20 @@
[
{
"title": "title_a",
"description": "description_a",
"deadline": "1988-04-12T23:20:50.52Z",
"created": "1985-04-12T23:20:50.52Z",
"updated": "1986-04-12T23:20:50.52Z",
"closed": "1987-04-12T23:20:50.52Z",
"state": "closed"
},
{
"title": "title_b",
"description": "description_b",
"deadline": "1998-04-12T23:20:50.52Z",
"created": "1995-04-12T23:20:50.52Z",
"updated": "1996-04-12T23:20:50.52Z",
"closed": null,
"state": "open"
}
]

@ -28,21 +28,21 @@ func (c BasicIssueContext) ForeignID() int64 {
// Issue is a standard issue information
type Issue struct {
Number int64
PosterID int64 `yaml:"poster_id"`
PosterName string `yaml:"poster_name"`
PosterEmail string `yaml:"poster_email"`
Title string
Content string
Ref string
Milestone string
State string // closed, open
IsLocked bool `yaml:"is_locked"`
Created time.Time
Updated time.Time
Closed *time.Time
Labels []*Label
Reactions []*Reaction
Assignees []string
Number int64 `json:"number"`
PosterID int64 `yaml:"poster_id" json:"poster_id"`
PosterName string `yaml:"poster_name" json:"poster_name"`
PosterEmail string `yaml:"poster_email" json:"poster_email"`
Title string `json:"title"`
Content string `json:"content"`
Ref string `json:"ref"`
Milestone string `json:"milestone"`
State string `json:"state"` // closed, open
IsLocked bool `yaml:"is_locked" json:"is_locked"`
Created time.Time `json:"created"`
Updated time.Time `json:"updated"`
Closed *time.Time `json:"closed"`
Labels []*Label `json:"labels"`
Reactions []*Reaction `json:"reactions"`
Assignees []string `json:"assignees"`
Context IssueContext `yaml:"-"`
}

@ -7,7 +7,7 @@ package migration
// Label defines a standard label information
type Label struct {
Name string
Color string
Description string
Name string `json:"name"`
Color string `json:"color"`
Description string `json:"description"`
}

@ -9,11 +9,11 @@ import "time"
// Milestone defines a standard milestone
type Milestone struct {
Title string
Description string
Deadline *time.Time
Created time.Time
Updated *time.Time
Closed *time.Time
State string // open, closed
Title string `json:"title"`
Description string `json:"description"`
Deadline *time.Time `json:"deadline"`
Created time.Time `json:"created"`
Updated *time.Time `json:"updated"`
Closed *time.Time `json:"closed"`
State string `json:"state"` // open, closed
}

@ -6,7 +6,7 @@ package migration
// Reaction represents a reaction to an issue/pr/comment.
type Reaction struct {
UserID int64 `yaml:"user_id"`
UserName string `yaml:"user_name"`
Content string
UserID int64 `yaml:"user_id" json:"user_id"`
UserName string `yaml:"user_name" json:"user_name"`
Content string `json:"content"`
}

@ -0,0 +1,114 @@
{
"title": "Issue",
"description": "Issues associated to a repository within a forge (Gitea, GitLab, etc.).",
"type": "array",
"items": {
"type": "object",
"additionalProperties": false,
"properties": {
"number": {
"description": "Unique identifier, relative to the repository.",
"type": "number"
},
"poster_id": {
"description": "Unique identifier of the user who authored the issue.",
"type": "number"
},
"poster_name": {
"description": "Name of the user who authored the issue.",
"type": "string"
},
"poster_email": {
"description": "Email of the user who authored the issue.",
"type": "string"
},
"title": {
"description": "Short description displayed as the title.",
"type": "string"
},
"content": {
"description": "Long, multiline, description.",
"type": "string"
},
"ref": {
"description": "Target branch in the repository.",
"type": "string"
},
"milestone": {
"description": "Name of the milestone.",
"type": "string"
},
"state": {
"description": "A 'closed' issue will not see any activity in the future, otherwise it is 'open'.",
"enum": [
"closed",
"open"
]
},
"is_locked": {
"description": "A locked issue can only be modified by privileged users.",
"type": "boolean"
},
"created": {
"description": "Creation time.",
"type": "string",
"format": "date-time"
},
"updated": {
"description": "Last update time.",
"type": "string",
"format": "date-time"
},
"closed": {
"description": "The last time 'state' changed to 'closed'.",
"anyOf": [
{
"type": "string",
"format": "date-time"
},
{
"type": "null"
}
]
},
"labels": {
"description": "List of labels.",
"type": "array",
"items": {
"$ref": "label.json"
}
},
"reactions": {
"description": "List of reactions.",
"type": "array",
"items": {
"$ref": "reaction.json"
}
},
"assignees": {
"description": "List of assignees.",
"type": "array",
"items": {
"description": "Name of a user assigned to the issue.",
"type": "string"
}
}
},
"required": [
"number",
"poster_id",
"poster_name",
"title",
"content",
"state",
"is_locked",
"created",
"updated"
]
},
"$schema": "http://json-schema.org/draft-04/schema#",
"$id": "http://example.com/issue.json",
"$$target": "issue.json"
}

@ -0,0 +1,28 @@
{
"title": "Label",
"description": "Label associated to an issue.",
"type": "object",
"additionalProperties": false,
"properties": {
"name": {
"description": "Name of the label, unique within the repository.",
"type": "string"
},
"color": {
"description": "Color code of the label.",
"type": "string"
},
"description": {
"description": "Long, multiline, description.",
"type": "string"
}
},
"required": [
"name"
],
"$schema": "http://json-schema.org/draft-04/schema#",
"$id": "label.json",
"$$target": "label.json"
}

@ -0,0 +1,67 @@
{
"title": "Milestone",
"description": "Milestone associated to a repository within a forge.",
"type": "array",
"items": {
"type": "object",
"additionalProperties": false,
"properties": {
"title": {
"description": "Short description.",
"type": "string"
},
"description": {
"description": "Long, multiline, description.",
"type": "string"
},
"deadline": {
"description": "Deadline after which the milestone is overdue.",
"type": "string",
"format": "date-time"
},
"created": {
"description": "Creation time.",
"type": "string",
"format": "date-time"
},
"updated": {
"description": "Last update time.",
"type": "string",
"format": "date-time"
},
"closed": {
"description": "The last time 'state' changed to 'closed'.",
"anyOf": [
{
"type": "string",
"format": "date-time"
},
{
"type": "null"
}
]
},
"state": {
"description": "A 'closed' issue will not see any activity in the future, otherwise it is 'open'.",
"enum": [
"closed",
"open"
]
}
},
"required": [
"title",
"description",
"deadline",
"created",
"updated",
"closed",
"state"
]
},
"$schema": "http://json-schema.org/draft-04/schema#",
"$id": "http://example.com/milestone.json",
"$$target": "milestone.json"
}

@ -0,0 +1,29 @@
{
"title": "Reaction",
"description": "Reaction associated to an issue or a comment.",
"type": "object",
"additionalProperties": false,
"properties": {
"user_id": {
"description": "Unique identifier of the user who authored the reaction.",
"type": "number"
},
"user_name": {
"description": "Name of the user who authored the reaction.",
"type": "string"
},
"content": {
"description": "Representation of the reaction",
"type": "string"
}
},
"required": [
"user_id",
"content"
],
"$schema": "http://json-schema.org/draft-04/schema#",
"$id": "http://example.com/reaction.json",
"$$target": "reaction.json"
}

@ -0,0 +1,10 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build bindata
// +build bindata
package migration
//go:generate go run ../../build/generate-bindata.go ../../modules/migration/schemas migration bindata.go

@ -0,0 +1,40 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build !bindata
// +build !bindata
package migration
import (
"io"
"net/url"
"os"
"path"
"path/filepath"
)
func openSchema(s string) (io.ReadCloser, error) {
u, err := url.Parse(s)
if err != nil {
return nil, err
}
basename := path.Base(u.Path)
filename := basename
//
// Schema reference each other within the schemas directory but
// the tests run in the parent directory.
//
if _, err := os.Stat(filename); os.IsNotExist(err) {
filename = filepath.Join("schemas", basename)
//
// Integration tests run from the git root directory, not the
// directory in which the test source is located.
//
if _, err := os.Stat(filename); os.IsNotExist(err) {
filename = filepath.Join("modules/migration/schemas", basename)
}
}
return os.Open(filename)
}

@ -0,0 +1,17 @@
// Copyright 2022 The Gitea Authors. All rights reserved.
// Use of this source code is governed by a MIT-style
// license that can be found in the LICENSE file.
//go:build bindata
// +build bindata
package migration
import (
"io"
"path"
)
func openSchema(filename string) (io.ReadCloser, error) {
return Assets.Open(path.Base(filename))
}

@ -17,24 +17,26 @@ import (
// RestoreParams structure holds a data for restore repository
type RestoreParams struct {
RepoDir string
OwnerName string
RepoName string
Units []string
RepoDir string
OwnerName string
RepoName string
Units []string
Validation bool
}
// RestoreRepo calls the internal RestoreRepo function
func RestoreRepo(ctx context.Context, repoDir, ownerName, repoName string, units []string) (int, string) {
func RestoreRepo(ctx context.Context, repoDir, ownerName, repoName string, units []string, validation bool) (int, string) {
reqURL := setting.LocalURL + "api/internal/restore_repo"
req := newInternalRequest(ctx, reqURL, "POST")
req.SetTimeout(3*time.Second, 0) // since the request will spend much time, don't timeout
req = req.Header("Content-Type", "application/json")
jsonBytes, _ := json.Marshal(RestoreParams{
RepoDir: repoDir,
OwnerName: ownerName,
RepoName: repoName,
Units: units,
RepoDir: repoDir,
OwnerName: ownerName,
RepoName: repoName,
Units: units,
Validation: validation,
})
req.Body(jsonBytes)
resp, err := req.Response()

@ -24,10 +24,11 @@ func RestoreRepo(ctx *myCtx.PrivateContext) {
return
}
params := struct {
RepoDir string
OwnerName string
RepoName string
Units []string
RepoDir string
OwnerName string
RepoName string
Units []string
Validation bool
}{}
if err = json.Unmarshal(bs, &params); err != nil {
ctx.JSON(http.StatusInternalServerError, private.Response{
@ -42,6 +43,7 @@ func RestoreRepo(ctx *myCtx.PrivateContext) {
params.OwnerName,
params.RepoName,
params.Units,
params.Validation,
); err != nil {
ctx.JSON(http.StatusInternalServerError, private.Response{
Err: err.Error(),

@ -604,13 +604,13 @@ func updateOptionsUnits(opts *base.MigrateOptions, units []string) {
}
// RestoreRepository restore a repository from the disk directory
func RestoreRepository(ctx context.Context, baseDir, ownerName, repoName string, units []string) error {
func RestoreRepository(ctx context.Context, baseDir, ownerName, repoName string, units []string, validation bool) error {
doer, err := user_model.GetAdminUser()
if err != nil {
return err
}
uploader := NewGiteaLocalUploader(ctx, doer, ownerName, repoName)
downloader, err := NewRepositoryRestorer(ctx, baseDir, ownerName, repoName)
downloader, err := NewRepositoryRestorer(ctx, baseDir, ownerName, repoName, validation)
if err != nil {
return err
}

@ -19,23 +19,25 @@ import (
// RepositoryRestorer implements an Downloader from the local directory
type RepositoryRestorer struct {
base.NullDownloader
ctx context.Context
baseDir string
repoOwner string
repoName string
ctx context.Context
baseDir string
repoOwner string
repoName string
validation bool
}
// NewRepositoryRestorer creates a repository restorer which could restore repository from a dumped folder
func NewRepositoryRestorer(ctx context.Context, baseDir, owner, repoName string) (*RepositoryRestorer, error) {
func NewRepositoryRestorer(ctx context.Context, baseDir, owner, repoName string, validation bool) (*RepositoryRestorer, error) {
baseDir, err := filepath.Abs(baseDir)
if err != nil {
return nil, err
}
return &RepositoryRestorer{
ctx: ctx,
baseDir: baseDir,
repoOwner: owner,
repoName: repoName,
ctx: ctx,
baseDir: baseDir,
repoOwner: owner,
repoName: repoName,
validation: validation,
}, nil
}
@ -114,7 +116,7 @@ func (r *RepositoryRestorer) GetTopics() ([]string, error) {
func (r *RepositoryRestorer) GetMilestones() ([]*base.Milestone, error) {
milestones := make([]*base.Milestone, 0, 10)
p := filepath.Join(r.baseDir, "milestone.yml")
_, err := os.Stat(p)
err := base.Load(p, &milestones, r.validation)
if err != nil {
if os.IsNotExist(err) {
return nil, nil
@ -122,15 +124,6 @@ func (r *RepositoryRestorer) GetMilestones() ([]*base.Milestone, error) {
return nil, err
}
bs, err := os.ReadFile(p)
if err != nil {
return nil, err
}
err = yaml.Unmarshal(bs, &milestones)
if err != nil {
return nil, err
}
return milestones, nil
}
@ -193,7 +186,7 @@ func (r *RepositoryRestorer) GetLabels() ([]*base.Label, error) {
func (r *RepositoryRestorer) GetIssues(page, perPage int) ([]*base.Issue, bool, error) {
issues := make([]*base.Issue, 0, 10)
p := filepath.Join(r.baseDir, "issue.yml")
_, err := os.Stat(p)
err := base.Load(p, &issues, r.validation)
if err != nil {
if os.IsNotExist(err) {
return nil, true, nil
@ -201,15 +194,6 @@ func (r *RepositoryRestorer) GetIssues(page, perPage int) ([]*base.Issue, bool,
return nil, false, err
}
bs, err := os.ReadFile(p)
if err != nil {
return nil, false, err
}
err = yaml.Unmarshal(bs, &issues)
if err != nil {
return nil, false, err
}
for _, issue := range issues {
issue.Context = base.BasicIssueContext(issue.Number)
}

Loading…
Cancel
Save