Fix markdown URL parsing for commit ID (#30812)

7 months ago · c7bb3aa034
parent 0f3e717a1a
commit c7bb3aa034
4 changed files with 115 additions and 74 deletions
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@ -10,6 +10,7 @@ import (
 	"path"
 	"path/filepath"
 	"regexp"
 	"slices"
 	"strings"
 	"sync"
@ -54,7 +55,7 @@ var (
 	shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
 	// anyHashPattern splits url containing SHA into parts
-	anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
+	anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~%./\w]+)?(\?[-+~%.\w&=]+)?(#[-+~%.\w]+)?`)
 	// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
 	comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
@ -591,7 +592,8 @@ func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
 func mentionProcessor(ctx *RenderContext, node *html.Node) {
 	start := 0
-	for node != nil {
+	nodeStop := node.NextSibling
 	for node != nodeStop {
 		found, loc := references.FindFirstMentionBytes(util.UnsafeStringToBytes(node.Data[start:]))
 		if !found {
 			node = node.NextSibling
@ -962,57 +964,68 @@ func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) {
 	}
 }
-// fullHashPatternProcessor renders SHA containing URLs
+type anyHashPatternResult struct {
-func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
+	PosStart  int
-	if ctx.Metas == nil {
+	PosEnd    int
-		return
+	FullURL   string
-	}
+	CommitID  string
 	SubPath   string
 	QueryHash string
 }
-	next := node.NextSibling
+func anyHashPatternExtract(s string) (ret anyHashPatternResult, ok bool) {
-	for node != nil && node != next {
+	m := anyHashPattern.FindStringSubmatchIndex(s)
 		m := anyHashPattern.FindStringSubmatchIndex(node.Data)
 	if m == nil {
-			return
+		return ret, false
 	}
-		urlFull := node.Data[m[0]:m[1]]
+	ret.PosStart, ret.PosEnd = m[0], m[1]
-		text := base.ShortSha(node.Data[m[2]:m[3]])
+	ret.FullURL = s[ret.PosStart:ret.PosEnd]
 	if strings.HasSuffix(ret.FullURL, ".") {
 		// if url ends in '.', it's very likely that it is not part of the actual url but used to finish a sentence.
 		ret.PosEnd--
 		ret.FullURL = ret.FullURL[:len(ret.FullURL)-1]
 		for i := 0; i < len(m); i++ {
 			m[i] = min(m[i], ret.PosEnd)
 		}
 	}
-		// 3rd capture group matches a optional path
+	ret.CommitID = s[m[2]:m[3]]
 		subpath := ""
 	if m[5] > 0 {
-			subpath = node.Data[m[4]:m[5]]
+		ret.SubPath = s[m[4]:m[5]]
 	}
-		// 4th capture group matches a optional url hash
+	lastStart, lastEnd := m[len(m)-2], m[len(m)-1]
-		hash := ""
+	if lastEnd > 0 {
-		if m[7] > 0 {
+		ret.QueryHash = s[lastStart:lastEnd][1:]
 			hash = node.Data[m[6]:m[7]][1:]
 	}
 	return ret, true
 }
-		start := m[0]
+// fullHashPatternProcessor renders SHA containing URLs
-		end := m[1]
+func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
-
+	if ctx.Metas == nil {
-		// If url ends in '.', it's very likely that it is not part of the
+		return
 		// actual url but used to finish a sentence.
 		if strings.HasSuffix(urlFull, ".") {
 			end--
 			urlFull = urlFull[:len(urlFull)-1]
 			if hash != "" {
 				hash = hash[:len(hash)-1]
 			} else if subpath != "" {
 				subpath = subpath[:len(subpath)-1]
 	}
 	nodeStop := node.NextSibling
 	for node != nodeStop {
 		if node.Type != html.TextNode {
 			node = node.NextSibling
 			continue
 		}
-
+		ret, ok := anyHashPatternExtract(node.Data)
-		if subpath != "" {
+		if !ok {
-			text += subpath
+			node = node.NextSibling
 			continue
 		}
-
+		text := base.ShortSha(ret.CommitID)
-		if hash != "" {
+		if ret.SubPath != "" {
-			text += " (" + hash + ")"
+			text += ret.SubPath
 		}
-		replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
+		if ret.QueryHash != "" {
 			text += " (" + ret.QueryHash + ")"
 		}
 		replaceContent(node, ret.PosStart, ret.PosEnd, createCodeLink(ret.FullURL, text, "commit"))
 		node = node.NextSibling.NextSibling
 	}
 }
@ -1021,19 +1034,16 @@ func comparePatternProcessor(ctx *RenderContext, node *html.Node) {
 	if ctx.Metas == nil {
 		return
 	}
-
+	nodeStop := node.NextSibling
-	next := node.NextSibling
+	for node != nodeStop {
-	for node != nil && node != next {
+		if node.Type != html.TextNode {
-		m := comparePattern.FindStringSubmatchIndex(node.Data)
+			node = node.NextSibling
-		if m == nil {
+			continue
 			return
 		}
 		// Ensure that every group (m[0]...m[7]) has a match
 		for i := 0; i < 8; i++ {
 			if m[i] == -1 {
 				return
 		}
 		m := comparePattern.FindStringSubmatchIndex(node.Data)
 		if m == nil || slices.Contains(m[:8], -1) { // ensure that every group (m[0]...m[7]) has a match
 			node = node.NextSibling
 			continue
 		}
 		urlFull := node.Data[m[0]:m[1]]
--- a/modules/markup/html_codepreview.go
+++ b/modules/markup/html_codepreview.go
@ -60,7 +60,8 @@ func renderCodeBlock(ctx *RenderContext, node *html.Node) (urlPosStart, urlPosSt
 }
 func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) {
-	for node != nil {
+	nodeStop := node.NextSibling
 	for node != nodeStop {
 		if node.Type != html.TextNode {
 			node = node.NextSibling
 			continue
--- a/modules/markup/html_internal_test.go
+++ b/modules/markup/html_internal_test.go
@ -399,36 +399,61 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
 }
 func TestRegExp_anySHA1Pattern(t *testing.T) {
-	testCases := map[string][]string{
+	testCases := map[string]anyHashPatternResult{
 		"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": {
-			"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
+			CommitID:  "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
-			"/test/unit/event.js",
+			SubPath:   "/test/unit/event.js",
-			"#L2703",
+			QueryHash: "L2703",
 		},
 		"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": {
-			"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
+			CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
-			"/test/unit/event.js",
+			SubPath:  "/test/unit/event.js",
 			"",
 		},
 		"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
-			"0705be475092aede1eddae01319ec931fb9c65fc",
+			CommitID: "0705be475092aede1eddae01319ec931fb9c65fc",
 			"",
 			"",
 		},
 		"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": {
-			"0705be475092aede1eddae01319ec931fb9c65fc",
+			CommitID: "0705be475092aede1eddae01319ec931fb9c65fc",
-			"/src",
+			SubPath:  "/src",
 			"",
 		},
 		"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
-			"d8a994ef243349f321568f9e36d5c3f444b99cae",
+			CommitID:  "d8a994ef243349f321568f9e36d5c3f444b99cae",
-			"",
+			QueryHash: "diff-2",
-			"#diff-2",
+		},
 		"non-url": {},
 		"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b#L1-L2": {
 			CommitID:  "1234567812345678123456781234567812345678123456781234567812345678",
 			QueryHash: "L1-L2",
 		},
 		"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678.": {
 			CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
 		},
 		"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678/sub.": {
 			CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
 			SubPath:  "/sub",
 		},
 		"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b.": {
 			CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
 		},
 		"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b&c=d": {
 			CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
 		},
 		"http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678#hash.": {
 			CommitID:  "1234567812345678123456781234567812345678123456781234567812345678",
 			QueryHash: "hash",
 		},
 	}
 	for k, v := range testCases {
-		assert.Equal(t, anyHashPattern.FindStringSubmatch(k)[1:], v)
+		ret, ok := anyHashPatternExtract(k)
 		if v.CommitID == "" {
 			assert.False(t, ok)
 		} else {
 			assert.EqualValues(t, strings.TrimSuffix(k, "."), ret.FullURL)
 			assert.EqualValues(t, v.CommitID, ret.CommitID)
 			assert.EqualValues(t, v.SubPath, ret.SubPath)
 			assert.EqualValues(t, v.QueryHash, ret.QueryHash)
 		}
 	}
 }
--- a/modules/markup/html_test.go
+++ b/modules/markup/html_test.go
@ -124,6 +124,11 @@ func TestRender_CrossReferences(t *testing.T) {
 	test(
 		util.URLJoin(markup.TestAppURL, "gogitea", "some-repo-name", "issues", "12345"),
 		`<p><a href="`+util.URLJoin(markup.TestAppURL, "gogitea", "some-repo-name", "issues", "12345")+`" class="ref-issue" rel="nofollow">gogitea/some-repo-name#12345</a></p>`)
 	inputURL := "https://host/a/b/commit/0123456789012345678901234567890123456789/foo.txt?a=b#L2-L3"
 	test(
 		inputURL,
 		`<p><a href="`+inputURL+`" rel="nofollow"><code>0123456789/foo.txt (L2-L3)</code></a></p>`)
 }
 func TestMisc_IsSameDomain(t *testing.T) {
@ -695,7 +700,7 @@ func TestIssue18471(t *testing.T) {
 	}, strings.NewReader(data), &res)
 	assert.NoError(t, err)
-	assert.Equal(t, "<a href=\"http://domain/org/repo/compare/783b039...da951ce\" class=\"compare\"><code class=\"nohighlight\">783b039...da951ce</code></a>", res.String())
+	assert.Equal(t, `<a href="http://domain/org/repo/compare/783b039...da951ce" class="compare"><code class="nohighlight">783b039...da951ce</code></a>`, res.String())
 }
 func TestIsFullURL(t *testing.T) {