From 61be51e56baf037aa7902e7cd066b895a10da244 Mon Sep 17 00:00:00 2001 From: wxiaoguang Date: Mon, 4 Nov 2024 18:59:50 +0800 Subject: [PATCH] Refactor markup package (#32399) To make the markup package easier to maintain: 1. Split some go files into small files 2. Use a shared util.NopCloser, remove duplicate code 3. Remove unused functions --- modules/gitrepo/gitrepo.go | 9 +- modules/log/event_writer_console.go | 13 +- modules/log/event_writer_file.go | 3 +- modules/markup/html.go | 757 ------------------- modules/markup/html_commit.go | 225 ++++++ modules/markup/html_email.go | 21 + modules/markup/html_emoji.go | 115 +++ modules/markup/html_issue.go | 180 +++++ modules/markup/html_link.go | 227 ++++++ modules/markup/html_mention.go | 54 ++ modules/markup/markdown/goldmark.go | 2 +- modules/markup/markdown/toc.go | 10 +- modules/markup/markdown/transform_heading.go | 4 +- modules/markup/render.go | 226 ++++++ modules/markup/render_helper.go | 21 + modules/markup/render_links.go | 56 ++ modules/markup/renderer.go | 301 +------- modules/packages/debian/metadata_test.go | 11 +- modules/util/io.go | 6 + 19 files changed, 1154 insertions(+), 1087 deletions(-) create mode 100644 modules/markup/html_commit.go create mode 100644 modules/markup/html_email.go create mode 100644 modules/markup/html_emoji.go create mode 100644 modules/markup/html_issue.go create mode 100644 modules/markup/html_mention.go create mode 100644 modules/markup/render.go create mode 100644 modules/markup/render_helper.go create mode 100644 modules/markup/render_links.go diff --git a/modules/gitrepo/gitrepo.go b/modules/gitrepo/gitrepo.go index d89f8f9c0c8..14d809aedbe 100644 --- a/modules/gitrepo/gitrepo.go +++ b/modules/gitrepo/gitrepo.go @@ -11,6 +11,7 @@ import ( "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/util" ) type Repository interface { @@ -59,15 +60,11 @@ func repositoryFromContext(ctx context.Context, repo Repository) *git.Repository return nil } -type nopCloser func() - -func (nopCloser) Close() error { return nil } - // RepositoryFromContextOrOpen attempts to get the repository from the context or just opens it func RepositoryFromContextOrOpen(ctx context.Context, repo Repository) (*git.Repository, io.Closer, error) { gitRepo := repositoryFromContext(ctx, repo) if gitRepo != nil { - return gitRepo, nopCloser(nil), nil + return gitRepo, util.NopCloser{}, nil } gitRepo, err := OpenRepository(ctx, repo) @@ -95,7 +92,7 @@ func repositoryFromContextPath(ctx context.Context, path string) *git.Repository func RepositoryFromContextOrOpenPath(ctx context.Context, path string) (*git.Repository, io.Closer, error) { gitRepo := repositoryFromContextPath(ctx, path) if gitRepo != nil { - return gitRepo, nopCloser(nil), nil + return gitRepo, util.NopCloser{}, nil } gitRepo, err := git.OpenRepository(ctx, path) diff --git a/modules/log/event_writer_console.go b/modules/log/event_writer_console.go index 78183de644b..e4c409d83e7 100644 --- a/modules/log/event_writer_console.go +++ b/modules/log/event_writer_console.go @@ -4,8 +4,9 @@ package log import ( - "io" "os" + + "code.gitea.io/gitea/modules/util" ) type WriterConsoleOption struct { @@ -18,19 +19,13 @@ type eventWriterConsole struct { var _ EventWriter = (*eventWriterConsole)(nil) -type nopCloser struct { - io.Writer -} - -func (nopCloser) Close() error { return nil } - func NewEventWriterConsole(name string, mode WriterMode) EventWriter { w := &eventWriterConsole{EventWriterBaseImpl: NewEventWriterBase(name, "console", mode)} opt := mode.WriterOption.(WriterConsoleOption) if opt.Stderr { - w.OutputWriteCloser = nopCloser{os.Stderr} + w.OutputWriteCloser = util.NopCloser{Writer: os.Stderr} } else { - w.OutputWriteCloser = nopCloser{os.Stdout} + w.OutputWriteCloser = util.NopCloser{Writer: os.Stdout} } return w } diff --git a/modules/log/event_writer_file.go b/modules/log/event_writer_file.go index fd73d7d30a0..f26286498a6 100644 --- a/modules/log/event_writer_file.go +++ b/modules/log/event_writer_file.go @@ -6,6 +6,7 @@ package log import ( "io" + "code.gitea.io/gitea/modules/util" "code.gitea.io/gitea/modules/util/rotatingfilewriter" ) @@ -42,7 +43,7 @@ func NewEventWriterFile(name string, mode WriterMode) EventWriter { // if the log file can't be opened, what should it do? panic/exit? ignore logs? fallback to stderr? // it seems that "fallback to stderr" is slightly better than others .... FallbackErrorf("unable to open log file %q: %v", opt.FileName, err) - w.fileWriter = nopCloser{Writer: LoggerToWriter(FallbackErrorf)} + w.fileWriter = util.NopCloser{Writer: LoggerToWriter(FallbackErrorf)} } w.OutputWriteCloser = w.fileWriter return w diff --git a/modules/markup/html.go b/modules/markup/html.go index 8d3327c49eb..a9c3dc9ba28 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -6,25 +6,12 @@ package markup import ( "bytes" "io" - "net/url" - "path" - "path/filepath" "regexp" - "slices" "strings" "sync" - "code.gitea.io/gitea/modules/base" - "code.gitea.io/gitea/modules/emoji" - "code.gitea.io/gitea/modules/gitrepo" - "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup/common" - "code.gitea.io/gitea/modules/references" - "code.gitea.io/gitea/modules/regexplru" "code.gitea.io/gitea/modules/setting" - "code.gitea.io/gitea/modules/templates/vars" - "code.gitea.io/gitea/modules/translation" - "code.gitea.io/gitea/modules/util" "golang.org/x/net/html" "golang.org/x/net/html/atom" @@ -451,50 +438,6 @@ func createKeyword(content string) *html.Node { return span } -func createEmoji(content, class, name string) *html.Node { - span := &html.Node{ - Type: html.ElementNode, - Data: atom.Span.String(), - Attr: []html.Attribute{}, - } - if class != "" { - span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class}) - } - if name != "" { - span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name}) - } - - text := &html.Node{ - Type: html.TextNode, - Data: content, - } - - span.AppendChild(text) - return span -} - -func createCustomEmoji(alias string) *html.Node { - span := &html.Node{ - Type: html.ElementNode, - Data: atom.Span.String(), - Attr: []html.Attribute{}, - } - span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"}) - span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias}) - - img := &html.Node{ - Type: html.ElementNode, - DataAtom: atom.Img, - Data: "img", - Attr: []html.Attribute{}, - } - img.Attr = append(img.Attr, html.Attribute{Key: "alt", Val: ":" + alias + ":"}) - img.Attr = append(img.Attr, html.Attribute{Key: "src", Val: setting.StaticURLPrefix + "/assets/img/emoji/" + alias + ".png"}) - - span.AppendChild(img) - return span -} - func createLink(href, content, class string) *html.Node { a := &html.Node{ Type: html.ElementNode, @@ -515,33 +458,6 @@ func createLink(href, content, class string) *html.Node { return a } -func createCodeLink(href, content, class string) *html.Node { - a := &html.Node{ - Type: html.ElementNode, - Data: atom.A.String(), - Attr: []html.Attribute{{Key: "href", Val: href}}, - } - - if class != "" { - a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class}) - } - - text := &html.Node{ - Type: html.TextNode, - Data: content, - } - - code := &html.Node{ - Type: html.ElementNode, - Data: atom.Code.String(), - Attr: []html.Attribute{{Key: "class", Val: "nohighlight"}}, - } - - code.AppendChild(text) - a.AppendChild(code) - return a -} - // replaceContent takes text node, and in its content it replaces a section of // it with the specified newNode. func replaceContent(node *html.Node, i, j int, newNode *html.Node) { @@ -573,676 +489,3 @@ func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) { }, nextSibling) } } - -func mentionProcessor(ctx *RenderContext, node *html.Node) { - start := 0 - nodeStop := node.NextSibling - for node != nodeStop { - found, loc := references.FindFirstMentionBytes(util.UnsafeStringToBytes(node.Data[start:])) - if !found { - node = node.NextSibling - start = 0 - continue - } - loc.Start += start - loc.End += start - mention := node.Data[loc.Start:loc.End] - teams, ok := ctx.Metas["teams"] - // FIXME: util.URLJoin may not be necessary here: - // - setting.AppURL is defined to have a terminal '/' so unless mention[1:] - // is an AppSubURL link we can probably fallback to concatenation. - // team mention should follow @orgName/teamName style - if ok && strings.Contains(mention, "/") { - mentionOrgAndTeam := strings.Split(mention, "/") - if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") { - replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention")) - node = node.NextSibling.NextSibling - start = 0 - continue - } - start = loc.End - continue - } - mentionedUsername := mention[1:] - - if DefaultProcessorHelper.IsUsernameMentionable != nil && DefaultProcessorHelper.IsUsernameMentionable(ctx.Ctx, mentionedUsername) { - replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "mention")) - node = node.NextSibling.NextSibling - start = 0 - } else { - start = loc.End - } - } -} - -func shortLinkProcessor(ctx *RenderContext, node *html.Node) { - next := node.NextSibling - for node != nil && node != next { - m := shortLinkPattern.FindStringSubmatchIndex(node.Data) - if m == nil { - return - } - - content := node.Data[m[2]:m[3]] - tail := node.Data[m[4]:m[5]] - props := make(map[string]string) - - // MediaWiki uses [[link|text]], while GitHub uses [[text|link]] - // It makes page handling terrible, but we prefer GitHub syntax - // And fall back to MediaWiki only when it is obvious from the look - // Of text and link contents - sl := strings.Split(content, "|") - for _, v := range sl { - if equalPos := strings.IndexByte(v, '='); equalPos == -1 { - // There is no equal in this argument; this is a mandatory arg - if props["name"] == "" { - if IsFullURLString(v) { - // If we clearly see it is a link, we save it so - - // But first we need to ensure, that if both mandatory args provided - // look like links, we stick to GitHub syntax - if props["link"] != "" { - props["name"] = props["link"] - } - - props["link"] = strings.TrimSpace(v) - } else { - props["name"] = v - } - } else { - props["link"] = strings.TrimSpace(v) - } - } else { - // There is an equal; optional argument. - - sep := strings.IndexByte(v, '=') - key, val := v[:sep], html.UnescapeString(v[sep+1:]) - - // When parsing HTML, x/net/html will change all quotes which are - // not used for syntax into UTF-8 quotes. So checking val[0] won't - // be enough, since that only checks a single byte. - if len(val) > 1 { - if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) || - (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) { - const lenQuote = len("‘") - val = val[lenQuote : len(val)-lenQuote] - } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) || - (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) { - val = val[1 : len(val)-1] - } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") { - const lenQuote = len("‘") - val = val[1 : len(val)-lenQuote] - } - } - props[key] = val - } - } - - var name, link string - if props["link"] != "" { - link = props["link"] - } else if props["name"] != "" { - link = props["name"] - } - if props["title"] != "" { - name = props["title"] - } else if props["name"] != "" { - name = props["name"] - } else { - name = link - } - - name += tail - image := false - ext := filepath.Ext(link) - switch ext { - // fast path: empty string, ignore - case "": - // leave image as false - case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg": - image = true - } - - childNode := &html.Node{} - linkNode := &html.Node{ - FirstChild: childNode, - LastChild: childNode, - Type: html.ElementNode, - Data: "a", - DataAtom: atom.A, - } - childNode.Parent = linkNode - absoluteLink := IsFullURLString(link) - if !absoluteLink { - if image { - link = strings.ReplaceAll(link, " ", "+") - } else { - link = strings.ReplaceAll(link, " ", "-") // FIXME: it should support dashes in the link, eg: "the-dash-support.-" - } - if !strings.Contains(link, "/") { - link = url.PathEscape(link) // FIXME: it doesn't seem right and it might cause double-escaping - } - } - if image { - if !absoluteLink { - link = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), link) - } - title := props["title"] - if title == "" { - title = props["alt"] - } - if title == "" { - title = path.Base(name) - } - alt := props["alt"] - if alt == "" { - alt = name - } - - // make the childNode an image - if we can, we also place the alt - childNode.Type = html.ElementNode - childNode.Data = "img" - childNode.DataAtom = atom.Img - childNode.Attr = []html.Attribute{ - {Key: "src", Val: link}, - {Key: "title", Val: title}, - {Key: "alt", Val: alt}, - } - if alt == "" { - childNode.Attr = childNode.Attr[:2] - } - } else { - link, _ = ResolveLink(ctx, link, "") - childNode.Type = html.TextNode - childNode.Data = name - } - linkNode.Attr = []html.Attribute{{Key: "href", Val: link}} - replaceContent(node, m[0], m[1], linkNode) - node = node.NextSibling.NextSibling - } -} - -func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) { - if ctx.Metas == nil { - return - } - next := node.NextSibling - for node != nil && node != next { - m := getIssueFullPattern().FindStringSubmatchIndex(node.Data) - if m == nil { - return - } - - mDiffView := getFilesChangedFullPattern().FindStringSubmatchIndex(node.Data) - // leave it as it is if the link is from "Files Changed" tab in PR Diff View https://domain/org/repo/pulls/27/files - if mDiffView != nil { - return - } - - link := node.Data[m[0]:m[1]] - text := "#" + node.Data[m[2]:m[3]] - // if m[4] and m[5] is not -1, then link is to a comment - // indicate that in the text by appending (comment) - if m[4] != -1 && m[5] != -1 { - if locale, ok := ctx.Ctx.Value(translation.ContextKey).(translation.Locale); ok { - text += " " + locale.TrString("repo.from_comment") - } else { - text += " (comment)" - } - } - - // extract repo and org name from matched link like - // http://localhost:3000/gituser/myrepo/issues/1 - linkParts := strings.Split(link, "/") - matchOrg := linkParts[len(linkParts)-4] - matchRepo := linkParts[len(linkParts)-3] - - if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] { - replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue")) - } else { - text = matchOrg + "/" + matchRepo + text - replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue")) - } - node = node.NextSibling.NextSibling - } -} - -func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) { - if ctx.Metas == nil { - return - } - - // FIXME: the use of "mode" is quite dirty and hacky, for example: what is a "document"? how should it be rendered? - // The "mode" approach should be refactored to some other more clear&reliable way. - crossLinkOnly := ctx.Metas["mode"] == "document" && !ctx.IsWiki - - var ( - found bool - ref *references.RenderizableReference - ) - - next := node.NextSibling - - for node != nil && node != next { - _, hasExtTrackFormat := ctx.Metas["format"] - - // Repos with external issue trackers might still need to reference local PRs - // We need to concern with the first one that shows up in the text, whichever it is - isNumericStyle := ctx.Metas["style"] == "" || ctx.Metas["style"] == IssueNameStyleNumeric - foundNumeric, refNumeric := references.FindRenderizableReferenceNumeric(node.Data, hasExtTrackFormat && !isNumericStyle, crossLinkOnly) - - switch ctx.Metas["style"] { - case "", IssueNameStyleNumeric: - found, ref = foundNumeric, refNumeric - case IssueNameStyleAlphanumeric: - found, ref = references.FindRenderizableReferenceAlphanumeric(node.Data) - case IssueNameStyleRegexp: - pattern, err := regexplru.GetCompiled(ctx.Metas["regexp"]) - if err != nil { - return - } - found, ref = references.FindRenderizableReferenceRegexp(node.Data, pattern) - } - - // Repos with external issue trackers might still need to reference local PRs - // We need to concern with the first one that shows up in the text, whichever it is - if hasExtTrackFormat && !isNumericStyle && refNumeric != nil { - // If numeric (PR) was found, and it was BEFORE the non-numeric pattern, use that - // Allow a free-pass when non-numeric pattern wasn't found. - if found && (ref == nil || refNumeric.RefLocation.Start < ref.RefLocation.Start) { - found = foundNumeric - ref = refNumeric - } - } - if !found { - return - } - - var link *html.Node - reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End] - if hasExtTrackFormat && !ref.IsPull { - ctx.Metas["index"] = ref.Issue - - res, err := vars.Expand(ctx.Metas["format"], ctx.Metas) - if err != nil { - // here we could just log the error and continue the rendering - log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err) - } - - link = createLink(res, reftext, "ref-issue ref-external-issue") - } else { - // Path determines the type of link that will be rendered. It's unknown at this point whether - // the linked item is actually a PR or an issue. Luckily it's of no real consequence because - // Gitea will redirect on click as appropriate. - issuePath := util.Iif(ref.IsPull, "pulls", "issues") - if ref.Owner == "" { - link = createLink(util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], issuePath, ref.Issue), reftext, "ref-issue") - } else { - link = createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, issuePath, ref.Issue), reftext, "ref-issue") - } - } - - if ref.Action == references.XRefActionNone { - replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) - node = node.NextSibling.NextSibling - continue - } - - // Decorate action keywords if actionable - var keyword *html.Node - if references.IsXrefActionable(ref, hasExtTrackFormat) { - keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) - } else { - keyword = &html.Node{ - Type: html.TextNode, - Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End], - } - } - spaces := &html.Node{ - Type: html.TextNode, - Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start], - } - replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link}) - node = node.NextSibling.NextSibling.NextSibling.NextSibling - } -} - -func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) { - next := node.NextSibling - - for node != nil && node != next { - found, ref := references.FindRenderizableCommitCrossReference(node.Data) - if !found { - return - } - - reftext := ref.Owner + "/" + ref.Name + "@" + base.ShortSha(ref.CommitSha) - link := createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit") - - replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) - node = node.NextSibling.NextSibling - } -} - -type anyHashPatternResult struct { - PosStart int - PosEnd int - FullURL string - CommitID string - SubPath string - QueryHash string -} - -func anyHashPatternExtract(s string) (ret anyHashPatternResult, ok bool) { - m := anyHashPattern.FindStringSubmatchIndex(s) - if m == nil { - return ret, false - } - - ret.PosStart, ret.PosEnd = m[0], m[1] - ret.FullURL = s[ret.PosStart:ret.PosEnd] - if strings.HasSuffix(ret.FullURL, ".") { - // if url ends in '.', it's very likely that it is not part of the actual url but used to finish a sentence. - ret.PosEnd-- - ret.FullURL = ret.FullURL[:len(ret.FullURL)-1] - for i := 0; i < len(m); i++ { - m[i] = min(m[i], ret.PosEnd) - } - } - - ret.CommitID = s[m[2]:m[3]] - if m[5] > 0 { - ret.SubPath = s[m[4]:m[5]] - } - - lastStart, lastEnd := m[len(m)-2], m[len(m)-1] - if lastEnd > 0 { - ret.QueryHash = s[lastStart:lastEnd][1:] - } - return ret, true -} - -// fullHashPatternProcessor renders SHA containing URLs -func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) { - if ctx.Metas == nil { - return - } - nodeStop := node.NextSibling - for node != nodeStop { - if node.Type != html.TextNode { - node = node.NextSibling - continue - } - ret, ok := anyHashPatternExtract(node.Data) - if !ok { - node = node.NextSibling - continue - } - text := base.ShortSha(ret.CommitID) - if ret.SubPath != "" { - text += ret.SubPath - } - if ret.QueryHash != "" { - text += " (" + ret.QueryHash + ")" - } - replaceContent(node, ret.PosStart, ret.PosEnd, createCodeLink(ret.FullURL, text, "commit")) - node = node.NextSibling.NextSibling - } -} - -func comparePatternProcessor(ctx *RenderContext, node *html.Node) { - if ctx.Metas == nil { - return - } - nodeStop := node.NextSibling - for node != nodeStop { - if node.Type != html.TextNode { - node = node.NextSibling - continue - } - m := comparePattern.FindStringSubmatchIndex(node.Data) - if m == nil || slices.Contains(m[:8], -1) { // ensure that every group (m[0]...m[7]) has a match - node = node.NextSibling - continue - } - - urlFull := node.Data[m[0]:m[1]] - text1 := base.ShortSha(node.Data[m[2]:m[3]]) - textDots := base.ShortSha(node.Data[m[4]:m[5]]) - text2 := base.ShortSha(node.Data[m[6]:m[7]]) - - hash := "" - if m[9] > 0 { - hash = node.Data[m[8]:m[9]][1:] - } - - start := m[0] - end := m[1] - - // If url ends in '.', it's very likely that it is not part of the - // actual url but used to finish a sentence. - if strings.HasSuffix(urlFull, ".") { - end-- - urlFull = urlFull[:len(urlFull)-1] - if hash != "" { - hash = hash[:len(hash)-1] - } else if text2 != "" { - text2 = text2[:len(text2)-1] - } - } - - text := text1 + textDots + text2 - if hash != "" { - text += " (" + hash + ")" - } - replaceContent(node, start, end, createCodeLink(urlFull, text, "compare")) - node = node.NextSibling.NextSibling - } -} - -// emojiShortCodeProcessor for rendering text like :smile: into emoji -func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { - start := 0 - next := node.NextSibling - for node != nil && node != next && start < len(node.Data) { - m := emojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:]) - if m == nil { - return - } - m[0] += start - m[1] += start - - start = m[1] - - alias := node.Data[m[0]:m[1]] - alias = strings.ReplaceAll(alias, ":", "") - converted := emoji.FromAlias(alias) - if converted == nil { - // check if this is a custom reaction - if _, exist := setting.UI.CustomEmojisMap[alias]; exist { - replaceContent(node, m[0], m[1], createCustomEmoji(alias)) - node = node.NextSibling.NextSibling - start = 0 - continue - } - continue - } - - replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description)) - node = node.NextSibling.NextSibling - start = 0 - } -} - -// emoji processor to match emoji and add emoji class -func emojiProcessor(ctx *RenderContext, node *html.Node) { - start := 0 - next := node.NextSibling - for node != nil && node != next && start < len(node.Data) { - m := emoji.FindEmojiSubmatchIndex(node.Data[start:]) - if m == nil { - return - } - m[0] += start - m[1] += start - - codepoint := node.Data[m[0]:m[1]] - start = m[1] - val := emoji.FromCode(codepoint) - if val != nil { - replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description)) - node = node.NextSibling.NextSibling - start = 0 - } - } -} - -// hashCurrentPatternProcessor renders SHA1 strings to corresponding links that -// are assumed to be in the same repository. -func hashCurrentPatternProcessor(ctx *RenderContext, node *html.Node) { - if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || (ctx.Repo == nil && ctx.GitRepo == nil) { - return - } - - start := 0 - next := node.NextSibling - if ctx.ShaExistCache == nil { - ctx.ShaExistCache = make(map[string]bool) - } - for node != nil && node != next && start < len(node.Data) { - m := hashCurrentPattern.FindStringSubmatchIndex(node.Data[start:]) - if m == nil { - return - } - m[2] += start - m[3] += start - - hash := node.Data[m[2]:m[3]] - // The regex does not lie, it matches the hash pattern. - // However, a regex cannot know if a hash actually exists or not. - // We could assume that a SHA1 hash should probably contain alphas AND numerics - // but that is not always the case. - // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash - // as used by git and github for linking and thus we have to do similar. - // Because of this, we check to make sure that a matched hash is actually - // a commit in the repository before making it a link. - - // check cache first - exist, inCache := ctx.ShaExistCache[hash] - if !inCache { - if ctx.GitRepo == nil { - var err error - var closer io.Closer - ctx.GitRepo, closer, err = gitrepo.RepositoryFromContextOrOpen(ctx.Ctx, ctx.Repo) - if err != nil { - log.Error("unable to open repository: %s Error: %v", gitrepo.RepoGitURL(ctx.Repo), err) - return - } - ctx.AddCancel(func() { - _ = closer.Close() - ctx.GitRepo = nil - }) - } - - // Don't use IsObjectExist since it doesn't support short hashs with gogit edition. - exist = ctx.GitRepo.IsReferenceExist(hash) - ctx.ShaExistCache[hash] = exist - } - - if !exist { - start = m[3] - continue - } - - link := util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], "commit", hash) - replaceContent(node, m[2], m[3], createCodeLink(link, base.ShortSha(hash), "commit")) - start = 0 - node = node.NextSibling.NextSibling - } -} - -// emailAddressProcessor replaces raw email addresses with a mailto: link. -func emailAddressProcessor(ctx *RenderContext, node *html.Node) { - next := node.NextSibling - for node != nil && node != next { - m := emailRegex.FindStringSubmatchIndex(node.Data) - if m == nil { - return - } - - mail := node.Data[m[2]:m[3]] - replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto")) - node = node.NextSibling.NextSibling - } -} - -// linkProcessor creates links for any HTTP or HTTPS URL not captured by -// markdown. -func linkProcessor(ctx *RenderContext, node *html.Node) { - next := node.NextSibling - for node != nil && node != next { - m := common.LinkRegex.FindStringIndex(node.Data) - if m == nil { - return - } - - uri := node.Data[m[0]:m[1]] - replaceContent(node, m[0], m[1], createLink(uri, uri, "link")) - node = node.NextSibling.NextSibling - } -} - -func genDefaultLinkProcessor(defaultLink string) processor { - return func(ctx *RenderContext, node *html.Node) { - ch := &html.Node{ - Parent: node, - Type: html.TextNode, - Data: node.Data, - } - - node.Type = html.ElementNode - node.Data = "a" - node.DataAtom = atom.A - node.Attr = []html.Attribute{ - {Key: "href", Val: defaultLink}, - {Key: "class", Val: "default-link muted"}, - } - node.FirstChild, node.LastChild = ch, ch - } -} - -// descriptionLinkProcessor creates links for DescriptionHTML -func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) { - next := node.NextSibling - for node != nil && node != next { - m := common.LinkRegex.FindStringIndex(node.Data) - if m == nil { - return - } - - uri := node.Data[m[0]:m[1]] - replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri)) - node = node.NextSibling.NextSibling - } -} - -func createDescriptionLink(href, content string) *html.Node { - textNode := &html.Node{ - Type: html.TextNode, - Data: content, - } - linkNode := &html.Node{ - FirstChild: textNode, - LastChild: textNode, - Type: html.ElementNode, - Data: "a", - DataAtom: atom.A, - Attr: []html.Attribute{ - {Key: "href", Val: href}, - {Key: "target", Val: "_blank"}, - {Key: "rel", Val: "noopener noreferrer"}, - }, - } - textNode.Parent = linkNode - return linkNode -} diff --git a/modules/markup/html_commit.go b/modules/markup/html_commit.go new file mode 100644 index 00000000000..86d70746d47 --- /dev/null +++ b/modules/markup/html_commit.go @@ -0,0 +1,225 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markup + +import ( + "io" + "slices" + "strings" + + "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/util" + + "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +type anyHashPatternResult struct { + PosStart int + PosEnd int + FullURL string + CommitID string + SubPath string + QueryHash string +} + +func createCodeLink(href, content, class string) *html.Node { + a := &html.Node{ + Type: html.ElementNode, + Data: atom.A.String(), + Attr: []html.Attribute{{Key: "href", Val: href}}, + } + + if class != "" { + a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class}) + } + + text := &html.Node{ + Type: html.TextNode, + Data: content, + } + + code := &html.Node{ + Type: html.ElementNode, + Data: atom.Code.String(), + Attr: []html.Attribute{{Key: "class", Val: "nohighlight"}}, + } + + code.AppendChild(text) + a.AppendChild(code) + return a +} + +func anyHashPatternExtract(s string) (ret anyHashPatternResult, ok bool) { + m := anyHashPattern.FindStringSubmatchIndex(s) + if m == nil { + return ret, false + } + + ret.PosStart, ret.PosEnd = m[0], m[1] + ret.FullURL = s[ret.PosStart:ret.PosEnd] + if strings.HasSuffix(ret.FullURL, ".") { + // if url ends in '.', it's very likely that it is not part of the actual url but used to finish a sentence. + ret.PosEnd-- + ret.FullURL = ret.FullURL[:len(ret.FullURL)-1] + for i := 0; i < len(m); i++ { + m[i] = min(m[i], ret.PosEnd) + } + } + + ret.CommitID = s[m[2]:m[3]] + if m[5] > 0 { + ret.SubPath = s[m[4]:m[5]] + } + + lastStart, lastEnd := m[len(m)-2], m[len(m)-1] + if lastEnd > 0 { + ret.QueryHash = s[lastStart:lastEnd][1:] + } + return ret, true +} + +// fullHashPatternProcessor renders SHA containing URLs +func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) { + if ctx.Metas == nil { + return + } + nodeStop := node.NextSibling + for node != nodeStop { + if node.Type != html.TextNode { + node = node.NextSibling + continue + } + ret, ok := anyHashPatternExtract(node.Data) + if !ok { + node = node.NextSibling + continue + } + text := base.ShortSha(ret.CommitID) + if ret.SubPath != "" { + text += ret.SubPath + } + if ret.QueryHash != "" { + text += " (" + ret.QueryHash + ")" + } + replaceContent(node, ret.PosStart, ret.PosEnd, createCodeLink(ret.FullURL, text, "commit")) + node = node.NextSibling.NextSibling + } +} + +func comparePatternProcessor(ctx *RenderContext, node *html.Node) { + if ctx.Metas == nil { + return + } + nodeStop := node.NextSibling + for node != nodeStop { + if node.Type != html.TextNode { + node = node.NextSibling + continue + } + m := comparePattern.FindStringSubmatchIndex(node.Data) + if m == nil || slices.Contains(m[:8], -1) { // ensure that every group (m[0]...m[7]) has a match + node = node.NextSibling + continue + } + + urlFull := node.Data[m[0]:m[1]] + text1 := base.ShortSha(node.Data[m[2]:m[3]]) + textDots := base.ShortSha(node.Data[m[4]:m[5]]) + text2 := base.ShortSha(node.Data[m[6]:m[7]]) + + hash := "" + if m[9] > 0 { + hash = node.Data[m[8]:m[9]][1:] + } + + start := m[0] + end := m[1] + + // If url ends in '.', it's very likely that it is not part of the + // actual url but used to finish a sentence. + if strings.HasSuffix(urlFull, ".") { + end-- + urlFull = urlFull[:len(urlFull)-1] + if hash != "" { + hash = hash[:len(hash)-1] + } else if text2 != "" { + text2 = text2[:len(text2)-1] + } + } + + text := text1 + textDots + text2 + if hash != "" { + text += " (" + hash + ")" + } + replaceContent(node, start, end, createCodeLink(urlFull, text, "compare")) + node = node.NextSibling.NextSibling + } +} + +// hashCurrentPatternProcessor renders SHA1 strings to corresponding links that +// are assumed to be in the same repository. +func hashCurrentPatternProcessor(ctx *RenderContext, node *html.Node) { + if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || (ctx.Repo == nil && ctx.GitRepo == nil) { + return + } + + start := 0 + next := node.NextSibling + if ctx.ShaExistCache == nil { + ctx.ShaExistCache = make(map[string]bool) + } + for node != nil && node != next && start < len(node.Data) { + m := hashCurrentPattern.FindStringSubmatchIndex(node.Data[start:]) + if m == nil { + return + } + m[2] += start + m[3] += start + + hash := node.Data[m[2]:m[3]] + // The regex does not lie, it matches the hash pattern. + // However, a regex cannot know if a hash actually exists or not. + // We could assume that a SHA1 hash should probably contain alphas AND numerics + // but that is not always the case. + // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash + // as used by git and github for linking and thus we have to do similar. + // Because of this, we check to make sure that a matched hash is actually + // a commit in the repository before making it a link. + + // check cache first + exist, inCache := ctx.ShaExistCache[hash] + if !inCache { + if ctx.GitRepo == nil { + var err error + var closer io.Closer + ctx.GitRepo, closer, err = gitrepo.RepositoryFromContextOrOpen(ctx.Ctx, ctx.Repo) + if err != nil { + log.Error("unable to open repository: %s Error: %v", gitrepo.RepoGitURL(ctx.Repo), err) + return + } + ctx.AddCancel(func() { + _ = closer.Close() + ctx.GitRepo = nil + }) + } + + // Don't use IsObjectExist since it doesn't support short hashs with gogit edition. + exist = ctx.GitRepo.IsReferenceExist(hash) + ctx.ShaExistCache[hash] = exist + } + + if !exist { + start = m[3] + continue + } + + link := util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], "commit", hash) + replaceContent(node, m[2], m[3], createCodeLink(link, base.ShortSha(hash), "commit")) + start = 0 + node = node.NextSibling.NextSibling + } +} diff --git a/modules/markup/html_email.go b/modules/markup/html_email.go new file mode 100644 index 00000000000..a062789b358 --- /dev/null +++ b/modules/markup/html_email.go @@ -0,0 +1,21 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markup + +import "golang.org/x/net/html" + +// emailAddressProcessor replaces raw email addresses with a mailto: link. +func emailAddressProcessor(ctx *RenderContext, node *html.Node) { + next := node.NextSibling + for node != nil && node != next { + m := emailRegex.FindStringSubmatchIndex(node.Data) + if m == nil { + return + } + + mail := node.Data[m[2]:m[3]] + replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto")) + node = node.NextSibling.NextSibling + } +} diff --git a/modules/markup/html_emoji.go b/modules/markup/html_emoji.go new file mode 100644 index 00000000000..c60d06b8232 --- /dev/null +++ b/modules/markup/html_emoji.go @@ -0,0 +1,115 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markup + +import ( + "strings" + + "code.gitea.io/gitea/modules/emoji" + "code.gitea.io/gitea/modules/setting" + + "golang.org/x/net/html" + "golang.org/x/net/html/atom" +) + +func createEmoji(content, class, name string) *html.Node { + span := &html.Node{ + Type: html.ElementNode, + Data: atom.Span.String(), + Attr: []html.Attribute{}, + } + if class != "" { + span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class}) + } + if name != "" { + span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name}) + } + + text := &html.Node{ + Type: html.TextNode, + Data: content, + } + + span.AppendChild(text) + return span +} + +func createCustomEmoji(alias string) *html.Node { + span := &html.Node{ + Type: html.ElementNode, + Data: atom.Span.String(), + Attr: []html.Attribute{}, + } + span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"}) + span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias}) + + img := &html.Node{ + Type: html.ElementNode, + DataAtom: atom.Img, + Data: "img", + Attr: []html.Attribute{}, + } + img.Attr = append(img.Attr, html.Attribute{Key: "alt", Val: ":" + alias + ":"}) + img.Attr = append(img.Attr, html.Attribute{Key: "src", Val: setting.StaticURLPrefix + "/assets/img/emoji/" + alias + ".png"}) + + span.AppendChild(img) + return span +} + +// emojiShortCodeProcessor for rendering text like :smile: into emoji +func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { + start := 0 + next := node.NextSibling + for node != nil && node != next && start < len(node.Data) { + m := emojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:]) + if m == nil { + return + } + m[0] += start + m[1] += start + + start = m[1] + + alias := node.Data[m[0]:m[1]] + alias = strings.ReplaceAll(alias, ":", "") + converted := emoji.FromAlias(alias) + if converted == nil { + // check if this is a custom reaction + if _, exist := setting.UI.CustomEmojisMap[alias]; exist { + replaceContent(node, m[0], m[1], createCustomEmoji(alias)) + node = node.NextSibling.NextSibling + start = 0 + continue + } + continue + } + + replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description)) + node = node.NextSibling.NextSibling + start = 0 + } +} + +// emoji processor to match emoji and add emoji class +func emojiProcessor(ctx *RenderContext, node *html.Node) { + start := 0 + next := node.NextSibling + for node != nil && node != next && start < len(node.Data) { + m := emoji.FindEmojiSubmatchIndex(node.Data[start:]) + if m == nil { + return + } + m[0] += start + m[1] += start + + codepoint := node.Data[m[0]:m[1]] + start = m[1] + val := emoji.FromCode(codepoint) + if val != nil { + replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description)) + node = node.NextSibling.NextSibling + start = 0 + } + } +} diff --git a/modules/markup/html_issue.go b/modules/markup/html_issue.go new file mode 100644 index 00000000000..b6d4ed6a8e2 --- /dev/null +++ b/modules/markup/html_issue.go @@ -0,0 +1,180 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markup + +import ( + "strings" + + "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/log" + "code.gitea.io/gitea/modules/references" + "code.gitea.io/gitea/modules/regexplru" + "code.gitea.io/gitea/modules/templates/vars" + "code.gitea.io/gitea/modules/translation" + "code.gitea.io/gitea/modules/util" + + "golang.org/x/net/html" +) + +func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) { + if ctx.Metas == nil { + return + } + next := node.NextSibling + for node != nil && node != next { + m := getIssueFullPattern().FindStringSubmatchIndex(node.Data) + if m == nil { + return + } + + mDiffView := getFilesChangedFullPattern().FindStringSubmatchIndex(node.Data) + // leave it as it is if the link is from "Files Changed" tab in PR Diff View https://domain/org/repo/pulls/27/files + if mDiffView != nil { + return + } + + link := node.Data[m[0]:m[1]] + text := "#" + node.Data[m[2]:m[3]] + // if m[4] and m[5] is not -1, then link is to a comment + // indicate that in the text by appending (comment) + if m[4] != -1 && m[5] != -1 { + if locale, ok := ctx.Ctx.Value(translation.ContextKey).(translation.Locale); ok { + text += " " + locale.TrString("repo.from_comment") + } else { + text += " (comment)" + } + } + + // extract repo and org name from matched link like + // http://localhost:3000/gituser/myrepo/issues/1 + linkParts := strings.Split(link, "/") + matchOrg := linkParts[len(linkParts)-4] + matchRepo := linkParts[len(linkParts)-3] + + if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] { + replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue")) + } else { + text = matchOrg + "/" + matchRepo + text + replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue")) + } + node = node.NextSibling.NextSibling + } +} + +func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) { + if ctx.Metas == nil { + return + } + + // FIXME: the use of "mode" is quite dirty and hacky, for example: what is a "document"? how should it be rendered? + // The "mode" approach should be refactored to some other more clear&reliable way. + crossLinkOnly := ctx.Metas["mode"] == "document" && !ctx.IsWiki + + var ( + found bool + ref *references.RenderizableReference + ) + + next := node.NextSibling + + for node != nil && node != next { + _, hasExtTrackFormat := ctx.Metas["format"] + + // Repos with external issue trackers might still need to reference local PRs + // We need to concern with the first one that shows up in the text, whichever it is + isNumericStyle := ctx.Metas["style"] == "" || ctx.Metas["style"] == IssueNameStyleNumeric + foundNumeric, refNumeric := references.FindRenderizableReferenceNumeric(node.Data, hasExtTrackFormat && !isNumericStyle, crossLinkOnly) + + switch ctx.Metas["style"] { + case "", IssueNameStyleNumeric: + found, ref = foundNumeric, refNumeric + case IssueNameStyleAlphanumeric: + found, ref = references.FindRenderizableReferenceAlphanumeric(node.Data) + case IssueNameStyleRegexp: + pattern, err := regexplru.GetCompiled(ctx.Metas["regexp"]) + if err != nil { + return + } + found, ref = references.FindRenderizableReferenceRegexp(node.Data, pattern) + } + + // Repos with external issue trackers might still need to reference local PRs + // We need to concern with the first one that shows up in the text, whichever it is + if hasExtTrackFormat && !isNumericStyle && refNumeric != nil { + // If numeric (PR) was found, and it was BEFORE the non-numeric pattern, use that + // Allow a free-pass when non-numeric pattern wasn't found. + if found && (ref == nil || refNumeric.RefLocation.Start < ref.RefLocation.Start) { + found = foundNumeric + ref = refNumeric + } + } + if !found { + return + } + + var link *html.Node + reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End] + if hasExtTrackFormat && !ref.IsPull { + ctx.Metas["index"] = ref.Issue + + res, err := vars.Expand(ctx.Metas["format"], ctx.Metas) + if err != nil { + // here we could just log the error and continue the rendering + log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err) + } + + link = createLink(res, reftext, "ref-issue ref-external-issue") + } else { + // Path determines the type of link that will be rendered. It's unknown at this point whether + // the linked item is actually a PR or an issue. Luckily it's of no real consequence because + // Gitea will redirect on click as appropriate. + issuePath := util.Iif(ref.IsPull, "pulls", "issues") + if ref.Owner == "" { + link = createLink(util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], issuePath, ref.Issue), reftext, "ref-issue") + } else { + link = createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, issuePath, ref.Issue), reftext, "ref-issue") + } + } + + if ref.Action == references.XRefActionNone { + replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) + node = node.NextSibling.NextSibling + continue + } + + // Decorate action keywords if actionable + var keyword *html.Node + if references.IsXrefActionable(ref, hasExtTrackFormat) { + keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) + } else { + keyword = &html.Node{ + Type: html.TextNode, + Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End], + } + } + spaces := &html.Node{ + Type: html.TextNode, + Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start], + } + replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link}) + node = node.NextSibling.NextSibling.NextSibling.NextSibling + } +} + +func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) { + next := node.NextSibling + + for node != nil && node != next { + found, ref := references.FindRenderizableCommitCrossReference(node.Data) + if !found { + return + } + + reftext := ref.Owner + "/" + ref.Name + "@" + base.ShortSha(ref.CommitSha) + link := createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit") + + replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) + node = node.NextSibling.NextSibling + } +} diff --git a/modules/markup/html_link.go b/modules/markup/html_link.go index b0861353485..93506345683 100644 --- a/modules/markup/html_link.go +++ b/modules/markup/html_link.go @@ -4,7 +4,16 @@ package markup import ( + "net/url" + "path" + "path/filepath" + "strings" + + "code.gitea.io/gitea/modules/markup/common" "code.gitea.io/gitea/modules/util" + + "golang.org/x/net/html" + "golang.org/x/net/html/atom" ) func ResolveLink(ctx *RenderContext, link, userContentAnchorPrefix string) (result string, resolved bool) { @@ -27,3 +36,221 @@ func ResolveLink(ctx *RenderContext, link, userContentAnchorPrefix string) (resu } return link, resolved } + +func shortLinkProcessor(ctx *RenderContext, node *html.Node) { + next := node.NextSibling + for node != nil && node != next { + m := shortLinkPattern.FindStringSubmatchIndex(node.Data) + if m == nil { + return + } + + content := node.Data[m[2]:m[3]] + tail := node.Data[m[4]:m[5]] + props := make(map[string]string) + + // MediaWiki uses [[link|text]], while GitHub uses [[text|link]] + // It makes page handling terrible, but we prefer GitHub syntax + // And fall back to MediaWiki only when it is obvious from the look + // Of text and link contents + sl := strings.Split(content, "|") + for _, v := range sl { + if equalPos := strings.IndexByte(v, '='); equalPos == -1 { + // There is no equal in this argument; this is a mandatory arg + if props["name"] == "" { + if IsFullURLString(v) { + // If we clearly see it is a link, we save it so + + // But first we need to ensure, that if both mandatory args provided + // look like links, we stick to GitHub syntax + if props["link"] != "" { + props["name"] = props["link"] + } + + props["link"] = strings.TrimSpace(v) + } else { + props["name"] = v + } + } else { + props["link"] = strings.TrimSpace(v) + } + } else { + // There is an equal; optional argument. + + sep := strings.IndexByte(v, '=') + key, val := v[:sep], html.UnescapeString(v[sep+1:]) + + // When parsing HTML, x/net/html will change all quotes which are + // not used for syntax into UTF-8 quotes. So checking val[0] won't + // be enough, since that only checks a single byte. + if len(val) > 1 { + if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) || + (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) { + const lenQuote = len("‘") + val = val[lenQuote : len(val)-lenQuote] + } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) || + (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) { + val = val[1 : len(val)-1] + } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") { + const lenQuote = len("‘") + val = val[1 : len(val)-lenQuote] + } + } + props[key] = val + } + } + + var name, link string + if props["link"] != "" { + link = props["link"] + } else if props["name"] != "" { + link = props["name"] + } + if props["title"] != "" { + name = props["title"] + } else if props["name"] != "" { + name = props["name"] + } else { + name = link + } + + name += tail + image := false + ext := filepath.Ext(link) + switch ext { + // fast path: empty string, ignore + case "": + // leave image as false + case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg": + image = true + } + + childNode := &html.Node{} + linkNode := &html.Node{ + FirstChild: childNode, + LastChild: childNode, + Type: html.ElementNode, + Data: "a", + DataAtom: atom.A, + } + childNode.Parent = linkNode + absoluteLink := IsFullURLString(link) + if !absoluteLink { + if image { + link = strings.ReplaceAll(link, " ", "+") + } else { + link = strings.ReplaceAll(link, " ", "-") // FIXME: it should support dashes in the link, eg: "the-dash-support.-" + } + if !strings.Contains(link, "/") { + link = url.PathEscape(link) // FIXME: it doesn't seem right and it might cause double-escaping + } + } + if image { + if !absoluteLink { + link = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), link) + } + title := props["title"] + if title == "" { + title = props["alt"] + } + if title == "" { + title = path.Base(name) + } + alt := props["alt"] + if alt == "" { + alt = name + } + + // make the childNode an image - if we can, we also place the alt + childNode.Type = html.ElementNode + childNode.Data = "img" + childNode.DataAtom = atom.Img + childNode.Attr = []html.Attribute{ + {Key: "src", Val: link}, + {Key: "title", Val: title}, + {Key: "alt", Val: alt}, + } + if alt == "" { + childNode.Attr = childNode.Attr[:2] + } + } else { + link, _ = ResolveLink(ctx, link, "") + childNode.Type = html.TextNode + childNode.Data = name + } + linkNode.Attr = []html.Attribute{{Key: "href", Val: link}} + replaceContent(node, m[0], m[1], linkNode) + node = node.NextSibling.NextSibling + } +} + +// linkProcessor creates links for any HTTP or HTTPS URL not captured by +// markdown. +func linkProcessor(ctx *RenderContext, node *html.Node) { + next := node.NextSibling + for node != nil && node != next { + m := common.LinkRegex.FindStringIndex(node.Data) + if m == nil { + return + } + + uri := node.Data[m[0]:m[1]] + replaceContent(node, m[0], m[1], createLink(uri, uri, "link")) + node = node.NextSibling.NextSibling + } +} + +func genDefaultLinkProcessor(defaultLink string) processor { + return func(ctx *RenderContext, node *html.Node) { + ch := &html.Node{ + Parent: node, + Type: html.TextNode, + Data: node.Data, + } + + node.Type = html.ElementNode + node.Data = "a" + node.DataAtom = atom.A + node.Attr = []html.Attribute{ + {Key: "href", Val: defaultLink}, + {Key: "class", Val: "default-link muted"}, + } + node.FirstChild, node.LastChild = ch, ch + } +} + +// descriptionLinkProcessor creates links for DescriptionHTML +func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) { + next := node.NextSibling + for node != nil && node != next { + m := common.LinkRegex.FindStringIndex(node.Data) + if m == nil { + return + } + + uri := node.Data[m[0]:m[1]] + replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri)) + node = node.NextSibling.NextSibling + } +} + +func createDescriptionLink(href, content string) *html.Node { + textNode := &html.Node{ + Type: html.TextNode, + Data: content, + } + linkNode := &html.Node{ + FirstChild: textNode, + LastChild: textNode, + Type: html.ElementNode, + Data: "a", + DataAtom: atom.A, + Attr: []html.Attribute{ + {Key: "href", Val: href}, + {Key: "target", Val: "_blank"}, + {Key: "rel", Val: "noopener noreferrer"}, + }, + } + textNode.Parent = linkNode + return linkNode +} diff --git a/modules/markup/html_mention.go b/modules/markup/html_mention.go new file mode 100644 index 00000000000..3f0692e05f5 --- /dev/null +++ b/modules/markup/html_mention.go @@ -0,0 +1,54 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markup + +import ( + "strings" + + "code.gitea.io/gitea/modules/references" + "code.gitea.io/gitea/modules/util" + + "golang.org/x/net/html" +) + +func mentionProcessor(ctx *RenderContext, node *html.Node) { + start := 0 + nodeStop := node.NextSibling + for node != nodeStop { + found, loc := references.FindFirstMentionBytes(util.UnsafeStringToBytes(node.Data[start:])) + if !found { + node = node.NextSibling + start = 0 + continue + } + loc.Start += start + loc.End += start + mention := node.Data[loc.Start:loc.End] + teams, ok := ctx.Metas["teams"] + // FIXME: util.URLJoin may not be necessary here: + // - setting.AppURL is defined to have a terminal '/' so unless mention[1:] + // is an AppSubURL link we can probably fallback to concatenation. + // team mention should follow @orgName/teamName style + if ok && strings.Contains(mention, "/") { + mentionOrgAndTeam := strings.Split(mention, "/") + if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") { + replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention")) + node = node.NextSibling.NextSibling + start = 0 + continue + } + start = loc.End + continue + } + mentionedUsername := mention[1:] + + if DefaultProcessorHelper.IsUsernameMentionable != nil && DefaultProcessorHelper.IsUsernameMentionable(ctx.Ctx, mentionedUsername) { + replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "mention")) + node = node.NextSibling.NextSibling + start = 0 + } else { + start = loc.End + } + } +} diff --git a/modules/markup/markdown/goldmark.go b/modules/markup/markdown/goldmark.go index 515a79578de..0cd9dc5f30c 100644 --- a/modules/markup/markdown/goldmark.go +++ b/modules/markup/markdown/goldmark.go @@ -45,7 +45,7 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa ctx := pc.Get(renderContextKey).(*markup.RenderContext) rc := pc.Get(renderConfigKey).(*RenderConfig) - tocList := make([]markup.Header, 0, 20) + tocList := make([]Header, 0, 20) if rc.yamlNode != nil { metaNode := rc.toMetaNode() if metaNode != nil { diff --git a/modules/markup/markdown/toc.go b/modules/markup/markdown/toc.go index 38f744a25ff..ea1af83a3ed 100644 --- a/modules/markup/markdown/toc.go +++ b/modules/markup/markdown/toc.go @@ -7,13 +7,19 @@ import ( "fmt" "net/url" - "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/translation" "github.com/yuin/goldmark/ast" ) -func createTOCNode(toc []markup.Header, lang string, detailsAttrs map[string]string) ast.Node { +// Header holds the data about a header. +type Header struct { + Level int + Text string + ID string +} + +func createTOCNode(toc []Header, lang string, detailsAttrs map[string]string) ast.Node { details := NewDetails() summary := NewSummary() diff --git a/modules/markup/markdown/transform_heading.go b/modules/markup/markdown/transform_heading.go index b78720e16dc..5f8a12794da 100644 --- a/modules/markup/markdown/transform_heading.go +++ b/modules/markup/markdown/transform_heading.go @@ -13,14 +13,14 @@ import ( "github.com/yuin/goldmark/text" ) -func (g *ASTTransformer) transformHeading(_ *markup.RenderContext, v *ast.Heading, reader text.Reader, tocList *[]markup.Header) { +func (g *ASTTransformer) transformHeading(_ *markup.RenderContext, v *ast.Heading, reader text.Reader, tocList *[]Header) { for _, attr := range v.Attributes() { if _, ok := attr.Value.([]byte); !ok { v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value))) } } txt := v.Text(reader.Source()) //nolint:staticcheck - header := markup.Header{ + header := Header{ Text: util.UnsafeBytesToString(txt), Level: v.Level, } diff --git a/modules/markup/render.go b/modules/markup/render.go new file mode 100644 index 00000000000..f2ce9229af6 --- /dev/null +++ b/modules/markup/render.go @@ -0,0 +1,226 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package markup + +import ( + "context" + "errors" + "fmt" + "io" + "net/url" + "path/filepath" + "strings" + "sync" + + "code.gitea.io/gitea/modules/git" + "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/util" + + "github.com/yuin/goldmark/ast" +) + +type RenderMetaMode string + +const ( + RenderMetaAsDetails RenderMetaMode = "details" // default + RenderMetaAsNone RenderMetaMode = "none" + RenderMetaAsTable RenderMetaMode = "table" +) + +// RenderContext represents a render context +type RenderContext struct { + Ctx context.Context + RelativePath string // relative path from tree root of the branch + Type string + IsWiki bool + Links Links + Metas map[string]string // user, repo, mode(comment/document) + DefaultLink string + GitRepo *git.Repository + Repo gitrepo.Repository + ShaExistCache map[string]bool + cancelFn func() + SidebarTocNode ast.Node + RenderMetaAs RenderMetaMode + InStandalonePage bool // used by external render. the router "/org/repo/render/..." will output the rendered content in a standalone page +} + +// Cancel runs any cleanup functions that have been registered for this Ctx +func (ctx *RenderContext) Cancel() { + if ctx == nil { + return + } + ctx.ShaExistCache = map[string]bool{} + if ctx.cancelFn == nil { + return + } + ctx.cancelFn() +} + +// AddCancel adds the provided fn as a Cleanup for this Ctx +func (ctx *RenderContext) AddCancel(fn func()) { + if ctx == nil { + return + } + oldCancelFn := ctx.cancelFn + if oldCancelFn == nil { + ctx.cancelFn = fn + return + } + ctx.cancelFn = func() { + defer oldCancelFn() + fn() + } +} + +// Render renders markup file to HTML with all specific handling stuff. +func Render(ctx *RenderContext, input io.Reader, output io.Writer) error { + if ctx.Type != "" { + return renderByType(ctx, input, output) + } else if ctx.RelativePath != "" { + return renderFile(ctx, input, output) + } + return errors.New("render options both filename and type missing") +} + +// RenderString renders Markup string to HTML with all specific handling stuff and return string +func RenderString(ctx *RenderContext, content string) (string, error) { + var buf strings.Builder + if err := Render(ctx, strings.NewReader(content), &buf); err != nil { + return "", err + } + return buf.String(), nil +} + +func renderIFrame(ctx *RenderContext, output io.Writer) error { + // set height="0" ahead, otherwise the scrollHeight would be max(150, realHeight) + // at the moment, only "allow-scripts" is allowed for sandbox mode. + // "allow-same-origin" should never be used, it leads to XSS attack, and it makes the JS in iframe can access parent window's config and CSRF token + // TODO: when using dark theme, if the rendered content doesn't have proper style, the default text color is black, which is not easy to read + _, err := io.WriteString(output, fmt.Sprintf(` +`, + setting.AppSubURL, + url.PathEscape(ctx.Metas["user"]), + url.PathEscape(ctx.Metas["repo"]), + ctx.Metas["BranchNameSubURL"], + url.PathEscape(ctx.RelativePath), + )) + return err +} + +func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error { + var wg sync.WaitGroup + var err error + pr, pw := io.Pipe() + defer func() { + _ = pr.Close() + _ = pw.Close() + }() + + var pr2 io.ReadCloser + var pw2 io.WriteCloser + + var sanitizerDisabled bool + if r, ok := renderer.(ExternalRenderer); ok { + sanitizerDisabled = r.SanitizerDisabled() + } + + if !sanitizerDisabled { + pr2, pw2 = io.Pipe() + defer func() { + _ = pr2.Close() + _ = pw2.Close() + }() + + wg.Add(1) + go func() { + err = SanitizeReader(pr2, renderer.Name(), output) + _ = pr2.Close() + wg.Done() + }() + } else { + pw2 = util.NopCloser{Writer: output} + } + + wg.Add(1) + go func() { + if r, ok := renderer.(PostProcessRenderer); ok && r.NeedPostProcess() { + err = PostProcess(ctx, pr, pw2) + } else { + _, err = io.Copy(pw2, pr) + } + _ = pr.Close() + _ = pw2.Close() + wg.Done() + }() + + if err1 := renderer.Render(ctx, input, pw); err1 != nil { + return err1 + } + _ = pw.Close() + + wg.Wait() + return err +} + +func renderByType(ctx *RenderContext, input io.Reader, output io.Writer) error { + if renderer, ok := renderers[ctx.Type]; ok { + return render(ctx, renderer, input, output) + } + return fmt.Errorf("unsupported render type: %s", ctx.Type) +} + +// ErrUnsupportedRenderExtension represents the error when extension doesn't supported to render +type ErrUnsupportedRenderExtension struct { + Extension string +} + +func IsErrUnsupportedRenderExtension(err error) bool { + _, ok := err.(ErrUnsupportedRenderExtension) + return ok +} + +func (err ErrUnsupportedRenderExtension) Error() string { + return fmt.Sprintf("Unsupported render extension: %s", err.Extension) +} + +func renderFile(ctx *RenderContext, input io.Reader, output io.Writer) error { + extension := strings.ToLower(filepath.Ext(ctx.RelativePath)) + if renderer, ok := extRenderers[extension]; ok { + if r, ok := renderer.(ExternalRenderer); ok && r.DisplayInIFrame() { + if !ctx.InStandalonePage { + // for an external render, it could only output its content in a standalone page + // otherwise, a `, - setting.AppSubURL, - url.PathEscape(ctx.Metas["user"]), - url.PathEscape(ctx.Metas["repo"]), - ctx.Metas["BranchNameSubURL"], - url.PathEscape(ctx.RelativePath), - )) - return err -} - -func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error { - var wg sync.WaitGroup - var err error - pr, pw := io.Pipe() - defer func() { - _ = pr.Close() - _ = pw.Close() - }() - - var pr2 io.ReadCloser - var pw2 io.WriteCloser - - var sanitizerDisabled bool - if r, ok := renderer.(ExternalRenderer); ok { - sanitizerDisabled = r.SanitizerDisabled() - } - - if !sanitizerDisabled { - pr2, pw2 = io.Pipe() - defer func() { - _ = pr2.Close() - _ = pw2.Close() - }() - - wg.Add(1) - go func() { - err = SanitizeReader(pr2, renderer.Name(), output) - _ = pr2.Close() - wg.Done() - }() - } else { - pw2 = nopCloser{output} - } - - wg.Add(1) - go func() { - if r, ok := renderer.(PostProcessRenderer); ok && r.NeedPostProcess() { - err = PostProcess(ctx, pr, pw2) - } else { - _, err = io.Copy(pw2, pr) - } - _ = pr.Close() - _ = pw2.Close() - wg.Done() - }() - - if err1 := renderer.Render(ctx, input, pw); err1 != nil { - return err1 - } - _ = pw.Close() - - wg.Wait() - return err -} - -// ErrUnsupportedRenderType represents -type ErrUnsupportedRenderType struct { - Type string -} - -func (err ErrUnsupportedRenderType) Error() string { - return fmt.Sprintf("Unsupported render type: %s", err.Type) -} - -func renderByType(ctx *RenderContext, input io.Reader, output io.Writer) error { - if renderer, ok := renderers[ctx.Type]; ok { - return render(ctx, renderer, input, output) - } - return ErrUnsupportedRenderType{ctx.Type} -} - -// ErrUnsupportedRenderExtension represents the error when extension doesn't supported to render -type ErrUnsupportedRenderExtension struct { - Extension string -} - -func IsErrUnsupportedRenderExtension(err error) bool { - _, ok := err.(ErrUnsupportedRenderExtension) - return ok -} - -func (err ErrUnsupportedRenderExtension) Error() string { - return fmt.Sprintf("Unsupported render extension: %s", err.Extension) -} - -func renderFile(ctx *RenderContext, input io.Reader, output io.Writer) error { - extension := strings.ToLower(filepath.Ext(ctx.RelativePath)) - if renderer, ok := extRenderers[extension]; ok { - if r, ok := renderer.(ExternalRenderer); ok && r.DisplayInIFrame() { - if !ctx.InStandalonePage { - // for an external render, it could only output its content in a standalone page - // otherwise, a