go-gitea · lunny · Jul 12, 2022 · Jul 12, 2022 · Jul 12, 2022 · Jul 12, 2022
diff --git a/integrations/benchmarks_test.go b/integrations/benchmarks_test.go
@@ -12,6 +12,7 @@ import (
 
 	repo_model "code.gitea.io/gitea/models/repo"
 	"code.gitea.io/gitea/models/unittest"
+	"code.gitea.io/gitea/modules/markup"
 	api "code.gitea.io/gitea/modules/structs"
 )
 
@@ -70,3 +71,11 @@ func BenchmarkRepoBranchCommit(b *testing.B) {
 		}
 	})
 }
+
+func BenchmarkPostProcess(b *testing.B) {
+	input := strings.Repeat("a", 1024)
+	b.ReportAllocs()
+	for i := 0; i < b.N; i++ {
+		markup.PostProcess(&markup.RenderContext{}, strings.NewReader(input), io.Discard)
+	}
+}
diff --git a/modules/markup/html.go b/modules/markup/html.go
@@ -5,7 +5,6 @@
 package markup
 
 import (
-	"bytes"
 	"io"
 	"net/url"
 	"path"
@@ -298,26 +297,36 @@ var (
 	nulCleaner = strings.NewReplacer("\000", "")
 )
 
-func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output io.Writer) error {
-	defer ctx.Cancel()
-	// FIXME: don't read all content to memory
-	rawHTML, err := io.ReadAll(input)
+type cleanReader struct {
+	io.Reader
+}
+
+func (c cleanReader) Read(bs []byte) (int, error) {
+	original := make([]byte, len(bs))
+	n, err := c.Reader.Read(original)
 	if err != nil {
-		return err
+		return n, err
 	}
+	n = copy(bs, tagCleaner.ReplaceAll([]byte(nulCleaner.Replace(string(original[:n]))), []byte("&lt;$1")))
+	return n, nil
+}
+
+var _ io.Reader = cleanReader{}
+
+func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output io.Writer) error {
+	defer ctx.Cancel()
 
-	res := bytes.NewBuffer(make([]byte, 0, len(rawHTML)+50))
 	// prepend "<html><body>"
-	_, _ = res.WriteString("<html><body>")
+	htmlTagPrefix := strings.NewReader("<html><body>")
 
 	// Strip out nuls - they're always invalid
-	_, _ = res.Write(tagCleaner.ReplaceAll([]byte(nulCleaner.Replace(string(rawHTML))), []byte("&lt;$1")))
+	body := cleanReader{input}
 
 	// close the tags
-	_, _ = res.WriteString("</body></html>")
+	htmlTagSuffix := strings.NewReader("</body></html>")
 
 	// parse the HTML
-	node, err := html.Parse(res)
+	node, err := html.Parse(io.MultiReader(htmlTagPrefix, body, htmlTagSuffix))
 	if err != nil {
 		return &postProcessError{"invalid HTML", err}
 	}