From 203e230905a73524b3f9b251bcb7de12c15208a5 Mon Sep 17 00:00:00 2001 From: Moritz Marquardt Date: Fri, 19 Mar 2021 20:58:53 +0100 Subject: [PATCH] Implement caching & limit concurrency --- go.mod | 1 + go.sum | 2 + handler.go | 106 ++++++++++++++++++++++++++++------------------------- main.go | 2 + 4 files changed, 62 insertions(+), 49 deletions(-) diff --git a/go.mod b/go.mod index 8f7b835..b467059 100644 --- a/go.mod +++ b/go.mod @@ -3,6 +3,7 @@ module codeberg.org/codeberg/pages go 1.16 require ( + github.com/OrlovEvgeny/go-mcache v0.0.0-20200121124330-1a8195b34f3a github.com/valyala/fasthttp v1.22.0 github.com/valyala/fastjson v1.6.3 ) diff --git a/go.sum b/go.sum index 63178eb..fd150c5 100644 --- a/go.sum +++ b/go.sum @@ -1,3 +1,5 @@ +github.com/OrlovEvgeny/go-mcache v0.0.0-20200121124330-1a8195b34f3a h1:Cf4CrDeyrIcuIiJZEZJAH5dapqQ6J3OmP/vHPbDjaFA= +github.com/OrlovEvgeny/go-mcache v0.0.0-20200121124330-1a8195b34f3a/go.mod h1:ig6eVXkYn/9dz0Vm8UdLf+E0u1bE6kBSn3n2hqk6jas= github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc= github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= github.com/klauspost/compress v1.11.8 h1:difgzQsp5mdAz9v8lm3P/I+EpDKMU/6uTMw1y1FObuo= diff --git a/handler.go b/handler.go index adf5c25..f837e7f 100644 --- a/handler.go +++ b/handler.go @@ -3,10 +3,10 @@ package main import ( "bytes" "fmt" + "github.com/OrlovEvgeny/go-mcache" "github.com/valyala/fasthttp" "github.com/valyala/fastjson" "mime" - "net/url" "path" "strconv" "strings" @@ -69,34 +69,30 @@ func handler(ctx *fasthttp.RequestCtx) { if repo == "" { return false } - fmt.Printf("Trying branch: %s/%s/%s with path %v\n", targetOwner, repo, branch, path) - escapedBranch, _ := url.PathUnescape(branch) - if escapedBranch == "" { - escapedBranch = branch - } // Check if the branch exists, otherwise treat it as a file path - targetBranch, targetOptions.BranchTimestamp = getBranchTimestamp(targetOwner, repo, branch) - fmt.Printf("Branch %s has timestamp %v\n", targetBranch, targetOptions.BranchTimestamp) - if targetOptions.BranchTimestamp != (time.Time{}) { - // Branch exists, use it - targetRepo = repo - targetPath = strings.Trim(strings.Join(path, "/"), "/") - - if canonicalLink != "" { - // Hide from search machines & add canonical link - ctx.Response.Header.Set("X-Robots-Tag", "noarchive, noindex") - ctx.Response.Header.Set("Link", - strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+ - "; rel=\"canonical\"", - ) - } - - return true - } else { + branchTimestampResult := getBranchTimestamp(targetOwner, repo, branch) + if branchTimestampResult == nil { // branch doesn't exist return false } + + // Branch exists, use it + targetRepo = repo + targetPath = strings.Trim(strings.Join(path, "/"), "/") + targetBranch = branchTimestampResult.branch + targetOptions.BranchTimestamp = branchTimestampResult.timestamp + + if canonicalLink != "" { + // Hide from search machines & add canonical link + ctx.Response.Header.Set("X-Robots-Tag", "noarchive, noindex") + ctx.Response.Header.Set("Link", + strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+ + "; rel=\"canonical\"", + ) + } + + return true } // tryUpstream forwards the target request to the Gitea API, and shows an error page on failure. @@ -209,36 +205,49 @@ func returnErrorPage(ctx *fasthttp.RequestCtx, code int) { ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte(strconv.Itoa(code)+" "+fasthttp.StatusMessage(code)))) } +type branchTimestamp struct { + branch string + timestamp time.Time +} +var branchTimestampCache = mcache.New() + // getBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch // (or an empty time.Time if the branch doesn't exist) -// TODO: cache responses for ~15 minutes if a branch exists -func getBranchTimestamp(owner, repo, branch string) (branchWithFallback string, t time.Time) { - branchWithFallback = branch +func getBranchTimestamp(owner, repo, branch string) *branchTimestamp { + if result, ok := branchTimestampCache.Get(owner + "/" + repo + "/" + branch); ok { + return result.(*branchTimestamp) + } + result := &branchTimestamp{} + result.branch = branch if branch == "" { var body = make([]byte, 0) - status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+url.PathEscape(owner)+"/"+url.PathEscape(repo), 10*time.Second) + status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+owner+"/"+repo, 10*time.Second) if err != nil || status != 200 { - fmt.Printf("Default branch request to Gitea API failed with status code %d and error %s\n", status, err) - branchWithFallback = "" - return + return nil } branch = fastjson.GetString(body, "default_branch") - branchWithFallback = branch + result.branch = branch } var body = make([]byte, 0) - status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+url.PathEscape(owner)+"/"+url.PathEscape(repo)+"/branches/"+url.PathEscape(branch), 10*time.Second) + status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+owner+"/"+repo+"/branches/"+branch, 10*time.Second) if err != nil || status != 200 { - fmt.Printf("Branch info request to Gitea API failed with status code %d and error %s\n", status, err) - branchWithFallback = "" - return + return nil } - t, _ = time.Parse(time.RFC3339, fastjson.GetString(body, "commit", "timestamp")) - return + result.timestamp, _ = time.Parse(time.RFC3339, fastjson.GetString(body, "commit", "timestamp")) + _ = branchTimestampCache.Set(owner + "/" + repo + "/" + branch, result, 15 * time.Second) + return result } -// upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context. +var upstreamClient = fasthttp.Client{ + ReadTimeout: 10 * time.Second, + MaxConnDuration: 60 * time.Second, + MaxConnWaitTimeout: 1000 * time.Millisecond, + MaxConnsPerHost: 1024 * 16, // TODO: adjust bottlenecks for best performance with Gitea! +} + + // upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context. func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, targetBranch string, targetPath string, options *upstreamOptions) (success bool) { if options.ForbiddenMimeTypes == nil { options.ForbiddenMimeTypes = map[string]struct{}{} @@ -246,15 +255,14 @@ func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, t // Check if the branch exists and when it was modified if options.BranchTimestamp == (time.Time{}) { - targetBranch, options.BranchTimestamp = getBranchTimestamp(targetOwner, targetRepo, targetBranch) - } + branch := getBranchTimestamp(targetOwner, targetRepo, targetBranch) - // Handle repositories with no/broken pages setup - if options.BranchTimestamp == (time.Time{}) || targetBranch == "" { - ctx.Response.SetStatusCode(fasthttp.StatusFailedDependency) - ctx.Response.Header.SetContentType("text/html; charset=utf-8") - ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte("pages not set up for this repo"))) - return true + if branch == nil { + returnErrorPage(ctx, fasthttp.StatusFailedDependency) + return true + } + targetBranch = branch.branch + options.BranchTimestamp = branch.timestamp } if targetOwner == "" || targetRepo == "" || targetBranch == "" { @@ -272,9 +280,9 @@ func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, t // Make a GET request to the upstream URL req := fasthttp.AcquireRequest() - req.SetRequestURI(string(GiteaRoot) + "/api/v1/repos/" + url.PathEscape(targetOwner) + "/" + url.PathEscape(targetRepo) + "/raw/" + url.PathEscape(targetBranch) + "/" + url.PathEscape(targetPath)) + req.SetRequestURI(string(GiteaRoot) + "/api/v1/repos/" + targetOwner + "/" + targetRepo + "/raw/" + targetBranch + "/" + targetPath) res := fasthttp.AcquireResponse() - err := fasthttp.DoTimeout(req, res, 10*time.Second) + err := upstreamClient.Do(req, res) // Handle errors if res.StatusCode() == fasthttp.StatusNotFound { diff --git a/main.go b/main.go index a7081a4..ad1c40e 100644 --- a/main.go +++ b/main.go @@ -99,6 +99,8 @@ func main() { NoDefaultServerHeader: true, NoDefaultDate: true, ReadTimeout: 10 * time.Second, + Concurrency: 1024 * 32, // TODO: adjust bottlenecks for best performance with Gitea! + MaxConnsPerIP: 100, }).Serve(listener) if err != nil { fmt.Printf("Couldn't start server: %s\n", err)