From 8519bba527fb111cd08824ac7e04db72c65ab5b0 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Sat, 12 Nov 2022 17:50:08 +0100 Subject: [PATCH] tryUpstream always use targetOpt generated by tryBranch --- server/handler.go | 127 ++++++++++++++++++------------------ server/try.go | 38 +++++------ server/upstream/helper.go | 33 ++++++---- server/upstream/upstream.go | 26 +++++--- 4 files changed, 116 insertions(+), 108 deletions(-) diff --git a/server/handler.go b/server/handler.go index a1a3c3c..25c8af4 100644 --- a/server/handler.go +++ b/server/handler.go @@ -89,18 +89,12 @@ func Handler(mainDomainSuffix, rawDomain string, // Prepare request information to Gitea pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/") - targetOptions := &upstream.Options{ - TryIndexPages: true, - } log.Debug().Msg("preparations") if rawDomain != "" && strings.EqualFold(trimmedHost, rawDomain) { // Serve raw content from RawDomain log.Debug().Msg("raw domain") - targetOptions.TryIndexPages = false - targetOptions.ServeRaw = true - if len(pathElements) < 2 { // https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required ctx.Redirect(rawInfoPage, http.StatusTemporaryRedirect) @@ -110,15 +104,16 @@ func Handler(mainDomainSuffix, rawDomain string, // raw.codeberg.org/example/myrepo/@main/index.html if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") { log.Debug().Msg("raw domain preparations, now trying with specified branch") - newPath := path.Join(pathElements[3:]...) - branch := pathElements[2][1:] - repoOwner := pathElements[0] - repo := pathElements[1] - if timestampBranch, works := tryBranch(log, ctx, giteaClient, repoOwner, repo, branch, newPath, true); works { - targetOptions.BranchTimestamp = timestampBranch.Timestamp - log.Debug().Msg("tryBranch, now trying upstream 1") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, repoOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) + if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{ + TryIndexPages: false, + ServeRaw: true, + TargetOwner: pathElements[0], + TargetRepo: pathElements[1], + TargetBranch: pathElements[2][1:], + TargetPath: path.Join(pathElements[3:]...), + }, true); works { + log.Trace().Msg("tryUpstream: serve raw domain with specified branch") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) return } log.Debug().Msg("missing branch info") @@ -127,18 +122,20 @@ func Handler(mainDomainSuffix, rawDomain string, } log.Debug().Msg("raw domain preparations, now trying with default branch") - repoOwner := pathElements[0] - repo := pathElements[1] - newPath := path.Join(pathElements[2:]...) - if timestampBranch, works := tryBranch(log, ctx, giteaClient, repoOwner, repo, "", newPath, true); works { - targetOptions.BranchTimestamp = timestampBranch.Timestamp - log.Debug().Msg("tryBranch, now trying upstream 2") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, repoOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) - return + if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{ + TryIndexPages: false, + ServeRaw: true, + TargetOwner: pathElements[0], + TargetRepo: pathElements[1], + TargetPath: path.Join(pathElements[2:]...), + }, true); works { + log.Trace().Msg("tryUpstream: serve raw domain with default branch") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) + } else { + html.ReturnErrorPage(ctx, + fmt.Sprintf("raw domain could not find repo '%s/%s' or repo is empty", targetOpt.TargetOwner, targetOpt.TargetRepo), + http.StatusNotFound) } - - html.ReturnErrorPage(ctx, fmt.Sprintf("raw domain could not find repo '%s/%s' or repo is empty", repoOwner, repo), http.StatusNotFound) return } else if strings.HasSuffix(trimmedHost, mainDomainSuffix) { @@ -164,17 +161,17 @@ func Handler(mainDomainSuffix, rawDomain string, } log.Debug().Msg("main domain preparations, now trying with specified repo & branch") - branch := pathElements[1][1:] - newPath := path.Join(pathElements[2:]...) - repo := pathElements[0] - if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, branch, newPath, true); works { - targetOptions.BranchTimestamp = timestampBranch.Timestamp - log.Debug().Msg("tryBranch, now trying upstream 3") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) + if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{ + TargetOwner: targetOwner, + TargetRepo: pathElements[0], + TargetBranch: pathElements[1][1:], + TargetPath: path.Join(pathElements[2:]...), + }, true); works { + log.Trace().Msg("tryUpstream: serve with specified repo and branch") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) } else { html.ReturnErrorPage(ctx, - fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, targetRepo), + fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", targetOpt.TargetBranch, targetOpt.TargetOwner, targetOpt.TargetRepo), http.StatusFailedDependency) } return @@ -184,17 +181,17 @@ func Handler(mainDomainSuffix, rawDomain string, // example.codeberg.page/@main/index.html if strings.HasPrefix(pathElements[0], "@") { log.Debug().Msg("main domain preparations, now trying with specified branch") - branch := pathElements[0][1:] - repo := "pages" - newPath := path.Join(pathElements[1:]...) - if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, branch, newPath, true); works { - targetOptions.BranchTimestamp = timestampBranch.Timestamp - log.Debug().Msg("tryBranch, now trying upstream 4") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) + if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{ + TargetOwner: targetOwner, + TargetRepo: "pages", + TargetBranch: pathElements[0][1:], + TargetPath: path.Join(pathElements[1:]...), + }, true); works { + log.Trace().Msg("tryUpstream: serve default pages repo with specified branch") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) } else { html.ReturnErrorPage(ctx, - fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, "pages"), + fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", targetOpt.TargetBranch, targetOpt.TargetOwner, targetOpt.TargetRepo), http.StatusFailedDependency) } return @@ -205,14 +202,14 @@ func Handler(mainDomainSuffix, rawDomain string, // example.codeberg.page/pages/... is not allowed here. log.Debug().Msg("main domain preparations, now trying with specified repo") if pathElements[0] != "pages" { - repo := pathElements[0] - branch := "pages" - newPath := path.Join(pathElements[1:]...) - if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, branch, newPath, false); works { - targetOptions.BranchTimestamp = timestampBranch.Timestamp + if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{ + TargetOwner: targetOwner, + TargetRepo: pathElements[0], + TargetBranch: "pages", + TargetPath: path.Join(pathElements[1:]...), + }, false); works { log.Debug().Msg("tryBranch, now trying upstream 5") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) return } } @@ -220,13 +217,13 @@ func Handler(mainDomainSuffix, rawDomain string, // Try to use the "pages" repo on its default branch // example.codeberg.page/index.html log.Debug().Msg("main domain preparations, now trying with default repo/branch") - newPath := path.Join(pathElements...) - repo := "pages" - if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, "", newPath, false); works { - targetOptions.BranchTimestamp = timestampBranch.Timestamp + if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{ + TargetOwner: targetOwner, + TargetRepo: "pages", + TargetPath: path.Join(pathElements...), + }, false); works { log.Debug().Msg("tryBranch, now trying upstream 6") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) return } @@ -256,11 +253,14 @@ func Handler(mainDomainSuffix, rawDomain string, } // Try to use the given repo on the given branch or the default branch - newPath := path.Join(pathParts...) log.Debug().Msg("custom domain preparations, now trying with details from DNS") - if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, targetBranch, newPath, canonicalLink); works { - targetOptions.BranchTimestamp = timestampBranch.Timestamp - canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, timestampBranch.Branch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache) + if targetOpt, works := tryBranch(log, ctx, giteaClient, &upstream.Options{ + TargetOwner: targetOwner, + TargetRepo: targetRepo, + TargetBranch: targetBranch, + TargetPath: path.Join(pathParts...), + }, canonicalLink); works { + canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOpt.TargetOwner, targetOpt.TargetRepo, targetOpt.TargetBranch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache) if !valid { html.ReturnErrorPage(ctx, "domain not specified in .domains file", http.StatusMisdirectedRequest) return @@ -268,7 +268,7 @@ func Handler(mainDomainSuffix, rawDomain string, // only redirect if the target is also a codeberg page! targetOwner, _, _ = dns.GetTargetFromDNS(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix), dnsLookupCache) if targetOwner != "" { - ctx.Redirect("https://"+canonicalDomain+string(newPath), http.StatusTemporaryRedirect) + ctx.Redirect("https://"+canonicalDomain+string(targetOpt.TargetPath), http.StatusTemporaryRedirect) return } @@ -277,8 +277,7 @@ func Handler(mainDomainSuffix, rawDomain string, } log.Debug().Msg("tryBranch, now trying upstream 7") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, targetOwner, targetRepo, timestampBranch.Branch, newPath, canonicalDomainCache) + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, targetOpt, canonicalDomainCache) return } diff --git a/server/try.go b/server/try.go index 7de19b6..96b937c 100644 --- a/server/try.go +++ b/server/try.go @@ -16,18 +16,15 @@ import ( // tryUpstream forwards the target request to the Gitea API, and shows an error page on failure. func tryUpstream(ctx *context.Context, giteaClient *gitea.Client, mainDomainSuffix, trimmedHost string, - - targetOptions *upstream.Options, - targetOwner, targetRepo, targetBranch, targetPath string, - + options *upstream.Options, canonicalDomainCache cache.SetGetKey, ) { // check if a canonical domain exists on a request on MainDomain if strings.HasSuffix(trimmedHost, mainDomainSuffix) { - canonicalDomain, _ := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, targetBranch, "", string(mainDomainSuffix), canonicalDomainCache) + canonicalDomain, _ := upstream.CheckCanonicalDomain(giteaClient, options.TargetOwner, options.TargetRepo, options.TargetBranch, "", string(mainDomainSuffix), canonicalDomainCache) if !strings.HasSuffix(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix)) { canonicalPath := ctx.Req.RequestURI - if targetRepo != "pages" { + if options.TargetRepo != "pages" { path := strings.SplitN(canonicalPath, "/", 3) if len(path) >= 3 { canonicalPath = "/" + path[2] @@ -38,14 +35,11 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client, } } - targetOptions.TargetOwner = targetOwner - targetOptions.TargetRepo = targetRepo - targetOptions.TargetBranch = targetBranch - targetOptions.TargetPath = targetPath - targetOptions.Host = string(trimmedHost) + // add host for debugging + options.Host = string(trimmedHost) // Try to request the file from the Gitea API - if !targetOptions.Upstream(ctx, giteaClient) { + if !options.Upstream(ctx, giteaClient) { html.ReturnErrorPage(ctx, "", ctx.StatusCode) } } @@ -53,20 +47,20 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client, // tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty, // it will also disallow search indexing and add a Link header to the canonical URL. func tryBranch(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client, - repoOwner, repoName, branch, path string, canonicalLink bool, -) (*gitea.BranchTimestamp, bool) { - if repoName == "" { - log.Debug().Msg("tryBranch: repo is empty") + targetOptions *upstream.Options, canonicalLink bool, +) (*upstream.Options, bool) { + if targetOptions.TargetOwner == "" || targetOptions.TargetRepo == "" { + log.Debug().Msg("tryBranch: owner or repo is empty") return nil, false } // Replace "~" to "/" so we can access branch that contains slash character // Branch name cannot contain "~" so doing this is okay - branch = strings.ReplaceAll(branch, "~", "/") + targetOptions.TargetBranch = strings.ReplaceAll(targetOptions.TargetBranch, "~", "/") // Check if the branch exists, otherwise treat it as a file path - branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, repoOwner, repoName, branch) - if branchTimestampResult == nil { + branchExist, _ := targetOptions.GetBranchTimestamp(giteaClient) + if !branchExist { log.Debug().Msg("tryBranch: branch doesn't exist") return nil, false } @@ -74,11 +68,9 @@ func tryBranch(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Clie if canonicalLink { // Hide from search machines & add canonical link ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex") - ctx.RespWriter.Header().Set("Link", - giteaClient.ContentWebLink(repoOwner, repoName, branchTimestampResult.Branch, path)+ - "; rel=\"canonical\"") + ctx.RespWriter.Header().Set("Link", targetOptions.ContentWebLink(giteaClient)+"; rel=\"canonical\"") } log.Debug().Msg("tryBranch: true") - return branchTimestampResult, true + return targetOptions, true } diff --git a/server/upstream/helper.go b/server/upstream/helper.go index 6bc23c8..428976b 100644 --- a/server/upstream/helper.go +++ b/server/upstream/helper.go @@ -2,35 +2,46 @@ package upstream import ( "errors" + "fmt" "github.com/rs/zerolog/log" "codeberg.org/codeberg/pages/server/gitea" ) -// GetBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch -// (or nil if the branch doesn't exist) -func GetBranchTimestamp(giteaClient *gitea.Client, owner, repo, branch string) *gitea.BranchTimestamp { - log := log.With().Strs("BranchInfo", []string{owner, repo, branch}).Logger() +// GetBranchTimestamp finds the default branch (if branch is "") and save branch and it's last modification time to Options +func (o *Options) GetBranchTimestamp(giteaClient *gitea.Client) (bool, error) { + log := log.With().Strs("BranchInfo", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch}).Logger() - if len(branch) == 0 { + if len(o.TargetBranch) == 0 { // Get default branch - defaultBranch, err := giteaClient.GiteaGetRepoDefaultBranch(owner, repo) + defaultBranch, err := giteaClient.GiteaGetRepoDefaultBranch(o.TargetOwner, o.TargetRepo) if err != nil { log.Err(err).Msg("Could't fetch default branch from repository") - return nil + return false, err } log.Debug().Msgf("Succesfully fetched default branch %q from Gitea", defaultBranch) - branch = defaultBranch + o.TargetBranch = defaultBranch } - timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(owner, repo, branch) + timestamp, err := giteaClient.GiteaGetRepoBranchTimestamp(o.TargetOwner, o.TargetRepo, o.TargetBranch) if err != nil { if !errors.Is(err, gitea.ErrorNotFound) { log.Error().Err(err).Msg("Could not get latest commit's timestamp from branch") } - return nil + return false, err } + + if timestamp == nil || timestamp.Branch == "" { + return false, fmt.Errorf("empty response") + } + log.Debug().Msgf("Succesfully fetched latest commit's timestamp from branch: %#v", timestamp) - return timestamp + o.BranchTimestamp = timestamp.Timestamp + o.TargetBranch = timestamp.Branch + return true, nil +} + +func (o *Options) ContentWebLink(giteaClient *gitea.Client) string { + return giteaClient.ContentWebLink(o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath) + "; rel=\"canonical\"" } diff --git a/server/upstream/upstream.go b/server/upstream/upstream.go index 0fbb530..f480a76 100644 --- a/server/upstream/upstream.go +++ b/server/upstream/upstream.go @@ -34,10 +34,10 @@ var upstreamNotFoundPages = []string{ // Options provides various options for the upstream request. type Options struct { - TargetOwner, - TargetRepo, - TargetBranch, - TargetPath, + TargetOwner string + TargetRepo string + TargetBranch string + TargetPath string // Used for debugging purposes. Host string @@ -62,16 +62,22 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client) (fin // Check if the branch exists and when it was modified if o.BranchTimestamp.IsZero() { - branch := GetBranchTimestamp(giteaClient, o.TargetOwner, o.TargetRepo, o.TargetBranch) - - if branch == nil || branch.Branch == "" { + branchExist, err := o.GetBranchTimestamp(giteaClient) + // handle 404 + if err != nil && errors.Is(err, gitea.ErrorNotFound) || !branchExist { html.ReturnErrorPage(ctx, - fmt.Sprintf("could not get timestamp of branch %q", o.TargetBranch), + fmt.Sprintf("branch %q for '%s/%s' not found", o.TargetBranch, o.TargetOwner, o.TargetRepo), + http.StatusNotFound) + return true + } + + // handle unexpected errors + if err != nil { + html.ReturnErrorPage(ctx, + fmt.Sprintf("could not get timestamp of branch %q: %v", o.TargetBranch, err), http.StatusFailedDependency) return true } - o.TargetBranch = branch.Branch - o.BranchTimestamp = branch.Timestamp } // Check if the browser has a cached version