diff --git a/cmd/main.go b/cmd/main.go index a3a61e1..53cc417 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -90,7 +90,7 @@ func Serve(ctx *cli.Context) error { // Create handler based on settings httpsHandler := server.Handler(mainDomainSuffix, rawDomain, giteaClient, - giteaRoot, rawInfoPage, + rawInfoPage, BlacklistedPaths, allowedCorsDomains, dnsLookupCache, canonicalDomainCache) diff --git a/server/gitea/client.go b/server/gitea/client.go index c63ee21..51647ba 100644 --- a/server/gitea/client.go +++ b/server/gitea/client.go @@ -45,6 +45,8 @@ type Client struct { sdkClient *gitea.Client responseCache cache.SetGetKey + giteaRoot string + followSymlinks bool supportLFS bool @@ -79,6 +81,8 @@ func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, follo sdkClient: sdk, responseCache: respCache, + giteaRoot: giteaRoot, + followSymlinks: followSymlinks, supportLFS: supportLFS, @@ -87,6 +91,10 @@ func NewClient(giteaRoot, giteaAPIToken string, respCache cache.SetGetKey, follo }, err } +func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource string) string { + return path.Join(client.giteaRoot, targetOwner, targetRepo, "src/branch", branch, resource) +} + func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) { reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource) if err != nil { diff --git a/server/handler.go b/server/handler.go index 49c8012..caa483a 100644 --- a/server/handler.go +++ b/server/handler.go @@ -6,7 +6,6 @@ import ( "path" "strings" - "github.com/rs/zerolog" "github.com/rs/zerolog/log" "codeberg.org/codeberg/pages/html" @@ -27,7 +26,7 @@ const ( // Handler handles a single HTTP request to the web server. func Handler(mainDomainSuffix, rawDomain string, giteaClient *gitea.Client, - giteaRoot, rawInfoPage string, + rawInfoPage string, blacklistedPaths, allowedCorsDomains []string, dnsLookupCache, canonicalDomainCache cache.SetGetKey, ) http.HandlerFunc { @@ -85,51 +84,10 @@ func Handler(mainDomainSuffix, rawDomain string, } // Prepare request information to Gitea - var targetOwner, targetRepo, targetBranch, targetPath string targetOptions := &upstream.Options{ TryIndexPages: true, } - // tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty, it will - // also disallow search indexing and add a Link header to the canonical URL. - // TODO: move into external func to not alert vars indirectly - tryBranch1 := func(log zerolog.Logger, repo, branch string, _path []string, canonicalLink string) bool { - if repo == "" { - log.Debug().Msg("tryBranch: repo is empty") - return false - } - - // Replace "~" to "/" so we can access branch that contains slash character - // Branch name cannot contain "~" so doing this is okay - branch = strings.ReplaceAll(branch, "~", "/") - - // Check if the branch exists, otherwise treat it as a file path - branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, targetOwner, repo, branch) - if branchTimestampResult == nil { - log.Debug().Msg("tryBranch: branch doesn't exist") - return false - } - - // Branch exists, use it - targetRepo = repo - targetPath = path.Join(_path...) - targetBranch = branchTimestampResult.Branch - - targetOptions.BranchTimestamp = branchTimestampResult.Timestamp - - if canonicalLink != "" { - // Hide from search machines & add canonical link - ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex") - ctx.RespWriter.Header().Set("Link", - strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+ - "; rel=\"canonical\"", - ) - } - - log.Debug().Msg("tryBranch: true") - return true - } - log.Debug().Msg("preparations") if rawDomain != "" && strings.EqualFold(trimmedHost, rawDomain) { // Serve raw content from RawDomain @@ -144,21 +102,19 @@ func Handler(mainDomainSuffix, rawDomain string, ctx.Redirect(rawInfoPage, http.StatusTemporaryRedirect) return } - targetOwner = pathElements[0] - targetRepo = pathElements[1] + targetOwner := pathElements[0] + targetRepo := pathElements[1] // raw.codeberg.org/example/myrepo/@main/index.html if len(pathElements) > 2 && strings.HasPrefix(pathElements[2], "@") { log.Debug().Msg("raw domain preparations, now trying with specified branch") - if newRepo, newPath, newBranch, newTimestamp, works := tryBranch2(log, ctx, giteaClient, - targetOwner, targetRepo, pathElements[2][1:], pathElements[3:], - giteaRoot+"/"+targetOwner+"/"+targetRepo+"/src/branch/%b/%p", - ); works { - targetOptions.BranchTimestamp = *newTimestamp + newPath := path.Join(pathElements[3:]...) + branch := pathElements[2][1:] + if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, branch, newPath, true); works { + targetOptions.BranchTimestamp = timestampBranch.Timestamp log.Debug().Msg("tryBranch, now trying upstream 1") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, targetOwner, newRepo, newBranch, newPath, - canonicalDomainCache) + targetOptions, targetOwner, targetRepo, timestampBranch.Branch, newPath, canonicalDomainCache) return } log.Debug().Msg("missing branch info") @@ -167,14 +123,15 @@ func Handler(mainDomainSuffix, rawDomain string, } log.Debug().Msg("raw domain preparations, now trying with default branch") - tryBranch(log, - targetRepo, "", pathElements[2:], - giteaRoot+"/"+targetOwner+"/"+targetRepo+"/src/branch/%b/%p", - ) - log.Debug().Msg("tryBranch, now trying upstream 2") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, targetOwner, targetRepo, targetBranch, targetPath, - canonicalDomainCache) + newPath := path.Join(pathElements[2:]...) + if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, "", newPath, true); works { + targetOptions.BranchTimestamp = timestampBranch.Timestamp + log.Debug().Msg("tryBranch, now trying upstream 2") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, + targetOptions, targetOwner, targetRepo, timestampBranch.Branch, newPath, canonicalDomainCache) + } else { + log.Error().Msg("TODO: is this a bug?") + } return } else if strings.HasSuffix(trimmedHost, mainDomainSuffix) { @@ -182,9 +139,8 @@ func Handler(mainDomainSuffix, rawDomain string, log.Debug().Msg("main domain suffix") pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/") - targetOwner = strings.TrimSuffix(trimmedHost, mainDomainSuffix) - targetRepo = pathElements[0] - targetPath = strings.Trim(strings.Join(pathElements[1:], "/"), "/") + targetOwner := strings.TrimSuffix(trimmedHost, mainDomainSuffix) + targetRepo := pathElements[0] if targetOwner == "www" { // www.codeberg.page redirects to codeberg.page // TODO: rm hardcoded - use cname? @@ -203,14 +159,13 @@ func Handler(mainDomainSuffix, rawDomain string, log.Debug().Msg("main domain preparations, now trying with specified repo & branch") branch := pathElements[1][1:] - if tryBranch(log, - pathElements[0], branch, pathElements[2:], - "/"+pathElements[0]+"/%p", - ) { + newPath := path.Join(pathElements[2:]...) + repo := pathElements[0] + if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, branch, newPath, true); works { + targetOptions.BranchTimestamp = timestampBranch.Timestamp log.Debug().Msg("tryBranch, now trying upstream 3") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, targetOwner, targetRepo, targetBranch, targetPath, - canonicalDomainCache) + targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) } else { html.ReturnErrorPage(ctx, fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, targetRepo), @@ -224,12 +179,13 @@ func Handler(mainDomainSuffix, rawDomain string, if strings.HasPrefix(pathElements[0], "@") { log.Debug().Msg("main domain preparations, now trying with specified branch") branch := pathElements[0][1:] - if tryBranch(log, - "pages", branch, pathElements[1:], "/%p") { + repo := "pages" + newPath := path.Join(pathElements[1:]...) + if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, branch, newPath, true); works { + targetOptions.BranchTimestamp = timestampBranch.Timestamp log.Debug().Msg("tryBranch, now trying upstream 4") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, targetOwner, "pages", targetBranch, targetPath, - canonicalDomainCache) + targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) } else { html.ReturnErrorPage(ctx, fmt.Sprintf("explizite set branch %q do not exist at '%s/%s'", branch, targetOwner, "pages"), @@ -242,37 +198,42 @@ func Handler(mainDomainSuffix, rawDomain string, // example.codeberg.page/myrepo/index.html // example.codeberg.page/pages/... is not allowed here. log.Debug().Msg("main domain preparations, now trying with specified repo") - if pathElements[0] != "pages" && tryBranch(log, - pathElements[0], "pages", pathElements[1:], "") { - log.Debug().Msg("tryBranch, now trying upstream 5") - tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, targetOwner, targetRepo, targetBranch, targetPath, - canonicalDomainCache) - return + if pathElements[0] != "pages" { + repo := pathElements[0] + branch := "pages" + newPath := path.Join(pathElements[1:]...) + if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, branch, newPath, false); works { + targetOptions.BranchTimestamp = timestampBranch.Timestamp + log.Debug().Msg("tryBranch, now trying upstream 5") + tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, + targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) + return + } } // Try to use the "pages" repo on its default branch // example.codeberg.page/index.html log.Debug().Msg("main domain preparations, now trying with default repo/branch") - if tryBranch(log, - "pages", "", pathElements, "") { + newPath := path.Join(pathElements...) + repo := "pages" + if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, repo, "", newPath, false); works { + targetOptions.BranchTimestamp = timestampBranch.Timestamp log.Debug().Msg("tryBranch, now trying upstream 6") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, targetOwner, targetRepo, targetBranch, targetPath, - canonicalDomainCache) + targetOptions, targetOwner, repo, timestampBranch.Branch, newPath, canonicalDomainCache) return } // Couldn't find a valid repo/branch html.ReturnErrorPage(ctx, - fmt.Sprintf("couldn't find a valid repo[%s]/branch[%s]", targetRepo, targetBranch), + fmt.Sprintf("couldn't find a valid repo[%s]", targetRepo), http.StatusFailedDependency) return } else { trimmedHostStr := string(trimmedHost) // Serve pages from custom domains - targetOwner, targetRepo, targetBranch = dns.GetTargetFromDNS(trimmedHostStr, string(mainDomainSuffix), dnsLookupCache) + targetOwner, targetRepo, targetBranch := dns.GetTargetFromDNS(trimmedHostStr, string(mainDomainSuffix), dnsLookupCache) if targetOwner == "" { html.ReturnErrorPage(ctx, "could not obtain repo owner from custom domain", @@ -281,18 +242,19 @@ func Handler(mainDomainSuffix, rawDomain string, } pathElements := strings.Split(strings.Trim(ctx.Path(), "/"), "/") - canonicalLink := "" + canonicalLink := false if strings.HasPrefix(pathElements[0], "@") { targetBranch = pathElements[0][1:] pathElements = pathElements[1:] - canonicalLink = "/%p" + canonicalLink = true } // Try to use the given repo on the given branch or the default branch + newPath := path.Join(pathElements...) log.Debug().Msg("custom domain preparations, now trying with details from DNS") - if tryBranch(log, - targetRepo, targetBranch, pathElements, canonicalLink) { - canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, targetBranch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache) + if timestampBranch, works := tryBranch(log, ctx, giteaClient, targetOwner, targetRepo, targetBranch, newPath, canonicalLink); works { + targetOptions.BranchTimestamp = timestampBranch.Timestamp + canonicalDomain, valid := upstream.CheckCanonicalDomain(giteaClient, targetOwner, targetRepo, timestampBranch.Branch, trimmedHostStr, string(mainDomainSuffix), canonicalDomainCache) if !valid { html.ReturnErrorPage(ctx, "domain not specified in .domains file", http.StatusMisdirectedRequest) return @@ -300,7 +262,7 @@ func Handler(mainDomainSuffix, rawDomain string, // only redirect if the target is also a codeberg page! targetOwner, _, _ = dns.GetTargetFromDNS(strings.SplitN(canonicalDomain, "/", 2)[0], string(mainDomainSuffix), dnsLookupCache) if targetOwner != "" { - ctx.Redirect("https://"+canonicalDomain+string(ctx.Path()), http.StatusTemporaryRedirect) + ctx.Redirect("https://"+canonicalDomain+string(newPath), http.StatusTemporaryRedirect) return } @@ -310,8 +272,7 @@ func Handler(mainDomainSuffix, rawDomain string, log.Debug().Msg("tryBranch, now trying upstream 7") tryUpstream(ctx, giteaClient, mainDomainSuffix, trimmedHost, - targetOptions, targetOwner, targetRepo, targetBranch, targetPath, - canonicalDomainCache) + targetOptions, targetOwner, targetRepo, timestampBranch.Branch, newPath, canonicalDomainCache) return } diff --git a/server/handler_test.go b/server/handler_test.go index c0aca14..b81a2b4 100644 --- a/server/handler_test.go +++ b/server/handler_test.go @@ -11,12 +11,11 @@ import ( ) func TestHandlerPerformance(t *testing.T) { - giteaRoot := "https://codeberg.org" - giteaClient, _ := gitea.NewClient(giteaRoot, "", cache.NewKeyValueCache(), false, false) + giteaClient, _ := gitea.NewClient("https://codeberg.org", "", cache.NewKeyValueCache(), false, false) testHandler := Handler( "codeberg.page", "raw.codeberg.org", giteaClient, - giteaRoot, "https://docs.codeberg.org/pages/raw-content/", + "https://docs.codeberg.org/pages/raw-content/", []string{"/.well-known/acme-challenge/"}, []string{"raw.codeberg.org", "fonts.codeberg.org", "design.codeberg.org"}, cache.NewKeyValueCache(), diff --git a/server/try.go b/server/try.go index ccad226..70ee3f7 100644 --- a/server/try.go +++ b/server/try.go @@ -2,9 +2,7 @@ package server import ( "net/http" - "path" "strings" - "time" "codeberg.org/codeberg/pages/html" "codeberg.org/codeberg/pages/server/cache" @@ -51,13 +49,14 @@ func tryUpstream(ctx *context.Context, giteaClient *gitea.Client, } } -func tryBranch2(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client, - repoOwner, repoName, branch string, _path []string, canonicalLink string) ( - targetRepo, targetPath, targetBranch string, branchTimestamp *time.Time, - works bool) { +// tryBranch checks if a branch exists and populates the target variables. If canonicalLink is non-empty, +// it will also disallow search indexing and add a Link header to the canonical URL. +func tryBranch(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Client, + repoOwner, repoName, branch, path string, canonicalLink bool, +) (*gitea.BranchTimestamp, bool) { if repoName == "" { - log.Debug().Msg("tryBranch: repo == ''") - return "", "", "", nil, false + log.Debug().Msg("tryBranch: repo is empty") + return nil, false } // Replace "~" to "/" so we can access branch that contains slash character @@ -68,25 +67,17 @@ func tryBranch2(log zerolog.Logger, ctx *context.Context, giteaClient *gitea.Cli branchTimestampResult := upstream.GetBranchTimestamp(giteaClient, repoOwner, repoName, branch) if branchTimestampResult == nil { log.Debug().Msg("tryBranch: branch doesn't exist") - return "", "", "", nil, false + return nil, false } - // Branch exists, use it - targetRepo = repoName - targetPath = path.Join(_path...) - targetBranch = branchTimestampResult.Branch - - branchTimestamp = &branchTimestampResult.Timestamp - - if canonicalLink != "" { + if canonicalLink { // Hide from search machines & add canonical link ctx.RespWriter.Header().Set("X-Robots-Tag", "noarchive, noindex") ctx.RespWriter.Header().Set("Link", - strings.NewReplacer("%b", targetBranch, "%p", targetPath).Replace(canonicalLink)+ - "; rel=\"canonical\"", - ) + giteaClient.ContentWebLink(repoOwner, repoName, branchTimestampResult.Branch, path)+ + "; rel=\"canonical\"") } log.Debug().Msg("tryBranch: true") - return + return branchTimestampResult, true }