diff --git a/certificates.go b/certificates.go new file mode 100644 index 0000000..78e1110 --- /dev/null +++ b/certificates.go @@ -0,0 +1,16 @@ +package main + +import ( + "crypto/tls" + "fmt" +) + +// tlsConfig contains the configuration for generating, serving and cleaning up Let's Encrypt certificates. +var tlsConfig = &tls.Config{ + GetCertificate: func(info *tls.ClientHelloInfo) (*tls.Certificate, error) { + // TODO: check DNS name & get certificate from Let's Encrypt + return nil, fmt.Errorf("NYI") + }, + PreferServerCipherSuites: true, + // TODO: optimize cipher suites, minimum TLS version, etc. +} diff --git a/domains.go b/domains.go new file mode 100644 index 0000000..d0b25b1 --- /dev/null +++ b/domains.go @@ -0,0 +1,11 @@ +package main + +import "github.com/valyala/fasthttp" + +// getTargetFromDNS searches for CNAME entries on the request domain, optionally with a "www." prefix, and checks if +// the domain is included in the repository's "domains.txt" file. If everything is fine, it returns the target data. +func getTargetFromDNS(ctx *fasthttp.RequestCtx) (targetOwner, targetRepo, targetBranch, targetPath string) { + // TODO: read CNAME record for host and "www.{host}" to get those values + // TODO: check domains.txt + return +} diff --git a/handler.go b/handler.go new file mode 100644 index 0000000..02a1a73 --- /dev/null +++ b/handler.go @@ -0,0 +1,269 @@ +package main + +import ( + "bytes" + "fmt" + "github.com/valyala/fasthttp" + "github.com/valyala/fastjson" + "mime" + "net/url" + "path" + "strconv" + "strings" + "time" +) + +// handler handles a single HTTP request to the web server. +func handler(ctx *fasthttp.RequestCtx) { + ctx.Response.Header.Set("Server", "Codeberg Pages") + + // Force new default from specification (since November 2020) - see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy#strict-origin-when-cross-origin + ctx.Response.Header.Set("Referrer-Policy", "strict-origin-when-cross-origin") + + // Enable caching, but require revalidation to reduce confusion + ctx.Response.Header.Set("Cache-Control", "must-revalidate") + + // Block all methods not required for static pages + if !ctx.IsGet() && !ctx.IsHead() && !ctx.IsOptions() { + ctx.Response.Header.Set("Allow", "GET, HEAD, OPTIONS") + ctx.Error("Method not allowed", fasthttp.StatusMethodNotAllowed) + return + } + + // Block blacklisted paths (like ACME challenges) + for _, blacklistedPath := range BlacklistedPaths { + if bytes.HasPrefix(ctx.Path(), blacklistedPath) { + returnErrorPage(ctx, fasthttp.StatusForbidden) + return + } + } + + // Allow CORS for specified domains + if ctx.IsOptions() { + allowCors := false + for _, allowedCorsDomain := range AllowedCorsDomains { + if bytes.Equal(ctx.Request.Host(), allowedCorsDomain) { + allowCors = true + break + } + } + if allowCors { + ctx.Response.Header.Set("Access-Control-Allow-Origin", "*") + ctx.Response.Header.Set("Access-Control-Allow-Methods", "GET, HEAD") + } + ctx.Response.Header.Set("Allow", "GET, HEAD, OPTIONS") + ctx.Response.Header.SetStatusCode(fasthttp.StatusNoContent) + return + } + + // Prepare request information to Gitea + var targetOwner, targetRepo, targetBranch, targetPath string + var targetOptions = &upstreamOptions{ + ForbiddenMimeTypes: map[string]struct{}{}, + TryIndexPages: true, + } + + if RawDomain != nil && bytes.Equal(ctx.Request.Host(), RawDomain) { + // Serve raw content from RawDomain + + targetOptions.TryIndexPages = false + targetOptions.ForbiddenMimeTypes["text/html"] = struct{}{} + targetOptions.DefaultMimeType = "text/plain; charset=utf-8" + + pathElements := strings.SplitN(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/", 4) + if len(pathElements) < 2 { + // https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required + ctx.Redirect(RawInfoPage, fasthttp.StatusTemporaryRedirect) + return + } + targetOwner = pathElements[0] + targetRepo = pathElements[1] + if len(pathElements) > 3 { + targetPath = strings.Trim(pathElements[2]+"/"+pathElements[3], "/") + } else if len(pathElements) > 2 { + targetPath = pathElements[2] + } + + // raw.codeberg.page/example/myrepo/@main/index.html + if len(pathElements) > 3 && strings.HasPrefix(pathElements[2], "@") { + branch, _ := url.PathUnescape(pathElements[2][1:]) + if branch == "" { + branch = pathElements[2][1:] + } + // Check if the branch exists, otherwise treat it as a file path + targetBranch, targetOptions.BranchTimestamp = getBranchTimestamp(targetOwner, targetRepo, branch) + if targetOptions.BranchTimestamp != (time.Time{}) { + targetPath = strings.Trim(pathElements[3], "/") // branch exists, use it + } else { + targetBranch = "" // branch doesn't exist, use default branch + } + } + + } else if bytes.HasSuffix(ctx.Request.Host(), MainDomainSuffix) { + // Serve pages from subdomains of MainDomainSuffix + + pathElements := strings.SplitN(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/", 2) + targetOwner = string(bytes.TrimSuffix(ctx.Request.Host(), MainDomainSuffix)) + targetRepo = pathElements[0] + if len(pathElements) > 1 { + targetPath = strings.Trim(pathElements[1], "/") + } + + // Check if the first directory is a repo with a "pages" branch + targetBranch, targetOptions.BranchTimestamp = getBranchTimestamp(targetOwner, targetRepo, "pages") + if targetOptions.BranchTimestamp == (time.Time{}) { + targetRepo = "pages" + targetBranch = "" + targetPath = strings.Trim(pathElements[0]+"/"+targetPath, "/") + } + } else { + // Serve pages from external domains + + targetOwner, targetRepo, targetBranch, targetPath = getTargetFromDNS(ctx) + if targetOwner == "" { + ctx.Redirect(BrokenDNSPage, fasthttp.StatusTemporaryRedirect) + return + } + } + + // Check if a username can't exist because it's reserved (we'd risk to hit a Gitea route in that case) + if _, ok := ReservedUsernames[targetOwner]; ok { + returnErrorPage(ctx, fasthttp.StatusForbidden) + return + } + + // Check for blob path + if strings.HasPrefix(targetPath, "blob/") { + returnErrorPage(ctx, fasthttp.StatusForbidden) + return + } + + // Try to request the file from the Gitea API + if !upstream(ctx, targetOwner, targetRepo, targetBranch, targetPath, targetOptions) { + returnErrorPage(ctx, ctx.Response.StatusCode()) + } +} + +// returnErrorPage sets the response status code and writes NotFoundPage to the response body, with "%status" replaced +// with the provided status code. +func returnErrorPage(ctx *fasthttp.RequestCtx, code int) { + ctx.Response.SetStatusCode(code) + ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte(strconv.Itoa(code)))) +} + +// getBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch +// (or an empty time.Time if the branch doesn't exist) +func getBranchTimestamp(owner, repo, branch string) (branchWithFallback string, t time.Time) { + branchWithFallback = branch + if branch == "" { + var body = make([]byte, 0) + status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+owner+"/"+repo, 10*time.Second) + if err != nil || status != 200 { + branchWithFallback = "" + return + } + branch = fastjson.GetString(body, "default_branch") + branchWithFallback = branch + } + + var body = make([]byte, 0) + status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+owner+"/"+repo+"/branches/"+branch, 10*time.Second) + if err != nil || status != 200 { + branchWithFallback = "" + return + } + + t, _ = time.Parse(time.RFC3339, fastjson.GetString(body, "commit", "timestamp")) + return +} + +// upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context. +func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, targetBranch string, targetPath string, options *upstreamOptions) (success bool) { + if options.ForbiddenMimeTypes == nil { + options.ForbiddenMimeTypes = map[string]struct{}{} + } + + // Check if the branch exists and when it was modified + if options.BranchTimestamp == (time.Time{}) { + targetBranch, options.BranchTimestamp = getBranchTimestamp(targetOwner, targetRepo, targetBranch) + if options.BranchTimestamp == (time.Time{}) { + ctx.Response.SetStatusCode(fasthttp.StatusNotFound) + return false + } + } + + // Check if the browser has a cached version + if ifModifiedSince, err := time.Parse(time.RFC1123, string(ctx.Request.Header.Peek("If-Modified-Since"))); err == nil { + if !ifModifiedSince.Before(options.BranchTimestamp) { + ctx.Response.SetStatusCode(fasthttp.StatusNotModified) + return true + } + } + + // Make a GET request to the upstream URL + req := fasthttp.AcquireRequest() + req.SetRequestURI(string(GiteaRoot) + "/api/v1/repos/" + targetOwner + "/" + targetRepo + "/raw/" + targetBranch + "/" + targetPath) + res := fasthttp.AcquireResponse() + err := fasthttp.DoTimeout(req, res, 10*time.Second) + + // Handle errors + if res.StatusCode() == fasthttp.StatusNotFound { + if options.TryIndexPages { + // copy the options struct & try if an index page exists + optionsForIndexPages := *options + optionsForIndexPages.TryIndexPages = false + optionsForIndexPages.AppendTrailingSlash = true + for _, indexPage := range IndexPages { + if upstream(ctx, targetOwner, targetRepo, targetBranch, strings.TrimSuffix(targetPath, "/")+"/"+indexPage, &optionsForIndexPages) { + return true + } + } + } + ctx.Response.SetStatusCode(fasthttp.StatusNotFound) + return false + } + if err != nil || res.StatusCode() != fasthttp.StatusOK { + fmt.Printf("Couldn't fetch contents from \"%s\": %s (status code %d)\n", req.RequestURI(), err, res.StatusCode()) + returnErrorPage(ctx, fasthttp.StatusInternalServerError) + return true + } + + // Append trailing slash if missing (for index files) + if options.AppendTrailingSlash && !bytes.HasSuffix(ctx.Request.URI().Path(), []byte{'/'}) { + ctx.Redirect(string(ctx.Request.URI().Path())+"/", fasthttp.StatusTemporaryRedirect) + return true + } + + // Set the MIME type + mimeType := mime.TypeByExtension(path.Ext(targetPath)) + mimeTypeSplit := strings.SplitN(mimeType, ";", 2) + if _, ok := options.ForbiddenMimeTypes[mimeTypeSplit[0]]; ok || mimeType == "" { + if options.DefaultMimeType != "" { + mimeType = options.DefaultMimeType + } else { + mimeType = "application/octet-stream" + } + } + ctx.Response.Header.SetContentType(mimeType) + + // Write the response to the original request + ctx.Response.SetStatusCode(fasthttp.StatusOK) + ctx.Response.Header.SetLastModified(options.BranchTimestamp) + err = res.BodyWriteTo(ctx.Response.BodyWriter()) + if err != nil { + fmt.Printf("Couldn't write body for \"%s\": %s\n", req.RequestURI(), err) + returnErrorPage(ctx, fasthttp.StatusInternalServerError) + return true + } + + return true +} + +// upstreamOptions provides various options for the upstream request. +type upstreamOptions struct { + DefaultMimeType string + ForbiddenMimeTypes map[string]struct{} + TryIndexPages bool + AppendTrailingSlash bool + BranchTimestamp time.Time +} diff --git a/main.go b/main.go index b1b5517..ae4ac7b 100644 --- a/main.go +++ b/main.go @@ -2,7 +2,7 @@ // // Mapping custom domains is not static anymore, but can be done with DNS: // -// 1) add a "codeberg-pages-domains.txt" text file to your repository, containing the allowed domains +// 1) add a "domains.txt" text file to your repository, containing the allowed domains, separated by new lines. // // 2) add a CNAME entry to your domain, pointing to "[[{branch}.]{repo}.]{owner}.codeberg.page" (repo defaults to // "pages", "branch" defaults to the default branch if "repo" is "pages", or to "pages" if "repo" is something else): @@ -18,19 +18,14 @@ import ( "bytes" "crypto/tls" "fmt" - "mime" "net" - "net/url" "os" - "path" - "strconv" "strings" "time" _ "embed" "github.com/valyala/fasthttp" - "github.com/valyala/fastjson" ) // MainDomainSuffix specifies the main domain (starting with a dot) for which subdomains shall be served as static @@ -102,14 +97,7 @@ func main() { os.Exit(1) } if envOr("LETS_ENCRYPT", "0") == "1" { - tls.NewListener(listener, &tls.Config{ - GetCertificate: func(info *tls.ClientHelloInfo) (*tls.Certificate, error) { - // TODO: check DNS name & get certificate from Let's Encrypt - return nil, fmt.Errorf("NYI") - }, - PreferServerCipherSuites: true, - // TODO: optimize cipher suites, minimum TLS version, etc. - }) + tls.NewListener(listener, tlsConfig) } // Start the web server @@ -127,263 +115,6 @@ func main() { } } -// handler handles a single HTTP request to the web server. -func handler(ctx *fasthttp.RequestCtx) { - ctx.Response.Header.Set("Server", "Codeberg Pages") - - // Force new default from specification (since November 2020) - see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy#strict-origin-when-cross-origin - ctx.Response.Header.Set("Referrer-Policy", "strict-origin-when-cross-origin") - - // Enable caching, but require revalidation to reduce confusion - ctx.Response.Header.Set("Cache-Control", "must-revalidate") - - // Block all methods not required for static pages - if !ctx.IsGet() && !ctx.IsHead() && !ctx.IsOptions() { - ctx.Response.Header.Set("Allow", "GET, HEAD, OPTIONS") - ctx.Error("Method not allowed", fasthttp.StatusMethodNotAllowed) - return - } - - // Block blacklisted paths (like ACME challenges) - for _, blacklistedPath := range BlacklistedPaths { - if bytes.HasPrefix(ctx.Path(), blacklistedPath) { - returnErrorPage(ctx, fasthttp.StatusForbidden) - return - } - } - - // Allow CORS for specified domains - if ctx.IsOptions() { - allowCors := false - for _, allowedCorsDomain := range AllowedCorsDomains { - if bytes.Equal(ctx.Request.Host(), allowedCorsDomain) { - allowCors = true - break - } - } - if allowCors { - ctx.Response.Header.Set("Access-Control-Allow-Origin", "*") - ctx.Response.Header.Set("Access-Control-Allow-Methods", "GET, HEAD") - } - ctx.Response.Header.Set("Allow", "GET, HEAD, OPTIONS") - ctx.Response.Header.SetStatusCode(fasthttp.StatusNoContent) - return - } - - // Prepare request information to Gitea - var targetOwner, targetRepo, targetBranch, targetPath string - var targetOptions = &upstreamOptions{ - ForbiddenMimeTypes: map[string]struct{}{}, - TryIndexPages: true, - } - - if RawDomain != nil && bytes.Equal(ctx.Request.Host(), RawDomain) { - // Serve raw content from RawDomain - - targetOptions.TryIndexPages = false - targetOptions.ForbiddenMimeTypes["text/html"] = struct{}{} - targetOptions.DefaultMimeType = "text/plain; charset=utf-8" - - pathElements := strings.SplitN(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/", 4) - if len(pathElements) < 2 { - // https://{RawDomain}/{owner}/{repo}[/@{branch}]/{path} is required - ctx.Redirect(RawInfoPage, fasthttp.StatusTemporaryRedirect) - return - } - targetOwner = pathElements[0] - targetRepo = pathElements[1] - if len(pathElements) > 3 { - targetPath = strings.Trim(pathElements[2]+"/"+pathElements[3], "/") - } else if len(pathElements) > 2 { - targetPath = pathElements[2] - } - - // raw.codeberg.page/example/myrepo/@main/index.html - if len(pathElements) > 3 && strings.HasPrefix(pathElements[2], "@") { - branch, _ := url.PathUnescape(pathElements[2][1:]) - if branch == "" { - branch = pathElements[2][1:] - } - // Check if the branch exists, otherwise treat it as a file path - targetBranch, targetOptions.BranchTimestamp = getBranchTimestamp(targetOwner, targetRepo, branch) - if targetOptions.BranchTimestamp != (time.Time{}) { - targetPath = strings.Trim(pathElements[3], "/") // branch exists, use it - } else { - targetBranch = "" // branch doesn't exist, use default branch - } - } - - } else if bytes.HasSuffix(ctx.Request.Host(), MainDomainSuffix) { - // Serve pages from subdomains of MainDomainSuffix - - pathElements := strings.SplitN(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/", 2) - targetOwner = string(bytes.TrimSuffix(ctx.Request.Host(), MainDomainSuffix)) - targetRepo = pathElements[0] - if len(pathElements) > 1 { - targetPath = strings.Trim(pathElements[1], "/") - } - - // Check if the first directory is a repo with a "pages" branch - targetBranch, targetOptions.BranchTimestamp = getBranchTimestamp(targetOwner, targetRepo, "pages") - if targetOptions.BranchTimestamp == (time.Time{}) { - targetRepo = "pages" - targetBranch = "" - targetPath = strings.Trim(pathElements[0]+"/"+targetPath, "/") - } - } else { - // Serve pages from external domains - - targetOwner, targetRepo, targetBranch, targetPath = getTargetFromDNS(ctx) - if targetOwner == "" { - ctx.Redirect(BrokenDNSPage, fasthttp.StatusTemporaryRedirect) - return - } - } - - // Check if a username can't exist because it's reserved (we'd risk to hit a Gitea route in that case) - if _, ok := ReservedUsernames[targetOwner]; ok { - returnErrorPage(ctx, fasthttp.StatusForbidden) - return - } - - // Check for blob path - if strings.HasPrefix(targetPath, "blob/") { - returnErrorPage(ctx, fasthttp.StatusForbidden) - return - } - - if upstream(ctx, targetOwner, targetRepo, targetBranch, targetPath, targetOptions) { - return - } - - returnErrorPage(ctx, ctx.Response.StatusCode()) -} - -func getTargetFromDNS(ctx *fasthttp.RequestCtx) (targetOwner, targetRepo, targetBranch, targetPath string) { - // TODO: read CNAME record for host and "www.{host}" to get those values - // TODO: check codeberg-pages-domains.txt - return -} - -// returnErrorPage sets the response status code and writes NotFoundPage to the response body, with "%status" replaced -// with the provided status code. -func returnErrorPage(ctx *fasthttp.RequestCtx, code int) { - ctx.Response.SetStatusCode(code) - ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte(strconv.Itoa(code)))) -} - -func getBranchTimestamp(owner, repo, branch string) (branchWithFallback string, t time.Time) { - branchWithFallback = branch - if branch == "" { - var body = make([]byte, 0) - status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+owner+"/"+repo, 10*time.Second) - if err != nil || status != 200 { - return - } - branch = fastjson.GetString(body, "default_branch") - branchWithFallback = branch - } - - var body = make([]byte, 0) - status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot)+"/api/v1/repos/"+owner+"/"+repo+"/branches/"+branch, 10*time.Second) - if err != nil || status != 200 { - return - } - - t, _ = time.Parse(time.RFC3339, fastjson.GetString(body, "commit", "timestamp")) - return -} - -// upstream requests an URL from GiteaRoot and writes it to the request context; if "final" is set, it also returns a -// 404 error if the page couldn't be loaded. -func upstream(ctx *fasthttp.RequestCtx, targetOwner string, targetRepo string, targetBranch string, targetPath string, options *upstreamOptions) (success bool) { - fmt.Printf("Trying: %s/%s/%s/%s\n", targetOwner, targetRepo, targetBranch, targetPath) - if options.ForbiddenMimeTypes == nil { - options.ForbiddenMimeTypes = map[string]struct{}{} - } - - // Check if the branch exists and when it was modified - if options.BranchTimestamp == (time.Time{}) { - targetBranch, options.BranchTimestamp = getBranchTimestamp(targetOwner, targetRepo, targetBranch) - if options.BranchTimestamp == (time.Time{}) { - ctx.Response.SetStatusCode(fasthttp.StatusNotFound) - return false - } - } - - if ifModifiedSince, err := time.Parse(time.RFC1123, string(ctx.Request.Header.Peek("If-Modified-Since"))); err == nil { - if !ifModifiedSince.Before(options.BranchTimestamp) { - ctx.Response.SetStatusCode(fasthttp.StatusNotModified) - return true - } - } - - // Make a GET request to the upstream URL - req := fasthttp.AcquireRequest() - req.SetRequestURI(string(GiteaRoot) + "/api/v1/repos/" + targetOwner + "/" + targetRepo + "/raw/" + targetBranch + "/" + targetPath) - res := fasthttp.AcquireResponse() - err := fasthttp.DoTimeout(req, res, 10*time.Second) - if res.StatusCode() == fasthttp.StatusNotFound { - if options.TryIndexPages { - // copy the options struct & try if an index page exists - optionsForIndexPages := *options - optionsForIndexPages.TryIndexPages = false - optionsForIndexPages.AppendTrailingSlash = true - for _, indexPage := range IndexPages { - if upstream(ctx, targetOwner, targetRepo, targetBranch, strings.TrimSuffix(targetPath, "/")+"/"+indexPage, &optionsForIndexPages) { - return true - } - } - } - ctx.Response.SetStatusCode(fasthttp.StatusNotFound) - return false - } - if err != nil || res.StatusCode() != fasthttp.StatusOK { - fmt.Printf("Couldn't fetch contents from \"%s\": %s (status code %d)\n", req.RequestURI(), err, res.StatusCode()) - returnErrorPage(ctx, fasthttp.StatusInternalServerError) - return true - } - - // Append trailing slash if missing (for index files) - if options.AppendTrailingSlash && !bytes.HasSuffix(ctx.Request.URI().Path(), []byte{'/'}) { - ctx.Redirect(string(ctx.Request.URI().Path())+"/", fasthttp.StatusTemporaryRedirect) - return true - } - - // Set the MIME type - mimeType := mime.TypeByExtension(path.Ext(targetPath)) - mimeTypeSplit := strings.SplitN(mimeType, ";", 2) - if _, ok := options.ForbiddenMimeTypes[mimeTypeSplit[0]]; ok || mimeType == "" { - if options.DefaultMimeType != "" { - mimeType = options.DefaultMimeType - } else { - mimeType = "application/octet-stream" - } - } - ctx.Response.Header.SetContentType(mimeType) - - // Write the response to the original request - ctx.Response.SetStatusCode(fasthttp.StatusOK) - ctx.Response.Header.SetLastModified(options.BranchTimestamp) - err = res.BodyWriteTo(ctx.Response.BodyWriter()) - if err != nil { - fmt.Printf("Couldn't write body for \"%s\": %s\n", req.RequestURI(), err) - returnErrorPage(ctx, fasthttp.StatusInternalServerError) - return true - } - - return true -} - -// upstreamOptions provides various options for the upstream request. -type upstreamOptions struct { - DefaultMimeType string - ForbiddenMimeTypes map[string]struct{} - TryIndexPages bool - AppendTrailingSlash bool - BranchTimestamp time.Time -} - // envOr reads an environment variable and returns a default value if it's empty. func envOr(env string, or string) string { if v := os.Getenv(env); v != "" {