From 0756993f7eb86d4f771b7b5e95f9e974e80d422c Mon Sep 17 00:00:00 2001 From: Moritz Marquardt Date: Tue, 16 Mar 2021 20:22:06 +0100 Subject: [PATCH] Implement first version without custom domains, TLS and caching --- .gitignore | 2 + 404.html | 32 +++++ go.mod | 5 + go.sum | 23 ++++ main.go | 364 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 426 insertions(+) create mode 100644 .gitignore create mode 100644 404.html create mode 100644 go.mod create mode 100644 go.sum create mode 100644 main.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1062418 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.idea/ +*.iml diff --git a/404.html b/404.html new file mode 100644 index 0000000..854781d --- /dev/null +++ b/404.html @@ -0,0 +1,32 @@ + + + + + + Not found + + + + + + + + + +

You found a bug!

+

Sorry, this page doesn't exist or is otherwise inaccessible (code %status)

+ + + Website powered by Codeberg Pages + + + diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..fd8c9ee --- /dev/null +++ b/go.mod @@ -0,0 +1,5 @@ +module codeberg.org/codeberg/pages + +go 1.16 + +require github.com/valyala/fasthttp v1.22.0 diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..f00185c --- /dev/null +++ b/go.sum @@ -0,0 +1,23 @@ +github.com/andybalholm/brotli v1.0.1 h1:KqhlKozYbRtJvsPrrEeXcO+N2l6NYT5A2QAFmSULpEc= +github.com/andybalholm/brotli v1.0.1/go.mod h1:loMXtMfwqflxFJPmdbJO0a3KNoPuLBgiu3qAvBg8x/Y= +github.com/klauspost/compress v1.11.8 h1:difgzQsp5mdAz9v8lm3P/I+EpDKMU/6uTMw1y1FObuo= +github.com/klauspost/compress v1.11.8/go.mod h1:aoV0uJVorq1K+umq18yTdKaF57EivdYsUV+/s2qKfXs= +github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= +github.com/valyala/fasthttp v1.22.0 h1:OpwH5KDOJ9cS2bq8fD+KfT4IrksK0llvkHf4MZx42jQ= +github.com/valyala/fasthttp v1.22.0/go.mod h1:0mw2RjXGOzxf4NL2jni3gUQ7LfjjUSiG5sskOUUSEpU= +github.com/valyala/tcplisten v0.0.0-20161114210144-ceec8f93295a/go.mod h1:v3UYOV9WzVtRmSR+PDvWpU/qWl4Wa5LApYYX4ZtKbio= +golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20210220033148-5ea612d1eb83/go.mod h1:jdWPYTVW3xRLrWPugEBEK3UY2ZEsg3UU495nc5E+M+I= +golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20210226101413-39120d07d75e/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/term v0.0.0-20201117132131-f5c789dd3221/go.mod h1:Nr5EML6q2oocZ2LXRh80K7BxOlk5/8JxuGnuhpl+muw= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= +golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.5/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= diff --git a/main.go b/main.go new file mode 100644 index 0000000..3fd546b --- /dev/null +++ b/main.go @@ -0,0 +1,364 @@ +// Package main is the new Codeberg Pages server, a solution for serving static pages from Gitea repositories. +// +// Mapping custom domains is not static anymore, but can be done with DNS: +// +// 1) add a "codeberg-pages-domains.txt" text file to your repository, containing the allowed domains +// +// 2) add a CNAME entry to your domain, pointing to "[[{branch}.]{repo}.]{owner}.codeberg.page" (repo defaults to +// "pages", "branch" defaults to the default branch if "repo" is "pages", or to "pages" if "repo" is something else): +// www.example.org. IN CNAME main.pages.example.codeberg.page. +// 3) if a CNAME is set for "www.example.org", you can redirect there from the naked domain by adding an ALIAS record +// for "example.org" (if your provider allows ALIAS or similar records): +// example.org IN ALIAS codeberg.page. +// +// Certificates are generated, updated and cleaned up automatically via Let's Encrypt through a TLS challenge. +package main + +import ( + "bytes" + "crypto/tls" + "fmt" + "mime" + "net" + "os" + "path" + "strconv" + "strings" + "time" + + _ "embed" + + "github.com/valyala/fasthttp" +) + +// MainDomainSuffix specifies the main domain (starting with a dot) for which subdomains shall be served as static +// pages, or used for comparison in CNAME lookups. Static pages can be accessed through +// https://{owner}.{MainDomain}[/{repo}], with repo defaulting to "pages". +var MainDomainSuffix = []byte(".codeberg.page") + +// GiteaRoot specifies the root URL of the Gitea instance, without a trailing slash. +var GiteaRoot = []byte("https://codeberg.org") + +//go:embed 404.html +var NotFoundPage []byte + +// BrokenDNSPage will be shown (with a redirect) when trying to access a domain for which no DNS CNAME record exists. +var BrokenDNSPage = "https://docs.codeberg.org/codeberg-pages/custom-domains/" + +// RawDomain specifies the domain from which raw repository content shall be served in the following format: +// https://{RawDomain}/{owner}/{repo}[/{branch|tag|commit}/{version}]/{filepath...} +// (set to []byte(nil) to disable raw content hosting) +var RawDomain = []byte("raw.codeberg.page") + +// RawInfoPage will be shown (with a redirect) when trying to access RawDomain directly (or without owner/repo/path). +var RawInfoPage = "https://docs.codeberg.org/codeberg-pages/raw-content/" + +// AllowedCorsDomains lists the domains for which Cross-Origin Resource Sharing is allowed. +var AllowedCorsDomains = [][]byte{ + RawDomain, + []byte("fonts.codeberg.org"), + []byte("design.codeberg.org"), +} + +// BlacklistedPaths specifies forbidden path prefixes for all Codeberg Pages. +var BlacklistedPaths = [][]byte{ + []byte("/.well-known/acme-challenge/"), +} + +// IndexPages lists pages that may be considered as index pages for directories. +var IndexPages = []string{ + "index.html", +} + +// ReservedUsernames specifies the usernames that are reserved by Gitea and thus may not be used as owner names. +// The contents are taken from https://github.com/go-gitea/gitea/blob/master/models/user.go#L783; reserved names with +// dots are removed as they are forbidden for Codeberg Pages anyways. +var ReservedUsernames = map[string]struct{}{ + "admin": {}, + "api": {}, + "assets": {}, + "attachments": {}, + "avatars": {}, + "captcha": {}, + "commits": {}, + "debug": {}, + "error": {}, + "explore": {}, + "ghost": {}, + "help": {}, + "install": {}, + "issues": {}, + "less": {}, + "login": {}, + "metrics": {}, + "milestones": {}, + "new": {}, + "notifications": {}, + "org": {}, + "plugins": {}, + "pulls": {}, + "raw": {}, + "repo": {}, + "search": {}, + "stars": {}, + "template": {}, + "user": {}, +} + +// main sets up and starts the web server. +func main() { + // Make sure MainDomain has a trailing dot, and GiteaRoot has no trailing slash + if !bytes.HasPrefix(MainDomainSuffix, []byte{'.'}) { + MainDomainSuffix = append([]byte{'.'}, MainDomainSuffix...) + } + GiteaRoot = bytes.TrimSuffix(GiteaRoot, []byte{'/'}) + + // Use HOST and PORT environment variables to determine listening address + address := fmt.Sprintf("%s:%s", envOr("HOST", "[::]"), envOr("PORT", "80")) + fmt.Printf("Listening on http://%s\n", address) + + // Enable compression by wrapping the handler() method with the compression function provided by FastHTTP + compressedHandler := fasthttp.CompressHandlerBrotliLevel(handler, fasthttp.CompressBrotliBestSpeed, fasthttp.CompressBestSpeed) + + // Setup listener and TLS + listener, err := net.Listen("tcp", address) + if err != nil { + fmt.Printf("Couldn't create listener: %s\n", err) + os.Exit(1) + } + if envOr("LETS_ENCRYPT", "0") == "1" { + tls.NewListener(listener, &tls.Config{ + GetCertificate: func(info *tls.ClientHelloInfo) (*tls.Certificate, error) { + // TODO: check DNS name & get certificate from Let's Encrypt + return nil, fmt.Errorf("NYI") + }, + PreferServerCipherSuites: true, + // TODO: optimize cipher suites, minimum TLS version, etc. + }) + } + + // Start the web server + err = (&fasthttp.Server{ + Handler: compressedHandler, + DisablePreParseMultipartForm: false, + MaxRequestBodySize: 0, + NoDefaultServerHeader: true, + ReadTimeout: 10 * time.Second, + }).Serve(listener) + if err != nil { + fmt.Printf("Couldn't start server: %s\n", err) + os.Exit(1) + } +} + +// handler handles a single HTTP request to the web server. +func handler(ctx *fasthttp.RequestCtx) { + ctx.Response.Header.Set("Server", "Codeberg Pages") + + // Force new default from specification (since November 2020) - see https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referrer-Policy#strict-origin-when-cross-origin + ctx.Response.Header.Set("Referrer-Policy", "strict-origin-when-cross-origin") + + // Enable caching, but require revalidation to reduce confusion + ctx.Response.Header.Set("Cache-Control", "must-revalidate") + + // Block all methods not required for static pages + if !ctx.IsGet() && !ctx.IsHead() && !ctx.IsOptions() { + ctx.Response.Header.Set("Allow", "GET, HEAD, OPTIONS") + ctx.Error("Method not allowed", fasthttp.StatusMethodNotAllowed) + return + } + + // Block blacklisted paths (like ACME challenges) + for _, blacklistedPath := range BlacklistedPaths { + if bytes.HasPrefix(ctx.Path(), blacklistedPath) { + returnErrorPage(ctx, fasthttp.StatusForbidden) + return + } + } + + // Allow CORS for specified domains + if ctx.IsOptions() { + allowCors := false + for _, allowedCorsDomain := range AllowedCorsDomains { + if bytes.Equal(ctx.Request.Host(), allowedCorsDomain) { + allowCors = true + break + } + } + if allowCors { + ctx.Response.Header.Set("Access-Control-Allow-Origin", "*") + ctx.Response.Header.Set("Access-Control-Allow-Methods", "GET, HEAD") + } + ctx.Response.Header.Set("Allow", "GET, HEAD, OPTIONS") + ctx.Response.Header.SetStatusCode(fasthttp.StatusNoContent) + return + } + + // Prepare request information to Gitea + var targetOwner, targetRepo, targetPath string + var targetOptions = upstreamOptions{ + ForbiddenMimeTypes: map[string]struct{}{}, + TryIndexPages: true, + } + var alsoTryPagesRepo = false // Also try to treat the repo as the first path element & fall back to the "pages" repo + + if RawDomain != nil && bytes.Equal(ctx.Request.Host(), RawDomain) { + // Serve raw content from RawDomain + + targetOptions.TryIndexPages = false + targetOptions.ForbiddenMimeTypes["text/html"] = struct{}{} + + pathElements := strings.SplitN(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/", 3) + if len(pathElements) < 3 { + // https://{RawDomain}/{owner}/{repo}/{path} is required + ctx.Redirect(RawInfoPage, fasthttp.StatusTemporaryRedirect) + return + } + targetOwner = pathElements[0] + targetRepo = pathElements[1] + targetPath = pathElements[2] + + } else if bytes.HasSuffix(ctx.Request.Host(), MainDomainSuffix) { + // Serve pages from subdomains of MainDomainSuffix + + pathElements := strings.SplitN(string(bytes.Trim(ctx.Request.URI().Path(), "/")), "/", 2) + targetOwner = string(bytes.TrimSuffix(ctx.Request.Host(), MainDomainSuffix)) + targetRepo = pathElements[0] + targetPath = pathElements[1] + alsoTryPagesRepo = true + + } else { + // Serve pages from external domains + + targetOwner, targetRepo, targetPath = getTargetFromDNS(ctx.Request.Host()) + if targetOwner == "" { + ctx.Redirect(BrokenDNSPage, fasthttp.StatusTemporaryRedirect) + return + } + } + + // Check if a username can't exist because it's reserved (we'd risk to hit a Gitea route in that case) + if _, ok := ReservedUsernames[targetOwner]; ok { + returnErrorPage(ctx, fasthttp.StatusForbidden) + return + } + + // Pass request to Gitea + url := "/" + targetOwner + "/" + targetRepo + "/raw/" + targetPath + if strings.HasPrefix(targetPath, "blob/") { + returnErrorPage(ctx, fasthttp.StatusForbidden) + return + } + + // Try target + if upstream(ctx, url, targetOptions) { + return + } + + // Try target with pages repo + if alsoTryPagesRepo { + targetPath = targetRepo + "/" + targetPath + targetRepo = "pages" + url := "/" + targetOwner + "/" + targetRepo + "/raw/" + targetPath + if strings.HasPrefix(targetPath, "blob/") { + returnErrorPage(ctx, fasthttp.StatusForbidden) + return + } + + if upstream(ctx, url, targetOptions) { + return + } + } + + returnErrorPage(ctx, fasthttp.StatusNotFound) +} + +func getTargetFromDNS(host []byte) (targetOwner, targetRepo, targetPath string) { + // TODO: read CNAME record for host and "www.{host}" to get those values + // TODO: check codeberg-pages-domains.txt + return +} + +// returnErrorPage sets the response status code and writes NotFoundPage to the response body, with "%status" replaced +// with the provided status code. +func returnErrorPage(ctx *fasthttp.RequestCtx, code int) { + ctx.Response.SetStatusCode(code) + ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte(strconv.Itoa(code)))) +} + +// upstream requests an URL from GiteaRoot and writes it to the request context; if "final" is set, it also returns a +// 404 error if the page couldn't be loaded. +func upstream(ctx *fasthttp.RequestCtx, url string, options upstreamOptions) (success bool) { + // Prepare necessary (temporary) variables with default values + body := make([]byte, 0) + if options.ForbiddenMimeTypes == nil { + options.ForbiddenMimeTypes = map[string]struct{}{} + } + + // Make a request to the upstream URL + status, body, err := fasthttp.GetTimeout(body, string(GiteaRoot) + url, 10 * time.Second) + + // Handle errors + if err != nil { + // Connection error, probably Gitea or the internet connection is down? + fmt.Printf("Couldn't fetch URL \"%s\": %s", url, err) + ctx.Response.SetStatusCode(fasthttp.StatusBadGateway) + return false + } + if status != 200 { + if options.TryIndexPages { + // copy the options struct & try if an index page exists + optionsForIndexPages := options + optionsForIndexPages.TryIndexPages = false + optionsForIndexPages.AppendTrailingSlash = true + for _, indexPage := range IndexPages { + if upstream(ctx, url + "/" + indexPage, optionsForIndexPages) { + return true + } + } + } + ctx.Response.SetStatusCode(status) + return false + } + + // Append trailing slash if missing (for index files) + if options.AppendTrailingSlash && !bytes.HasSuffix(ctx.Request.URI().Path(), []byte{'/'}) { + ctx.Redirect(string(ctx.Request.URI().Path()) + "/", fasthttp.StatusTemporaryRedirect) + return true + } + + // Set the MIME type + mimeType := mime.TypeByExtension(path.Ext(url)) + mimeTypeSplit := strings.SplitN(mimeType, ";", 2) + if _, ok := options.ForbiddenMimeTypes[mimeTypeSplit[0]]; ok || mimeType == "" { + if options.DefaultMimeType != "" { + mimeType = options.DefaultMimeType + } else { + mimeType = "application/octet-stream" + } + } + ctx.Response.Header.SetContentType(mimeType) + + // TODO: enable Caching - set Date header and respect If-Modified-Since! + + // Set the response body + ctx.Response.SetStatusCode(fasthttp.StatusOK) + ctx.Response.SetBody(body) + return true +} + +// upstreamOptions provides various options for the upstream request. +type upstreamOptions struct { + DefaultMimeType string + ForbiddenMimeTypes map[string]struct{} + TryIndexPages bool + AppendTrailingSlash bool +} + +// envOr reads an environment variable and returns a default value if it's empty. +func envOr(env string, or string) string { + if v := os.Getenv(env); v != "" { + return v + } + return or +}