FIX blank internal pages (#164)

This commit is contained in:
Hoernschen 2024-02-25 19:47:12 +01:00
parent 7e80ade24b
commit f74f71cac1
No known key found for this signature in database
GPG Key ID: 37591FAF4E6D3462
4 changed files with 61 additions and 33 deletions

View File

@ -2,6 +2,7 @@ package gitea
import (
"bytes"
//"errors"
"fmt"
"io"
"net/http"
@ -39,7 +40,7 @@ type FileResponse struct {
}
func (f FileResponse) IsEmpty() bool {
return len(f.Body) != 0
return len(f.Body) == 0
}
func (f FileResponse) createHttpResponse(cacheKey string) (header http.Header, statusCode int) {
@ -72,13 +73,14 @@ type BranchTimestamp struct {
type writeCacheReader struct {
originalReader io.ReadCloser
buffer *bytes.Buffer
rileResponse *FileResponse
fileResponse *FileResponse
cacheKey string
cache cache.ICache
hasError bool
}
func (t *writeCacheReader) Read(p []byte) (n int, err error) {
log.Trace().Msgf("[cache] read %q", t.cacheKey)
n, err = t.originalReader.Read(p)
if err != nil && err != io.EOF {
log.Trace().Err(err).Msgf("[cache] original reader for %q has returned an error", t.cacheKey)
@ -90,12 +92,20 @@ func (t *writeCacheReader) Read(p []byte) (n int, err error) {
}
func (t *writeCacheReader) Close() error {
if !t.hasError {
fc := *t.rileResponse
fc.Body = t.buffer.Bytes()
_ = t.cache.Set(t.cacheKey, fc, fileCacheTimeout)
doWrite := !t.hasError
fc := *t.fileResponse
fc.Body = t.buffer.Bytes()
if fc.IsEmpty() {
log.Trace().Msg("[cache] file response is empty")
doWrite = false
}
log.Trace().Msgf("cacheReader for %q saved=%t closed", t.cacheKey, !t.hasError)
if doWrite {
err := t.cache.Set(t.cacheKey, fc, fileCacheTimeout)
if err != nil {
log.Trace().Err(err).Msgf("[cache] writer for %q has returned an error", t.cacheKey)
}
}
log.Trace().Msgf("cacheReader for %q saved=%t closed", t.cacheKey, doWrite)
return t.originalReader.Close()
}
@ -108,7 +118,7 @@ func (f FileResponse) CreateCacheReader(r io.ReadCloser, cache cache.ICache, cac
return &writeCacheReader{
originalReader: r,
buffer: bytes.NewBuffer(make([]byte, 0)),
rileResponse: &f,
fileResponse: &f,
cache: cache,
cacheKey: cacheKey,
}

View File

@ -112,26 +112,28 @@ func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource str
func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, http.Header, int, error) {
cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource)
log := log.With().Str("cache_key", cacheKey).Logger()
log.Trace().Msg("try file in cache")
// handle if cache entry exist
if cache, ok := client.responseCache.Get(cacheKey); ok {
cache := cache.(FileResponse)
cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey)
// TODO: check against some timestamp mismatch?!?
if cache.Exists {
log.Debug().Msg("[cache] exists")
if cache.IsSymlink {
linkDest := string(cache.Body)
log.Debug().Msgf("[cache] follow symlink from %q to %q", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
} else {
log.Debug().Msg("[cache] return bytes")
} else if !cache.IsEmpty() {
log.Debug().Msgf("[cache] return %d bytes", len(cache.Body))
return io.NopCloser(bytes.NewReader(cache.Body)), cachedHeader, cachedStatusCode, nil
} else if cache.IsEmpty() {
log.Debug().Msg("[cache] is empty")
}
} else {
return nil, cachedHeader, cachedStatusCode, ErrorNotFound
//return nil, cachedHeader, cachedStatusCode, ErrorNotFound
}
}
log.Trace().Msg("file not in cache")
// not in cache, open reader via gitea api
reader, resp, err := client.sdkClient.GetFileReader(targetOwner, targetRepo, ref, resource, client.supportLFS)
if resp != nil {
@ -155,12 +157,14 @@ func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource str
linkDest = path.Join(path.Dir(resource), linkDest)
// we store symlink not content to reduce duplicates in cache
if err := client.responseCache.Set(cacheKey, FileResponse{
Exists: true,
fileResponse := FileResponse {
Exists: true,
IsSymlink: true,
Body: []byte(linkDest),
ETag: resp.Header.Get(ETagHeader),
}, fileCacheTimeout); err != nil {
Body: []byte(linkDest),
ETag: resp.Header.Get(ETagHeader),
}
log.Trace().Msgf("file response has %d bytes", len(fileResponse.Body))
if err := client.responseCache.Set(cacheKey, fileResponse, fileCacheTimeout); err != nil {
log.Error().Err(err).Msg("[cache] error on cache write")
}

View File

@ -26,6 +26,7 @@ func Handler(
dnsLookupCache, canonicalDomainCache, redirectsCache cache.ICache,
) http.HandlerFunc {
return func(w http.ResponseWriter, req *http.Request) {
log.Info().Msg("\n----------------------------------------------------------")
log := log.With().Strs("Handler", []string{req.Host, req.RequestURI}).Logger()
ctx := context.New(w, req)

View File

@ -46,7 +46,7 @@ type Options struct {
TryIndexPages bool
BranchTimestamp time.Time
// internal
appendTrailingSlash bool
dontAppendTrailingSlash bool
redirectIfExists string
ServeRaw bool
@ -56,6 +56,8 @@ type Options struct {
func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redirectsCache cache.ICache) bool {
log := log.With().Strs("upstream", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath}).Logger()
log.Debug().Msg("Start")
if o.TargetOwner == "" || o.TargetRepo == "" {
html.ReturnErrorPage(ctx, "forge client: either repo owner or name info is missing", http.StatusBadRequest)
return true
@ -93,6 +95,19 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redi
log.Trace().Msg("check response against last modified: outdated")
}
if strings.HasSuffix(ctx.Path(), "/index.html") && !o.ServeRaw {
log.Trace().Msg("remove index.html from path and redirect")
ctx.Redirect(strings.TrimSuffix(ctx.Path(), "index.html"), http.StatusTemporaryRedirect)
return true
}
// Append trailing slash if missing (for index files), and redirect to fix filenames in general
// o.dontAppendTrailingSlash is only true when looking for index pages
if !o.dontAppendTrailingSlash && !strings.HasSuffix(ctx.Path(), "/") {
log.Trace().Msg("append trailing slash and redirect")
ctx.Redirect(ctx.Path()+"/", http.StatusTemporaryRedirect)
return true
}
log.Debug().Msg("Preparing")
reader, header, statusCode, err := giteaClient.ServeRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath)
@ -104,25 +119,29 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redi
// Handle not found error
if err != nil && errors.Is(err, gitea.ErrorNotFound) {
log.Debug().Msg("Handling not found error")
// Get and match redirects
redirects := o.getRedirects(giteaClient, redirectsCache)
if o.matchRedirects(ctx, giteaClient, redirects, redirectsCache) {
log.Trace().Msg("redirect")
return true
}
if o.TryIndexPages {
log.Trace().Msg("try index page")
// copy the o struct & try if an index page exists
optionsForIndexPages := *o
optionsForIndexPages.TryIndexPages = false
optionsForIndexPages.appendTrailingSlash = true
optionsForIndexPages.dontAppendTrailingSlash = false
for _, indexPage := range upstreamIndexPages {
optionsForIndexPages.TargetPath = strings.TrimSuffix(o.TargetPath, "/") + "/" + indexPage
if optionsForIndexPages.Upstream(ctx, giteaClient, redirectsCache) {
return true
}
}
log.Trace().Msg("try html file with path name")
// compatibility fix for GitHub Pages (/example → /example.html)
optionsForIndexPages.appendTrailingSlash = false
optionsForIndexPages.dontAppendTrailingSlash = true
optionsForIndexPages.redirectIfExists = strings.TrimSuffix(ctx.Path(), "/") + ".html"
optionsForIndexPages.TargetPath = o.TargetPath + ".html"
if optionsForIndexPages.Upstream(ctx, giteaClient, redirectsCache) {
@ -130,18 +149,22 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redi
}
}
log.Trace().Msg("not found")
ctx.StatusCode = http.StatusNotFound
if o.TryIndexPages {
log.Trace().Msg("try not found page")
// copy the o struct & try if a not found page exists
optionsForNotFoundPages := *o
optionsForNotFoundPages.TryIndexPages = false
optionsForNotFoundPages.appendTrailingSlash = false
optionsForNotFoundPages.dontAppendTrailingSlash = true
for _, notFoundPage := range upstreamNotFoundPages {
optionsForNotFoundPages.TargetPath = "/" + notFoundPage
if optionsForNotFoundPages.Upstream(ctx, giteaClient, redirectsCache) {
return true
}
}
log.Trace().Msg("not found page missing")
}
return false
@ -170,16 +193,6 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redi
return true
}
// Append trailing slash if missing (for index files), and redirect to fix filenames in general
// o.appendTrailingSlash is only true when looking for index pages
if o.appendTrailingSlash && !strings.HasSuffix(ctx.Path(), "/") {
ctx.Redirect(ctx.Path()+"/", http.StatusTemporaryRedirect)
return true
}
if strings.HasSuffix(ctx.Path(), "/index.html") && !o.ServeRaw {
ctx.Redirect(strings.TrimSuffix(ctx.Path(), "index.html"), http.StatusTemporaryRedirect)
return true
}
if o.redirectIfExists != "" {
ctx.Redirect(o.redirectIfExists, http.StatusTemporaryRedirect)
return true