wip: add fileCache back

This commit is contained in:
6543 2022-09-18 21:02:55 +02:00
parent 9626d3a8a0
commit 8dac935cd8
No known key found for this signature in database
GPG Key ID: B8BE6D610E61C862
2 changed files with 127 additions and 52 deletions

View File

@ -1,24 +1,48 @@
package gitea
import "time"
import (
"net/http"
"time"
)
type FileResponse struct {
Exists bool
ETag []byte
MimeType string
Body []byte
Exists bool
IsSymlink bool
ETag string
MimeType string
Body []byte
}
func (f FileResponse) IsEmpty() bool {
return len(f.Body) != 0
}
func (f FileResponse) createHttpResponse() *http.Response {
resp := &http.Response{
Header: make(http.Header),
}
if f.Exists {
resp.StatusCode = http.StatusOK
} else {
resp.StatusCode = http.StatusNotFound
}
if f.IsSymlink {
resp.Header.Set(giteaObjectTypeHeader, objTypeSymlink)
}
resp.Header.Set(eTagHeader, f.ETag)
resp.Header.Set(contentTypeHeader, f.MimeType)
return resp
}
type BranchTimestamp struct {
Branch string
Timestamp time.Time
}
var (
const (
// defaultBranchCacheTimeout specifies the timeout for the default branch cache. It can be quite long.
defaultBranchCacheTimeout = 15 * time.Minute
@ -30,8 +54,8 @@ var (
// fileCacheTimeout specifies the timeout for the file content cache - you might want to make this quite long, depending
// on your available memory.
// TODO: move as option into cache interface
// fileCacheTimeout = 5 * time.Minute
fileCacheTimeout = 5 * time.Minute
// fileCacheSizeLimit limits the maximum file size that will be cached, and is set to 1 MB by default.
// fileCacheSizeLimit = 1024 * 1024
fileCacheSizeLimit = int64(1024 * 1024)
)

View File

@ -1,11 +1,13 @@
package gitea
import (
"bytes"
"errors"
"fmt"
"io"
"net/http"
"net/url"
"strconv"
"strings"
"time"
@ -18,9 +20,19 @@ import (
var ErrorNotFound = errors.New("not found")
const (
// cache key prefixe
branchTimestampCacheKeyPrefix = "branchTime"
defaultBranchCacheKeyPrefix = "defaultBranch"
giteaObjectTypeHeader = "X-Gitea-Object-Type"
rawContentCacheKeyPrefix = "rawContent"
// gitea
giteaObjectTypeHeader = "X-Gitea-Object-Type"
objTypeSymlink = "symlink"
// std
eTagHeader = "ETag"
contentTypeHeader = "Content-Type"
contentLengthHeader = "Content-Length"
)
type Client struct {
@ -59,61 +71,100 @@ func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource str
}
func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, *http.Response, error) {
cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource)
log := log.With().Str("cache_key", cacheKey).Logger()
// handle if cache entry exist
if cache, ok := client.responseCache.Get(cacheKey); ok == true {
cache := cache.(FileResponse)
// TODO: check against some timestamp missmatch?!?
if cache.Exists {
if cache.IsSymlink {
linkDest := string(cache.Body)
log.Debug().Msgf("[cache] follow symlink from'%s' to '%s'", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
} else {
log.Debug().Msg("[cache] return bytes")
return io.NopCloser(bytes.NewReader(cache.Body)), cache.createHttpResponse(), nil
}
} else {
return nil, cache.createHttpResponse(), ErrorNotFound
}
}
// if cachedValue, ok := fileResponseCache.Get(uri + "?timestamp=" + o.timestamp()); ok && !cachedValue.(gitea.FileResponse).IsEmpty() {
// cachedResponse = cachedValue.(gitea.FileResponse)
reader, resp, err := client.sdkClient.GetFileReader(targetOwner, targetRepo, ref, resource, client.supportLFS)
if resp != nil {
switch resp.StatusCode {
case http.StatusOK:
// first handle symlinks
{
objType := resp.Header.Get(giteaObjectTypeHeader)
log.Trace().Msgf("server raw content object: %s", objType)
if client.followSymlinks && objType == objTypeSymlink {
// limit to 1000 chars
defer reader.Close()
linkDestBytes, err := io.ReadAll(io.LimitReader(reader, 10000))
if err != nil {
return nil, nil, err
}
linkDest := strings.TrimSpace(string(linkDestBytes))
// add caching
if err := client.responseCache.Set(cacheKey, FileResponse{
Exists: true,
IsSymlink: true,
Body: []byte(linkDest),
ETag: resp.Header.Get(eTagHeader),
}, fileCacheTimeout); err != nil {
log.Error().Err(err).Msg("could not save symlink in cache")
}
// Write the response body to the original request
// var cacheBodyWriter bytes.Buffer
// if res != nil {
// if res.Header.ContentLength() > fileCacheSizeLimit {
// // fasthttp else will set "Content-Length: 0"
// ctx.Response().SetBodyStream(&strings.Reader{}, -1)
//
// err = res.BodyWriteTo(ctx.Response.BodyWriter())
// } else {
// // TODO: cache is half-empty if request is cancelled - does the ctx.Err() below do the trick?
// err = res.BodyWriteTo(io.MultiWriter(ctx.Response().BodyWriter(), &cacheBodyWriter))
// }
// } else {
// _, err = ctx.Write(cachedResponse.Body)
// }
// if res != nil && res.Header.ContentLength() <= fileCacheSizeLimit && ctx.Err() == nil {
// cachedResponse.Exists = true
// cachedResponse.MimeType = mimeType
// cachedResponse.Body = cacheBodyWriter.Bytes()
// _ = fileResponseCache.Set(uri+"?timestamp="+o.timestamp(), cachedResponse, fileCacheTimeout)
// }
// store ETag in resp !!!!
objType := resp.Header.Get(giteaObjectTypeHeader)
log.Trace().Msgf("server raw content object: %s", objType)
if client.followSymlinks && objType == "symlink" {
// limit to 1000 chars
defer reader.Close()
linkDestBytes, err := io.ReadAll(io.LimitReader(reader, 10000))
if err != nil {
return nil, nil, err
log.Debug().Msgf("follow symlink from '%s' to '%s'", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
}
linkDest := strings.TrimSpace(string(linkDestBytes))
log.Debug().Msgf("follow symlink from '%s' to '%s'", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
}
return reader, resp.Response, err
case http.StatusNotFound:
// now we are sure it's content
{
contentLeng, err2 := strconv.ParseInt(resp.Header.Get(contentLengthHeader), 20, 64)
if err2 != nil {
log.Error().Err(err2).Msg("could not parse content length")
}
if contentLeng <= 0 && contentLeng > fileCacheSizeLimit {
// if content to big or could not be determined we return now
return reader, resp.Response, err
}
// add not exist caching
// _ = fileResponseCache.Set(uri+"?timestamp="+o.timestamp(), gitea.FileResponse{
// Exists: false,
// }, fileCacheTimeout)
// now we write to cache and respond at the sime time
// TODO: at the sime time !!!
/*
we need a "go"
// TODO: cache is half-empty if request is cancelled - does the ctx.Err() below do the trick?
// err = res.BodyWriteTo(io.MultiWriter(ctx.Response().BodyWriter(), &cacheBodyWriter))
*/
body, err := io.ReadAll(io.LimitReader(reader, contentLeng))
if err != nil {
if err := client.responseCache.Set(cacheKey, FileResponse{
Exists: true,
ETag: resp.Header.Get(eTagHeader),
MimeType: resp.Header.Get(contentTypeHeader),
Body: body,
}, fileCacheTimeout); err != nil {
log.Error().Err(err).Msg("could not save content in cache")
}
}
return io.NopCloser(bytes.NewReader(body)), resp.Response, nil
}
case http.StatusNotFound:
if err := client.responseCache.Set(cacheKey, FileResponse{
Exists: false,
ETag: resp.Header.Get(eTagHeader),
}, fileCacheTimeout); err != nil {
log.Error().Err(err).Msg("could not save 404 in cache")
}
return nil, resp.Response, ErrorNotFound
default: