package gitea import ( "bytes" "errors" "fmt" "io" "mime" "net/http" "net/url" "path" "strings" "time" "code.gitea.io/sdk/gitea" "github.com/rs/zerolog/log" "codeberg.org/codeberg/pages/config" "codeberg.org/codeberg/pages/server/cache" "codeberg.org/codeberg/pages/server/version" ) var ErrorNotFound = errors.New("not found") const ( // cache key prefixes branchTimestampCacheKeyPrefix = "branchTime" defaultBranchCacheKeyPrefix = "defaultBranch" rawContentCacheKeyPrefix = "rawContent" ownerExistenceKeyPrefix = "ownerExist" // pages server PagesCacheIndicatorHeader = "X-Pages-Cache" symlinkReadLimit = 10000 // gitea giteaObjectTypeHeader = "X-Gitea-Object-Type" objTypeSymlink = "symlink" // std ETagHeader = "ETag" ContentTypeHeader = "Content-Type" ContentLengthHeader = "Content-Length" ) type Client struct { sdkClient *gitea.Client responseCache cache.ICache giteaRoot string followSymlinks bool supportLFS bool forbiddenMimeTypes map[string]bool defaultMimeType string } func NewClient(cfg config.GiteaConfig, respCache cache.ICache) (*Client, error) { rootURL, err := url.Parse(cfg.Root) if err != nil { return nil, err } giteaRoot := strings.Trim(rootURL.String(), "/") stdClient := http.Client{Timeout: 10 * time.Second} forbiddenMimeTypes := make(map[string]bool, len(cfg.ForbiddenMimeTypes)) for _, mimeType := range cfg.ForbiddenMimeTypes { forbiddenMimeTypes[mimeType] = true } defaultMimeType := cfg.DefaultMimeType if defaultMimeType == "" { defaultMimeType = "application/octet-stream" } sdk, err := gitea.NewClient( giteaRoot, gitea.SetHTTPClient(&stdClient), gitea.SetToken(cfg.Token), gitea.SetUserAgent("pages-server/"+version.Version), ) return &Client{ sdkClient: sdk, responseCache: respCache, giteaRoot: giteaRoot, followSymlinks: cfg.FollowSymlinks, supportLFS: cfg.LFSEnabled, forbiddenMimeTypes: forbiddenMimeTypes, defaultMimeType: defaultMimeType, }, err } func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource string) string { return path.Join(client.giteaRoot, targetOwner, targetRepo, "src/branch", branch, resource) } func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) { reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource) if err != nil { return nil, err } defer reader.Close() return io.ReadAll(reader) } func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, http.Header, int, error) { cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource) log := log.With().Str("cache_key", cacheKey).Logger() log.Trace().Msg("try file in cache") // handle if cache entry exist if cacheMetadata, ok := client.responseCache.Get(cacheKey + "|Metadata"); ok { cache := FileResponseFromMetadataString(string(cacheMetadata)) cache.Body, _ = client.responseCache.Get(cacheKey + "|Body") // TODO: don't grab the content from the cache if the ETag matches?! cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey) // TODO: check against some timestamp mismatch?!? if cache.Exists { if cache.IsSymlink { linkDest := string(cache.Body) log.Debug().Msgf("[cache] follow symlink from %q to %q", resource, linkDest) return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest) } else { log.Debug().Msgf("[cache] return %d bytes", len(cache.Body)) return io.NopCloser(bytes.NewReader(cache.Body)), cachedHeader, cachedStatusCode, nil } } else { return nil, nil, http.StatusNotFound, ErrorNotFound } } // TODO: metadata not written, is close ever called? log.Trace().Msg("file not in cache") // not in cache, open reader via gitea api reader, resp, err := client.sdkClient.GetFileReader(targetOwner, targetRepo, ref, resource, client.supportLFS) if resp != nil { switch resp.StatusCode { case http.StatusOK: // first handle symlinks { objType := resp.Header.Get(giteaObjectTypeHeader) log.Trace().Msgf("server raw content object %q", objType) if client.followSymlinks && objType == objTypeSymlink { defer reader.Close() // read limited chars for symlink linkDestBytes, err := io.ReadAll(io.LimitReader(reader, symlinkReadLimit)) if err != nil { return nil, nil, http.StatusInternalServerError, err } linkDest := strings.TrimSpace(string(linkDestBytes)) // handle relative links // we first remove the link from the path, and make a relative join (resolve parent paths like "/../" too) linkDest = path.Join(path.Dir(resource), linkDest) // we store symlink not content to reduce duplicates in cache fileResponse := FileResponse{ Exists: true, IsSymlink: true, Body: []byte(linkDest), ETag: resp.Header.Get(ETagHeader), } log.Trace().Msgf("file response has %d bytes", len(fileResponse.Body)) if err := client.responseCache.Set(cacheKey+"|Metadata", []byte(fileResponse.MetadataAsString()), fileCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } // TODO: Test with binary files, as we convert []byte to string! Using []byte values might makes more sense anyways. if err := client.responseCache.Set(cacheKey+"|Body", fileResponse.Body, fileCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } log.Debug().Msgf("follow symlink from %q to %q", resource, linkDest) return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest) } } // now we are sure it's content so set the MIME type mimeType := client.getMimeTypeByExtension(resource) resp.Response.Header.Set(ContentTypeHeader, mimeType) // now we write to cache and respond at the same time fileResp := FileResponse{ Exists: true, ETag: resp.Header.Get(ETagHeader), MimeType: mimeType, } return fileResp.CreateCacheReader(reader, client.responseCache, cacheKey), resp.Response.Header, resp.StatusCode, nil case http.StatusNotFound: if err := client.responseCache.Set(cacheKey+"|Metadata", []byte(FileResponse{ETag: resp.Header.Get(ETagHeader)}.MetadataAsString()), fileCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return nil, resp.Response.Header, http.StatusNotFound, ErrorNotFound default: return nil, resp.Response.Header, resp.StatusCode, fmt.Errorf("unexpected status code '%d'", resp.StatusCode) } } return nil, nil, http.StatusInternalServerError, err } func (client *Client) GiteaGetRepoBranchTimestamp(repoOwner, repoName, branchName string) (*BranchTimestamp, error) { cacheKey := fmt.Sprintf("%s/%s/%s/%s", branchTimestampCacheKeyPrefix, repoOwner, repoName, branchName) if stamp, ok := client.responseCache.Get(cacheKey); ok { if len(stamp) == 0 { log.Trace().Msgf("[cache] use branch %q not found", branchName) return &BranchTimestamp{}, ErrorNotFound } log.Trace().Msgf("[cache] use branch %q exist", branchName) // This comes from the refactoring of the caching library. // The branch as reported by the API was stored in the cache, and I'm not sure if there are // situations where it differs from the name in the request, hence this is left here. stampParts := strings.SplitN(string(stamp), "|", 2) stampTime, _ := time.Parse(time.RFC3339, stampParts[0]) return &BranchTimestamp{ Branch: stampParts[1], Timestamp: stampTime, }, nil } branch, resp, err := client.sdkClient.GetRepoBranch(repoOwner, repoName, branchName) if err != nil { if resp != nil && resp.StatusCode == http.StatusNotFound { log.Trace().Msgf("[cache] set cache branch %q not found", branchName) if err := client.responseCache.Set(cacheKey, []byte{}, branchExistenceCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return &BranchTimestamp{}, ErrorNotFound } return &BranchTimestamp{}, err } if resp.StatusCode != http.StatusOK { return &BranchTimestamp{}, fmt.Errorf("unexpected status code '%d'", resp.StatusCode) } stamp := &BranchTimestamp{ Branch: branch.Name, Timestamp: branch.Commit.Timestamp, } log.Trace().Msgf("set cache branch [%s] exist", branchName) if err := client.responseCache.Set(cacheKey, []byte(stamp.Timestamp.Format(time.RFC3339)+"|"+stamp.Branch), branchExistenceCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return stamp, nil } func (client *Client) GiteaGetRepoDefaultBranch(repoOwner, repoName string) (string, error) { cacheKey := fmt.Sprintf("%s/%s/%s", defaultBranchCacheKeyPrefix, repoOwner, repoName) if branch, ok := client.responseCache.Get(cacheKey); ok { return string(branch), nil } repo, resp, err := client.sdkClient.GetRepo(repoOwner, repoName) if err != nil { return "", err } if resp.StatusCode != http.StatusOK { return "", fmt.Errorf("unexpected status code '%d'", resp.StatusCode) } branch := repo.DefaultBranch if err := client.responseCache.Set(cacheKey, []byte(branch), defaultBranchCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return branch, nil } func (client *Client) GiteaCheckIfOwnerExists(owner string) (bool, error) { cacheKey := fmt.Sprintf("%s/%s", ownerExistenceKeyPrefix, owner) if exist, ok := client.responseCache.Get(cacheKey); ok && exist != nil { return string(exist) == "true", nil } _, resp, err := client.sdkClient.GetUserInfo(owner) if resp.StatusCode == http.StatusOK && err == nil { if err := client.responseCache.Set(cacheKey, []byte("true"), ownerExistenceCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return true, nil } else if resp.StatusCode != http.StatusNotFound { return false, err } _, resp, err = client.sdkClient.GetOrg(owner) if resp.StatusCode == http.StatusOK && err == nil { if err := client.responseCache.Set(cacheKey, []byte("true"), ownerExistenceCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return true, nil } else if resp.StatusCode != http.StatusNotFound { return false, err } if err := client.responseCache.Set(cacheKey, []byte("false"), ownerExistenceCacheTimeout); err != nil { log.Error().Err(err).Msg("[cache] error on cache write") } return false, nil } func (client *Client) getMimeTypeByExtension(resource string) string { mimeType := mime.TypeByExtension(path.Ext(resource)) mimeTypeSplit := strings.SplitN(mimeType, ";", 2) if client.forbiddenMimeTypes[mimeTypeSplit[0]] || mimeType == "" { mimeType = client.defaultMimeType } log.Trace().Msgf("probe mime of %q is %q", resource, mimeType) return mimeType }