Implement static serving of compressed files

This commit is contained in:
ltdk 2024-09-08 23:31:23 -04:00
parent 557a295732
commit abbb29df55
4 changed files with 154 additions and 20 deletions

View File

@ -37,15 +37,20 @@ type FileResponse struct {
Exists bool
IsSymlink bool
ETag string
MimeType string
Body []byte
// uncompressed MIME type
MimeType string
// raw MIME type (if compressed, type of compression)
RawMime string
Body []byte
}
func (f FileResponse) IsEmpty() bool {
return len(f.Body) == 0
}
func (f FileResponse) createHttpResponse(cacheKey string) (header http.Header, statusCode int) {
func (f FileResponse) createHttpResponse(cacheKey string, decompress bool) (header http.Header, statusCode int) {
header = make(http.Header)
if f.Exists {
@ -58,7 +63,12 @@ func (f FileResponse) createHttpResponse(cacheKey string) (header http.Header, s
header.Set(giteaObjectTypeHeader, objTypeSymlink)
}
header.Set(ETagHeader, f.ETag)
header.Set(ContentTypeHeader, f.MimeType)
if decompress {
header.Set(ContentTypeHeader, f.MimeType)
} else {
header.Set(ContentTypeHeader, f.RawMime)
}
header.Set(ContentLengthHeader, fmt.Sprintf("%d", len(f.Body)))
header.Set(PagesCacheIndicatorHeader, "true")

View File

@ -39,9 +39,10 @@ const (
objTypeSymlink = "symlink"
// std
ETagHeader = "ETag"
ContentTypeHeader = "Content-Type"
ContentLengthHeader = "Content-Length"
ETagHeader = "ETag"
ContentTypeHeader = "Content-Type"
ContentLengthHeader = "Content-Length"
ContentEncodingHeader = "Content-Encoding"
)
type Client struct {
@ -103,7 +104,7 @@ func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource s
}
func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) {
reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource)
reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource, false)
if err != nil {
return nil, err
}
@ -111,21 +112,21 @@ func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource str
return io.ReadAll(reader)
}
func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, http.Header, int, error) {
func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string, decompress bool) (io.ReadCloser, http.Header, int, error) {
cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource)
log := log.With().Str("cache_key", cacheKey).Logger()
log.Trace().Msg("try file in cache")
// handle if cache entry exist
if cache, ok := client.responseCache.Get(cacheKey); ok {
cache := cache.(FileResponse)
cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey)
cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey, decompress)
// TODO: check against some timestamp mismatch?!?
if cache.Exists {
log.Debug().Msg("[cache] exists")
if cache.IsSymlink {
linkDest := string(cache.Body)
log.Debug().Msgf("[cache] follow symlink from %q to %q", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest, decompress)
} else if !cache.IsEmpty() {
log.Debug().Msgf("[cache] return %d bytes", len(cache.Body))
return io.NopCloser(bytes.NewReader(cache.Body)), cachedHeader, cachedStatusCode, nil
@ -170,13 +171,17 @@ func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource str
}
log.Debug().Msgf("follow symlink from %q to %q", resource, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest)
return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest, decompress)
}
}
// now we are sure it's content so set the MIME type
mimeType := client.getMimeTypeByExtension(resource)
resp.Response.Header.Set(ContentTypeHeader, mimeType)
mimeType, rawType := client.getMimeTypeByExtension(resource)
if decompress {
resp.Response.Header.Set(ContentTypeHeader, mimeType)
} else {
resp.Response.Header.Set(ContentTypeHeader, rawType)
}
if !shouldRespBeSavedToCache(resp.Response) {
return reader, resp.Response.Header, resp.StatusCode, err
@ -187,6 +192,7 @@ func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource str
Exists: true,
ETag: resp.Header.Get(ETagHeader),
MimeType: mimeType,
RawMime: rawType,
}
return fileResp.CreateCacheReader(reader, client.responseCache, cacheKey), resp.Response.Header, resp.StatusCode, nil
@ -300,16 +306,35 @@ func (client *Client) GiteaCheckIfOwnerExists(owner string) (bool, error) {
return false, nil
}
func (client *Client) getMimeTypeByExtension(resource string) string {
mimeType := mime.TypeByExtension(path.Ext(resource))
func (client *Client) extToMime(ext string) string {
mimeType := mime.TypeByExtension(ext)
mimeTypeSplit := strings.SplitN(mimeType, ";", 2)
if client.forbiddenMimeTypes[mimeTypeSplit[0]] || mimeType == "" {
mimeType = client.defaultMimeType
}
log.Trace().Msgf("probe mime of %q is %q", resource, mimeType)
return mimeType
}
func (client *Client) getMimeTypeByExtension(resource string) (string, string) {
rawExt := path.Ext(resource)
innerExt := rawExt
switch rawExt {
case ".gz":
fallthrough
case ".br":
fallthrough
case ".zst":
innerExt = path.Ext(resource[:len(resource)-len(rawExt)])
}
rawType := client.extToMime(rawExt)
mimeType := rawType
if innerExt != rawExt {
mimeType = client.extToMime(innerExt)
}
log.Trace().Msgf("probe mime of %q is (%q / raw %q)", resource, mimeType, rawType)
return mimeType, rawType
}
func shouldRespBeSavedToCache(resp *http.Response) bool {
if resp == nil {
return false

View File

@ -24,5 +24,8 @@ func (o *Options) setHeader(ctx *context.Context, header http.Header) {
} else {
ctx.RespWriter.Header().Set(gitea.ContentTypeHeader, mime)
}
if encoding := header.Get(gitea.ContentEncodingHeader); encoding != "" && encoding != "identity" {
ctx.RespWriter.Header().Set(gitea.ContentEncodingHeader, encoding)
}
ctx.RespWriter.Header().Set(headerLastModified, o.BranchTimestamp.In(time.UTC).Format(http.TimeFormat))
}

View File

@ -1,10 +1,13 @@
package upstream
import (
"cmp"
"errors"
"fmt"
"io"
"net/http"
"slices"
"strconv"
"strings"
"time"
@ -19,6 +22,8 @@ import (
const (
headerLastModified = "Last-Modified"
headerIfModifiedSince = "If-Modified-Since"
headerAcceptEncoding = "Accept-Encoding"
headerContentEncoding = "Content-Encoding"
rawMime = "text/plain; charset=utf-8"
)
@ -52,6 +57,73 @@ type Options struct {
ServeRaw bool
}
// allowed encodings
var allowedEncodings = map[string]string{
"gzip": ".gz",
"br": ".br",
"zstd": ".zst",
"identity": "",
}
// parses Accept-Encoding header into a list of acceptable encodings
func AcceptEncodings(header string) []string {
log.Trace().Msgf("got accept-encoding: %s", header)
encodings := []string{}
globQuality := 0.0
qualities := make(map[string]float64)
for _, encoding := range strings.Split(header, ",") {
splits := strings.SplitN(encoding, ";q=", 2)
name := splits[0]
quality := 1.0
if len(splits) > 1 {
var err error
quality, err = strconv.ParseFloat(splits[1], 64)
if err != nil || quality < 0 {
continue
}
}
name = strings.TrimSpace(name)
if name == "*" {
globQuality = quality
} else {
_, allowed := allowedEncodings[name]
if allowed {
qualities[name] = quality
if quality > 0 {
encodings = append(encodings, name)
}
}
}
}
if globQuality > 0 {
for encoding := range allowedEncodings {
_, exists := qualities[encoding]
if !exists {
encodings = append(encodings, encoding)
qualities[encoding] = globQuality
}
}
} else {
_, exists := qualities["identity"]
if !exists {
encodings = append(encodings, "identity")
qualities["identity"] = -1
}
}
slices.SortStableFunc(encodings, func(x, y string) int {
// sort in reverse order; big quality comes first
return cmp.Compare(qualities[y], qualities[x])
})
log.Trace().Msgf("decided encoding order: %#v", encodings)
return encodings
}
// Upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context.
func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redirectsCache cache.ICache) bool {
log := log.With().Strs("upstream", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath}).Logger()
@ -97,9 +169,33 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redi
log.Debug().Msg("Preparing")
reader, header, statusCode, err := giteaClient.ServeRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath)
if reader != nil {
defer reader.Close()
var reader io.ReadCloser
var header http.Header
var statusCode int
var err error
// pick first non-404 response for encoding, *only* if not root
if o.TargetPath == "" || strings.HasSuffix(o.TargetPath, "/") {
err = gitea.ErrorNotFound
} else {
for _, encoding := range AcceptEncodings(ctx.Req.Header.Get(headerAcceptEncoding)) {
log.Trace().Msgf("try %s encoding", encoding)
// add extension for encoding
path := o.TargetPath + allowedEncodings[encoding]
reader, header, statusCode, err = giteaClient.ServeRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, path, true)
if statusCode == 404 {
continue
}
log.Debug().Msgf("using %s encoding", encoding)
if encoding != "identity" {
header.Set(headerContentEncoding, encoding)
}
break
}
if reader != nil {
defer reader.Close()
}
}
log.Debug().Msg("Aquisting")