From abbb29df554755f268cc3081b07c9ce5ae34ab3d Mon Sep 17 00:00:00 2001 From: ltdk Date: Sun, 8 Sep 2024 23:31:23 -0400 Subject: [PATCH] Implement static serving of compressed files --- server/gitea/cache.go | 18 +++++-- server/gitea/client.go | 51 +++++++++++++----- server/upstream/header.go | 3 ++ server/upstream/upstream.go | 102 ++++++++++++++++++++++++++++++++++-- 4 files changed, 154 insertions(+), 20 deletions(-) diff --git a/server/gitea/cache.go b/server/gitea/cache.go index 97024a1..cfb7c2a 100644 --- a/server/gitea/cache.go +++ b/server/gitea/cache.go @@ -37,15 +37,20 @@ type FileResponse struct { Exists bool IsSymlink bool ETag string - MimeType string - Body []byte + + // uncompressed MIME type + MimeType string + + // raw MIME type (if compressed, type of compression) + RawMime string + Body []byte } func (f FileResponse) IsEmpty() bool { return len(f.Body) == 0 } -func (f FileResponse) createHttpResponse(cacheKey string) (header http.Header, statusCode int) { +func (f FileResponse) createHttpResponse(cacheKey string, decompress bool) (header http.Header, statusCode int) { header = make(http.Header) if f.Exists { @@ -58,7 +63,12 @@ func (f FileResponse) createHttpResponse(cacheKey string) (header http.Header, s header.Set(giteaObjectTypeHeader, objTypeSymlink) } header.Set(ETagHeader, f.ETag) - header.Set(ContentTypeHeader, f.MimeType) + + if decompress { + header.Set(ContentTypeHeader, f.MimeType) + } else { + header.Set(ContentTypeHeader, f.RawMime) + } header.Set(ContentLengthHeader, fmt.Sprintf("%d", len(f.Body))) header.Set(PagesCacheIndicatorHeader, "true") diff --git a/server/gitea/client.go b/server/gitea/client.go index 3abb487..7037805 100644 --- a/server/gitea/client.go +++ b/server/gitea/client.go @@ -39,9 +39,10 @@ const ( objTypeSymlink = "symlink" // std - ETagHeader = "ETag" - ContentTypeHeader = "Content-Type" - ContentLengthHeader = "Content-Length" + ETagHeader = "ETag" + ContentTypeHeader = "Content-Type" + ContentLengthHeader = "Content-Length" + ContentEncodingHeader = "Content-Encoding" ) type Client struct { @@ -103,7 +104,7 @@ func (client *Client) ContentWebLink(targetOwner, targetRepo, branch, resource s } func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource string) ([]byte, error) { - reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource) + reader, _, _, err := client.ServeRawContent(targetOwner, targetRepo, ref, resource, false) if err != nil { return nil, err } @@ -111,21 +112,21 @@ func (client *Client) GiteaRawContent(targetOwner, targetRepo, ref, resource str return io.ReadAll(reader) } -func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string) (io.ReadCloser, http.Header, int, error) { +func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource string, decompress bool) (io.ReadCloser, http.Header, int, error) { cacheKey := fmt.Sprintf("%s/%s/%s|%s|%s", rawContentCacheKeyPrefix, targetOwner, targetRepo, ref, resource) log := log.With().Str("cache_key", cacheKey).Logger() log.Trace().Msg("try file in cache") // handle if cache entry exist if cache, ok := client.responseCache.Get(cacheKey); ok { cache := cache.(FileResponse) - cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey) + cachedHeader, cachedStatusCode := cache.createHttpResponse(cacheKey, decompress) // TODO: check against some timestamp mismatch?!? if cache.Exists { log.Debug().Msg("[cache] exists") if cache.IsSymlink { linkDest := string(cache.Body) log.Debug().Msgf("[cache] follow symlink from %q to %q", resource, linkDest) - return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest) + return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest, decompress) } else if !cache.IsEmpty() { log.Debug().Msgf("[cache] return %d bytes", len(cache.Body)) return io.NopCloser(bytes.NewReader(cache.Body)), cachedHeader, cachedStatusCode, nil @@ -170,13 +171,17 @@ func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource str } log.Debug().Msgf("follow symlink from %q to %q", resource, linkDest) - return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest) + return client.ServeRawContent(targetOwner, targetRepo, ref, linkDest, decompress) } } // now we are sure it's content so set the MIME type - mimeType := client.getMimeTypeByExtension(resource) - resp.Response.Header.Set(ContentTypeHeader, mimeType) + mimeType, rawType := client.getMimeTypeByExtension(resource) + if decompress { + resp.Response.Header.Set(ContentTypeHeader, mimeType) + } else { + resp.Response.Header.Set(ContentTypeHeader, rawType) + } if !shouldRespBeSavedToCache(resp.Response) { return reader, resp.Response.Header, resp.StatusCode, err @@ -187,6 +192,7 @@ func (client *Client) ServeRawContent(targetOwner, targetRepo, ref, resource str Exists: true, ETag: resp.Header.Get(ETagHeader), MimeType: mimeType, + RawMime: rawType, } return fileResp.CreateCacheReader(reader, client.responseCache, cacheKey), resp.Response.Header, resp.StatusCode, nil @@ -300,16 +306,35 @@ func (client *Client) GiteaCheckIfOwnerExists(owner string) (bool, error) { return false, nil } -func (client *Client) getMimeTypeByExtension(resource string) string { - mimeType := mime.TypeByExtension(path.Ext(resource)) +func (client *Client) extToMime(ext string) string { + mimeType := mime.TypeByExtension(ext) mimeTypeSplit := strings.SplitN(mimeType, ";", 2) if client.forbiddenMimeTypes[mimeTypeSplit[0]] || mimeType == "" { mimeType = client.defaultMimeType } - log.Trace().Msgf("probe mime of %q is %q", resource, mimeType) return mimeType } +func (client *Client) getMimeTypeByExtension(resource string) (string, string) { + rawExt := path.Ext(resource) + innerExt := rawExt + switch rawExt { + case ".gz": + fallthrough + case ".br": + fallthrough + case ".zst": + innerExt = path.Ext(resource[:len(resource)-len(rawExt)]) + } + rawType := client.extToMime(rawExt) + mimeType := rawType + if innerExt != rawExt { + mimeType = client.extToMime(innerExt) + } + log.Trace().Msgf("probe mime of %q is (%q / raw %q)", resource, mimeType, rawType) + return mimeType, rawType +} + func shouldRespBeSavedToCache(resp *http.Response) bool { if resp == nil { return false diff --git a/server/upstream/header.go b/server/upstream/header.go index 7b85df1..3a218a1 100644 --- a/server/upstream/header.go +++ b/server/upstream/header.go @@ -24,5 +24,8 @@ func (o *Options) setHeader(ctx *context.Context, header http.Header) { } else { ctx.RespWriter.Header().Set(gitea.ContentTypeHeader, mime) } + if encoding := header.Get(gitea.ContentEncodingHeader); encoding != "" && encoding != "identity" { + ctx.RespWriter.Header().Set(gitea.ContentEncodingHeader, encoding) + } ctx.RespWriter.Header().Set(headerLastModified, o.BranchTimestamp.In(time.UTC).Format(http.TimeFormat)) } diff --git a/server/upstream/upstream.go b/server/upstream/upstream.go index d9c131e..0ecef82 100644 --- a/server/upstream/upstream.go +++ b/server/upstream/upstream.go @@ -1,10 +1,13 @@ package upstream import ( + "cmp" "errors" "fmt" "io" "net/http" + "slices" + "strconv" "strings" "time" @@ -19,6 +22,8 @@ import ( const ( headerLastModified = "Last-Modified" headerIfModifiedSince = "If-Modified-Since" + headerAcceptEncoding = "Accept-Encoding" + headerContentEncoding = "Content-Encoding" rawMime = "text/plain; charset=utf-8" ) @@ -52,6 +57,73 @@ type Options struct { ServeRaw bool } +// allowed encodings +var allowedEncodings = map[string]string{ + "gzip": ".gz", + "br": ".br", + "zstd": ".zst", + "identity": "", +} + +// parses Accept-Encoding header into a list of acceptable encodings +func AcceptEncodings(header string) []string { + log.Trace().Msgf("got accept-encoding: %s", header) + encodings := []string{} + globQuality := 0.0 + qualities := make(map[string]float64) + + for _, encoding := range strings.Split(header, ",") { + splits := strings.SplitN(encoding, ";q=", 2) + name := splits[0] + quality := 1.0 + + if len(splits) > 1 { + var err error + quality, err = strconv.ParseFloat(splits[1], 64) + if err != nil || quality < 0 { + continue + } + } + + name = strings.TrimSpace(name) + + if name == "*" { + globQuality = quality + } else { + _, allowed := allowedEncodings[name] + if allowed { + qualities[name] = quality + if quality > 0 { + encodings = append(encodings, name) + } + } + } + } + + if globQuality > 0 { + for encoding := range allowedEncodings { + _, exists := qualities[encoding] + if !exists { + encodings = append(encodings, encoding) + qualities[encoding] = globQuality + } + } + } else { + _, exists := qualities["identity"] + if !exists { + encodings = append(encodings, "identity") + qualities["identity"] = -1 + } + } + + slices.SortStableFunc(encodings, func(x, y string) int { + // sort in reverse order; big quality comes first + return cmp.Compare(qualities[y], qualities[x]) + }) + log.Trace().Msgf("decided encoding order: %#v", encodings) + return encodings +} + // Upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context. func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redirectsCache cache.ICache) bool { log := log.With().Strs("upstream", []string{o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath}).Logger() @@ -97,9 +169,33 @@ func (o *Options) Upstream(ctx *context.Context, giteaClient *gitea.Client, redi log.Debug().Msg("Preparing") - reader, header, statusCode, err := giteaClient.ServeRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, o.TargetPath) - if reader != nil { - defer reader.Close() + var reader io.ReadCloser + var header http.Header + var statusCode int + var err error + + // pick first non-404 response for encoding, *only* if not root + if o.TargetPath == "" || strings.HasSuffix(o.TargetPath, "/") { + err = gitea.ErrorNotFound + } else { + for _, encoding := range AcceptEncodings(ctx.Req.Header.Get(headerAcceptEncoding)) { + log.Trace().Msgf("try %s encoding", encoding) + + // add extension for encoding + path := o.TargetPath + allowedEncodings[encoding] + reader, header, statusCode, err = giteaClient.ServeRawContent(o.TargetOwner, o.TargetRepo, o.TargetBranch, path, true) + if statusCode == 404 { + continue + } + log.Debug().Msgf("using %s encoding", encoding) + if encoding != "identity" { + header.Set(headerContentEncoding, encoding) + } + break + } + if reader != nil { + defer reader.Close() + } } log.Debug().Msg("Aquisting")