From 38426c26db6e461a678ff8e344f24a6e7bef38d6 Mon Sep 17 00:00:00 2001 From: 6543 <6543@obermui.de> Date: Sun, 5 Dec 2021 14:47:33 +0100 Subject: [PATCH] move upstream into own package --- html/error.go | 23 +++ server/domains.go | 9 +- server/handler.go | 303 +++--------------------------------- server/upstream/2rm.go | 10 ++ server/upstream/const.go | 18 +++ server/upstream/helper.go | 53 +++++++ server/upstream/upstream.go | 193 +++++++++++++++++++++++ 7 files changed, 321 insertions(+), 288 deletions(-) create mode 100644 html/error.go create mode 100644 server/upstream/2rm.go create mode 100644 server/upstream/const.go create mode 100644 server/upstream/helper.go create mode 100644 server/upstream/upstream.go diff --git a/html/error.go b/html/error.go new file mode 100644 index 0000000..f831443 --- /dev/null +++ b/html/error.go @@ -0,0 +1,23 @@ +package html + +import ( + "bytes" + "github.com/valyala/fasthttp" + "strconv" +) + +// ReturnErrorPage sets the response status code and writes NotFoundPage to the response body, with "%status" replaced +// with the provided status code. +func ReturnErrorPage(ctx *fasthttp.RequestCtx, code int) { + ctx.Response.SetStatusCode(code) + ctx.Response.Header.SetContentType("text/html; charset=utf-8") + message := fasthttp.StatusMessage(code) + if code == fasthttp.StatusMisdirectedRequest { + message += " - domain not specified in .domains file" + } + if code == fasthttp.StatusFailedDependency { + message += " - target repo/branch doesn't exist or is private" + } + // TODO: use template engine? + ctx.Response.SetBody(bytes.ReplaceAll(NotFoundPage, []byte("%status"), []byte(strconv.Itoa(code)+" "+message))) +} diff --git a/server/domains.go b/server/domains.go index 5673960..5aeae96 100644 --- a/server/domains.go +++ b/server/domains.go @@ -1,11 +1,14 @@ package server import ( - "github.com/OrlovEvgeny/go-mcache" - "github.com/valyala/fasthttp" "net" "strings" "time" + + "github.com/OrlovEvgeny/go-mcache" + "github.com/valyala/fasthttp" + + "codeberg.org/codeberg/pages/server/upstream" ) // DnsLookupCacheTimeout specifies the timeout for the DNS lookup cache. @@ -84,7 +87,7 @@ func checkCanonicalDomain(targetOwner, targetRepo, targetBranch, actualDomain, m req.SetRequestURI(giteaRoot + "/api/v1/repos/" + targetOwner + "/" + targetRepo + "/raw/" + targetBranch + "/.domains" + "?access_token=" + giteaApiToken) res := fasthttp.AcquireResponse() - err := upstreamClient.Do(req, res) + err := upstream.Client.Do(req, res) if err == nil && res.StatusCode() == fasthttp.StatusOK { for _, domain := range strings.Split(string(res.Body()), "\n") { domain = strings.ToLower(domain) diff --git a/server/handler.go b/server/handler.go index 80c1c79..a6a19f4 100644 --- a/server/handler.go +++ b/server/handler.go @@ -2,21 +2,13 @@ package server import ( "bytes" - "fmt" - "io" - "mime" - "path" - "strconv" "strings" - "time" - - "github.com/OrlovEvgeny/go-mcache" - "github.com/rs/zerolog/log" - "github.com/valyala/fasthttp" - "github.com/valyala/fastjson" "codeberg.org/codeberg/pages/html" + "codeberg.org/codeberg/pages/server/upstream" "codeberg.org/codeberg/pages/server/utils" + "github.com/rs/zerolog/log" + "github.com/valyala/fasthttp" ) // Handler handles a single HTTP request to the web server. @@ -49,7 +41,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp // Block blacklisted paths (like ACME challenges) for _, blacklistedPath := range blacklistedPaths { if bytes.HasPrefix(ctx.Path(), blacklistedPath) { - returnErrorPage(ctx, fasthttp.StatusForbidden) + html.ReturnErrorPage(ctx, fasthttp.StatusForbidden) return } } @@ -74,7 +66,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp // Prepare request information to Gitea var targetOwner, targetRepo, targetBranch, targetPath string - var targetOptions = &upstreamOptions{ + var targetOptions = &upstream.Options{ ForbiddenMimeTypes: map[string]struct{}{}, TryIndexPages: true, } @@ -87,7 +79,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp } // Check if the branch exists, otherwise treat it as a file path - branchTimestampResult := getBranchTimestamp(targetOwner, repo, branch, giteaRoot, giteaApiToken) + branchTimestampResult := upstream.GetBranchTimestamp(targetOwner, repo, branch, giteaRoot, giteaApiToken) if branchTimestampResult == nil { // branch doesn't exist return false @@ -96,9 +88,9 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp // Branch exists, use it targetRepo = repo targetPath = strings.Trim(strings.Join(path, "/"), "/") - targetBranch = branchTimestampResult.branch + targetBranch = branchTimestampResult.Branch - targetOptions.BranchTimestamp = branchTimestampResult.timestamp + targetOptions.BranchTimestamp = branchTimestampResult.Timestamp if canonicalLink != "" { // Hide from search machines & add canonical link @@ -128,8 +120,8 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp } // Try to request the file from the Gitea API - if !upstream(ctx, targetOwner, targetRepo, targetBranch, targetPath, giteaRoot, giteaApiToken, targetOptions) { - returnErrorPage(ctx, ctx.Response.StatusCode()) + if !upstream.Upstream(ctx, targetOwner, targetRepo, targetBranch, targetPath, giteaRoot, giteaApiToken, targetOptions) { + html.ReturnErrorPage(ctx, ctx.Response.StatusCode()) } } @@ -163,7 +155,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp return } log.Debug().Msg("missing branch") - returnErrorPage(ctx, fasthttp.StatusFailedDependency) + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) return } else { log.Debug().Msg("raw domain preparations, now trying with default branch") @@ -206,7 +198,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp log.Debug().Msg("tryBranch, now trying upstream") tryUpstream() } else { - returnErrorPage(ctx, fasthttp.StatusFailedDependency) + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) } return } @@ -219,7 +211,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp log.Debug().Msg("tryBranch, now trying upstream") tryUpstream() } else { - returnErrorPage(ctx, fasthttp.StatusFailedDependency) + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) } return } @@ -244,7 +236,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp } // Couldn't find a valid repo/branch - returnErrorPage(ctx, fasthttp.StatusFailedDependency) + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) return } else { trimmedHostStr := string(trimmedHost) @@ -252,7 +244,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp // Serve pages from external domains targetOwner, targetRepo, targetBranch = getTargetFromDNS(trimmedHostStr, string(mainDomainSuffix)) if targetOwner == "" { - returnErrorPage(ctx, fasthttp.StatusFailedDependency) + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) return } @@ -269,7 +261,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp if tryBranch(targetRepo, targetBranch, pathElements, canonicalLink) { canonicalDomain, valid := checkCanonicalDomain(targetOwner, targetRepo, targetBranch, trimmedHostStr, string(mainDomainSuffix), giteaRoot, giteaApiToken) if !valid { - returnErrorPage(ctx, fasthttp.StatusMisdirectedRequest) + html.ReturnErrorPage(ctx, fasthttp.StatusMisdirectedRequest) return } else if canonicalDomain != trimmedHostStr { // only redirect if the target is also a codeberg page! @@ -278,7 +270,7 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp ctx.Redirect("https://"+canonicalDomain+string(ctx.RequestURI()), fasthttp.StatusTemporaryRedirect) return } else { - returnErrorPage(ctx, fasthttp.StatusFailedDependency) + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) return } } @@ -287,268 +279,9 @@ func Handler(mainDomainSuffix, rawDomain []byte, giteaRoot, rawInfoPage, giteaAp tryUpstream() return } else { - returnErrorPage(ctx, fasthttp.StatusFailedDependency) + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) return } } } } - -// returnErrorPage sets the response status code and writes NotFoundPage to the response body, with "%status" replaced -// with the provided status code. -func returnErrorPage(ctx *fasthttp.RequestCtx, code int) { - ctx.Response.SetStatusCode(code) - ctx.Response.Header.SetContentType("text/html; charset=utf-8") - message := fasthttp.StatusMessage(code) - if code == fasthttp.StatusMisdirectedRequest { - message += " - domain not specified in .domains file" - } - if code == fasthttp.StatusFailedDependency { - message += " - target repo/branch doesn't exist or is private" - } - // TODO: use template engine? - ctx.Response.SetBody(bytes.ReplaceAll(html.NotFoundPage, []byte("%status"), []byte(strconv.Itoa(code)+" "+message))) -} - -// DefaultBranchCacheTimeout specifies the timeout for the default branch cache. It can be quite long. -var DefaultBranchCacheTimeout = 15 * time.Minute - -// BranchExistanceCacheTimeout specifies the timeout for the branch timestamp & existance cache. It should be shorter -// than FileCacheTimeout, as that gets invalidated if the branch timestamp has changed. That way, repo changes will be -// picked up faster, while still allowing the content to be cached longer if nothing changes. -var BranchExistanceCacheTimeout = 5 * time.Minute - -// branchTimestampCache stores branch timestamps for faster cache checking -var branchTimestampCache = mcache.New() - -type branchTimestamp struct { - branch string - timestamp time.Time -} - -// FileCacheTimeout specifies the timeout for the file content cache - you might want to make this quite long, depending -// on your available memory. -var FileCacheTimeout = 5 * time.Minute - -// FileCacheSizeLimit limits the maximum file size that will be cached, and is set to 1 MB by default. -var FileCacheSizeLimit = 1024 * 1024 - -// fileResponseCache stores responses from the Gitea server -// TODO: make this an MRU cache with a size limit -var fileResponseCache = mcache.New() - -type fileResponse struct { - exists bool - mimeType string - body []byte -} - -// getBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch -// (or nil if the branch doesn't exist) -func getBranchTimestamp(owner, repo, branch, giteaRoot, giteaApiToken string) *branchTimestamp { - if result, ok := branchTimestampCache.Get(owner + "/" + repo + "/" + branch); ok { - if result == nil { - return nil - } - return result.(*branchTimestamp) - } - result := &branchTimestamp{} - result.branch = branch - if branch == "" { - // Get default branch - var body = make([]byte, 0) - // TODO: use header for API key? - status, body, err := fasthttp.GetTimeout(body, giteaRoot+"/api/v1/repos/"+owner+"/"+repo+"?access_token="+giteaApiToken, 5*time.Second) - if err != nil || status != 200 { - _ = branchTimestampCache.Set(owner+"/"+repo+"/"+branch, nil, DefaultBranchCacheTimeout) - return nil - } - result.branch = fastjson.GetString(body, "default_branch") - } - - var body = make([]byte, 0) - status, body, err := fasthttp.GetTimeout(body, giteaRoot+"/api/v1/repos/"+owner+"/"+repo+"/branches/"+branch+"?access_token="+giteaApiToken, 5*time.Second) - if err != nil || status != 200 { - return nil - } - - result.timestamp, _ = time.Parse(time.RFC3339, fastjson.GetString(body, "commit", "timestamp")) - _ = branchTimestampCache.Set(owner+"/"+repo+"/"+branch, result, BranchExistanceCacheTimeout) - return result -} - -var upstreamClient = fasthttp.Client{ - ReadTimeout: 10 * time.Second, - MaxConnDuration: 60 * time.Second, - MaxConnWaitTimeout: 1000 * time.Millisecond, - MaxConnsPerHost: 128 * 16, // TODO: adjust bottlenecks for best performance with Gitea! -} - -// upstreamIndexPages lists pages that may be considered as index pages for directories. -var upstreamIndexPages = []string{ - "index.html", -} - -// upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context. -func upstream(ctx *fasthttp.RequestCtx, targetOwner, targetRepo, targetBranch, targetPath, giteaRoot, giteaApiToken string, options *upstreamOptions) (final bool) { - log := log.With().Strs("upstream", []string{targetOwner, targetRepo, targetBranch, targetPath}).Logger() - - if options.ForbiddenMimeTypes == nil { - options.ForbiddenMimeTypes = map[string]struct{}{} - } - - // Check if the branch exists and when it was modified - if options.BranchTimestamp == (time.Time{}) { - branch := getBranchTimestamp(targetOwner, targetRepo, targetBranch, giteaRoot, giteaApiToken) - - if branch == nil { - returnErrorPage(ctx, fasthttp.StatusFailedDependency) - return true - } - targetBranch = branch.branch - options.BranchTimestamp = branch.timestamp - } - - if targetOwner == "" || targetRepo == "" || targetBranch == "" { - returnErrorPage(ctx, fasthttp.StatusBadRequest) - return true - } - - // Check if the browser has a cached version - if ifModifiedSince, err := time.Parse(time.RFC1123, string(ctx.Request.Header.Peek("If-Modified-Since"))); err == nil { - if !ifModifiedSince.Before(options.BranchTimestamp) { - ctx.Response.SetStatusCode(fasthttp.StatusNotModified) - return true - } - } - log.Debug().Msg("preparations") - - // Make a GET request to the upstream URL - uri := targetOwner + "/" + targetRepo + "/raw/" + targetBranch + "/" + targetPath - var req *fasthttp.Request - var res *fasthttp.Response - var cachedResponse fileResponse - var err error - if cachedValue, ok := fileResponseCache.Get(uri + "?timestamp=" + strconv.FormatInt(options.BranchTimestamp.Unix(), 10)); ok && len(cachedValue.(fileResponse).body) > 0 { - cachedResponse = cachedValue.(fileResponse) - } else { - req = fasthttp.AcquireRequest() - req.SetRequestURI(giteaRoot + "/api/v1/repos/" + uri + "?access_token=" + giteaApiToken) - res = fasthttp.AcquireResponse() - res.SetBodyStream(&strings.Reader{}, -1) - err = upstreamClient.Do(req, res) - } - log.Debug().Msg("acquisition") - - // Handle errors - if (res == nil && !cachedResponse.exists) || (res != nil && res.StatusCode() == fasthttp.StatusNotFound) { - if options.TryIndexPages { - // copy the options struct & try if an index page exists - optionsForIndexPages := *options - optionsForIndexPages.TryIndexPages = false - optionsForIndexPages.AppendTrailingSlash = true - for _, indexPage := range upstreamIndexPages { - if upstream(ctx, targetOwner, targetRepo, targetBranch, strings.TrimSuffix(targetPath, "/")+"/"+indexPage, giteaRoot, giteaApiToken, &optionsForIndexPages) { - _ = fileResponseCache.Set(uri+"?timestamp="+strconv.FormatInt(options.BranchTimestamp.Unix(), 10), fileResponse{ - exists: false, - }, FileCacheTimeout) - return true - } - } - // compatibility fix for GitHub Pages (/example → /example.html) - optionsForIndexPages.AppendTrailingSlash = false - optionsForIndexPages.RedirectIfExists = string(ctx.Request.URI().Path()) + ".html" - if upstream(ctx, targetOwner, targetRepo, targetBranch, targetPath+".html", giteaRoot, giteaApiToken, &optionsForIndexPages) { - _ = fileResponseCache.Set(uri+"?timestamp="+strconv.FormatInt(options.BranchTimestamp.Unix(), 10), fileResponse{ - exists: false, - }, FileCacheTimeout) - return true - } - } - ctx.Response.SetStatusCode(fasthttp.StatusNotFound) - if res != nil { - // Update cache if the request is fresh - _ = fileResponseCache.Set(uri+"?timestamp="+strconv.FormatInt(options.BranchTimestamp.Unix(), 10), fileResponse{ - exists: false, - }, FileCacheTimeout) - } - return false - } - if res != nil && (err != nil || res.StatusCode() != fasthttp.StatusOK) { - fmt.Printf("Couldn't fetch contents from \"%s\": %s (status code %d)\n", req.RequestURI(), err, res.StatusCode()) - returnErrorPage(ctx, fasthttp.StatusInternalServerError) - return true - } - - // Append trailing slash if missing (for index files), and redirect to fix filenames in general - // options.AppendTrailingSlash is only true when looking for index pages - if options.AppendTrailingSlash && !bytes.HasSuffix(ctx.Request.URI().Path(), []byte{'/'}) { - ctx.Redirect(string(ctx.Request.URI().Path())+"/", fasthttp.StatusTemporaryRedirect) - return true - } - if bytes.HasSuffix(ctx.Request.URI().Path(), []byte("/index.html")) { - ctx.Redirect(strings.TrimSuffix(string(ctx.Request.URI().Path()), "index.html"), fasthttp.StatusTemporaryRedirect) - return true - } - if options.RedirectIfExists != "" { - ctx.Redirect(options.RedirectIfExists, fasthttp.StatusTemporaryRedirect) - return true - } - log.Debug().Msg("error handling") - - // Set the MIME type - mimeType := mime.TypeByExtension(path.Ext(targetPath)) - mimeTypeSplit := strings.SplitN(mimeType, ";", 2) - if _, ok := options.ForbiddenMimeTypes[mimeTypeSplit[0]]; ok || mimeType == "" { - if options.DefaultMimeType != "" { - mimeType = options.DefaultMimeType - } else { - mimeType = "application/octet-stream" - } - } - ctx.Response.Header.SetContentType(mimeType) - - // Everything's okay so far - ctx.Response.SetStatusCode(fasthttp.StatusOK) - ctx.Response.Header.SetLastModified(options.BranchTimestamp) - - log.Debug().Msg("response preparations") - - // Write the response body to the original request - var cacheBodyWriter bytes.Buffer - if res != nil { - if res.Header.ContentLength() > FileCacheSizeLimit { - err = res.BodyWriteTo(ctx.Response.BodyWriter()) - } else { - // TODO: cache is half-empty if request is cancelled - does the ctx.Err() below do the trick? - err = res.BodyWriteTo(io.MultiWriter(ctx.Response.BodyWriter(), &cacheBodyWriter)) - } - } else { - _, err = ctx.Write(cachedResponse.body) - } - if err != nil { - fmt.Printf("Couldn't write body for \"%s\": %s\n", req.RequestURI(), err) - returnErrorPage(ctx, fasthttp.StatusInternalServerError) - return true - } - log.Debug().Msg("response") - - if res != nil && ctx.Err() == nil { - cachedResponse.exists = true - cachedResponse.mimeType = mimeType - cachedResponse.body = cacheBodyWriter.Bytes() - _ = fileResponseCache.Set(uri+"?timestamp="+strconv.FormatInt(options.BranchTimestamp.Unix(), 10), cachedResponse, FileCacheTimeout) - } - - return true -} - -// upstreamOptions provides various options for the upstream request. -type upstreamOptions struct { - DefaultMimeType string - ForbiddenMimeTypes map[string]struct{} - TryIndexPages bool - AppendTrailingSlash bool - RedirectIfExists string - BranchTimestamp time.Time -} diff --git a/server/upstream/2rm.go b/server/upstream/2rm.go new file mode 100644 index 0000000..f02b452 --- /dev/null +++ b/server/upstream/2rm.go @@ -0,0 +1,10 @@ +package upstream + +import "github.com/OrlovEvgeny/go-mcache" + +// branchTimestampCache stores branch timestamps for faster cache checking +var branchTimestampCache = mcache.New() + +// fileResponseCache stores responses from the Gitea server +// TODO: make this an MRU cache with a size limit +var fileResponseCache = mcache.New() diff --git a/server/upstream/const.go b/server/upstream/const.go new file mode 100644 index 0000000..f51b152 --- /dev/null +++ b/server/upstream/const.go @@ -0,0 +1,18 @@ +package upstream + +import "time" + +// DefaultBranchCacheTimeout specifies the timeout for the default branch cache. It can be quite long. +var DefaultBranchCacheTimeout = 15 * time.Minute + +// BranchExistanceCacheTimeout specifies the timeout for the branch timestamp & existance cache. It should be shorter +// than FileCacheTimeout, as that gets invalidated if the branch timestamp has changed. That way, repo changes will be +// picked up faster, while still allowing the content to be cached longer if nothing changes. +var BranchExistanceCacheTimeout = 5 * time.Minute + +// FileCacheTimeout specifies the timeout for the file content cache - you might want to make this quite long, depending +// on your available memory. +var FileCacheTimeout = 5 * time.Minute + +// FileCacheSizeLimit limits the maximum file size that will be cached, and is set to 1 MB by default. +var FileCacheSizeLimit = 1024 * 1024 diff --git a/server/upstream/helper.go b/server/upstream/helper.go new file mode 100644 index 0000000..eeda2c2 --- /dev/null +++ b/server/upstream/helper.go @@ -0,0 +1,53 @@ +package upstream + +import ( + "time" + + "github.com/valyala/fasthttp" + "github.com/valyala/fastjson" +) + +type branchTimestamp struct { + Branch string + Timestamp time.Time +} + +// GetBranchTimestamp finds the default branch (if branch is "") and returns the last modification time of the branch +// (or nil if the branch doesn't exist) +func GetBranchTimestamp(owner, repo, branch, giteaRoot, giteaApiToken string) *branchTimestamp { + if result, ok := branchTimestampCache.Get(owner + "/" + repo + "/" + branch); ok { + if result == nil { + return nil + } + return result.(*branchTimestamp) + } + result := &branchTimestamp{} + result.Branch = branch + if branch == "" { + // Get default branch + var body = make([]byte, 0) + // TODO: use header for API key? + status, body, err := fasthttp.GetTimeout(body, giteaRoot+"/api/v1/repos/"+owner+"/"+repo+"?access_token="+giteaApiToken, 5*time.Second) + if err != nil || status != 200 { + _ = branchTimestampCache.Set(owner+"/"+repo+"/"+branch, nil, DefaultBranchCacheTimeout) + return nil + } + result.Branch = fastjson.GetString(body, "default_branch") + } + + var body = make([]byte, 0) + status, body, err := fasthttp.GetTimeout(body, giteaRoot+"/api/v1/repos/"+owner+"/"+repo+"/branches/"+branch+"?access_token="+giteaApiToken, 5*time.Second) + if err != nil || status != 200 { + return nil + } + + result.Timestamp, _ = time.Parse(time.RFC3339, fastjson.GetString(body, "commit", "timestamp")) + _ = branchTimestampCache.Set(owner+"/"+repo+"/"+branch, result, BranchExistanceCacheTimeout) + return result +} + +type fileResponse struct { + exists bool + mimeType string + body []byte +} diff --git a/server/upstream/upstream.go b/server/upstream/upstream.go new file mode 100644 index 0000000..a269e06 --- /dev/null +++ b/server/upstream/upstream.go @@ -0,0 +1,193 @@ +package upstream + +import ( + "bytes" + "fmt" + "io" + "mime" + "path" + "strconv" + "strings" + "time" + + "codeberg.org/codeberg/pages/html" + + "github.com/rs/zerolog/log" + "github.com/valyala/fasthttp" +) + +// upstreamIndexPages lists pages that may be considered as index pages for directories. +var upstreamIndexPages = []string{ + "index.html", +} + +// Options provides various options for the upstream request. +type Options struct { + DefaultMimeType string + ForbiddenMimeTypes map[string]struct{} + TryIndexPages bool + AppendTrailingSlash bool + RedirectIfExists string + BranchTimestamp time.Time +} + +var Client = fasthttp.Client{ + ReadTimeout: 10 * time.Second, + MaxConnDuration: 60 * time.Second, + MaxConnWaitTimeout: 1000 * time.Millisecond, + MaxConnsPerHost: 128 * 16, // TODO: adjust bottlenecks for best performance with Gitea! +} + +// Upstream requests a file from the Gitea API at GiteaRoot and writes it to the request context. +func Upstream(ctx *fasthttp.RequestCtx, targetOwner, targetRepo, targetBranch, targetPath, giteaRoot, giteaApiToken string, options *Options) (final bool) { + log := log.With().Strs("upstream", []string{targetOwner, targetRepo, targetBranch, targetPath}).Logger() + + if options.ForbiddenMimeTypes == nil { + options.ForbiddenMimeTypes = map[string]struct{}{} + } + + // Check if the branch exists and when it was modified + if options.BranchTimestamp == (time.Time{}) { + branch := GetBranchTimestamp(targetOwner, targetRepo, targetBranch, giteaRoot, giteaApiToken) + + if branch == nil { + html.ReturnErrorPage(ctx, fasthttp.StatusFailedDependency) + return true + } + targetBranch = branch.Branch + options.BranchTimestamp = branch.Timestamp + } + + if targetOwner == "" || targetRepo == "" || targetBranch == "" { + html.ReturnErrorPage(ctx, fasthttp.StatusBadRequest) + return true + } + + // Check if the browser has a cached version + if ifModifiedSince, err := time.Parse(time.RFC1123, string(ctx.Request.Header.Peek("If-Modified-Since"))); err == nil { + if !ifModifiedSince.Before(options.BranchTimestamp) { + ctx.Response.SetStatusCode(fasthttp.StatusNotModified) + return true + } + } + log.Debug().Msg("preparations") + + // Make a GET request to the upstream URL + uri := targetOwner + "/" + targetRepo + "/raw/" + targetBranch + "/" + targetPath + var req *fasthttp.Request + var res *fasthttp.Response + var cachedResponse fileResponse + var err error + if cachedValue, ok := fileResponseCache.Get(uri + "?timestamp=" + strconv.FormatInt(options.BranchTimestamp.Unix(), 10)); ok && len(cachedValue.(fileResponse).body) > 0 { + cachedResponse = cachedValue.(fileResponse) + } else { + req = fasthttp.AcquireRequest() + req.SetRequestURI(giteaRoot + "/api/v1/repos/" + uri + "?access_token=" + giteaApiToken) + res = fasthttp.AcquireResponse() + res.SetBodyStream(&strings.Reader{}, -1) + err = Client.Do(req, res) + } + log.Debug().Msg("acquisition") + + // Handle errors + if (res == nil && !cachedResponse.exists) || (res != nil && res.StatusCode() == fasthttp.StatusNotFound) { + if options.TryIndexPages { + // copy the options struct & try if an index page exists + optionsForIndexPages := *options + optionsForIndexPages.TryIndexPages = false + optionsForIndexPages.AppendTrailingSlash = true + for _, indexPage := range upstreamIndexPages { + if Upstream(ctx, targetOwner, targetRepo, targetBranch, strings.TrimSuffix(targetPath, "/")+"/"+indexPage, giteaRoot, giteaApiToken, &optionsForIndexPages) { + _ = fileResponseCache.Set(uri+"?timestamp="+strconv.FormatInt(options.BranchTimestamp.Unix(), 10), fileResponse{ + exists: false, + }, FileCacheTimeout) + return true + } + } + // compatibility fix for GitHub Pages (/example → /example.html) + optionsForIndexPages.AppendTrailingSlash = false + optionsForIndexPages.RedirectIfExists = string(ctx.Request.URI().Path()) + ".html" + if Upstream(ctx, targetOwner, targetRepo, targetBranch, targetPath+".html", giteaRoot, giteaApiToken, &optionsForIndexPages) { + _ = fileResponseCache.Set(uri+"?timestamp="+strconv.FormatInt(options.BranchTimestamp.Unix(), 10), fileResponse{ + exists: false, + }, FileCacheTimeout) + return true + } + } + ctx.Response.SetStatusCode(fasthttp.StatusNotFound) + if res != nil { + // Update cache if the request is fresh + _ = fileResponseCache.Set(uri+"?timestamp="+strconv.FormatInt(options.BranchTimestamp.Unix(), 10), fileResponse{ + exists: false, + }, FileCacheTimeout) + } + return false + } + if res != nil && (err != nil || res.StatusCode() != fasthttp.StatusOK) { + fmt.Printf("Couldn't fetch contents from \"%s\": %s (status code %d)\n", req.RequestURI(), err, res.StatusCode()) + html.ReturnErrorPage(ctx, fasthttp.StatusInternalServerError) + return true + } + + // Append trailing slash if missing (for index files), and redirect to fix filenames in general + // options.AppendTrailingSlash is only true when looking for index pages + if options.AppendTrailingSlash && !bytes.HasSuffix(ctx.Request.URI().Path(), []byte{'/'}) { + ctx.Redirect(string(ctx.Request.URI().Path())+"/", fasthttp.StatusTemporaryRedirect) + return true + } + if bytes.HasSuffix(ctx.Request.URI().Path(), []byte("/index.html")) { + ctx.Redirect(strings.TrimSuffix(string(ctx.Request.URI().Path()), "index.html"), fasthttp.StatusTemporaryRedirect) + return true + } + if options.RedirectIfExists != "" { + ctx.Redirect(options.RedirectIfExists, fasthttp.StatusTemporaryRedirect) + return true + } + log.Debug().Msg("error handling") + + // Set the MIME type + mimeType := mime.TypeByExtension(path.Ext(targetPath)) + mimeTypeSplit := strings.SplitN(mimeType, ";", 2) + if _, ok := options.ForbiddenMimeTypes[mimeTypeSplit[0]]; ok || mimeType == "" { + if options.DefaultMimeType != "" { + mimeType = options.DefaultMimeType + } else { + mimeType = "application/octet-stream" + } + } + ctx.Response.Header.SetContentType(mimeType) + + // Everything's okay so far + ctx.Response.SetStatusCode(fasthttp.StatusOK) + ctx.Response.Header.SetLastModified(options.BranchTimestamp) + + log.Debug().Msg("response preparations") + + // Write the response body to the original request + var cacheBodyWriter bytes.Buffer + if res != nil { + if res.Header.ContentLength() > FileCacheSizeLimit { + err = res.BodyWriteTo(ctx.Response.BodyWriter()) + } else { + // TODO: cache is half-empty if request is cancelled - does the ctx.Err() below do the trick? + err = res.BodyWriteTo(io.MultiWriter(ctx.Response.BodyWriter(), &cacheBodyWriter)) + } + } else { + _, err = ctx.Write(cachedResponse.body) + } + if err != nil { + fmt.Printf("Couldn't write body for \"%s\": %s\n", req.RequestURI(), err) + html.ReturnErrorPage(ctx, fasthttp.StatusInternalServerError) + return true + } + log.Debug().Msg("response") + + if res != nil && ctx.Err() == nil { + cachedResponse.exists = true + cachedResponse.mimeType = mimeType + cachedResponse.body = cacheBodyWriter.Bytes() + _ = fileResponseCache.Set(uri+"?timestamp="+strconv.FormatInt(options.BranchTimestamp.Unix(), 10), cachedResponse, FileCacheTimeout) + } + + return true +}