From 498266e08e4d3312789d54e3e78849df5e78bad3 Mon Sep 17 00:00:00 2001 From: wjqserver <114663932+WJQSERVER@users.noreply.github.com> Date: Tue, 11 Mar 2025 18:07:17 +0800 Subject: [PATCH] 25w16c --- CHANGELOG.md | 6 ++ DEV-VERSION | 2 +- config/config.go | 6 +- config/config.toml | 2 +- gitclone/smart-http.go | 5 + go.mod | 2 + go.sum | 4 + main.go | 25 ++--- proxy/chunkreq.go | 6 +- proxy/gitreq.go | 89 +++++++++-------- proxy/handler.go | 216 ++++++++++++++++++++++++----------------- proxy/proxy.go | 10 -- 12 files changed, 207 insertions(+), 166 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b75f043..b743b69 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # 更新日志 +25w16c +--- +- PRE-RELEASE: 此版本是v2.4.0的预发布版本,请勿在生产环境中使用; +- CHANGE: 使用更高性能的Buffer Pool 实现 +- CHANGE: 改进路由匹配 + 25w16b --- - PRE-RELEASE: 此版本是v2.4.0的预发布版本,请勿在生产环境中使用; diff --git a/DEV-VERSION b/DEV-VERSION index 5a6a9e9..7ad1669 100644 --- a/DEV-VERSION +++ b/DEV-VERSION @@ -1 +1 @@ -25w16b +25w16c diff --git a/config/config.go b/config/config.go index e3d0cd5..2d55564 100644 --- a/config/config.go +++ b/config/config.go @@ -53,11 +53,11 @@ type HttpcConfig struct { /* [gitclone] mode = "bypass" # bypass / cache -dir = "./repos" +smartGitAddr = ":8080" */ type GitCloneConfig struct { - Mode string `toml:"mode"` - Dir string `toml:"dir"` + Mode string `toml:"mode"` + SmartGitAddr string `toml:"smartGitAddr"` } /* diff --git a/config/config.toml b/config/config.toml index 2d8e8a4..a176144 100644 --- a/config/config.toml +++ b/config/config.toml @@ -14,7 +14,7 @@ maxConnsPerHost = 0 # only for advanced mode [gitclone] mode = "bypass" # bypass / cache -dir = "./repos" +smartGitAddr = ":8080" [pages] mode = "internal" # "internal" or "external" diff --git a/gitclone/smart-http.go b/gitclone/smart-http.go index aa448b1..cb06f0c 100644 --- a/gitclone/smart-http.go +++ b/gitclone/smart-http.go @@ -1,5 +1,8 @@ package gitclone +/* +package gitclone + import ( "compress/gzip" "ghproxy/config" @@ -157,3 +160,5 @@ func HttpGitUploadPack(cfg *config.Config) gin.HandlerFunc { } } } + +*/ diff --git a/go.mod b/go.mod index ffa3791..fcae7de 100644 --- a/go.mod +++ b/go.mod @@ -10,6 +10,7 @@ require ( github.com/go-git/go-git/v5 v5.14.0 github.com/pierrec/lz4 v2.6.1+incompatible github.com/satomitouka/touka-httpc v0.3.0 + github.com/valyala/bytebufferpool v1.0.0 golang.org/x/net v0.37.0 golang.org/x/time v0.11.0 ) @@ -18,6 +19,7 @@ require ( dario.cat/mergo v1.0.1 // indirect github.com/Microsoft/go-winio v0.6.2 // indirect github.com/ProtonMail/go-crypto v1.1.6 // indirect + github.com/WJQSERVER-STUDIO/go-utils/copyb v0.0.3 // indirect github.com/WJQSERVER-STUDIO/go-utils/log v0.0.1 // indirect github.com/bytedance/sonic v1.13.1 // indirect github.com/bytedance/sonic/loader v0.2.4 // indirect diff --git a/go.sum b/go.sum index 4eed234..76348d1 100644 --- a/go.sum +++ b/go.sum @@ -7,6 +7,8 @@ github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERo github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= github.com/ProtonMail/go-crypto v1.1.6 h1:ZcV+Ropw6Qn0AX9brlQLAUXfqLBc7Bl+f/DmNxpLfdw= github.com/ProtonMail/go-crypto v1.1.6/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE= +github.com/WJQSERVER-STUDIO/go-utils/copyb v0.0.3 h1:S1tFRwMZkrAswOJxF1X2yTvL6Tz+6IeOBuqmycDnydw= +github.com/WJQSERVER-STUDIO/go-utils/copyb v0.0.3/go.mod h1:FZ6XE+4TKy4MOfX1xWKe6Rwsg0ucYFCdNh1KLvyKTfc= github.com/WJQSERVER-STUDIO/go-utils/log v0.0.1 h1:gJEQspQPB527Vp2FPcdOrynQEj3YYtrg1ixVSB/JvZM= github.com/WJQSERVER-STUDIO/go-utils/log v0.0.1/go.mod h1:j9Q+xnwpOfve7/uJnZ2izRQw6NNoXjvJHz7vUQAaLZE= github.com/WJQSERVER-STUDIO/go-utils/logger v1.5.0 h1:Uk4N7Sh4OPth3am3xVv17JlAm7tsna97ZLQRpQj7r5c= @@ -136,6 +138,8 @@ github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= +github.com/valyala/bytebufferpool v1.0.0 h1:GqA5TC/0021Y/b9FG4Oi9Mr3q7XYx6KllzawFIhcdPw= +github.com/valyala/bytebufferpool v1.0.0/go.mod h1:6bBcMArwyJ5K/AmCkWv1jt77kVWyCJ6HpOuEn7z0Csc= github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= golang.org/x/arch v0.15.0 h1:QtOrQd0bTUnhNVNndMpLHNWrDmYzZ2KDqSrEymqInZw= diff --git a/main.go b/main.go index 3102d4e..f8adcc2 100644 --- a/main.go +++ b/main.go @@ -12,7 +12,6 @@ import ( "ghproxy/api" "ghproxy/auth" "ghproxy/config" - "ghproxy/gitclone" "ghproxy/middleware/loggin" "ghproxy/middleware/timing" "ghproxy/proxy" @@ -225,23 +224,6 @@ func init() { setupPages(cfg, router) - if cfg.GitClone.Mode == "cache" { - router.GET("/github.com/:username/:repo/info/refs", gitclone.HttpInfoRefs(cfg)) - //router.GET("/https://github.com/:username/:repo/info/refs", gitclone.HttpInfoRefs(cfg)) - - router.POST("/github.com/:username/:repo/git-upload-pack", gitclone.HttpGitUploadPack(cfg)) - //router.POST("/https://github.com/:username/:repo/git-upload-pack", gitclone.HttpGitUploadPack(cfg)) - } else { - // 3. GitHub Info/Git- - Use distinct path segments for type (or combine under a common prefix) - - router.GET("/github.com/:username/:repo/info/*filepath", func(c *gin.Context) { // Distinct path for info - proxy.NoRouteHandler(cfg, limiter, iplimiter, runMode)(c) - }) - router.GET("/github.com/:username/:repo/git-*filepath", func(c *gin.Context) { // Distinct path for git-* (or a more specific prefix) - proxy.NoRouteHandler(cfg, limiter, iplimiter, runMode)(c) - }) - } - // 1. GitHub Releases/Archive - Use distinct path segments for type router.GET("/github.com/:username/:repo/releases/*filepath", func(c *gin.Context) { // Distinct path for releases proxy.NoRouteHandler(cfg, limiter, iplimiter, runMode)(c) @@ -260,6 +242,13 @@ func init() { proxy.NoRouteHandler(cfg, limiter, iplimiter, runMode)(c) }) + router.GET("/github.com/:username/:repo/info/*filepath", func(c *gin.Context) { // Distinct path for info + proxy.NoRouteHandler(cfg, limiter, iplimiter, runMode)(c) + }) + router.GET("/github.com/:username/:repo/git-upload-pack", func(c *gin.Context) { + proxy.NoRouteHandler(cfg, limiter, iplimiter, runMode)(c) + }) + // 4. Raw GitHubusercontent - Keep as is (assuming it's distinct enough) router.GET("/raw.githubusercontent.com/:username/:repo/*filepath", func(c *gin.Context) { proxy.NoRouteHandler(cfg, limiter, iplimiter, runMode)(c) diff --git a/proxy/chunkreq.go b/proxy/chunkreq.go index 988d62f..20a92f2 100644 --- a/proxy/chunkreq.go +++ b/proxy/chunkreq.go @@ -126,11 +126,7 @@ func ChunkedProxyRequest(c *gin.Context, u string, cfg *config.Config, mode stri c.Status(resp.StatusCode) - // 使用固定32KB缓冲池 - buffer := BufferPool.Get().([]byte) - defer BufferPool.Put(buffer) - - _, err = io.CopyBuffer(c.Writer, resp.Body, buffer) + _, err = io.CopyBuffer(c.Writer, resp.Body, nil) if err != nil { logError("%s %s %s %s %s Failed to copy response body: %v", c.ClientIP(), method, u, c.Request.Header.Get("User-Agent"), c.Request.Proto, err) return diff --git a/proxy/gitreq.go b/proxy/gitreq.go index 4324fa0..d2971cf 100644 --- a/proxy/gitreq.go +++ b/proxy/gitreq.go @@ -6,7 +6,9 @@ import ( "ghproxy/config" "io" "net/http" + "net/url" "strconv" + "strings" "github.com/gin-gonic/gin" ) @@ -15,38 +17,15 @@ func GitReq(c *gin.Context, u string, cfg *config.Config, mode string, runMode s method := c.Request.Method logInfo("%s %s %s %s %s", c.ClientIP(), method, u, c.Request.Header.Get("User-Agent"), c.Request.Proto) - // 发送HEAD请求, 预获取Content-Length - headReq, err := client.NewRequest("HEAD", u, nil) - if err != nil { - HandleError(c, fmt.Sprintf("Failed to create request: %v", err)) - return - } - setRequestHeaders(c, headReq) - AuthPassThrough(c, cfg, headReq) - - headResp, err := client.Do(headReq) - if err != nil { - HandleError(c, fmt.Sprintf("Failed to send request: %v", err)) - return - } - - // defer headResp.Body.Close() - defer func(Body io.ReadCloser) { - if err := Body.Close(); err != nil { - logError("Failed to close response body: %v", err) - } - }(headResp.Body) - - contentLength := headResp.Header.Get("Content-Length") - sizelimit := cfg.Server.SizeLimit * 1024 * 1024 - if contentLength != "" { - size, err := strconv.Atoi(contentLength) - if err == nil && size > sizelimit { - finalURL := headResp.Request.URL.String() - c.Redirect(http.StatusMovedPermanently, finalURL) - logWarning("%s %s %s %s %s Final-URL: %s Size-Limit-Exceeded: %d", c.ClientIP(), c.Request.Method, c.Request.URL.String(), c.Request.Header.Get("User-Agent"), c.Request.Proto, finalURL, size) + logInfo("U:%s", u) + if cfg.GitClone.Mode == "cache" { + userPath, repoPath, remainingPath, err := extractParts(u) + if err != nil { + HandleError(c, fmt.Sprintf("Failed to extract parts from URL: %v", err)) return } + // 构建新url + u = cfg.GitClone.SmartGitAddr + userPath + repoPath + remainingPath } body, err := readRequestBody(c) @@ -77,9 +56,10 @@ func GitReq(c *gin.Context, u string, cfg *config.Config, mode string, runMode s } }(resp.Body) - contentLength = resp.Header.Get("Content-Length") + contentLength := resp.Header.Get("Content-Length") if contentLength != "" { size, err := strconv.Atoi(contentLength) + sizelimit := cfg.Server.SizeLimit * 1024 * 1024 if err == nil && size > sizelimit { finalURL := resp.Request.URL.String() c.Redirect(http.StatusMovedPermanently, finalURL) @@ -104,14 +84,6 @@ func GitReq(c *gin.Context, u string, cfg *config.Config, mode string, runMode s resp.Header.Del(header) } - /* - if cfg.CORS.Enabled { - c.Header("Access-Control-Allow-Origin", "*") - } else { - c.Header("Access-Control-Allow-Origin", "") - } - */ - switch cfg.Server.Cors { case "*": c.Header("Access-Control-Allow-Origin", "*") @@ -136,4 +108,43 @@ func GitReq(c *gin.Context, u string, cfg *config.Config, mode string, runMode s } else { c.Writer.Flush() // 确保刷入 } + + /* + _, err = copyb.CopyBuffer(c.Writer, resp.Body, nil) + if err != nil { + logError("%s %s %s %s %s Failed to copy response body: %v", c.ClientIP(), method, u, c.Request.Header.Get("User-Agent"), c.Request.Proto, err) + return + } else { + c.Writer.Flush() // 确保刷入 + } + */ +} + +// extractParts 从给定的 URL 中提取所需的部分 +func extractParts(rawURL string) (string, string, string, error) { + // 解析 URL + parsedURL, err := url.Parse(rawURL) + if err != nil { + return "", "", "", err + } + + // 获取路径部分并分割 + pathParts := strings.Split(parsedURL.Path, "/") + + // 提取所需的部分 + if len(pathParts) < 3 { + return "", "", "", fmt.Errorf("URL path is too short") + } + + // 提取 /WJQSERVER-STUDIO 和 /go-utils.git + repoOwner := "/" + pathParts[1] + repoName := "/" + pathParts[2] + + // 剩余部分 + remainingPath := strings.Join(pathParts[3:], "/") + if remainingPath != "" { + remainingPath = "/" + remainingPath + } + + return repoOwner, repoName, remainingPath, nil } diff --git a/proxy/handler.go b/proxy/handler.go index 9e3d37d..86b6b06 100644 --- a/proxy/handler.go +++ b/proxy/handler.go @@ -12,121 +12,159 @@ import ( "github.com/gin-gonic/gin" ) +var exps = []*regexp.Regexp{ + regexp.MustCompile(`^(?:https?://)?github\.com/([^/]+)/([^/]+)/(?:releases|archive)/.*`), // 匹配 GitHub Releases 或 Archive 链接 + regexp.MustCompile(`^(?:https?://)?github\.com/([^/]+)/([^/]+)/(?:blob|raw)/.*`), // 匹配 GitHub Blob 或 Raw 链接 + regexp.MustCompile(`^(?:https?://)?github\.com/([^/]+)/([^/]+)/(?:info|git-).*`), // 匹配 GitHub Info 或 Git 相关链接 (例如 .gitattributes, .gitignore) + regexp.MustCompile(`^(?:https?://)?raw\.github(?:usercontent|)\.com/([^/]+)/([^/]+)/.+?/.+`), // 匹配 raw.githubusercontent.com 链接 + regexp.MustCompile(`^(?:https?://)?gist\.github(?:usercontent|)\.com/([^/]+)/.+?/.+`), // 匹配 gist.githubusercontent.com 链接 + regexp.MustCompile(`^(?:https?://)?api\.github\.com/repos/([^/]+)/([^/]+)/.*`), // 匹配 api.github.com/repos 链接 (GitHub API) +} + +// NoRouteHandler 是 Gin 框架的 NoRoute 处理器函数,用于处理所有未匹配到预定义路由的请求 +// 此函数实现了请求的频率限制、URL 路径解析、白名单/黑名单检查、URL 类型匹配和最终的代理请求处理 func NoRouteHandler(cfg *config.Config, limiter *rate.RateLimiter, iplimiter *rate.IPRateLimiter, runMode string) gin.HandlerFunc { return func(c *gin.Context) { - // 限制访问频率 - if cfg.RateLimit.Enabled { + // **频率限制处理** + if cfg.RateLimit.Enabled { // 检查是否启用频率限制 - var allowed bool + var allowed bool // 用于标记是否允许请求 - switch cfg.RateLimit.RateMethod { - case "ip": - allowed = iplimiter.Allow(c.ClientIP()) - case "total": - allowed = limiter.Allow() - default: - logWarning("Invalid RateLimit Method") - return + switch cfg.RateLimit.RateMethod { // 根据配置的频率限制方法选择 + case "ip": // 基于 IP 地址的频率限制 + allowed = iplimiter.Allow(c.ClientIP()) // 使用 IPRateLimiter 检查客户端 IP 是否允许请求 + case "total": // 基于总请求量的频率限制 + allowed = limiter.Allow() // 使用 RateLimiter 检查总请求量是否允许请求 + default: // 无效的频率限制方法 + logWarning("Invalid RateLimit Method") // 记录警告日志 + return // 中断请求处理 } - if !allowed { - c.JSON(http.StatusTooManyRequests, gin.H{"error": "Too Many Requests"}) - logWarning("%s %s %s %s %s 429-TooManyRequests", c.ClientIP(), c.Request.Method, c.Request.URL.RequestURI(), c.Request.Header.Get("User-Agent"), c.Request.Proto) - return + if !allowed { // 如果请求被频率限制阻止 + c.JSON(http.StatusTooManyRequests, gin.H{"error": "Too Many Requests"}) // 返回 429 状态码和错误信息 + logWarning("%s %s %s %s %s 429-TooManyRequests", c.ClientIP(), c.Request.Method, c.Request.URL.RequestURI(), c.Request.Header.Get("User-Agent"), c.Request.Proto) // 记录警告日志 + return // 中断请求处理 } } - //rawPath := strings.TrimPrefix(c.Request.URL.Path, "/") // 去掉前缀/ - rawPath := strings.TrimPrefix(c.Request.URL.RequestURI(), "/") // 去掉前缀/ - re := regexp.MustCompile(`^(http:|https:)?/?/?(.*)`) // 匹配http://或https://开头的路径 - matches := re.FindStringSubmatch(rawPath) // 匹配路径 + rawPath := strings.TrimPrefix(c.Request.URL.RequestURI(), "/") // 去掉 URL 前缀的斜杠 '/', 获取原始路径 (例如: /https://github.com/user/repo -> https://github.com/user/repo) + re := regexp.MustCompile(`^(http:|https:)?/?/?(.*)`) // 定义正则表达式,匹配以 http:// 或 https:// 开头的路径,并捕获协议和剩余部分 + matches := re.FindStringSubmatch(rawPath) // 使用正则表达式匹配原始路径 - // 匹配路径错误处理 - if len(matches) < 3 { - errMsg := fmt.Sprintf("%s %s %s %s %s Invalid URL", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto) - logWarning(errMsg) - c.String(http.StatusForbidden, "Invalid URL Format. Path: %s", rawPath) - return + // **路径匹配错误处理** + if len(matches) < 3 { // 如果匹配结果少于 3 个子串 (完整匹配 + 协议 + 剩余部分),则说明 URL 格式无效 + errMsg := fmt.Sprintf("%s %s %s %s %s Invalid URL", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto) // 构建错误日志信息 + logWarning(errMsg) // 记录警告日志 + c.String(http.StatusForbidden, "Invalid URL Format. Path: %s", rawPath) // 返回 403 状态码和错误信息,提示 URL 格式无效 + return // 中断请求处理 } - // 制作url - rawPath = "https://" + matches[2] + // **构建完整的 URL** + rawPath = "https://" + matches[2] // 从匹配结果中提取 URL 的剩余部分,并添加 https:// 协议头,构建完整的 URL - username, repo := MatchUserRepo(rawPath, cfg, c, matches) // 匹配用户名和仓库名 + username, repo := MatchUserRepo(rawPath, cfg, c, matches) // 调用 MatchUserRepo 函数,从 URL 中提取用户名和仓库名 - logInfo("%s %s %s %s %s Matched-Username: %s, Matched-Repo: %s", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, username, repo) + logInfo("%s %s %s %s %s Matched-Username: %s, Matched-Repo: %s", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, username, repo) // 记录 info 日志,包含匹配到的用户名和仓库名 // dump log 记录详细信息 c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, full Header - LogDump("%s %s %s %s %s %s", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, c.Request.Header) - repouser := fmt.Sprintf("%s/%s", username, repo) + LogDump("%s %s %s %s %s %s", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, c.Request.Header) // 记录 dump 日志,包含更详细的请求头信息 + repouser := fmt.Sprintf("%s/%s", username, repo) // 构建 "用户名/仓库名" 格式的字符串 - // 白名单检查 - if cfg.Whitelist.Enabled { - whitelist := auth.CheckWhitelist(username, repo) - if !whitelist { - logErrMsg := fmt.Sprintf("%s %s %s %s %s Whitelist Blocked repo: %s", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, repouser) - errMsg := fmt.Sprintf("Whitelist Blocked repo: %s", repouser) - c.JSON(http.StatusForbidden, gin.H{"error": errMsg}) - logWarning(logErrMsg) - return + // **白名单检查** + if cfg.Whitelist.Enabled { // 检查是否启用白名单 + whitelist := auth.CheckWhitelist(username, repo) // 调用 CheckWhitelist 函数检查当前仓库是否在白名单中 + if !whitelist { // 如果仓库不在白名单中 + logErrMsg := fmt.Sprintf("%s %s %s %s %s Whitelist Blocked repo: %s", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, repouser) // 构建错误日志信息 + errMsg := fmt.Sprintf("Whitelist Blocked repo: %s", repouser) // 构建返回给客户端的错误信息 + c.JSON(http.StatusForbidden, gin.H{"error": errMsg}) // 返回 403 状态码和 JSON 错误信息 + logWarning(logErrMsg) // 记录警告日志 + return // 中断请求处理 } } - // 黑名单检查 - if cfg.Blacklist.Enabled { - blacklist := auth.CheckBlacklist(username, repo) - if blacklist { - logErrMsg := fmt.Sprintf("%s %s %s %s %s Blacklist Blocked repo: %s", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, repouser) - errMsg := fmt.Sprintf("Blacklist Blocked repo: %s", repouser) - c.JSON(http.StatusForbidden, gin.H{"error": errMsg}) - logWarning(logErrMsg) - return + // **黑名单检查** + if cfg.Blacklist.Enabled { // 检查是否启用黑名单 + blacklist := auth.CheckBlacklist(username, repo) // 调用 CheckBlacklist 函数检查当前仓库是否在黑名单中 + if blacklist { // 如果仓库在黑名单中 + logErrMsg := fmt.Sprintf("%s %s %s %s %s Blacklist Blocked repo: %s", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, repouser) // 构建错误日志信息 + errMsg := fmt.Sprintf("Blacklist Blocked repo: %s", repouser) // 构建返回给客户端的错误信息 + c.JSON(http.StatusForbidden, gin.H{"error": errMsg}) // 返回 403 状态码和 JSON 错误信息 + logWarning(logErrMsg) // 记录警告日志 + return // 中断请求处理 } } - matches = CheckURL(rawPath, c) - if matches == nil { - c.AbortWithStatus(http.StatusNotFound) - logWarning("%s %s %s %s %s 404-NOMATCH", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto) - return - } - - // 若匹配api.github.com/repos/用户名/仓库名/路径, 则检查是否开启HeaderAuth - if exps[5].MatchString(rawPath) { - if cfg.Auth.AuthMethod != "header" || !cfg.Auth.Enabled { - c.JSON(http.StatusForbidden, gin.H{"error": "HeaderAuth is not enabled."}) - logError("%s %s %s %s %s HeaderAuth-Error: HeaderAuth is not enabled.", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto) - return - } - } - - // 处理blob/raw路径 - if exps[1].MatchString(rawPath) { - rawPath = strings.Replace(rawPath, "/blob/", "/raw/", 1) - } - - // 鉴权 - authcheck, err := auth.AuthHandler(c, cfg) - if !authcheck { - c.AbortWithStatusJSON(401, gin.H{"error": "Unauthorized"}) - logWarning("%s %s %s %s %s Auth-Error: %v", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, err) - return - } - - // IP METHOD URL USERAGENT PROTO MATCHES - logDebug("%s %s %s %s %s Matches: %v", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, matches) + var matchedIndex = -1 // 用于存储匹配到的正则表达式索引,初始化为 -1 表示未匹配 + // **优化的 URL 匹配逻辑:基于关键词分类匹配** switch { - case exps[0].MatchString(rawPath), exps[1].MatchString(rawPath), exps[3].MatchString(rawPath), exps[4].MatchString(rawPath): - //ProxyRequest(c, rawPath, cfg, "chrome", runMode) - ChunkedProxyRequest(c, rawPath, cfg, "chrome", runMode) // dev test chunk - case exps[2].MatchString(rawPath): - //ProxyRequest(c, rawPath, cfg, "git", runMode) - GitReq(c, rawPath, cfg, "git", runMode) - default: - c.String(http.StatusForbidden, "Invalid input.") - fmt.Println("Invalid input.") + case strings.Contains(rawPath, "/releases/") || strings.Contains(rawPath, "/archive/"): // 检查 URL 中是否包含 "/releases/" 或 "/archive/" 关键词 + matchedIndex = 0 // 如果包含,则匹配 exps[0] (GitHub Releases/Archive 链接) + case strings.Contains(rawPath, "/blob/") || strings.Contains(rawPath, "/raw/"): // 检查 URL 中是否包含 "/blob/" 或 "/raw/" 关键词 + matchedIndex = 1 // 如果包含,则匹配 exps[1] (GitHub Blob/Raw 链接) + case strings.Contains(rawPath, "/info/") || strings.Contains(rawPath, "/git-"): // 检查 URL 中是否包含 "/info/" 或 "/git-" 关键词 + matchedIndex = 2 // 如果包含,则匹配 exps[2] (GitHub Info/Git 相关链接) + case strings.Contains(rawPath, "raw.githubusercontent.com"): // 检查 URL 中是否包含 "raw.githubusercontent.com" 域名 + matchedIndex = 3 // 如果包含,则匹配 exps[3] (raw.githubusercontent.com 链接) + case strings.Contains(rawPath, "gist.githubusercontent.com"): // 检查 URL 中是否包含 "gist.githubusercontent.com" 域名 + matchedIndex = 4 // 如果包含,则匹配 exps[4] (gist.githubusercontent.com 链接) + case strings.Contains(rawPath, "api.github.com/repos/"): // 检查 URL 中是否包含 "api.github.com/repos/" 路径前缀 + matchedIndex = 5 // 如果包含,则匹配 exps[5] (api.github.com/repos 链接) + } + + if matchedIndex == -1 { // 如果没有任何关键词匹配到,则说明 URL 类型无法识别 + c.AbortWithStatus(http.StatusNotFound) // 返回 404 状态码 + logWarning("%s %s %s %s %s 404-NOMATCH", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto) // 记录警告日志 + return // 中断请求处理 + } + + // **使用分类匹配到的正则表达式进行精确匹配** + exp := exps[matchedIndex] + matches = exp.FindStringSubmatch(rawPath) + if len(matches) == 0 { + // 如果精确匹配失败 (例如,关键词匹配到 releases,但实际 URL 格式不符合 releases 的正则) + c.AbortWithStatus(http.StatusNotFound) + logWarning("%s %s %s %s %s 404-NOMATCH-ExpSpecific", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto) // 记录警告日志,表明是特定正则匹配失败 return } + + // **HeaderAuth 鉴权检查 (仅针对 api.github.com/repos 链接)** + if matchedIndex == 5 { // 如果匹配的是 api.github.com/repos 链接 (对应 exps[5]) + if cfg.Auth.AuthMethod != "header" || !cfg.Auth.Enabled { // 检查是否启用了 HeaderAuth 并且 AuthMethod 配置为 "header" + c.JSON(http.StatusForbidden, gin.H{"error": "HeaderAuth is not enabled."}) // 返回 403 状态码和错误信息,提示 HeaderAuth 未启用 + logError("%s %s %s %s %s HeaderAuth-Error: HeaderAuth is not enabled.", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto) // 记录错误日志 + return // 中断请求处理 + } + } + + // **处理 blob/raw 路径** + if matchedIndex == 1 { // 如果匹配的是 GitHub Blob/Raw 链接 (对应 exps[1]) + rawPath = strings.Replace(rawPath, "/blob/", "/raw/", 1) // 将 URL 中的 "/blob/" 替换为 "/raw/",获取 raw 链接 (用于下载原始文件内容) + } + + // **通用鉴权处理** + authcheck, err := auth.AuthHandler(c, cfg) // 调用 AuthHandler 函数进行通用鉴权检查 (例如,基于 Cookie 或 Header 的鉴权) + if !authcheck { // 如果鉴权失败 + c.AbortWithStatusJSON(401, gin.H{"error": "Unauthorized"}) // 返回 401 状态码和 JSON 错误信息,提示未授权 + logWarning("%s %s %s %s %s Auth-Error: %v", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, err) // 记录警告日志,包含鉴权错误信息 + return // 中断请求处理 + } + + // **Debug 日志记录匹配结果** + logDebug("%s %s %s %s %s Matches: %v", c.ClientIP(), c.Request.Method, rawPath, c.Request.Header.Get("User-Agent"), c.Request.Proto, matches) // 记录 debug 日志,包含匹配结果信息 + + // **根据匹配到的 URL 类型,进行不同的代理请求处理** + switch matchedIndex { + case 0, 1, 3, 4: // 如果匹配的是 Releases/Archive, Blob/Raw, raw.githubusercontent.com 或 gist.githubusercontent.com 链接 (对应 exps[0], exps[1], exps[3], exps[4]) + //ProxyRequest(c, rawPath, cfg, "chrome", runMode) // 原始的 ProxyRequest 函数 (可能一次性读取全部响应) + ChunkedProxyRequest(c, rawPath, cfg, "chrome", runMode) // 使用 ChunkedProxyRequest 函数进行分块代理 (更高效,特别是对于大文件) + case 2: // 如果匹配的是 Info/Git 相关链接 (对应 exps[2]) + //ProxyRequest(c, rawPath, cfg, "git", runMode) // 原始的 ProxyRequest 函数 + GitReq(c, rawPath, cfg, "git", runMode) // 使用 GitReq 函数处理 Git 相关请求 (针对 .gitattributes, .gitignore 等) + default: // 如果匹配到其他类型 (理论上不应该发生,因为前面的 matchedIndex == -1 已经处理了未识别类型) + c.String(http.StatusForbidden, "Invalid input.") // 返回 403 状态码和错误信息,提示无效输入 + fmt.Println("Invalid input.") // 打印错误信息到控制台 + return // 中断请求处理 + } } } diff --git a/proxy/proxy.go b/proxy/proxy.go index 3b9d3ff..eb1ac82 100644 --- a/proxy/proxy.go +++ b/proxy/proxy.go @@ -4,7 +4,6 @@ import ( "fmt" "io" "net/http" - "regexp" "github.com/WJQSERVER-STUDIO/go-utils/logger" "github.com/gin-gonic/gin" @@ -20,15 +19,6 @@ var ( logError = logger.LogError ) -var exps = []*regexp.Regexp{ - regexp.MustCompile(`^(?:https?://)?github\.com/([^/]+)/([^/]+)/(?:releases|archive)/.*`), - regexp.MustCompile(`^(?:https?://)?github\.com/([^/]+)/([^/]+)/(?:blob|raw)/.*`), - regexp.MustCompile(`^(?:https?://)?github\.com/([^/]+)/([^/]+)/(?:info|git-).*`), - regexp.MustCompile(`^(?:https?://)?raw\.github(?:usercontent|)\.com/([^/]+)/([^/]+)/.+?/.+`), - regexp.MustCompile(`^(?:https?://)?gist\.github(?:usercontent|)\.com/([^/]+)/.+?/.+`), - regexp.MustCompile(`^(?:https?://)?api\.github\.com/repos/([^/]+)/([^/]+)/.*`), -} - // 读取请求体 func readRequestBody(c *gin.Context) ([]byte, error) { body, err := io.ReadAll(c.Request.Body)