From 5dde21a403320031b43cd4b38189542e66ac11f0 Mon Sep 17 00:00:00 2001 From: wjqserver <114663932+WJQSERVER@users.noreply.github.com> Date: Mon, 9 Jun 2025 06:58:21 +0800 Subject: [PATCH] optimize matcher performance --- CHANGELOG.md | 5 + DEV-VERSION | 2 +- proxy/chunkreq.go | 2 +- proxy/match.go | 339 ++++++++++++++++++++++++++++-------------- proxy/matcher_test.go | 303 +++++++++++++++++++++++++++++++++++++ 5 files changed, 536 insertions(+), 115 deletions(-) create mode 100644 proxy/matcher_test.go diff --git a/CHANGELOG.md b/CHANGELOG.md index 7922247..4df6a10 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # 更新日志 +25w45a - 2025-06-09 +--- +- PRE-RELEASE: 此版本是v3.5.1预发布版本,请勿在生产环境中使用; +- CHANGE: 大幅优化`Matcher`的性能, 实现零分配, 大幅提升性能; 单次操作时间: `254.3 ns/op` => `29.59 ns/op` + 3.5.0 - 2025-06-05 --- - CHANGE: 更新许可证 v2.0 => v2.1 diff --git a/DEV-VERSION b/DEV-VERSION index c4bec82..ebe0357 100644 --- a/DEV-VERSION +++ b/DEV-VERSION @@ -1 +1 @@ -25w44a \ No newline at end of file +25w45a \ No newline at end of file diff --git a/proxy/chunkreq.go b/proxy/chunkreq.go index c760b9a..c50b7fb 100644 --- a/proxy/chunkreq.go +++ b/proxy/chunkreq.go @@ -110,7 +110,7 @@ func ChunkedProxyRequest(ctx context.Context, c *app.RequestContext, u string, c bodyReader = limitreader.NewRateLimitedReader(bodyReader, bandwidthLimit, int(bandwidthBurst), ctx) } - if MatcherShell(u) && matchString(matcher, matchedMatchers) && cfg.Shell.Editor { + if MatcherShell(u) && matchString(matcher) && cfg.Shell.Editor { // 判断body是不是gzip var compress string if resp.Header.Get("Content-Encoding") == "gzip" { diff --git a/proxy/match.go b/proxy/match.go index cee4eaa..a28520e 100644 --- a/proxy/match.go +++ b/proxy/match.go @@ -6,152 +6,265 @@ import ( "net/url" "regexp" "strings" + "sync" ) +// Matcher 从原始URL路径中高效地解析并匹配代理规则. func Matcher(rawPath string, cfg *config.Config) (string, string, string, *GHProxyErrors) { - var ( - user string - repo string - matcher string - ) - // 匹配 "https://github.com"开头的链接 - if strings.HasPrefix(rawPath, "https://github.com") { - remainingPath := strings.TrimPrefix(rawPath, "https://github.com") - /* - if strings.HasPrefix(remainingPath, "/") { - remainingPath = strings.TrimPrefix(remainingPath, "/") - } - */ - remainingPath = strings.TrimPrefix(remainingPath, "/") - // 预期格式/user/repo/more... - // 取出user和repo和最后部分 - parts := strings.Split(remainingPath, "/") - if len(parts) <= 2 { - errMsg := "Not enough parts in path after matching 'https://github.com*'" - return "", "", "", NewErrorWithStatusLookup(400, errMsg) - } - user = parts[0] - repo = parts[1] - // 匹配 "https://github.com"开头的链接 - if len(parts) >= 3 { - switch parts[2] { - case "releases", "archive": - matcher = "releases" - case "blob": - matcher = "blob" - case "raw": - matcher = "raw" - case "info", "git-upload-pack": - matcher = "clone" - default: - errMsg := "Url Matched 'https://github.com*', but didn't match the next matcher" - return "", "", "", NewErrorWithStatusLookup(400, errMsg) - } - } - return user, repo, matcher, nil + if len(rawPath) < 18 { + return "", "", "", NewErrorWithStatusLookup(404, "path too short") } - // 匹配 "https://raw"开头的链接 - if strings.HasPrefix(rawPath, "https://raw") { - remainingPath := strings.TrimPrefix(rawPath, "https://") - parts := strings.Split(remainingPath, "/") - if len(parts) <= 3 { - errMsg := "URL after matched 'https://raw*' should have at least 4 parts (user/repo/branch/file)." - return "", "", "", NewErrorWithStatusLookup(400, errMsg) - } - user = parts[1] - repo = parts[2] - matcher = "raw" - return user, repo, matcher, nil - } - // 匹配 "https://gist"开头的链接 - if strings.HasPrefix(rawPath, "https://gist") { - remainingPath := strings.TrimPrefix(rawPath, "https://") - parts := strings.Split(remainingPath, "/") - if len(parts) <= 3 { - errMsg := "URL after matched 'https://gist*' should have at least 4 parts (user/gist_id)." - return "", "", "", NewErrorWithStatusLookup(400, errMsg) + // 匹配 "https://github.com/" + if strings.HasPrefix(rawPath, "https://github.com/") { + remaining := rawPath[19:] + i := strings.IndexByte(remaining, '/') + if i <= 0 { + return "", "", "", NewErrorWithStatusLookup(400, "malformed github path: missing user") + } + user := remaining[:i] + remaining = remaining[i+1:] + i = strings.IndexByte(remaining, '/') + if i <= 0 { + return "", "", "", NewErrorWithStatusLookup(400, "malformed github path: missing repo") + } + repo := remaining[:i] + remaining = remaining[i+1:] + if len(remaining) == 0 { + return "", "", "", NewErrorWithStatusLookup(400, "malformed github path: missing action") + } + i = strings.IndexByte(remaining, '/') + action := remaining + if i != -1 { + action = remaining[:i] + } + var matcher string + switch action { + case "releases", "archive": + matcher = "releases" + case "blob": + matcher = "blob" + case "raw": + matcher = "raw" + case "info", "git-upload-pack": + matcher = "clone" + default: + return "", "", "", NewErrorWithStatusLookup(400, fmt.Sprintf("unsupported github action: %s", action)) } - user = parts[1] - repo = "" - matcher = "gist" return user, repo, matcher, nil } - // 匹配 "https://api.github.com/"开头的链接 + + // 匹配 "https://raw.githubusercontent.com/" + if strings.HasPrefix(rawPath, "https://raw.githubusercontent.com/") { + remaining := rawPath[34:] + // 这里的逻辑与 github.com 的类似, 需要提取 user, repo, branch, file... + // 我们只需要 user 和 repo + i := strings.IndexByte(remaining, '/') + if i <= 0 { + return "", "", "", NewErrorWithStatusLookup(400, "malformed raw url: missing user") + } + user := remaining[:i] + remaining = remaining[i+1:] + i = strings.IndexByte(remaining, '/') + if i <= 0 { + return "", "", "", NewErrorWithStatusLookup(400, "malformed raw url: missing repo") + } + repo := remaining[:i] + // raw 链接至少需要 user/repo/branch 三部分 + remaining = remaining[i+1:] + if len(remaining) == 0 { + return "", "", "", NewErrorWithStatusLookup(400, "malformed raw url: missing branch/commit") + } + return user, repo, "raw", nil + } + + // 匹配 "https://gist.github.com/" + if strings.HasPrefix(rawPath, "https://gist.github.com/") { + remaining := rawPath[24:] + i := strings.IndexByte(remaining, '/') + if i <= 0 { + // case: https://gist.github.com/user + // 这种情况下, gist_id 缺失, 但我们仍然可以认为 user 是有效的 + if len(remaining) > 0 { + return remaining, "", "gist", nil + } + return "", "", "", NewErrorWithStatusLookup(400, "malformed gist url: missing user") + } + // case: https://gist.github.com/user/gist_id... + user := remaining[:i] + return user, "", "gist", nil + } + + // 匹配 "https://api.github.com/" if strings.HasPrefix(rawPath, "https://api.github.com/") { - matcher = "api" - remainingPath := strings.TrimPrefix(rawPath, "https://api.github.com/") - - parts := strings.Split(remainingPath, "/") - if parts[0] == "repos" { - user = parts[1] - repo = parts[2] + if !cfg.Auth.ForceAllowApi && (cfg.Auth.Method != "header" || !cfg.Auth.Enabled) { + return "", "", "", NewErrorWithStatusLookup(403, "API proxy requires header authentication") } - if parts[0] == "users" { - user = parts[1] - } - if !cfg.Auth.ForceAllowApi { - if cfg.Auth.Method != "header" || !cfg.Auth.Enabled { - //return "", "", "", ErrAuthHeaderUnavailable - errMsg := "AuthHeader Unavailable, Need to open header auth to enable api proxy" - return "", "", "", NewErrorWithStatusLookup(403, errMsg) + remaining := rawPath[23:] + var user, repo string + if strings.HasPrefix(remaining, "repos/") { + parts := strings.SplitN(remaining[6:], "/", 3) + if len(parts) >= 2 { + user = parts[0] + repo = parts[1] + } + } else if strings.HasPrefix(remaining, "users/") { + parts := strings.SplitN(remaining[6:], "/", 2) + if len(parts) >= 1 { + user = parts[0] } } - return user, repo, matcher, nil + return user, repo, "api", nil } - //return "", "", "", ErrNotFound - errMsg := "Didn't match any matcher" - return "", "", "", NewErrorWithStatusLookup(404, errMsg) + + return "", "", "", NewErrorWithStatusLookup(404, "no matcher found for the given path") } -var ( - matchedMatchers = []string{ - "blob", - "raw", - "gist", +/* + func Matcher(rawPath string, cfg *config.Config) (string, string, string, *GHProxyErrors) { + var ( + user string + repo string + matcher string + ) + // 匹配 "https://github.com"开头的链接 + if strings.HasPrefix(rawPath, "https://github.com") { + remainingPath := strings.TrimPrefix(rawPath, "https://github.com") + + //if strings.HasPrefix(remainingPath, "/") { + // remainingPath = strings.TrimPrefix(remainingPath, "/") + //} + + remainingPath = strings.TrimPrefix(remainingPath, "/") + // 预期格式/user/repo/more... + // 取出user和repo和最后部分 + parts := strings.Split(remainingPath, "/") + if len(parts) <= 2 { + errMsg := "Not enough parts in path after matching 'https://github.com*'" + return "", "", "", NewErrorWithStatusLookup(400, errMsg) + } + user = parts[0] + repo = parts[1] + // 匹配 "https://github.com"开头的链接 + if len(parts) >= 3 { + switch parts[2] { + case "releases", "archive": + matcher = "releases" + case "blob": + matcher = "blob" + case "raw": + matcher = "raw" + case "info", "git-upload-pack": + matcher = "clone" + default: + errMsg := "Url Matched 'https://github.com*', but didn't match the next matcher" + return "", "", "", NewErrorWithStatusLookup(400, errMsg) + } + } + return user, repo, matcher, nil + } + // 匹配 "https://raw"开头的链接 + if strings.HasPrefix(rawPath, "https://raw") { + remainingPath := strings.TrimPrefix(rawPath, "https://") + parts := strings.Split(remainingPath, "/") + if len(parts) <= 3 { + errMsg := "URL after matched 'https://raw*' should have at least 4 parts (user/repo/branch/file)." + return "", "", "", NewErrorWithStatusLookup(400, errMsg) + } + user = parts[1] + repo = parts[2] + matcher = "raw" + + return user, repo, matcher, nil + } + // 匹配 "https://gist"开头的链接 + if strings.HasPrefix(rawPath, "https://gist") { + remainingPath := strings.TrimPrefix(rawPath, "https://") + parts := strings.Split(remainingPath, "/") + if len(parts) <= 3 { + errMsg := "URL after matched 'https://gist*' should have at least 4 parts (user/gist_id)." + return "", "", "", NewErrorWithStatusLookup(400, errMsg) + } + user = parts[1] + repo = "" + matcher = "gist" + return user, repo, matcher, nil + } + // 匹配 "https://api.github.com/"开头的链接 + if strings.HasPrefix(rawPath, "https://api.github.com/") { + matcher = "api" + remainingPath := strings.TrimPrefix(rawPath, "https://api.github.com/") + + parts := strings.Split(remainingPath, "/") + if parts[0] == "repos" { + user = parts[1] + repo = parts[2] + } + if parts[0] == "users" { + user = parts[1] + } + if !cfg.Auth.ForceAllowApi { + if cfg.Auth.Method != "header" || !cfg.Auth.Enabled { + //return "", "", "", ErrAuthHeaderUnavailable + errMsg := "AuthHeader Unavailable, Need to open header auth to enable api proxy" + return "", "", "", NewErrorWithStatusLookup(403, errMsg) + } + } + return user, repo, matcher, nil + } + //return "", "", "", ErrNotFound + errMsg := "Didn't match any matcher" + return "", "", "", NewErrorWithStatusLookup(404, errMsg) } +*/ +var ( + proxyableMatchersMap map[string]struct{} + initMatchersOnce sync.Once ) -// matchString 检查目标字符串是否在给定的字符串集合中 -func matchString(target string, stringsToMatch []string) bool { - matchMap := make(map[string]struct{}, len(stringsToMatch)) - for _, str := range stringsToMatch { - matchMap[str] = struct{}{} - } - _, exists := matchMap[target] +func initMatchers() { + initMatchersOnce.Do(func() { + matchers := []string{"blob", "raw", "gist"} + proxyableMatchersMap = make(map[string]struct{}, len(matchers)) + for _, m := range matchers { + proxyableMatchersMap[m] = struct{}{} + } + }) +} + +// matchString 与原始版本签名兼容 +func matchString(target string) bool { + initMatchers() + _, exists := proxyableMatchersMap[target] return exists } -// extractParts 从给定的 URL 中提取所需的部分 +// extractParts 与原始版本签名兼容 func extractParts(rawURL string) (string, string, string, url.Values, error) { - // 解析 URL parsedURL, err := url.Parse(rawURL) if err != nil { return "", "", "", nil, err } - // 获取路径部分并分割 - pathParts := strings.Split(parsedURL.Path, "/") + path := parsedURL.Path + if len(path) > 0 && path[0] == '/' { + path = path[1:] + } - // 提取所需的部分 - if len(pathParts) < 3 { + parts := strings.SplitN(path, "/", 3) + + if len(parts) < 2 { return "", "", "", nil, fmt.Errorf("URL path is too short") } - // 提取 /WJQSERVER-STUDIO 和 /go-utils.git - repoOwner := "/" + pathParts[1] - repoName := "/" + pathParts[2] - - // 剩余部分 - remainingPath := strings.Join(pathParts[3:], "/") - if remainingPath != "" { - remainingPath = "/" + remainingPath + repoOwner := "/" + parts[0] + repoName := "/" + parts[1] + var remainingPath string + if len(parts) > 2 { + remainingPath = "/" + parts[2] } - // 查询参数 - queryParams := parsedURL.Query() - - return repoOwner, repoName, remainingPath, queryParams, nil + return repoOwner, repoName, remainingPath, parsedURL.Query(), nil } var urlPattern = regexp.MustCompile(`https?://[^\s'"]+`) diff --git a/proxy/matcher_test.go b/proxy/matcher_test.go new file mode 100644 index 0000000..21329c3 --- /dev/null +++ b/proxy/matcher_test.go @@ -0,0 +1,303 @@ +package proxy + +import ( + "ghproxy/config" + "net/url" + "reflect" + "testing" +) + +func TestMatcher_Compatibility(t *testing.T) { + // --- 准备各种配置用于测试 --- + cfgWithAuth := &config.Config{ + Auth: config.AuthConfig{Enabled: true, Method: "header", ForceAllowApi: false}, + } + cfgNoAuth := &config.Config{ + Auth: config.AuthConfig{Enabled: false}, + } + cfgApiForceAllowed := &config.Config{ + Auth: config.AuthConfig{ForceAllowApi: true}, + } + cfgWrongAuthMethod := &config.Config{ + Auth: config.AuthConfig{Enabled: true, Method: "none"}, + } + + testCases := []struct { + name string + rawPath string + config *config.Config + expectedUser string + expectedRepo string + expectedMatcher string + expectError bool + expectedErrCode int + }{ + { + name: "GH Releases Path", + rawPath: "https://github.com/owner/repo/releases/download/v1.0/asset.zip", + config: cfgWithAuth, + expectedUser: "owner", expectedRepo: "repo", expectedMatcher: "releases", + }, + { + name: "GH Archive Path", + rawPath: "https://github.com/owner/repo.git/archive/main.zip", + config: cfgWithAuth, + expectedUser: "owner", expectedRepo: "repo.git", expectedMatcher: "releases", + }, + { + name: "GH Blob Path", + rawPath: "https://github.com/owner/repo/blob/main/path/to/file.go", + config: cfgWithAuth, + expectedUser: "owner", expectedRepo: "repo", expectedMatcher: "blob", + }, + { + name: "GH Raw Path", + rawPath: "https://github.com/owner/repo/raw/main/image.png", + config: cfgWithAuth, + expectedUser: "owner", expectedRepo: "repo", expectedMatcher: "raw", + }, + { + name: "GH Clone Info Refs", + rawPath: "https://github.com/owner/repo.git/info/refs?service=git-upload-pack", + config: cfgWithAuth, + expectedUser: "owner", expectedRepo: "repo.git", expectedMatcher: "clone", + }, + { + name: "GH Clone Git Upload Pack", + rawPath: "https://github.com/owner/repo/git-upload-pack", + config: cfgWithAuth, + expectedUser: "owner", expectedRepo: "repo", expectedMatcher: "clone", + }, + + { + name: "RawGHUserContent Path", + rawPath: "https://raw.githubusercontent.com/owner/repo/branch/file.sh", + config: cfgWithAuth, + expectedUser: "owner", expectedRepo: "repo", expectedMatcher: "raw", + }, + { + name: "Gist Path", + rawPath: "https://gist.github.com/user/abcdef1234567890", + config: cfgWithAuth, + expectedUser: "user", expectedRepo: "", expectedMatcher: "gist", + }, + { + name: "API Repos Path (with Auth)", + rawPath: "https://api.github.com/repos/owner/repo/pulls", + config: cfgWithAuth, + expectedUser: "owner", expectedRepo: "repo", expectedMatcher: "api", + }, + { + name: "API Users Path (with Auth)", + rawPath: "https://api.github.com/users/someuser/repos", + config: cfgWithAuth, + expectedUser: "someuser", expectedRepo: "", expectedMatcher: "api", + }, + { + name: "API Other Path (with Auth)", + rawPath: "https://api.github.com/octocat", + config: cfgWithAuth, + expectedUser: "", expectedRepo: "", expectedMatcher: "api", + }, + { + name: "API Path (Force Allowed)", + rawPath: "https://api.github.com/repos/owner/repo", + config: cfgApiForceAllowed, // Auth disabled, but force allowed + expectedUser: "owner", expectedRepo: "repo", expectedMatcher: "api", + }, + { + name: "Malformed GH Path (no repo)", + rawPath: "https://github.com/owner/", + config: cfgWithAuth, + expectError: true, expectedErrCode: 400, + }, + { + name: "Malformed GH Path (no action)", + rawPath: "https://github.com/owner/repo", + config: cfgWithAuth, + expectError: true, expectedErrCode: 400, + }, + { + name: "Malformed GH Path (empty user)", + rawPath: "https://github.com//repo/blob/main/file.go", + config: cfgWithAuth, + expectError: true, expectedErrCode: 400, + }, + { + name: "Malformed Raw Path (no repo)", + rawPath: "https://raw.githubusercontent.com/owner/", + config: cfgWithAuth, + expectError: true, expectedErrCode: 400, + }, + { + name: "Malformed Gist Path (no user)", + rawPath: "https://gist.github.com/", + config: cfgWithAuth, + expectError: true, expectedErrCode: 400, + }, + { + name: "Unsupported GH Action", + rawPath: "https://github.com/owner/repo/issues/123", + config: cfgWithAuth, + expectError: true, expectedErrCode: 400, + }, + { + name: "API Path (No Auth)", + rawPath: "https://api.github.com/user", + config: cfgNoAuth, + expectError: true, expectedErrCode: 403, + }, + { + name: "API Path (Wrong Auth Method)", + rawPath: "https://api.github.com/user", + config: cfgWrongAuthMethod, + expectError: true, expectedErrCode: 403, + }, + { + name: "No Matcher Found (other domain)", + rawPath: "https://bitbucket.org/owner/repo", + config: cfgWithAuth, + expectError: true, expectedErrCode: 404, + }, + { + name: "No Matcher Found (path too short)", + rawPath: "https://a.co", + config: cfgWithAuth, + expectError: true, expectedErrCode: 404, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + user, repo, matcher, ghErr := Matcher(tc.rawPath, tc.config) + + if tc.expectError { + if ghErr == nil { + t.Fatalf("Expected a GHProxyErrors error, but got nil") + } + if ghErr.StatusCode != tc.expectedErrCode { + t.Errorf("Expected error code %d, but got %d (msg: %s)", + tc.expectedErrCode, ghErr.StatusCode, ghErr.ErrorMessage) + } + } else { + if ghErr != nil { + t.Fatalf("Expected no error, but got: %s", ghErr.ErrorMessage) + } + if user != tc.expectedUser { + t.Errorf("user: got %q, want %q", user, tc.expectedUser) + } + if repo != tc.expectedRepo { + t.Errorf("repo: got %q, want %q", repo, tc.expectedRepo) + } + if matcher != tc.expectedMatcher { + t.Errorf("matcher: got %q, want %q", matcher, tc.expectedMatcher) + } + } + }) + } +} + +func TestExtractParts_Compatibility(t *testing.T) { + testCases := []struct { + name string + rawURL string + expectedOwner string + expectedRepo string + expectedRem string + expectedQuery url.Values + expectError bool + }{ + { + name: "Standard git clone URL", + rawURL: "https://github.com/WJQSERVER-STUDIO/go-utils.git/info/refs?service=git-upload-pack", + expectedOwner: "/WJQSERVER-STUDIO", + expectedRepo: "/go-utils.git", + expectedRem: "/info/refs", + expectedQuery: url.Values{"service": []string{"git-upload-pack"}}, + }, + { + name: "No remaining path", + rawURL: "https://example.com/owner/repo", + expectedOwner: "/owner", + expectedRepo: "/repo", + expectedRem: "", + expectedQuery: url.Values{}, + }, + { + name: "Root path only", + rawURL: "https://example.com/", + expectError: true, // Path is too short + }, + { + name: "One level path", + rawURL: "https://example.com/owner", + expectError: true, // Path is too short + }, + { + name: "Empty path segments", + rawURL: "https://example.com//repo/a", // Will be treated as /repo/a + expectedOwner: "", // First part is empty + expectedRepo: "/repo", + expectedRem: "/a", + }, + { + name: "Invalid URL format", + rawURL: "://invalid", + expectError: true, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + owner, repo, rem, query, err := extractParts(tc.rawURL) + + if (err != nil) != tc.expectError { + t.Fatalf("extractParts() error = %v, expectError %v", err, tc.expectError) + } + + if !tc.expectError { + if owner != tc.expectedOwner { + t.Errorf("owner: got %q, want %q", owner, tc.expectedOwner) + } + if repo != tc.expectedRepo { + t.Errorf("repo: got %q, want %q", repo, tc.expectedRepo) + } + if rem != tc.expectedRem { + t.Errorf("remaining path: got %q, want %q", rem, tc.expectedRem) + } + if !reflect.DeepEqual(query, tc.expectedQuery) { + t.Errorf("query: got %v, want %v", query, tc.expectedQuery) + } + } + }) + } +} + +func TestMatchString_Compatibility(t *testing.T) { + testCases := []struct { + target string + expected bool + }{ + {"blob", true}, {"raw", true}, {"gist", true}, + {"clone", false}, {"releases", false}, + } + for _, tc := range testCases { + t.Run(tc.target, func(t *testing.T) { + if got := matchString(tc.target); got != tc.expected { + t.Errorf("matchString('%s') = %v; want %v", tc.target, got, tc.expected) + } + }) + } +} + +func BenchmarkMatcher(b *testing.B) { + cfg := &config.Config{} + path := "https://github.com/WJQSERVER/speedtest-ex/releases/download/v1.2.0/speedtest-linux-amd64.tar.gz" + + b.ReportAllocs() + b.ResetTimer() + + for i := 0; i < b.N; i++ { + _, _, _, _ = Matcher(path, cfg) + } +}