diff --git a/CHANGELOG.md b/CHANGELOG.md index ec6cdd3..29e7720 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,16 +1,5 @@ # 更新日志 -4.3.5-rc.0 - 2025-09-14 ---- -- PRE-RELEASE: v4.3.5-rc.0是v4.3.5的预发布版本,请勿在生产环境中使用; -- CHANGE: 改进`nest`实现, 减少内存分配`10371 B/op -> 1852 B/op` `43 allocs/op -> 14 allocs/op` -- CHANGE: 为`nest`加入`dispatcher`实现, 为不同情况分配适合的处理器以保证性能与兼容性 -- CHANGE: 改进路径匹配热点的内存分配 - -4.3.4 - 2025-09-14 ---- -- CHANGE: 改进嵌套加速实现, 增强稳定性 - 4.3.3 - 2025-09-10 --- - CHANGE: 增强对[wanf](https://github.com/WJQSERVER/wanf)的支持 @@ -32,7 +21,7 @@ 4.3.0-rc.0 - 2025-08-11 --- -- PRE-RELEASE: v4.3.0-rc.0是v4.3.0的预发布版本,请勿在生产环境中使用; +- PRE-RELEASE: v4.3.0-rc.0是v4.3.0发布版本,请勿在生产环境中使用; - CHANGE: 为OCI镜像(Docker)代理带来自动library附加功能 - CHANGE(refactor): 改进OCI镜像(Docker)代理路径组成流程 - ADD: 新增[WANF](https://github.com/WJQSERVER/wanf)配置文件格式支持 diff --git a/DEV-VERSION b/DEV-VERSION index 332d6f7..51ff7c7 100644 --- a/DEV-VERSION +++ b/DEV-VERSION @@ -1 +1 @@ -4.3.5-rc.0 \ No newline at end of file +4.3.0-rc.0 \ No newline at end of file diff --git a/VERSION b/VERSION index a6695ff..2533cac 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -4.3.4 \ No newline at end of file +4.3.3 \ No newline at end of file diff --git a/go.mod b/go.mod index c6fca3b..d43cb00 100644 --- a/go.mod +++ b/go.mod @@ -18,7 +18,7 @@ require ( github.com/fenthope/ipfilter v0.0.1 github.com/fenthope/reco v0.0.4 github.com/fenthope/record v0.0.4 - github.com/go-json-experiment/json v0.0.0-20250910080747-cc2cfa0554c3 + github.com/go-json-experiment/json v0.0.0-20250813233538-9b1f9ea2e11b github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/infinite-iroha/touka v0.3.7 github.com/wjqserver/modembed v0.0.1 diff --git a/go.sum b/go.sum index 5d934f6..97337f6 100644 --- a/go.sum +++ b/go.sum @@ -20,8 +20,6 @@ github.com/fenthope/record v0.0.4 h1:/1JHNCxiXGLL/qCh4LEGaAvhj4CcKsb6siTxjLmjdO4 github.com/fenthope/record v0.0.4/go.mod h1:G0a6KCiCDyX2SsC3nfzSN651fJKxH482AyJvzlnvAJU= github.com/go-json-experiment/json v0.0.0-20250813233538-9b1f9ea2e11b h1:6Q4zRHXS/YLOl9Ng1b1OOOBWMidAQZR3Gel0UKPC/KU= github.com/go-json-experiment/json v0.0.0-20250813233538-9b1f9ea2e11b/go.mod h1:TiCD2a1pcmjd7YnhGH0f/zKNcCD06B029pHhzV23c2M= -github.com/go-json-experiment/json v0.0.0-20250910080747-cc2cfa0554c3 h1:02WINGfSX5w0Mn+F28UyRoSt9uvMhKguwWMlOAh6U/0= -github.com/go-json-experiment/json v0.0.0-20250910080747-cc2cfa0554c3/go.mod h1:uNVvRXArCGbZ508SxYYTC5v1JWoz2voff5pm25jU1Ok= github.com/hashicorp/golang-lru/v2 v2.0.7 h1:a+bsQ5rvGLjzHuww6tVxozPZFVghXaHOwFs4luLUK2k= github.com/hashicorp/golang-lru/v2 v2.0.7/go.mod h1:QeFd9opnmA6QUJc5vARoKUSoFhyfM2/ZepoAG6RGpeM= github.com/infinite-iroha/touka v0.3.7 h1:bIIZW5Weh7lVpyOWh4FmyR9UOfb5FOt+cR9yQ30FJLA= diff --git a/proxy/chunkreq.go b/proxy/chunkreq.go index 544a312..9227b78 100644 --- a/proxy/chunkreq.go +++ b/proxy/chunkreq.go @@ -127,14 +127,18 @@ func ChunkedProxyRequest(ctx context.Context, c *touka.Context, u string, cfg *c defer bodyReader.Close() if MatcherShell(u) && matchString(matcher) && cfg.Shell.Editor { + // 判断body是不是gzip + var compress string + if resp.Header.Get("Content-Encoding") == "gzip" { + compress = "gzip" + } c.Debugf("Use Shell Editor: %s %s %s %s %s", c.ClientIP(), c.Request.Method, u, c.UserAgent(), c.Request.Proto) - c.DelHeader("Content-Length") - c.DelHeader("Content-Encoding") + c.Header("Content-Length", "") var reader io.Reader - reader, _, err = processLinks(bodyReader, c.Request.Host, cfg, c, bodySize) + reader, _, err = processLinks(bodyReader, compress, c.Request.Host, cfg, c) c.WriteStream(reader) if err != nil { c.Errorf("%s %s %s %s %s Failed to copy response body: %v", c.ClientIP(), c.Request.Method, u, c.UserAgent(), c.Request.Proto, err) @@ -142,6 +146,7 @@ func ChunkedProxyRequest(ctx context.Context, c *touka.Context, u string, cfg *c return } } else { + if contentLength != "" { c.SetHeader("Content-Length", contentLength) c.WriteStream(bodyReader) diff --git a/proxy/handler.go b/proxy/handler.go index ac89662..b15f1b5 100644 --- a/proxy/handler.go +++ b/proxy/handler.go @@ -9,32 +9,6 @@ import ( "github.com/infinite-iroha/touka" ) -// buildHandlerPath 使用 strings.Builder 来高效地构建最终的 URL. -// 这避免了使用标准字符串拼接时发生的多次内存分配. -func buildHandlerPath(path, matcher string) string { - var sb strings.Builder - sb.Grow(len(path) + 50) - - if matcher == "blob" && strings.HasPrefix(path, "github.com") { - sb.WriteString("https://raw.githubusercontent.com") - if len(path) > 10 { // len("github.com") - pathSegment := path[10:] // skip "github.com" - if i := strings.Index(pathSegment, "/blob/"); i != -1 { - sb.WriteString(pathSegment[:i]) - sb.WriteString("/") - sb.WriteString(pathSegment[i+len("/blob/"):]) - } else { - sb.WriteString(pathSegment) - } - } - } else { - sb.WriteString("https://") - sb.WriteString(path) - } - - return sb.String() -} - var re = regexp.MustCompile(`^(http:|https:)?/?/?(.*)`) // 匹配http://或https://开头的路径 func NoRouteHandler(cfg *config.Config) touka.HandlerFunc { @@ -58,16 +32,21 @@ func NoRouteHandler(cfg *config.Config) touka.HandlerFunc { } // 制作url - path := matches[2] + rawPath = "https://" + matches[2] + + var ( + user string + repo string + matcher string + ) + var matcherErr *GHProxyErrors - user, repo, matcher, matcherErr := Matcher("https://"+path, cfg) + user, repo, matcher, matcherErr = Matcher(rawPath, cfg) if matcherErr != nil { ErrorPage(c, matcherErr) return } - rawPath = buildHandlerPath(path, matcher) - shoudBreak = listCheck(cfg, c, user, repo, rawPath) if shoudBreak { return @@ -78,7 +57,11 @@ func NoRouteHandler(cfg *config.Config) touka.HandlerFunc { return } + // 处理blob/raw路径 if matcher == "blob" { + rawPath = rawPath[18:] + rawPath = "https://raw.githubusercontent.com" + rawPath + rawPath = strings.Replace(rawPath, "/blob/", "/", 1) matcher = "raw" } diff --git a/proxy/nest.go b/proxy/nest.go index 921af3a..4f93f20 100644 --- a/proxy/nest.go +++ b/proxy/nest.go @@ -2,78 +2,15 @@ package proxy import ( "bufio" - "bytes" + "compress/gzip" "fmt" "ghproxy/config" "io" "strings" - "sync" "github.com/infinite-iroha/touka" ) -var ( - prefixGithub = []byte("https://github.com") - prefixRawUser = []byte("https://raw.githubusercontent.com") - prefixRaw = []byte("https://raw.github.com") - prefixGistUser = []byte("https://gist.githubusercontent.com") - prefixGist = []byte("https://gist.github.com") - prefixAPI = []byte("https://api.github.com") - prefixHTTP = []byte("http://") - prefixHTTPS = []byte("https://") -) - -func EditorMatcherBytes(rawPath []byte, cfg *config.Config) (bool, error) { - if bytes.HasPrefix(rawPath, prefixGithub) { - return true, nil - } - if bytes.HasPrefix(rawPath, prefixRawUser) { - return true, nil - } - if bytes.HasPrefix(rawPath, prefixRaw) { - return true, nil - } - if bytes.HasPrefix(rawPath, prefixGistUser) { - return true, nil - } - if bytes.HasPrefix(rawPath, prefixGist) { - return true, nil - } - if cfg.Shell.RewriteAPI { - if bytes.HasPrefix(rawPath, prefixAPI) { - return true, nil - } - } - return false, nil -} - -func modifyURLBytes(url []byte, host []byte, cfg *config.Config) []byte { - matched, err := EditorMatcherBytes(url, cfg) - if err != nil || !matched { - return url - } - - var u []byte - if bytes.HasPrefix(url, prefixHTTPS) { - u = url[len(prefixHTTPS):] - } else if bytes.HasPrefix(url, prefixHTTP) { - u = url[len(prefixHTTP):] - } else { - u = url - } - - newLen := len(prefixHTTPS) + len(host) + 1 + len(u) - newURL := make([]byte, newLen) - - written := 0 - written += copy(newURL[written:], prefixHTTPS) - written += copy(newURL[written:], host) - written += copy(newURL[written:], []byte("/")) - copy(newURL[written:], u) - - return newURL -} - func EditorMatcher(rawPath string, cfg *config.Config) (bool, error) { // 匹配 "https://github.com"开头的链接 if strings.HasPrefix(rawPath, "https://github.com") { @@ -128,126 +65,116 @@ func modifyURL(url string, host string, cfg *config.Config) string { return url } -var bufferPool = sync.Pool{ - New: func() interface{} { - return new(bytes.Buffer) - }, -} - -// processLinksStreamingInternal is a link processing function that reads the input line by line. -// It is memory-safe for large files but less performant due to numerous small allocations. -func processLinksStreamingInternal(input io.ReadCloser, host string, cfg *config.Config, c *touka.Context) (readerOut io.Reader, written int64, err error) { - pipeReader, pipeWriter := io.Pipe() +// processLinks 处理链接,返回包含处理后数据的 io.Reader +func processLinks(input io.ReadCloser, compress string, host string, cfg *config.Config, c *touka.Context) (readerOut io.Reader, written int64, err error) { + pipeReader, pipeWriter := io.Pipe() // 创建 io.Pipe readerOut = pipeReader - go func() { + go func() { // 在 Goroutine 中执行写入操作 defer func() { - if err != nil { - pipeWriter.CloseWithError(err) - } else { - pipeWriter.Close() + if pipeWriter != nil { // 确保 pipeWriter 关闭,即使发生错误 + if err != nil { + if closeErr := pipeWriter.CloseWithError(err); closeErr != nil { // 如果有错误,传递错误给 reader + c.Errorf("pipeWriter close with error failed: %v, original error: %v", closeErr, err) + } + } else { + if closeErr := pipeWriter.Close(); closeErr != nil { // 没有错误,正常关闭 + c.Errorf("pipeWriter close failed: %v", closeErr) + if err == nil { // 如果之前没有错误,记录关闭错误 + err = closeErr + } + } + } } }() - defer input.Close() - bufReader := bufio.NewReader(input) - bufWriter := bufio.NewWriterSize(pipeWriter, 4096) - defer bufWriter.Flush() + defer func() { + if err := input.Close(); err != nil { + c.Errorf("input close failed: %v", err) + } + }() + + var bufReader *bufio.Reader + + if compress == "gzip" { + // 解压gzip + gzipReader, gzipErr := gzip.NewReader(input) + if gzipErr != nil { + err = fmt.Errorf("gzip解压错误: %v", gzipErr) + return // Goroutine 中使用 return 返回错误 + } + defer gzipReader.Close() + bufReader = bufio.NewReader(gzipReader) + } else { + bufReader = bufio.NewReader(input) + } + + var bufWriter *bufio.Writer + var gzipWriter *gzip.Writer + + // 根据是否gzip确定 writer 的创建 + if compress == "gzip" { + gzipWriter = gzip.NewWriter(pipeWriter) // 使用 pipeWriter + bufWriter = bufio.NewWriterSize(gzipWriter, 4096) //设置缓冲区大小 + } else { + bufWriter = bufio.NewWriterSize(pipeWriter, 4096) // 使用 pipeWriter + } + + //确保writer关闭 + defer func() { + var closeErr error // 局部变量,用于保存defer中可能发生的错误 + + if gzipWriter != nil { + if closeErr = gzipWriter.Close(); closeErr != nil { + c.Errorf("gzipWriter close failed %v", closeErr) + // 如果已经存在错误,则保留。否则,记录此错误。 + if err == nil { + err = closeErr + } + } + } + if flushErr := bufWriter.Flush(); flushErr != nil { + c.Errorf("writer flush failed %v", flushErr) + // 如果已经存在错误,则保留。否则,记录此错误。 + if err == nil { + err = flushErr + } + } + }() + + // 使用正则表达式匹配 http 和 https 链接 for { line, readErr := bufReader.ReadString('\n') - if readErr != nil && readErr != io.EOF { - err = fmt.Errorf("read error: %w", readErr) - return + if readErr != nil { + if readErr == io.EOF { + break // 文件结束 + } + err = fmt.Errorf("读取行错误: %v", readErr) // 传递错误 + return // Goroutine 中使用 return 返回错误 } + // 替换所有匹配的 URL modifiedLine := urlPattern.ReplaceAllStringFunc(line, func(originalURL string) string { - return modifyURL(originalURL, host, cfg) + return modifyURL(originalURL, host, cfg) // 假设 modifyURL 函数已定义 }) - var n int - n, err = bufWriter.WriteString(modifiedLine) - written += int64(n) - if err != nil { - err = fmt.Errorf("write error: %w", err) - return + n, writeErr := bufWriter.WriteString(modifiedLine) + written += int64(n) // 更新写入的字节数 + if writeErr != nil { + err = fmt.Errorf("写入文件错误: %v", writeErr) // 传递错误 + return // Goroutine 中使用 return 返回错误 } + } - if readErr == io.EOF { - break + // 在返回之前,再刷新一次 (虽然 defer 中已经有 flush,但这里再加一次确保及时刷新) + if flushErr := bufWriter.Flush(); flushErr != nil { + if err == nil { // 避免覆盖之前的错误 + err = flushErr } + return // Goroutine 中使用 return 返回错误 } }() - return readerOut, written, nil -} - -// processLinks acts as a dispatcher, choosing the best processing strategy based on file size. -// It uses a memory-safe streaming approach for large or unknown-size files, -// and a high-performance buffered approach for smaller files. -func processLinks(input io.ReadCloser, host string, cfg *config.Config, c *touka.Context, bodySize int) (readerOut io.Reader, written int64, err error) { - const sizeThreshold = 256 * 1024 // 256KB - - // Use streaming for large or unknown size files to prevent OOM - if bodySize == -1 || bodySize > sizeThreshold { - c.Debugf("Using streaming processor for large/unknown size file (%d bytes)", bodySize) - return processLinksStreamingInternal(input, host, cfg, c) - } else { - c.Debugf("Using buffered processor for small file (%d bytes)", bodySize) - return processLinksBufferedInternal(input, host, cfg, c) - } -} - -// processLinksBufferedInternal a link processing function that reads the entire content into a buffer. -// It is optimized for performance on smaller files but carries an OOM risk for large files. -func processLinksBufferedInternal(input io.ReadCloser, host string, cfg *config.Config, c *touka.Context) (readerOut io.Reader, written int64, err error) { - pipeReader, pipeWriter := io.Pipe() - readerOut = pipeReader - hostBytes := []byte(host) - - go func() { - // 在 goroutine 退出时, 根据 err 是否为 nil, 带错误或正常关闭 pipeWriter - defer func() { - if closeErr := input.Close(); closeErr != nil { - c.Errorf("input close failed: %v", closeErr) - } - }() - defer func() { - if err != nil { - if closeErr := pipeWriter.CloseWithError(err); closeErr != nil { - c.Errorf("pipeWriter close with error failed: %v", closeErr) - } - } else { - if closeErr := pipeWriter.Close(); closeErr != nil { - c.Errorf("pipeWriter close failed: %v", closeErr) - } - } - }() - - buf := bufferPool.Get().(*bytes.Buffer) - buf.Reset() - defer bufferPool.Put(buf) - - // 将全部输入读入复用的缓冲区 - if _, err = buf.ReadFrom(input); err != nil { - err = fmt.Errorf("reading input failed: %w", err) - return - } - - // 使用 ReplaceAllFunc 和字节版本辅助函数, 实现准零分配 - modifiedBytes := urlPattern.ReplaceAllFunc(buf.Bytes(), func(originalURL []byte) []byte { - return modifyURLBytes(originalURL, hostBytes, cfg) - }) - - // 将处理后的字节写回管道 - var n int - n, err = pipeWriter.Write(modifiedBytes) - if err != nil { - err = fmt.Errorf("writing to pipe failed: %w", err) - return - } - written = int64(n) - }() - - return readerOut, written, nil + return readerOut, written, nil // 返回 reader 和 written,error 由 Goroutine 通过 pipeWriter.CloseWithError 传递 } diff --git a/proxy/nest_bench_test.go b/proxy/nest_bench_test.go deleted file mode 100644 index d254290..0000000 --- a/proxy/nest_bench_test.go +++ /dev/null @@ -1,65 +0,0 @@ -package proxy - -import ( - "ghproxy/config" - "io" - "strings" - "testing" -) - -const benchmarkInput = ` -Some text here. -Link to be replaced: http://github.com/user/repo -Another link: https://google.com -And one more: http://example.com/some/path -This should not be replaced: notalink -End of text. -` - -func BenchmarkProcessLinksStreaming(b *testing.B) { - cfg := &config.Config{} - host := "my-proxy.com" - - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - b.StopTimer() - input := io.NopCloser(strings.NewReader(benchmarkInput)) - b.StartTimer() - - reader, _, err := processLinksStreamingInternal(input, host, cfg, nil) - if err != nil { - b.Fatalf("processLinksStreamingInternal failed: %v", err) - } - - _, err = io.ReadAll(reader) - if err != nil { - b.Fatalf("Failed to read from processed reader: %v", err) - } - } -} - -func BenchmarkProcessLinksBuffered(b *testing.B) { - cfg := &config.Config{} - host := "my-proxy.com" - - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - b.StopTimer() - input := io.NopCloser(strings.NewReader(benchmarkInput)) - b.StartTimer() - - reader, _, err := processLinksBufferedInternal(input, host, cfg, nil) - if err != nil { - b.Fatalf("processLinksBufferedInternal failed: %v", err) - } - - _, err = io.ReadAll(reader) - if err != nil { - b.Fatalf("Failed to read from processed reader: %v", err) - } - } -} diff --git a/proxy/reqheader.go b/proxy/reqheader.go index 57d8542..c89dc76 100644 --- a/proxy/reqheader.go +++ b/proxy/reqheader.go @@ -27,7 +27,6 @@ var ( "CDN-Loop": {}, "Upgrade": {}, "Connection": {}, - "Accept-Encoding": {}, } cloneHeadersToRemove = map[string]struct{}{ @@ -44,7 +43,7 @@ var ( var ( defaultHeaders = map[string]string{ "Accept": "*/*", - "Accept-Encoding": "", + "Accept-Encoding": "gzip", "Transfer-Encoding": "chunked", "User-Agent": "GHProxy/1.0", } diff --git a/proxy/routing.go b/proxy/routing.go index 48a4c69..7a5748f 100644 --- a/proxy/routing.go +++ b/proxy/routing.go @@ -7,33 +7,6 @@ import ( "github.com/infinite-iroha/touka" ) -// buildRoutingPath 使用 strings.Builder 来高效地构建最终的 URL. -// 这避免了使用标准字符串拼接时发生的多次内存分配. -func buildRoutingPath(rawPath, matcher string) string { - var sb strings.Builder - // 预分配内存以提高性能 - // (This comment is in Chinese as requested by the user) - sb.Grow(len(rawPath) + 30) - sb.WriteString("https://") - - if matcher == "blob" { - sb.WriteString("raw.githubusercontent.com") - if len(rawPath) > 10 { // len("github.com") - pathSegment := rawPath[10:] - if i := strings.Index(pathSegment, "/blob/"); i != -1 { - sb.WriteString(pathSegment[:i]) - sb.WriteString("/") - sb.WriteString(pathSegment[i+len("/blob/"):]) - } else { - sb.WriteString(pathSegment) - } - } - } else { - sb.WriteString(rawPath) - } - return sb.String() -} - func RoutingHandler(cfg *config.Config) touka.HandlerFunc { return func(c *touka.Context) { @@ -71,11 +44,17 @@ func RoutingHandler(cfg *config.Config) touka.HandlerFunc { return } - rawPath = buildRoutingPath(rawPath, matcher) + // 处理blob/raw路径 if matcher == "blob" { + rawPath = rawPath[10:] + rawPath = "raw.githubusercontent.com" + rawPath + rawPath = strings.Replace(rawPath, "/blob/", "/", 1) matcher = "raw" } + // 为rawpath加入https:// 头 + rawPath = "https://" + rawPath + switch matcher { case "releases", "blob", "raw", "gist", "api": ChunkedProxyRequest(ctx, c, rawPath, cfg, matcher)