mirror of
https://github.com/WJQSERVER-STUDIO/ghproxy.git
synced 2026-06-13 15:47:37 +08:00
perf(proxy): reduce nest rewrite allocations
- Dispatch shell link rewriting between streaming and buffered paths based on response size - Reuse buffers and reduce URL construction allocations in proxy handlers - Add nest benchmarks and align extractParts compatibility expectations with the current contract
This commit is contained in:
parent
4c555ed50c
commit
e2719aa761
6 changed files with 248 additions and 88 deletions
|
|
@ -74,7 +74,7 @@ func ChunkedProxyRequest(ctx context.Context, c *touka.Context, u string, cfg *c
|
||||||
// 处理响应体大小限制
|
// 处理响应体大小限制
|
||||||
|
|
||||||
var (
|
var (
|
||||||
bodySize int
|
bodySize = -1
|
||||||
contentLength string
|
contentLength string
|
||||||
sizelimit int
|
sizelimit int
|
||||||
)
|
)
|
||||||
|
|
@ -134,7 +134,7 @@ func ChunkedProxyRequest(ctx context.Context, c *touka.Context, u string, cfg *c
|
||||||
|
|
||||||
var reader io.Reader
|
var reader io.Reader
|
||||||
|
|
||||||
reader, _, err = processLinks(bodyReader, c.Request.Host, cfg, c)
|
reader, _, err = processLinks(bodyReader, c.Request.Host, cfg, c, bodySize)
|
||||||
c.WriteStream(reader)
|
c.WriteStream(reader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
c.Errorf("%s %s %s %s %s Failed to copy response body: %v", c.ClientIP(), c.Request.Method, u, c.UserAgent(), c.Request.Proto, err)
|
c.Errorf("%s %s %s %s %s Failed to copy response body: %v", c.ClientIP(), c.Request.Method, u, c.UserAgent(), c.Request.Proto, err)
|
||||||
|
|
|
||||||
|
|
@ -9,6 +9,28 @@ import (
|
||||||
"github.com/infinite-iroha/touka"
|
"github.com/infinite-iroha/touka"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func buildProxyPath(path, matcher string) string {
|
||||||
|
var sb strings.Builder
|
||||||
|
sb.Grow(len(path) + 50)
|
||||||
|
|
||||||
|
if matcher == "blob" && strings.HasPrefix(path, "github.com") {
|
||||||
|
sb.WriteString("https://raw.githubusercontent.com")
|
||||||
|
pathSegment := path[len("github.com"):]
|
||||||
|
if i := strings.Index(pathSegment, "/blob/"); i != -1 {
|
||||||
|
sb.WriteString(pathSegment[:i])
|
||||||
|
sb.WriteByte('/')
|
||||||
|
sb.WriteString(pathSegment[i+len("/blob/"):])
|
||||||
|
} else {
|
||||||
|
sb.WriteString(pathSegment)
|
||||||
|
}
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.WriteString("https://")
|
||||||
|
sb.WriteString(path)
|
||||||
|
return sb.String()
|
||||||
|
}
|
||||||
|
|
||||||
var re = regexp.MustCompile(`^(http:|https:)?/?/?(.*)`) // 匹配http://或https://开头的路径
|
var re = regexp.MustCompile(`^(http:|https:)?/?/?(.*)`) // 匹配http://或https://开头的路径
|
||||||
|
|
||||||
func NoRouteHandler(cfg *config.Config) touka.HandlerFunc {
|
func NoRouteHandler(cfg *config.Config) touka.HandlerFunc {
|
||||||
|
|
@ -31,22 +53,16 @@ func NoRouteHandler(cfg *config.Config) touka.HandlerFunc {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// 制作url
|
path := matches[2]
|
||||||
rawPath = "https://" + matches[2]
|
|
||||||
|
|
||||||
var (
|
|
||||||
user string
|
|
||||||
repo string
|
|
||||||
matcher string
|
|
||||||
)
|
|
||||||
|
|
||||||
var matcherErr *GHProxyErrors
|
var matcherErr *GHProxyErrors
|
||||||
user, repo, matcher, matcherErr = Matcher(rawPath, cfg)
|
user, repo, matcher, matcherErr := Matcher("https://"+path, cfg)
|
||||||
if matcherErr != nil {
|
if matcherErr != nil {
|
||||||
ErrorPage(c, matcherErr)
|
ErrorPage(c, matcherErr)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
rawPath = buildProxyPath(path, matcher)
|
||||||
|
|
||||||
shoudBreak = listCheck(cfg, c, user, repo, rawPath)
|
shoudBreak = listCheck(cfg, c, user, repo, rawPath)
|
||||||
if shoudBreak {
|
if shoudBreak {
|
||||||
return
|
return
|
||||||
|
|
@ -59,9 +75,6 @@ func NoRouteHandler(cfg *config.Config) touka.HandlerFunc {
|
||||||
|
|
||||||
// 处理blob/raw路径
|
// 处理blob/raw路径
|
||||||
if matcher == "blob" {
|
if matcher == "blob" {
|
||||||
rawPath = rawPath[18:]
|
|
||||||
rawPath = "https://raw.githubusercontent.com" + rawPath
|
|
||||||
rawPath = strings.Replace(rawPath, "/blob/", "/", 1)
|
|
||||||
matcher = "raw"
|
matcher = "raw"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -265,10 +265,11 @@ func TestExtractParts_Compatibility(t *testing.T) {
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Empty path segments",
|
name: "Empty path segments",
|
||||||
rawURL: "https://example.com//repo/a", // Will be treated as /repo/a
|
rawURL: "https://example.com//repo/a",
|
||||||
expectedOwner: "", // First part is empty
|
expectedOwner: "/",
|
||||||
expectedRepo: "/repo",
|
expectedRepo: "/repo",
|
||||||
expectedRem: "/a",
|
expectedRem: "/a",
|
||||||
|
expectedQuery: url.Values{},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
name: "Invalid URL format",
|
name: "Invalid URL format",
|
||||||
|
|
|
||||||
216
proxy/nest.go
216
proxy/nest.go
|
|
@ -2,14 +2,27 @@ package proxy
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"bufio"
|
"bufio"
|
||||||
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"ghproxy/config"
|
"ghproxy/config"
|
||||||
"io"
|
"io"
|
||||||
"strings"
|
"strings"
|
||||||
|
"sync"
|
||||||
|
|
||||||
"github.com/infinite-iroha/touka"
|
"github.com/infinite-iroha/touka"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
var (
|
||||||
|
prefixGithub = []byte("https://github.com")
|
||||||
|
prefixRawUser = []byte("https://raw.githubusercontent.com")
|
||||||
|
prefixRaw = []byte("https://raw.github.com")
|
||||||
|
prefixGistUser = []byte("https://gist.githubusercontent.com")
|
||||||
|
prefixGist = []byte("https://gist.github.com")
|
||||||
|
prefixAPIBytes = []byte("https://api.github.com")
|
||||||
|
prefixHTTP = []byte("http://")
|
||||||
|
prefixHTTPS = []byte("https://")
|
||||||
|
)
|
||||||
|
|
||||||
func EditorMatcher(rawPath string, cfg *config.Config) (bool, error) {
|
func EditorMatcher(rawPath string, cfg *config.Config) (bool, error) {
|
||||||
// 匹配 "https://github.com"开头的链接
|
// 匹配 "https://github.com"开头的链接
|
||||||
if strings.HasPrefix(rawPath, "https://github.com") {
|
if strings.HasPrefix(rawPath, "https://github.com") {
|
||||||
|
|
@ -40,6 +53,28 @@ func EditorMatcher(rawPath string, cfg *config.Config) (bool, error) {
|
||||||
return false, nil
|
return false, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func EditorMatcherBytes(rawPath []byte, cfg *config.Config) bool {
|
||||||
|
if bytes.HasPrefix(rawPath, prefixGithub) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if bytes.HasPrefix(rawPath, prefixRawUser) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if bytes.HasPrefix(rawPath, prefixRaw) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if bytes.HasPrefix(rawPath, prefixGistUser) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if bytes.HasPrefix(rawPath, prefixGist) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if cfg.Shell.RewriteAPI && bytes.HasPrefix(rawPath, prefixAPIBytes) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// 匹配文件扩展名是sh的rawPath
|
// 匹配文件扩展名是sh的rawPath
|
||||||
func MatcherShell(rawPath string) bool {
|
func MatcherShell(rawPath string) bool {
|
||||||
return strings.HasSuffix(rawPath, ".sh")
|
return strings.HasSuffix(rawPath, ".sh")
|
||||||
|
|
@ -64,87 +99,140 @@ func modifyURL(url string, host string, cfg *config.Config) string {
|
||||||
return url
|
return url
|
||||||
}
|
}
|
||||||
|
|
||||||
// processLinks 处理链接,返回包含处理后数据的 io.Reader
|
func modifyURLBytes(url []byte, host []byte, cfg *config.Config) []byte {
|
||||||
func processLinks(input io.ReadCloser, host string, cfg *config.Config, c *touka.Context) (readerOut io.Reader, written int64, err error) {
|
if !EditorMatcherBytes(url, cfg) {
|
||||||
pipeReader, pipeWriter := io.Pipe() // 创建 io.Pipe
|
return url
|
||||||
|
}
|
||||||
|
|
||||||
|
var trimmed []byte
|
||||||
|
if bytes.HasPrefix(url, prefixHTTPS) {
|
||||||
|
trimmed = url[len(prefixHTTPS):]
|
||||||
|
} else if bytes.HasPrefix(url, prefixHTTP) {
|
||||||
|
trimmed = url[len(prefixHTTP):]
|
||||||
|
} else {
|
||||||
|
trimmed = url
|
||||||
|
}
|
||||||
|
|
||||||
|
newURL := make([]byte, len(prefixHTTPS)+len(host)+1+len(trimmed))
|
||||||
|
written := 0
|
||||||
|
written += copy(newURL[written:], prefixHTTPS)
|
||||||
|
written += copy(newURL[written:], host)
|
||||||
|
written += copy(newURL[written:], []byte("/"))
|
||||||
|
copy(newURL[written:], trimmed)
|
||||||
|
|
||||||
|
return newURL
|
||||||
|
}
|
||||||
|
|
||||||
|
var bufferPool = sync.Pool{
|
||||||
|
New: func() any {
|
||||||
|
return new(bytes.Buffer)
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
func processLinksStreamingInternal(input io.ReadCloser, host string, cfg *config.Config, c *touka.Context) (readerOut io.Reader, written int64, err error) {
|
||||||
|
pipeReader, pipeWriter := io.Pipe()
|
||||||
readerOut = pipeReader
|
readerOut = pipeReader
|
||||||
|
|
||||||
go func() { // 在 Goroutine 中执行写入操作
|
go func() {
|
||||||
defer func() {
|
defer func() {
|
||||||
if pipeWriter != nil { // 确保 pipeWriter 关闭,即使发生错误
|
if err != nil {
|
||||||
if err != nil {
|
_ = pipeWriter.CloseWithError(err)
|
||||||
if closeErr := pipeWriter.CloseWithError(err); closeErr != nil { // 如果有错误,传递错误给 reader
|
return
|
||||||
c.Errorf("pipeWriter close with error failed: %v, original error: %v", closeErr, err)
|
}
|
||||||
}
|
_ = pipeWriter.Close()
|
||||||
} else {
|
}()
|
||||||
if closeErr := pipeWriter.Close(); closeErr != nil { // 没有错误,正常关闭
|
defer func() {
|
||||||
c.Errorf("pipeWriter close failed: %v", closeErr)
|
if closeErr := input.Close(); closeErr != nil && c != nil {
|
||||||
if err == nil { // 如果之前没有错误,记录关闭错误
|
c.Errorf("input close failed: %v", closeErr)
|
||||||
err = closeErr
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
bufReader := bufio.NewReader(input)
|
||||||
|
bufWriter := bufio.NewWriterSize(pipeWriter, 4096)
|
||||||
defer func() {
|
defer func() {
|
||||||
if err := input.Close(); err != nil {
|
if flushErr := bufWriter.Flush(); flushErr != nil && err == nil {
|
||||||
c.Errorf("input close failed: %v", err)
|
err = fmt.Errorf("flush writer failed: %w", flushErr)
|
||||||
}
|
|
||||||
|
|
||||||
}()
|
|
||||||
|
|
||||||
var bufReader *bufio.Reader
|
|
||||||
|
|
||||||
bufReader = bufio.NewReader(input)
|
|
||||||
|
|
||||||
var bufWriter *bufio.Writer
|
|
||||||
|
|
||||||
bufWriter = bufio.NewWriterSize(pipeWriter, 4096) // 使用 pipeWriter
|
|
||||||
|
|
||||||
//确保writer关闭
|
|
||||||
defer func() {
|
|
||||||
if flushErr := bufWriter.Flush(); flushErr != nil {
|
|
||||||
c.Errorf("writer flush failed %v", flushErr)
|
|
||||||
// 如果已经存在错误,则保留。否则,记录此错误。
|
|
||||||
if err == nil {
|
|
||||||
err = flushErr
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// 使用正则表达式匹配 http 和 https 链接
|
|
||||||
for {
|
for {
|
||||||
line, readErr := bufReader.ReadString('\n')
|
line, readErr := bufReader.ReadString('\n')
|
||||||
if readErr != nil {
|
if readErr != nil && readErr != io.EOF {
|
||||||
if readErr == io.EOF {
|
err = fmt.Errorf("read error: %w", readErr)
|
||||||
break // 文件结束
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(line) > 0 {
|
||||||
|
modifiedLine := urlPattern.ReplaceAllStringFunc(line, func(originalURL string) string {
|
||||||
|
return modifyURL(originalURL, host, cfg)
|
||||||
|
})
|
||||||
|
|
||||||
|
n, writeErr := bufWriter.WriteString(modifiedLine)
|
||||||
|
written += int64(n)
|
||||||
|
if writeErr != nil {
|
||||||
|
err = fmt.Errorf("write error: %w", writeErr)
|
||||||
|
return
|
||||||
}
|
}
|
||||||
err = fmt.Errorf("读取行错误: %v", readErr) // 传递错误
|
|
||||||
return // Goroutine 中使用 return 返回错误
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// 替换所有匹配的 URL
|
if readErr == io.EOF {
|
||||||
modifiedLine := urlPattern.ReplaceAllStringFunc(line, func(originalURL string) string {
|
break
|
||||||
return modifyURL(originalURL, host, cfg) // 假设 modifyURL 函数已定义
|
|
||||||
})
|
|
||||||
|
|
||||||
n, writeErr := bufWriter.WriteString(modifiedLine)
|
|
||||||
written += int64(n) // 更新写入的字节数
|
|
||||||
if writeErr != nil {
|
|
||||||
err = fmt.Errorf("写入文件错误: %v", writeErr) // 传递错误
|
|
||||||
return // Goroutine 中使用 return 返回错误
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 在返回之前,再刷新一次 (虽然 defer 中已经有 flush,但这里再加一次确保及时刷新)
|
|
||||||
if flushErr := bufWriter.Flush(); flushErr != nil {
|
|
||||||
if err == nil { // 避免覆盖之前的错误
|
|
||||||
err = flushErr
|
|
||||||
}
|
|
||||||
return // Goroutine 中使用 return 返回错误
|
|
||||||
}
|
|
||||||
}()
|
}()
|
||||||
|
|
||||||
return readerOut, written, nil // 返回 reader 和 written,error 由 Goroutine 通过 pipeWriter.CloseWithError 传递
|
return readerOut, written, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func processLinks(input io.ReadCloser, host string, cfg *config.Config, c *touka.Context, bodySize int) (readerOut io.Reader, written int64, err error) {
|
||||||
|
const sizeThreshold = 256 * 1024
|
||||||
|
|
||||||
|
if bodySize == -1 || bodySize > sizeThreshold {
|
||||||
|
return processLinksStreamingInternal(input, host, cfg, c)
|
||||||
|
}
|
||||||
|
|
||||||
|
return processLinksBufferedInternal(input, host, cfg, c)
|
||||||
|
}
|
||||||
|
|
||||||
|
func processLinksBufferedInternal(input io.ReadCloser, host string, cfg *config.Config, c *touka.Context) (readerOut io.Reader, written int64, err error) {
|
||||||
|
pipeReader, pipeWriter := io.Pipe()
|
||||||
|
readerOut = pipeReader
|
||||||
|
hostBytes := []byte(host)
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer func() {
|
||||||
|
if closeErr := input.Close(); closeErr != nil && c != nil {
|
||||||
|
c.Errorf("input close failed: %v", closeErr)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
defer func() {
|
||||||
|
if err != nil {
|
||||||
|
_ = pipeWriter.CloseWithError(err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
_ = pipeWriter.Close()
|
||||||
|
}()
|
||||||
|
|
||||||
|
buf := bufferPool.Get().(*bytes.Buffer)
|
||||||
|
buf.Reset()
|
||||||
|
defer bufferPool.Put(buf)
|
||||||
|
|
||||||
|
if _, err = buf.ReadFrom(input); err != nil {
|
||||||
|
err = fmt.Errorf("reading input failed: %w", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
modifiedBytes := urlPattern.ReplaceAllFunc(buf.Bytes(), func(originalURL []byte) []byte {
|
||||||
|
return modifyURLBytes(originalURL, hostBytes, cfg)
|
||||||
|
})
|
||||||
|
|
||||||
|
var n int
|
||||||
|
n, err = pipeWriter.Write(modifiedBytes)
|
||||||
|
written = int64(n)
|
||||||
|
if err != nil {
|
||||||
|
err = fmt.Errorf("writing to pipe failed: %w", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
return readerOut, written, nil
|
||||||
}
|
}
|
||||||
|
|
|
||||||
63
proxy/nest_bench_test.go
Normal file
63
proxy/nest_bench_test.go
Normal file
|
|
@ -0,0 +1,63 @@
|
||||||
|
package proxy
|
||||||
|
|
||||||
|
import (
|
||||||
|
"ghproxy/config"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
)
|
||||||
|
|
||||||
|
const benchmarkInput = `
|
||||||
|
Some text here.
|
||||||
|
Link to be replaced: http://github.com/user/repo
|
||||||
|
Another link: https://google.com
|
||||||
|
And one more: http://example.com/some/path
|
||||||
|
This should not be replaced: notalink
|
||||||
|
End of text.
|
||||||
|
`
|
||||||
|
|
||||||
|
func BenchmarkProcessLinksStreaming(b *testing.B) {
|
||||||
|
cfg := &config.Config{}
|
||||||
|
host := "my-proxy.com"
|
||||||
|
|
||||||
|
b.ReportAllocs()
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
b.StopTimer()
|
||||||
|
input := io.NopCloser(strings.NewReader(benchmarkInput))
|
||||||
|
b.StartTimer()
|
||||||
|
|
||||||
|
reader, _, err := processLinksStreamingInternal(input, host, cfg, nil)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatalf("processLinksStreamingInternal failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err = io.ReadAll(reader); err != nil {
|
||||||
|
b.Fatalf("failed to read from processed reader: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func BenchmarkProcessLinksBuffered(b *testing.B) {
|
||||||
|
cfg := &config.Config{}
|
||||||
|
host := "my-proxy.com"
|
||||||
|
|
||||||
|
b.ReportAllocs()
|
||||||
|
b.ResetTimer()
|
||||||
|
|
||||||
|
for i := 0; i < b.N; i++ {
|
||||||
|
b.StopTimer()
|
||||||
|
input := io.NopCloser(strings.NewReader(benchmarkInput))
|
||||||
|
b.StartTimer()
|
||||||
|
|
||||||
|
reader, _, err := processLinksBufferedInternal(input, host, cfg, nil)
|
||||||
|
if err != nil {
|
||||||
|
b.Fatalf("processLinksBufferedInternal failed: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, err = io.ReadAll(reader); err != nil {
|
||||||
|
b.Fatalf("failed to read from processed reader: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -44,17 +44,12 @@ func RoutingHandler(cfg *config.Config) touka.HandlerFunc {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// 处理blob/raw路径
|
rawPath = buildProxyPath(rawPath, matcher)
|
||||||
|
|
||||||
if matcher == "blob" {
|
if matcher == "blob" {
|
||||||
rawPath = rawPath[10:]
|
|
||||||
rawPath = "raw.githubusercontent.com" + rawPath
|
|
||||||
rawPath = strings.Replace(rawPath, "/blob/", "/", 1)
|
|
||||||
matcher = "raw"
|
matcher = "raw"
|
||||||
}
|
}
|
||||||
|
|
||||||
// 为rawpath加入https:// 头
|
|
||||||
rawPath = "https://" + rawPath
|
|
||||||
|
|
||||||
switch matcher {
|
switch matcher {
|
||||||
case "releases", "blob", "raw", "gist", "api":
|
case "releases", "blob", "raw", "gist", "api":
|
||||||
ChunkedProxyRequest(ctx, c, rawPath, cfg, matcher)
|
ChunkedProxyRequest(ctx, c, rawPath, cfg, matcher)
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue