Compare commits
19 Commits
0ff15efcd7
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 4e4cb0759f | |||
| 707267e471 | |||
| f5df0b5d54 | |||
| bb1d8c6e7d | |||
| 4687c67685 | |||
| f50d0c9f28 | |||
| 9e5ff37980 | |||
| 2772d6075f | |||
| a036f103af | |||
| 5352dc7200 | |||
| 249619abc5 | |||
| 9c6527dc14 | |||
| a7ee7577c4 | |||
| 6d54fe2715 | |||
| 2d66baaf3c | |||
| 184362429a | |||
| ff1d53e2a1 | |||
| 739d36203c | |||
| 4577fa1dd7 |
@@ -8,12 +8,35 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
-
|
||||
name: Build docker file and publish
|
||||
image: plugins/docker
|
||||
settings:
|
||||
name: Checkout
|
||||
uses: actions/checkout@v4
|
||||
-
|
||||
name: Docker meta
|
||||
id: meta
|
||||
uses: docker/metadata-action@v5
|
||||
with:
|
||||
context: git
|
||||
-
|
||||
name: Login to Gitea
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
registry: lab.5b5.eu
|
||||
repo: ade9/llm-proxy-metrics
|
||||
tags: latest
|
||||
secret: id=mysecret,src=secret-file
|
||||
username: ${{ vars.PACKAGE_USERNAME }}
|
||||
password: ${{ secrets.PACKAGE_TOKEN }}
|
||||
password: ${{ secrets.PACKAGE_TOKEN }}
|
||||
images: |
|
||||
ade9/llm-proxy-metrics
|
||||
-
|
||||
name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
-
|
||||
name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
-
|
||||
name: Build and push
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: lab.5b5.eu/ade9/llm-proxy-metrics/${{ steps.meta.outputs.tags }}
|
||||
labels: ${{ steps.meta.outputs.labels }}
|
||||
20
.vscode/launch.json
vendored
Normal file
20
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
|
||||
{
|
||||
"name": "Launch Package",
|
||||
"type": "go",
|
||||
"request": "launch",
|
||||
"mode": "debug",
|
||||
"program": "llmproxymetrics.go",
|
||||
"env": {
|
||||
"PORT": "6677",
|
||||
"BASE_URL": "http://10.0.0.10:11434"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
12
go.mod
12
go.mod
@@ -2,7 +2,17 @@ module ade9/llmproxymetrics
|
||||
|
||||
go 1.23.3
|
||||
|
||||
require github.com/caarlos0/env v3.5.0+incompatible
|
||||
|
||||
require (
|
||||
github.com/caarlos0/env v3.5.0+incompatible
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/caarlos0/env/v11 v11.3.1 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/prometheus/client_golang v1.22.0 // indirect
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/common v0.62.0 // indirect
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
golang.org/x/sys v0.30.0 // indirect
|
||||
google.golang.org/protobuf v1.36.5 // indirect
|
||||
)
|
||||
|
||||
18
go.sum
18
go.sum
@@ -1,4 +1,22 @@
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/caarlos0/env v3.5.0+incompatible h1:Yy0UN8o9Wtr/jGHZDpCBLpNrzcFLLM2yixi/rBrKyJs=
|
||||
github.com/caarlos0/env v3.5.0+incompatible/go.mod h1:tdCsowwCzMLdkqRYDlHpZCp2UooDD3MspDBjZ2AD02Y=
|
||||
github.com/caarlos0/env/v11 v11.3.1 h1:cArPWC15hWmEt+gWk7YBi7lEXTXCvpaSdCiZE2X5mCA=
|
||||
github.com/caarlos0/env/v11 v11.3.1/go.mod h1:qupehSf/Y0TUTsxKywqRt/vJjN5nz6vauiYEUUr8P4U=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
|
||||
github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
|
||||
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
||||
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
||||
github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io=
|
||||
github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I=
|
||||
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
||||
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
||||
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
||||
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
|
||||
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||
|
||||
@@ -1,18 +1,22 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"log"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"net/http/httputil"
|
||||
"net/url"
|
||||
"strconv"
|
||||
"time"
|
||||
"strings"
|
||||
|
||||
"github.com/caarlos0/env/v11"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
var cfg config
|
||||
@@ -24,81 +28,162 @@ type config struct {
|
||||
|
||||
func createProxy(target *url.URL) func(http.ResponseWriter, *http.Request) {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
r.URL.Path = strings.TrimPrefix(r.URL.Path, "/proxy")
|
||||
r.Host = target.Host
|
||||
r.URL.Scheme = target.Scheme
|
||||
r.URL.Host = target.Host
|
||||
|
||||
lrw := &LoggingResponseWriter{ResponseWriter: w, body: new(bytes.Buffer)}
|
||||
proxy := httputil.NewSingleHostReverseProxy(target)
|
||||
|
||||
startTime := time.Now()
|
||||
recorder := httptest.NewRecorder()
|
||||
proxy.ServeHTTP(recorder, r)
|
||||
|
||||
responseBody := recorder.Body.Bytes()
|
||||
|
||||
var jsonResponse map[string]interface{}
|
||||
err := json.Unmarshal(responseBody, &jsonResponse)
|
||||
if err != nil {
|
||||
log.Printf("Error unmarshalling JSON response: %v", err)
|
||||
lrw.Write(responseBody)
|
||||
return
|
||||
director := func(req *http.Request) {
|
||||
req.Header.Set("X-Forwarded-For", r.RemoteAddr)
|
||||
req.URL = r.URL
|
||||
req.Host = target.Host
|
||||
req.URL.Scheme = target.Scheme
|
||||
req.URL.Host = target.Host
|
||||
}
|
||||
wasstreamed := -1
|
||||
data, err := io.ReadAll(r.Body)
|
||||
if err == nil {
|
||||
r.Body = io.NopCloser(bytes.NewReader(data))
|
||||
var jsonData map[string]interface{}
|
||||
json.NewDecoder(r.Body).Decode(&jsonData)
|
||||
|
||||
// Add your metrics metadata here
|
||||
jsonResponse["metrics"] = map[string]interface{}{
|
||||
"requestPath": r.URL.Path,
|
||||
"statusCode": recorder.Code,
|
||||
"responseTime": time.Since(startTime).Milliseconds(),
|
||||
// Add more metrics as needed
|
||||
}
|
||||
|
||||
modifiedResponseBody, err := json.Marshal(jsonResponse)
|
||||
if err != nil {
|
||||
log.Printf("Error marshalling modified JSON response: %v", err)
|
||||
lrw.Write(responseBody)
|
||||
return
|
||||
}
|
||||
|
||||
for name, values := range recorder.Header() {
|
||||
for _, value := range values {
|
||||
w.Header().Add(name, value)
|
||||
if jsonData["stream"] != nil {
|
||||
if !jsonData["stream"].(bool) {
|
||||
wasstreamed = 0
|
||||
} else {
|
||||
wasstreamed = 1
|
||||
}
|
||||
}
|
||||
} else {
|
||||
fmt.Println("Error reading body:", err)
|
||||
}
|
||||
r.Body = io.NopCloser(bytes.NewReader(data))
|
||||
|
||||
modifyResponse := func(response *http.Response) error {
|
||||
pr, pw := io.Pipe()
|
||||
body := response.Body
|
||||
response.Body = pr
|
||||
|
||||
go func() {
|
||||
defer pw.Close()
|
||||
|
||||
reader := bufio.NewReader(body)
|
||||
for {
|
||||
line, err := reader.ReadBytes('\n')
|
||||
if err != nil {
|
||||
if err == io.EOF {
|
||||
handleJsonLine([]byte(string(line)), wasstreamed)
|
||||
pw.Write(line)
|
||||
break
|
||||
}
|
||||
return
|
||||
}
|
||||
handleJsonLine(line, wasstreamed)
|
||||
pw.Write(line)
|
||||
}
|
||||
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
w.WriteHeader(recorder.Code)
|
||||
lrw.Write(modifiedResponseBody)
|
||||
proxy := httputil.NewSingleHostReverseProxy(target)
|
||||
proxy.Director = director
|
||||
if r.URL.Path == "/api/generate" || r.URL.Path == "/api/chat" {
|
||||
proxy.ModifyResponse = modifyResponse
|
||||
concurrent_requests.Inc()
|
||||
}
|
||||
|
||||
log.Printf("Response with metrics: %s", lrw.Body())
|
||||
proxy.ServeHTTP(w, r)
|
||||
}
|
||||
}
|
||||
|
||||
type LoggingResponseWriter struct {
|
||||
http.ResponseWriter
|
||||
body *bytes.Buffer
|
||||
func handleJsonLine(line []byte, wasstreamed int) {
|
||||
if len(line) == 0 {
|
||||
return
|
||||
}
|
||||
var jsonData map[string]interface{}
|
||||
err := json.Unmarshal([]byte(line), &jsonData)
|
||||
if err != nil {
|
||||
fmt.Println("Error parsing JSON:", err)
|
||||
return
|
||||
}
|
||||
|
||||
if jsonData["done"].(bool) {
|
||||
duration := jsonData["eval_duration"].(float64) / 1000000000.0
|
||||
fmt.Printf("Duration: %.2f seconds\n", duration)
|
||||
opsProcessed.Inc()
|
||||
tokens_out.Add(jsonData["eval_count"].(float64))
|
||||
tokens_in.Add(jsonData["prompt_eval_count"].(float64))
|
||||
prompt_length.Observe(jsonData["prompt_eval_count"].(float64))
|
||||
eval_length.Observe(jsonData["eval_count"].(float64))
|
||||
eval_time.Observe(duration)
|
||||
if wasstreamed == 1 {
|
||||
streamed.Inc()
|
||||
} else if wasstreamed == 0 {
|
||||
notStreamed.Inc()
|
||||
}
|
||||
concurrent_requests.Dec()
|
||||
}
|
||||
}
|
||||
|
||||
func (lrw *LoggingResponseWriter) Write(b []byte) (int, error) {
|
||||
lrw.body.Write(b)
|
||||
return lrw.ResponseWriter.Write(b)
|
||||
}
|
||||
var opsProcessed = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "llmproxymetrics_total_requests",
|
||||
Help: "The total number of processed events",
|
||||
})
|
||||
|
||||
func (lrw *LoggingResponseWriter) Body() string {
|
||||
return lrw.body.String()
|
||||
}
|
||||
var notStreamed = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "llmproxymetrics_non_streamed_requests",
|
||||
Help: "The total number of processed non-streamed events",
|
||||
})
|
||||
var streamed = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "llmproxymetrics_streamed_requests",
|
||||
Help: "The total number of processed streamed events",
|
||||
})
|
||||
var tokens_out = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "llmproxymetrics_tokens_out",
|
||||
Help: "Tokens generated.",
|
||||
})
|
||||
var tokens_in = promauto.NewCounter(prometheus.CounterOpts{
|
||||
Name: "llmproxymetrics_tokens_in",
|
||||
Help: "Tokens input.",
|
||||
})
|
||||
var eval_time = promauto.NewHistogram(prometheus.HistogramOpts{
|
||||
Name: "llmproxymetrics_eval_time",
|
||||
Help: "Seconds spent evaluating the response.",
|
||||
Buckets: prometheus.LinearBuckets(5, 5, 23),
|
||||
})
|
||||
var prompt_length = promauto.NewHistogram(prometheus.HistogramOpts{
|
||||
Name: "llmproxymetrics_prompt_length",
|
||||
Help: "Prompt length",
|
||||
Buckets: prometheus.ExponentialBucketsRange(100, 32000, 20),
|
||||
})
|
||||
var eval_length = promauto.NewHistogram(prometheus.HistogramOpts{
|
||||
Name: "llmproxymetrics_eval_length",
|
||||
Help: "Eval length",
|
||||
Buckets: prometheus.ExponentialBucketsRange(100, 32000, 20),
|
||||
})
|
||||
var concurrent_requests = promauto.NewGauge(prometheus.GaugeOpts{
|
||||
Name: "llmproxymetrics_concurrent_requests",
|
||||
Help: "Concurrent requests",
|
||||
})
|
||||
|
||||
func main() {
|
||||
env.Parse(&cfg)
|
||||
err := env.Parse(&cfg)
|
||||
if err != nil {
|
||||
log.Fatalf("Error parsing environment variables: %v", err)
|
||||
}
|
||||
|
||||
targetURL, err := url.Parse(cfg.BaseURL)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
http.HandleFunc("/", createProxy(targetURL))
|
||||
http.HandleFunc("/proxy/", createProxy(targetURL))
|
||||
http.Handle("/metrics", promhttp.Handler())
|
||||
|
||||
log.Printf("Starting proxy server on :%s", strconv.Itoa(cfg.Port))
|
||||
err = http.ListenAndServe(fmt.Sprintf(":%s", strconv.Itoa(cfg.Port)), nil)
|
||||
err = http.ListenAndServe(fmt.Sprintf(":%d", cfg.Port), nil)
|
||||
if err != nil {
|
||||
log.Fatal(err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user