Compare commits
21 Commits
537a8fb29f
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 4e4cb0759f | |||
| 707267e471 | |||
| f5df0b5d54 | |||
| bb1d8c6e7d | |||
| 4687c67685 | |||
| f50d0c9f28 | |||
| 9e5ff37980 | |||
| 2772d6075f | |||
| a036f103af | |||
| 5352dc7200 | |||
| 249619abc5 | |||
| 9c6527dc14 | |||
| a7ee7577c4 | |||
| 6d54fe2715 | |||
| 2d66baaf3c | |||
| 184362429a | |||
| ff1d53e2a1 | |||
| 739d36203c | |||
| 4577fa1dd7 | |||
| 0ff15efcd7 | |||
| 7f1f6ccea8 |
@@ -36,6 +36,7 @@ jobs:
|
|||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v6
|
||||||
with:
|
with:
|
||||||
context: .
|
context: .
|
||||||
|
file: ./Dockerfile
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: lab.5b5.eu/ade9/llm-proxy-metrics/${{ steps.meta.outputs.tags }}
|
||||||
labels: ${{ steps.meta.outputs.labels }}
|
labels: ${{ steps.meta.outputs.labels }}
|
||||||
20
.vscode/launch.json
vendored
Normal file
20
.vscode/launch.json
vendored
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
|
||||||
|
{
|
||||||
|
"name": "Launch Package",
|
||||||
|
"type": "go",
|
||||||
|
"request": "launch",
|
||||||
|
"mode": "debug",
|
||||||
|
"program": "llmproxymetrics.go",
|
||||||
|
"env": {
|
||||||
|
"PORT": "6677",
|
||||||
|
"BASE_URL": "http://10.0.0.10:11434"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
12
go.mod
12
go.mod
@@ -2,7 +2,17 @@ module ade9/llmproxymetrics
|
|||||||
|
|
||||||
go 1.23.3
|
go 1.23.3
|
||||||
|
|
||||||
|
require github.com/caarlos0/env v3.5.0+incompatible
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/caarlos0/env v3.5.0+incompatible
|
github.com/beorn7/perks v1.0.1 // indirect
|
||||||
github.com/caarlos0/env/v11 v11.3.1 // indirect
|
github.com/caarlos0/env/v11 v11.3.1 // indirect
|
||||||
|
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||||
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||||
|
github.com/prometheus/client_golang v1.22.0 // indirect
|
||||||
|
github.com/prometheus/client_model v0.6.1 // indirect
|
||||||
|
github.com/prometheus/common v0.62.0 // indirect
|
||||||
|
github.com/prometheus/procfs v0.15.1 // indirect
|
||||||
|
golang.org/x/sys v0.30.0 // indirect
|
||||||
|
google.golang.org/protobuf v1.36.5 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
18
go.sum
18
go.sum
@@ -1,4 +1,22 @@
|
|||||||
|
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||||
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||||
github.com/caarlos0/env v3.5.0+incompatible h1:Yy0UN8o9Wtr/jGHZDpCBLpNrzcFLLM2yixi/rBrKyJs=
|
github.com/caarlos0/env v3.5.0+incompatible h1:Yy0UN8o9Wtr/jGHZDpCBLpNrzcFLLM2yixi/rBrKyJs=
|
||||||
github.com/caarlos0/env v3.5.0+incompatible/go.mod h1:tdCsowwCzMLdkqRYDlHpZCp2UooDD3MspDBjZ2AD02Y=
|
github.com/caarlos0/env v3.5.0+incompatible/go.mod h1:tdCsowwCzMLdkqRYDlHpZCp2UooDD3MspDBjZ2AD02Y=
|
||||||
github.com/caarlos0/env/v11 v11.3.1 h1:cArPWC15hWmEt+gWk7YBi7lEXTXCvpaSdCiZE2X5mCA=
|
github.com/caarlos0/env/v11 v11.3.1 h1:cArPWC15hWmEt+gWk7YBi7lEXTXCvpaSdCiZE2X5mCA=
|
||||||
github.com/caarlos0/env/v11 v11.3.1/go.mod h1:qupehSf/Y0TUTsxKywqRt/vJjN5nz6vauiYEUUr8P4U=
|
github.com/caarlos0/env/v11 v11.3.1/go.mod h1:qupehSf/Y0TUTsxKywqRt/vJjN5nz6vauiYEUUr8P4U=
|
||||||
|
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||||
|
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||||
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||||
|
github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
|
||||||
|
github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
|
||||||
|
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
||||||
|
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
||||||
|
github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io=
|
||||||
|
github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I=
|
||||||
|
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
||||||
|
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
||||||
|
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
|
||||||
|
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
|
||||||
|
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
|
||||||
|
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
|
||||||
|
|||||||
@@ -1,18 +1,22 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bufio"
|
||||||
"bytes"
|
"bytes"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"io"
|
||||||
"log"
|
"log"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/http/httptest"
|
|
||||||
"net/http/httputil"
|
"net/http/httputil"
|
||||||
"net/url"
|
"net/url"
|
||||||
"strconv"
|
"strconv"
|
||||||
"time"
|
"strings"
|
||||||
|
|
||||||
"github.com/caarlos0/env/v11"
|
"github.com/caarlos0/env/v11"
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promauto"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
)
|
)
|
||||||
|
|
||||||
var cfg config
|
var cfg config
|
||||||
@@ -24,81 +28,162 @@ type config struct {
|
|||||||
|
|
||||||
func createProxy(target *url.URL) func(http.ResponseWriter, *http.Request) {
|
func createProxy(target *url.URL) func(http.ResponseWriter, *http.Request) {
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
r.URL.Path = strings.TrimPrefix(r.URL.Path, "/proxy")
|
||||||
r.Host = target.Host
|
r.Host = target.Host
|
||||||
r.URL.Scheme = target.Scheme
|
r.URL.Scheme = target.Scheme
|
||||||
r.URL.Host = target.Host
|
r.URL.Host = target.Host
|
||||||
|
|
||||||
lrw := &LoggingResponseWriter{ResponseWriter: w, body: new(bytes.Buffer)}
|
director := func(req *http.Request) {
|
||||||
proxy := httputil.NewSingleHostReverseProxy(target)
|
req.Header.Set("X-Forwarded-For", r.RemoteAddr)
|
||||||
|
req.URL = r.URL
|
||||||
startTime := time.Now()
|
req.Host = target.Host
|
||||||
recorder := httptest.NewRecorder()
|
req.URL.Scheme = target.Scheme
|
||||||
proxy.ServeHTTP(recorder, r)
|
req.URL.Host = target.Host
|
||||||
|
|
||||||
responseBody := recorder.Body.Bytes()
|
|
||||||
|
|
||||||
var jsonResponse map[string]interface{}
|
|
||||||
err := json.Unmarshal(responseBody, &jsonResponse)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("Error unmarshalling JSON response: %v", err)
|
|
||||||
lrw.Write(responseBody)
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
wasstreamed := -1
|
||||||
|
data, err := io.ReadAll(r.Body)
|
||||||
|
if err == nil {
|
||||||
|
r.Body = io.NopCloser(bytes.NewReader(data))
|
||||||
|
var jsonData map[string]interface{}
|
||||||
|
json.NewDecoder(r.Body).Decode(&jsonData)
|
||||||
|
|
||||||
// Add your metrics metadata here
|
if jsonData["stream"] != nil {
|
||||||
jsonResponse["metrics"] = map[string]interface{}{
|
if !jsonData["stream"].(bool) {
|
||||||
"requestPath": r.URL.Path,
|
wasstreamed = 0
|
||||||
"statusCode": recorder.Code,
|
} else {
|
||||||
"responseTime": time.Since(startTime).Milliseconds(),
|
wasstreamed = 1
|
||||||
// Add more metrics as needed
|
}
|
||||||
}
|
|
||||||
|
|
||||||
modifiedResponseBody, err := json.Marshal(jsonResponse)
|
|
||||||
if err != nil {
|
|
||||||
log.Printf("Error marshalling modified JSON response: %v", err)
|
|
||||||
lrw.Write(responseBody)
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
for name, values := range recorder.Header() {
|
|
||||||
for _, value := range values {
|
|
||||||
w.Header().Add(name, value)
|
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
fmt.Println("Error reading body:", err)
|
||||||
|
}
|
||||||
|
r.Body = io.NopCloser(bytes.NewReader(data))
|
||||||
|
|
||||||
|
modifyResponse := func(response *http.Response) error {
|
||||||
|
pr, pw := io.Pipe()
|
||||||
|
body := response.Body
|
||||||
|
response.Body = pr
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
defer pw.Close()
|
||||||
|
|
||||||
|
reader := bufio.NewReader(body)
|
||||||
|
for {
|
||||||
|
line, err := reader.ReadBytes('\n')
|
||||||
|
if err != nil {
|
||||||
|
if err == io.EOF {
|
||||||
|
handleJsonLine([]byte(string(line)), wasstreamed)
|
||||||
|
pw.Write(line)
|
||||||
|
break
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
handleJsonLine(line, wasstreamed)
|
||||||
|
pw.Write(line)
|
||||||
|
}
|
||||||
|
|
||||||
|
}()
|
||||||
|
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
w.WriteHeader(recorder.Code)
|
proxy := httputil.NewSingleHostReverseProxy(target)
|
||||||
lrw.Write(modifiedResponseBody)
|
proxy.Director = director
|
||||||
|
if r.URL.Path == "/api/generate" || r.URL.Path == "/api/chat" {
|
||||||
|
proxy.ModifyResponse = modifyResponse
|
||||||
|
concurrent_requests.Inc()
|
||||||
|
}
|
||||||
|
|
||||||
log.Printf("Response with metrics: %s", lrw.Body())
|
proxy.ServeHTTP(w, r)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
type LoggingResponseWriter struct {
|
func handleJsonLine(line []byte, wasstreamed int) {
|
||||||
http.ResponseWriter
|
if len(line) == 0 {
|
||||||
body *bytes.Buffer
|
return
|
||||||
|
}
|
||||||
|
var jsonData map[string]interface{}
|
||||||
|
err := json.Unmarshal([]byte(line), &jsonData)
|
||||||
|
if err != nil {
|
||||||
|
fmt.Println("Error parsing JSON:", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
if jsonData["done"].(bool) {
|
||||||
|
duration := jsonData["eval_duration"].(float64) / 1000000000.0
|
||||||
|
fmt.Printf("Duration: %.2f seconds\n", duration)
|
||||||
|
opsProcessed.Inc()
|
||||||
|
tokens_out.Add(jsonData["eval_count"].(float64))
|
||||||
|
tokens_in.Add(jsonData["prompt_eval_count"].(float64))
|
||||||
|
prompt_length.Observe(jsonData["prompt_eval_count"].(float64))
|
||||||
|
eval_length.Observe(jsonData["eval_count"].(float64))
|
||||||
|
eval_time.Observe(duration)
|
||||||
|
if wasstreamed == 1 {
|
||||||
|
streamed.Inc()
|
||||||
|
} else if wasstreamed == 0 {
|
||||||
|
notStreamed.Inc()
|
||||||
|
}
|
||||||
|
concurrent_requests.Dec()
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (lrw *LoggingResponseWriter) Write(b []byte) (int, error) {
|
var opsProcessed = promauto.NewCounter(prometheus.CounterOpts{
|
||||||
lrw.body.Write(b)
|
Name: "llmproxymetrics_total_requests",
|
||||||
return lrw.ResponseWriter.Write(b)
|
Help: "The total number of processed events",
|
||||||
}
|
})
|
||||||
|
|
||||||
func (lrw *LoggingResponseWriter) Body() string {
|
var notStreamed = promauto.NewCounter(prometheus.CounterOpts{
|
||||||
return lrw.body.String()
|
Name: "llmproxymetrics_non_streamed_requests",
|
||||||
}
|
Help: "The total number of processed non-streamed events",
|
||||||
|
})
|
||||||
|
var streamed = promauto.NewCounter(prometheus.CounterOpts{
|
||||||
|
Name: "llmproxymetrics_streamed_requests",
|
||||||
|
Help: "The total number of processed streamed events",
|
||||||
|
})
|
||||||
|
var tokens_out = promauto.NewCounter(prometheus.CounterOpts{
|
||||||
|
Name: "llmproxymetrics_tokens_out",
|
||||||
|
Help: "Tokens generated.",
|
||||||
|
})
|
||||||
|
var tokens_in = promauto.NewCounter(prometheus.CounterOpts{
|
||||||
|
Name: "llmproxymetrics_tokens_in",
|
||||||
|
Help: "Tokens input.",
|
||||||
|
})
|
||||||
|
var eval_time = promauto.NewHistogram(prometheus.HistogramOpts{
|
||||||
|
Name: "llmproxymetrics_eval_time",
|
||||||
|
Help: "Seconds spent evaluating the response.",
|
||||||
|
Buckets: prometheus.LinearBuckets(5, 5, 23),
|
||||||
|
})
|
||||||
|
var prompt_length = promauto.NewHistogram(prometheus.HistogramOpts{
|
||||||
|
Name: "llmproxymetrics_prompt_length",
|
||||||
|
Help: "Prompt length",
|
||||||
|
Buckets: prometheus.ExponentialBucketsRange(100, 32000, 20),
|
||||||
|
})
|
||||||
|
var eval_length = promauto.NewHistogram(prometheus.HistogramOpts{
|
||||||
|
Name: "llmproxymetrics_eval_length",
|
||||||
|
Help: "Eval length",
|
||||||
|
Buckets: prometheus.ExponentialBucketsRange(100, 32000, 20),
|
||||||
|
})
|
||||||
|
var concurrent_requests = promauto.NewGauge(prometheus.GaugeOpts{
|
||||||
|
Name: "llmproxymetrics_concurrent_requests",
|
||||||
|
Help: "Concurrent requests",
|
||||||
|
})
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
env.Parse(&cfg)
|
err := env.Parse(&cfg)
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Error parsing environment variables: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
targetURL, err := url.Parse(cfg.BaseURL)
|
targetURL, err := url.Parse(cfg.BaseURL)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|
||||||
http.HandleFunc("/", createProxy(targetURL))
|
http.HandleFunc("/proxy/", createProxy(targetURL))
|
||||||
|
http.Handle("/metrics", promhttp.Handler())
|
||||||
|
|
||||||
log.Printf("Starting proxy server on :%s", strconv.Itoa(cfg.Port))
|
log.Printf("Starting proxy server on :%s", strconv.Itoa(cfg.Port))
|
||||||
err = http.ListenAndServe(fmt.Sprintf(":%s", strconv.Itoa(cfg.Port)), nil)
|
err = http.ListenAndServe(fmt.Sprintf(":%d", cfg.Port), nil)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Fatal(err)
|
log.Fatal(err)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user