Compare commits

...

37 Commits

Author SHA1 Message Date
4e4cb0759f more metrics added 2025-04-28 13:46:58 +03:00
707267e471 only count completed requests as streamed or not 2025-04-25 09:40:10 +03:00
f5df0b5d54 fix stream metrics timing 2025-04-25 09:30:34 +03:00
bb1d8c6e7d fixes 2025-04-25 09:06:47 +03:00
4687c67685 nullability fix 2025-04-25 08:43:03 +03:00
f50d0c9f28 few more metrics added 2025-04-25 08:31:06 +03:00
9e5ff37980 update tokens out 2025-04-24 15:25:31 +03:00
2772d6075f bucket for tokens out 2025-04-24 15:10:57 +03:00
a036f103af add chat endpoint 2025-04-23 15:32:32 +03:00
5352dc7200 only modify certain requests 2025-04-23 15:23:13 +03:00
249619abc5 prometheus metrics endpoint 2025-04-23 15:15:34 +03:00
9c6527dc14 impl streaming 2025-04-23 14:46:49 +03:00
a7ee7577c4 updates 2025-04-22 19:31:34 +03:00
6d54fe2715 ah vars dont exist 2025-04-22 18:44:32 +03:00
2d66baaf3c maybe 2025-04-22 18:40:35 +03:00
184362429a updated 2025-04-22 18:36:35 +03:00
ff1d53e2a1 might work 2025-04-22 18:30:02 +03:00
739d36203c basd 2025-04-22 18:28:14 +03:00
4577fa1dd7 asd 2025-04-22 18:25:54 +03:00
0ff15efcd7 Merge branch 'main' of https://lab.5b5.eu/Ade9/LLM-Proxy-Metrics 2025-04-22 18:24:59 +03:00
7f1f6ccea8 simplify ci 2025-04-22 18:24:57 +03:00
537a8fb29f Update README.md 2025-04-22 11:05:42 +03:00
2481c1d872 idk man 2025-04-20 17:22:45 +03:00
d2a8421194 god help me 2025-04-20 17:08:49 +03:00
6ffd059b5b another one 2025-04-20 17:03:53 +03:00
0b3ab25e39 another try 2025-04-20 17:00:57 +03:00
b38b29369d go workflow go workflow go 2025-04-20 16:49:50 +03:00
8213071ae6 workflow test 2025-04-20 16:45:14 +03:00
aa78cf072a uppercase user 2025-04-20 16:39:31 +03:00
59e4311d6e change tags to latest 2025-04-20 16:39:21 +03:00
1221712c5d workflow permission clause removed 2025-04-20 16:37:28 +03:00
98c0d1a26e update workflow and dockerfile 2025-04-20 16:35:26 +03:00
2cb599ae74 update dockerfile 2025-04-20 16:17:11 +03:00
f2cf934e83 workflow continues 2025-04-20 16:05:50 +03:00
bb2012e79f fix workflow case 2025-04-20 16:01:35 +03:00
eae6745cb4 update tags 2025-04-20 15:56:18 +03:00
e99991f6c6 context fix for workflow 2025-04-20 15:55:08 +03:00
7 changed files with 206 additions and 61 deletions

View File

@@ -5,10 +5,17 @@ on:
jobs:
docker:
permissions:
id-token: write
runs-on: ubuntu-latest
steps:
-
name: Checkout
uses: actions/checkout@v4
-
name: Docker meta
id: meta
uses: docker/metadata-action@v5
with:
context: git
-
name: Login to Gitea
uses: docker/login-action@v3
@@ -16,6 +23,8 @@ jobs:
registry: lab.5b5.eu
username: ${{ vars.PACKAGE_USERNAME }}
password: ${{ secrets.PACKAGE_TOKEN }}
images: |
ade9/llm-proxy-metrics
-
name: Set up QEMU
uses: docker/setup-qemu-action@v3
@@ -26,5 +35,8 @@ jobs:
name: Build and push
uses: docker/build-push-action@v6
with:
context: .
file: ./Dockerfile
push: true
tags: user/app:latest
tags: lab.5b5.eu/ade9/llm-proxy-metrics/${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}

20
.vscode/launch.json vendored Normal file
View File

@@ -0,0 +1,20 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Launch Package",
"type": "go",
"request": "launch",
"mode": "debug",
"program": "llmproxymetrics.go",
"env": {
"PORT": "6677",
"BASE_URL": "http://10.0.0.10:11434"
}
}
]
}

View File

@@ -1,13 +1,13 @@
FROM golang:1.24.2-alpine3.21 as builder
FROM golang:1.24 AS builder
ARG CGO_ENABLED=0
WORKDIR /app
COPY go.mod go.sum ./
RUN go mod download
COPY . .
COPY *.go ./
RUN go build
RUN CGO_ENABLED=0 GOOS=linux go build -o /docker-llmpm
FROM scratch
COPY --from=builder /app/server /server
ENTRYPOINT ["/server"]
COPY --from=builder /docker-llmpm /docker-llmpm
ENTRYPOINT ["/docker-llmpm"]

View File

@@ -1,3 +1,3 @@
# LLM-Proxy-Metrics
Rust impl of a proxy for Ollama and OpenAI endpoints that gathers prometheus metrics.
Go impl of a proxy for Ollama and OpenAI endpoints that gathers prometheus metrics.

12
go.mod
View File

@@ -2,7 +2,17 @@ module ade9/llmproxymetrics
go 1.23.3
require github.com/caarlos0/env v3.5.0+incompatible
require (
github.com/caarlos0/env v3.5.0+incompatible
github.com/beorn7/perks v1.0.1 // indirect
github.com/caarlos0/env/v11 v11.3.1 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/prometheus/client_golang v1.22.0 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.62.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
golang.org/x/sys v0.30.0 // indirect
google.golang.org/protobuf v1.36.5 // indirect
)

18
go.sum
View File

@@ -1,4 +1,22 @@
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/caarlos0/env v3.5.0+incompatible h1:Yy0UN8o9Wtr/jGHZDpCBLpNrzcFLLM2yixi/rBrKyJs=
github.com/caarlos0/env v3.5.0+incompatible/go.mod h1:tdCsowwCzMLdkqRYDlHpZCp2UooDD3MspDBjZ2AD02Y=
github.com/caarlos0/env/v11 v11.3.1 h1:cArPWC15hWmEt+gWk7YBi7lEXTXCvpaSdCiZE2X5mCA=
github.com/caarlos0/env/v11 v11.3.1/go.mod h1:qupehSf/Y0TUTsxKywqRt/vJjN5nz6vauiYEUUr8P4U=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/prometheus/client_golang v1.22.0 h1:rb93p9lokFEsctTys46VnV1kLCDpVZ0a/Y92Vm0Zc6Q=
github.com/prometheus/client_golang v1.22.0/go.mod h1:R7ljNsLXhuQXYZYtw6GAE9AZg8Y7vEW5scdCXrWRXC0=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.62.0 h1:xasJaQlnWAeyHdUBeGjXmutelfJHWMRr+Fg4QszZ2Io=
github.com/prometheus/common v0.62.0/go.mod h1:vyBcEuLSvWos9B1+CyL7JZ2up+uFzXhkqml0W5zIY1I=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc=
golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
google.golang.org/protobuf v1.36.5 h1:tPhr+woSbjfYvY6/GPufUoYizxw1cF/yFoxJ2fmpwlM=
google.golang.org/protobuf v1.36.5/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=

View File

@@ -1,18 +1,22 @@
package main
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"log"
"net/http"
"net/http/httptest"
"net/http/httputil"
"net/url"
"strconv"
"time"
"strings"
"github.com/caarlos0/env/v11"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
var cfg config
@@ -24,81 +28,162 @@ type config struct {
func createProxy(target *url.URL) func(http.ResponseWriter, *http.Request) {
return func(w http.ResponseWriter, r *http.Request) {
r.URL.Path = strings.TrimPrefix(r.URL.Path, "/proxy")
r.Host = target.Host
r.URL.Scheme = target.Scheme
r.URL.Host = target.Host
lrw := &LoggingResponseWriter{ResponseWriter: w, body: new(bytes.Buffer)}
director := func(req *http.Request) {
req.Header.Set("X-Forwarded-For", r.RemoteAddr)
req.URL = r.URL
req.Host = target.Host
req.URL.Scheme = target.Scheme
req.URL.Host = target.Host
}
wasstreamed := -1
data, err := io.ReadAll(r.Body)
if err == nil {
r.Body = io.NopCloser(bytes.NewReader(data))
var jsonData map[string]interface{}
json.NewDecoder(r.Body).Decode(&jsonData)
if jsonData["stream"] != nil {
if !jsonData["stream"].(bool) {
wasstreamed = 0
} else {
wasstreamed = 1
}
}
} else {
fmt.Println("Error reading body:", err)
}
r.Body = io.NopCloser(bytes.NewReader(data))
modifyResponse := func(response *http.Response) error {
pr, pw := io.Pipe()
body := response.Body
response.Body = pr
go func() {
defer pw.Close()
reader := bufio.NewReader(body)
for {
line, err := reader.ReadBytes('\n')
if err != nil {
if err == io.EOF {
handleJsonLine([]byte(string(line)), wasstreamed)
pw.Write(line)
break
}
return
}
handleJsonLine(line, wasstreamed)
pw.Write(line)
}
}()
return nil
}
proxy := httputil.NewSingleHostReverseProxy(target)
proxy.Director = director
if r.URL.Path == "/api/generate" || r.URL.Path == "/api/chat" {
proxy.ModifyResponse = modifyResponse
concurrent_requests.Inc()
}
startTime := time.Now()
recorder := httptest.NewRecorder()
proxy.ServeHTTP(recorder, r)
proxy.ServeHTTP(w, r)
}
}
responseBody := recorder.Body.Bytes()
var jsonResponse map[string]interface{}
err := json.Unmarshal(responseBody, &jsonResponse)
func handleJsonLine(line []byte, wasstreamed int) {
if len(line) == 0 {
return
}
var jsonData map[string]interface{}
err := json.Unmarshal([]byte(line), &jsonData)
if err != nil {
log.Printf("Error unmarshalling JSON response: %v", err)
lrw.Write(responseBody)
fmt.Println("Error parsing JSON:", err)
return
}
// Add your metrics metadata here
jsonResponse["metrics"] = map[string]interface{}{
"requestPath": r.URL.Path,
"statusCode": recorder.Code,
"responseTime": time.Since(startTime).Milliseconds(),
// Add more metrics as needed
if jsonData["done"].(bool) {
duration := jsonData["eval_duration"].(float64) / 1000000000.0
fmt.Printf("Duration: %.2f seconds\n", duration)
opsProcessed.Inc()
tokens_out.Add(jsonData["eval_count"].(float64))
tokens_in.Add(jsonData["prompt_eval_count"].(float64))
prompt_length.Observe(jsonData["prompt_eval_count"].(float64))
eval_length.Observe(jsonData["eval_count"].(float64))
eval_time.Observe(duration)
if wasstreamed == 1 {
streamed.Inc()
} else if wasstreamed == 0 {
notStreamed.Inc()
}
modifiedResponseBody, err := json.Marshal(jsonResponse)
if err != nil {
log.Printf("Error marshalling modified JSON response: %v", err)
lrw.Write(responseBody)
return
}
for name, values := range recorder.Header() {
for _, value := range values {
w.Header().Add(name, value)
concurrent_requests.Dec()
}
}
w.WriteHeader(recorder.Code)
lrw.Write(modifiedResponseBody)
var opsProcessed = promauto.NewCounter(prometheus.CounterOpts{
Name: "llmproxymetrics_total_requests",
Help: "The total number of processed events",
})
log.Printf("Response with metrics: %s", lrw.Body())
}
}
type LoggingResponseWriter struct {
http.ResponseWriter
body *bytes.Buffer
}
func (lrw *LoggingResponseWriter) Write(b []byte) (int, error) {
lrw.body.Write(b)
return lrw.ResponseWriter.Write(b)
}
func (lrw *LoggingResponseWriter) Body() string {
return lrw.body.String()
}
var notStreamed = promauto.NewCounter(prometheus.CounterOpts{
Name: "llmproxymetrics_non_streamed_requests",
Help: "The total number of processed non-streamed events",
})
var streamed = promauto.NewCounter(prometheus.CounterOpts{
Name: "llmproxymetrics_streamed_requests",
Help: "The total number of processed streamed events",
})
var tokens_out = promauto.NewCounter(prometheus.CounterOpts{
Name: "llmproxymetrics_tokens_out",
Help: "Tokens generated.",
})
var tokens_in = promauto.NewCounter(prometheus.CounterOpts{
Name: "llmproxymetrics_tokens_in",
Help: "Tokens input.",
})
var eval_time = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "llmproxymetrics_eval_time",
Help: "Seconds spent evaluating the response.",
Buckets: prometheus.LinearBuckets(5, 5, 23),
})
var prompt_length = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "llmproxymetrics_prompt_length",
Help: "Prompt length",
Buckets: prometheus.ExponentialBucketsRange(100, 32000, 20),
})
var eval_length = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "llmproxymetrics_eval_length",
Help: "Eval length",
Buckets: prometheus.ExponentialBucketsRange(100, 32000, 20),
})
var concurrent_requests = promauto.NewGauge(prometheus.GaugeOpts{
Name: "llmproxymetrics_concurrent_requests",
Help: "Concurrent requests",
})
func main() {
env.Parse(&cfg)
err := env.Parse(&cfg)
if err != nil {
log.Fatalf("Error parsing environment variables: %v", err)
}
targetURL, err := url.Parse(cfg.BaseURL)
if err != nil {
log.Fatal(err)
}
http.HandleFunc("/", createProxy(targetURL))
http.HandleFunc("/proxy/", createProxy(targetURL))
http.Handle("/metrics", promhttp.Handler())
log.Printf("Starting proxy server on :%s", strconv.Itoa(cfg.Port))
err = http.ListenAndServe(fmt.Sprintf(":%s", strconv.Itoa(cfg.Port)), nil)
err = http.ListenAndServe(fmt.Sprintf(":%d", cfg.Port), nil)
if err != nil {
log.Fatal(err)
}