more metrics added

This commit is contained in:
2025-04-28 13:46:58 +03:00
parent 707267e471
commit 4e4cb0759f

View File

@@ -91,6 +91,7 @@ func createProxy(target *url.URL) func(http.ResponseWriter, *http.Request) {
proxy.Director = director proxy.Director = director
if r.URL.Path == "/api/generate" || r.URL.Path == "/api/chat" { if r.URL.Path == "/api/generate" || r.URL.Path == "/api/chat" {
proxy.ModifyResponse = modifyResponse proxy.ModifyResponse = modifyResponse
concurrent_requests.Inc()
} }
proxy.ServeHTTP(w, r) proxy.ServeHTTP(w, r)
@@ -114,12 +115,15 @@ func handleJsonLine(line []byte, wasstreamed int) {
opsProcessed.Inc() opsProcessed.Inc()
tokens_out.Add(jsonData["eval_count"].(float64)) tokens_out.Add(jsonData["eval_count"].(float64))
tokens_in.Add(jsonData["prompt_eval_count"].(float64)) tokens_in.Add(jsonData["prompt_eval_count"].(float64))
prompt_length.Observe(jsonData["prompt_eval_count"].(float64))
eval_length.Observe(jsonData["eval_count"].(float64))
eval_time.Observe(duration) eval_time.Observe(duration)
if wasstreamed == 1 { if wasstreamed == 1 {
streamed.Inc() streamed.Inc()
} else if wasstreamed == 0 { } else if wasstreamed == 0 {
notStreamed.Inc() notStreamed.Inc()
} }
concurrent_requests.Dec()
} }
} }
@@ -146,8 +150,22 @@ var tokens_in = promauto.NewCounter(prometheus.CounterOpts{
}) })
var eval_time = promauto.NewHistogram(prometheus.HistogramOpts{ var eval_time = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "llmproxymetrics_eval_time", Name: "llmproxymetrics_eval_time",
Help: "Tokens generated.", Help: "Seconds spent evaluating the response.",
Buckets: prometheus.LinearBuckets(0, 2.5, 20), Buckets: prometheus.LinearBuckets(5, 5, 23),
})
var prompt_length = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "llmproxymetrics_prompt_length",
Help: "Prompt length",
Buckets: prometheus.ExponentialBucketsRange(100, 32000, 20),
})
var eval_length = promauto.NewHistogram(prometheus.HistogramOpts{
Name: "llmproxymetrics_eval_length",
Help: "Eval length",
Buckets: prometheus.ExponentialBucketsRange(100, 32000, 20),
})
var concurrent_requests = promauto.NewGauge(prometheus.GaugeOpts{
Name: "llmproxymetrics_concurrent_requests",
Help: "Concurrent requests",
}) })
func main() { func main() {