more metrics added
This commit is contained in:
@@ -91,6 +91,7 @@ func createProxy(target *url.URL) func(http.ResponseWriter, *http.Request) {
|
|||||||
proxy.Director = director
|
proxy.Director = director
|
||||||
if r.URL.Path == "/api/generate" || r.URL.Path == "/api/chat" {
|
if r.URL.Path == "/api/generate" || r.URL.Path == "/api/chat" {
|
||||||
proxy.ModifyResponse = modifyResponse
|
proxy.ModifyResponse = modifyResponse
|
||||||
|
concurrent_requests.Inc()
|
||||||
}
|
}
|
||||||
|
|
||||||
proxy.ServeHTTP(w, r)
|
proxy.ServeHTTP(w, r)
|
||||||
@@ -114,12 +115,15 @@ func handleJsonLine(line []byte, wasstreamed int) {
|
|||||||
opsProcessed.Inc()
|
opsProcessed.Inc()
|
||||||
tokens_out.Add(jsonData["eval_count"].(float64))
|
tokens_out.Add(jsonData["eval_count"].(float64))
|
||||||
tokens_in.Add(jsonData["prompt_eval_count"].(float64))
|
tokens_in.Add(jsonData["prompt_eval_count"].(float64))
|
||||||
|
prompt_length.Observe(jsonData["prompt_eval_count"].(float64))
|
||||||
|
eval_length.Observe(jsonData["eval_count"].(float64))
|
||||||
eval_time.Observe(duration)
|
eval_time.Observe(duration)
|
||||||
if wasstreamed == 1 {
|
if wasstreamed == 1 {
|
||||||
streamed.Inc()
|
streamed.Inc()
|
||||||
} else if wasstreamed == 0 {
|
} else if wasstreamed == 0 {
|
||||||
notStreamed.Inc()
|
notStreamed.Inc()
|
||||||
}
|
}
|
||||||
|
concurrent_requests.Dec()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -146,8 +150,22 @@ var tokens_in = promauto.NewCounter(prometheus.CounterOpts{
|
|||||||
})
|
})
|
||||||
var eval_time = promauto.NewHistogram(prometheus.HistogramOpts{
|
var eval_time = promauto.NewHistogram(prometheus.HistogramOpts{
|
||||||
Name: "llmproxymetrics_eval_time",
|
Name: "llmproxymetrics_eval_time",
|
||||||
Help: "Tokens generated.",
|
Help: "Seconds spent evaluating the response.",
|
||||||
Buckets: prometheus.LinearBuckets(0, 2.5, 20),
|
Buckets: prometheus.LinearBuckets(5, 5, 23),
|
||||||
|
})
|
||||||
|
var prompt_length = promauto.NewHistogram(prometheus.HistogramOpts{
|
||||||
|
Name: "llmproxymetrics_prompt_length",
|
||||||
|
Help: "Prompt length",
|
||||||
|
Buckets: prometheus.ExponentialBucketsRange(100, 32000, 20),
|
||||||
|
})
|
||||||
|
var eval_length = promauto.NewHistogram(prometheus.HistogramOpts{
|
||||||
|
Name: "llmproxymetrics_eval_length",
|
||||||
|
Help: "Eval length",
|
||||||
|
Buckets: prometheus.ExponentialBucketsRange(100, 32000, 20),
|
||||||
|
})
|
||||||
|
var concurrent_requests = promauto.NewGauge(prometheus.GaugeOpts{
|
||||||
|
Name: "llmproxymetrics_concurrent_requests",
|
||||||
|
Help: "Concurrent requests",
|
||||||
})
|
})
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
|||||||
Reference in New Issue
Block a user