diff --git a/llmproxymetrics.go b/llmproxymetrics.go index 52b1643..fc11625 100644 --- a/llmproxymetrics.go +++ b/llmproxymetrics.go @@ -2,6 +2,7 @@ package main import ( "bufio" + "bytes" "encoding/json" "fmt" "io" @@ -39,16 +40,23 @@ func createProxy(target *url.URL) func(http.ResponseWriter, *http.Request) { req.URL.Scheme = target.Scheme req.URL.Host = target.Host } - var jsonData map[string]interface{} - json.NewDecoder(r.Body).Decode(&jsonData) + data, err := io.ReadAll(r.Body) + if err == nil { + r.Body = io.NopCloser(bytes.NewReader(data)) + var jsonData map[string]interface{} + json.NewDecoder(r.Body).Decode(&jsonData) - if jsonData["stream"] != nil { - if !jsonData["stream"].(bool) { - notStreamed.Inc() - } else { - streamed.Inc() + if jsonData["stream"] != nil { + if !jsonData["stream"].(bool) { + notStreamed.Inc() + } else { + streamed.Inc() + } } + } else { + fmt.Println("Error reading body:", err) } + r.Body = io.NopCloser(bytes.NewReader(data)) modifyResponse := func(response *http.Response) error { pr, pw := io.Pipe() @@ -89,6 +97,9 @@ func createProxy(target *url.URL) func(http.ResponseWriter, *http.Request) { } func handleJsonLine(line []byte) { + if len(line) == 0 { + return + } var jsonData map[string]interface{} err := json.Unmarshal([]byte(line), &jsonData) if err != nil { @@ -100,7 +111,8 @@ func handleJsonLine(line []byte) { duration := jsonData["eval_duration"].(float64) / 1000000000.0 fmt.Printf("Duration: %.2f seconds\n", duration) opsProcessed.Inc() - tokens_out.Add(jsonData["prompt_eval_count"].(float64)) + tokens_out.Add(jsonData["eval_count"].(float64)) + tokens_in.Add(jsonData["prompt_eval_count"].(float64)) eval_time.Observe(duration) } } @@ -122,7 +134,10 @@ var tokens_out = promauto.NewCounter(prometheus.CounterOpts{ Name: "llmproxymetrics_tokens_out", Help: "Tokens generated.", }) - +var tokens_in = promauto.NewCounter(prometheus.CounterOpts{ + Name: "llmproxymetrics_tokens_in", + Help: "Tokens input.", +}) var eval_time = promauto.NewHistogram(prometheus.HistogramOpts{ Name: "llmproxymetrics_eval_time", Help: "Tokens generated.",