diff --git a/cmd/mock_tsdb/main.go b/cmd/mock_tsdb/main.go index 43f947d..9158d37 100644 --- a/cmd/mock_tsdb/main.go +++ b/cmd/mock_tsdb/main.go @@ -3,10 +3,13 @@ package main import ( "encoding/json" "fmt" + "hash/fnv" "log" + "math" "net/http" "regexp" "slices" + "strconv" "strings" "time" @@ -22,20 +25,24 @@ var ( regexpUUID = regexp.MustCompile("(?:.+?)[^gpu]uuid=[~]{0,1}\"(?P[a-zA-Z0-9-|]+)\"(?:.*)") ) -// filterResults returns the filtered results based on uuids slice -func filterResults(uuids []string, allResults []interface{}) []interface{} { - // Return results corresponding to UUIDs - var responseResults []interface{} - for _, result := range allResults { - if m, ok := result.(map[string]interface{})["metric"]; ok { - if uuid, ok := m.(map[string]string)["uuid"]; ok { - if slices.Contains(uuids, uuid) { - responseResults = append(responseResults, result) - } - } - } +// hash returns hash of a given string +func hash(s string) uint32 { + h := fnv.New32a() + h.Write([]byte(s)) + return h.Sum32() +} + +// lenLoop returns number of digits in an integer +func lenLoop(i uint32) int { + if i == 0 { + return 1 + } + count := 0 + for i != 0 { + i /= 10 + count++ } - return responseResults + return count } // QueryHandler handles queries @@ -79,246 +86,99 @@ func QueryHandler(w http.ResponseWriter, r *http.Request) { // log.Println("Query", query, "UUIDs", uuids) - var allResults []interface{} + var results []interface{} if slices.Contains( []string{ "avg_cpu_usage", "avg_cpu_mem_usage", "avg_gpu_usage", "avg_gpu_mem_usage", "total_cpu_energy_usage_kwh", "total_gpu_energy_usage_kwh", "total_cpu_emissions_gms", "total_gpu_emissions_gms", }, query) { - allResults = []interface{}{ - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "1479763", - }, - "value": []interface{}{ - 12345, "14.79", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "1481508", - }, - "value": []interface{}{ - 12345, "14.58", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "147975", - }, - "value": []interface{}{ - 12345, "14.79", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "11508", - }, - "value": []interface{}{ - 12345, "11.50", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "81510", - }, - "value": []interface{}{ - 12345, "81.51", - }, - }, + // Convert uuid into hash and transform that hash number into float64 between 0 and 100 + for _, uuid := range uuids { + h := hash(uuid) + numDigits := lenLoop(h) + value := float64(h) / math.Pow(10, float64(numDigits)-2) + results = append(results, + map[string]interface{}{ + "metric": map[string]string{ + "uuid": uuid, + }, + "value": []interface{}{ + 12345, strconv.FormatFloat(value, 'f', -1, 64), + }, + }) } } else if slices.Contains( []string{ "total_io_read_stats_bytes", "total_io_write_stats_bytes", }, query) { - allResults = []interface{}{ - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "1479763", - }, - "value": []interface{}{ - 12345, "1479763", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "1481508", - }, - "value": []interface{}{ - 12345, "1481508", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "147975", - }, - "value": []interface{}{ - 12345, "147975", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "11508", - }, - "value": []interface{}{ - 12345, "11508", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "81510", - }, - "value": []interface{}{ - 12345, "81510", - }, - }, + for _, uuid := range uuids { + h := hash(uuid) + results = append(results, + map[string]interface{}{ + "metric": map[string]string{ + "uuid": uuid, + }, + "value": []interface{}{ + 12345, strconv.FormatUint(uint64(h), 10), + }, + }) } } else if slices.Contains( []string{ "total_io_read_stats_requests", "total_io_write_stats_requests", }, query) { - allResults = []interface{}{ - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "1479763", - }, - "value": []interface{}{ - 12345, "14797630", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "1481508", - }, - "value": []interface{}{ - 12345, "14815080", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "147975", - }, - "value": []interface{}{ - 12345, "1479750", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "11508", - }, - "value": []interface{}{ - 12345, "115080", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "81510", - }, - "value": []interface{}{ - 12345, "815100", - }, - }, + for _, uuid := range uuids { + h := hash(uuid) + results = append(results, + map[string]interface{}{ + "metric": map[string]string{ + "uuid": uuid, + }, + "value": []interface{}{ + 12345, strconv.FormatUint(uint64(h)*10, 10), + }, + }) } } else if slices.Contains( []string{ "total_ingress_stats_bytes", "total_outgress_stats_bytes", }, query) { - allResults = []interface{}{ - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "1479763", - }, - "value": []interface{}{ - 12345, "147976300", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "1481508", - }, - "value": []interface{}{ - 12345, "148150800", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "147975", - }, - "value": []interface{}{ - 12345, "14797500", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "11508", - }, - "value": []interface{}{ - 12345, "1150800", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "81510", - }, - "value": []interface{}{ - 12345, "8151000", - }, - }, + for _, uuid := range uuids { + h := hash(uuid) + results = append(results, + map[string]interface{}{ + "metric": map[string]string{ + "uuid": uuid, + }, + "value": []interface{}{ + 12345, strconv.FormatUint(uint64(h)*100, 10), + }, + }) } } else if slices.Contains( []string{ "total_ingress_stats_packets", "total_outgress_stats_packets", }, query) { - allResults = []interface{}{ - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "1479763", - }, - "value": []interface{}{ - 12345, "1479763000", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "1481508", - }, - "value": []interface{}{ - 12345, "1481508000", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "147975", - }, - "value": []interface{}{ - 12345, "147975000", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "11508", - }, - "value": []interface{}{ - 12345, "11508000", - }, - }, - map[string]interface{}{ - "metric": map[string]string{ - "uuid": "81510", - }, - "value": []interface{}{ - 12345, "81510000", - }, - }, + for _, uuid := range uuids { + h := hash(uuid) + results = append(results, + map[string]interface{}{ + "metric": map[string]string{ + "uuid": uuid, + }, + "value": []interface{}{ + 12345, strconv.FormatUint(uint64(h)*1000, 10), + }, + }) } } - responseResults := filterResults(uuids, allResults) + // responseResults := filterResults(uuids, esults) response = tsdb.Response{ Status: "success", Data: map[string]interface{}{ "resultType": "vector", - "result": responseResults, + "result": results, }, } if err := json.NewEncoder(w).Encode(&response); err != nil { diff --git a/go.mod b/go.mod index 4b0e83a..d8c3bd7 100644 --- a/go.mod +++ b/go.mod @@ -14,6 +14,7 @@ require ( github.com/prometheus/common v0.45.0 github.com/prometheus/exporter-toolkit v0.10.0 github.com/prometheus/procfs v0.12.0 + github.com/stretchr/testify v1.9.0 github.com/swaggo/http-swagger/v2 v2.0.2 github.com/swaggo/swag v1.16.3 github.com/zeebo/xxh3 v1.0.2 diff --git a/pkg/api/db/db.go b/pkg/api/db/db.go index 37ee03c..da496af 100644 --- a/pkg/api/db/db.go +++ b/pkg/api/db/db.go @@ -169,7 +169,6 @@ updatetime: setup: // Setup manager struct that retrieves unit data manager, err := c.ResourceManager(c.Logger) - fmt.Printf("%#v\n", manager) if err != nil { level.Error(c.Logger).Log("msg", "Resource manager setup failed", "err", err) return nil, err @@ -259,6 +258,9 @@ setup: // Collect unit stats func (s *statsDB) Collect() error { + // Measure elapsed time + defer common.TimeTrack(time.Now(), "Data collection", s.logger) + var currentTime = time.Now() // If duration is less than 1 day do single update @@ -452,7 +454,7 @@ func (s *statsDB) execStatements( clusterProjects []models.ClusterProjects, ) { // Measure elapsed time - defer common.TimeTrack(time.Now(), "DB insertions", s.logger) + defer common.TimeTrack(time.Now(), "DB insertion", s.logger) var ignore = 0 var err error diff --git a/pkg/api/db/db_test.go b/pkg/api/db/db_test.go index ca7aa52..3672f55 100644 --- a/pkg/api/db/db_test.go +++ b/pkg/api/db/db_test.go @@ -4,7 +4,6 @@ import ( "fmt" "os" "path/filepath" - "reflect" "testing" "time" @@ -14,6 +13,9 @@ import ( "github.com/mahendrapaipuri/ceems/pkg/api/resource" "github.com/mahendrapaipuri/ceems/pkg/api/updater" "github.com/prometheus/common/model" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" ) type mockFetcherOne struct { @@ -560,94 +562,67 @@ func populateDBWithMockData(s *statsDB) { func TestNewUnitStatsDB(t *testing.T) { tmpDir := t.TempDir() c := prepareMockConfig(tmpDir) + var err error lastUnitsUpdateTimeFile := filepath.Join(c.Data.Path, "lastupdatetime") // Make new stats DB c.Data.LastUpdateTime, _ = time.Parse("2006-01-02", "2023-12-20") - _, err := NewStatsDB(c) - if err != nil { - t.Errorf("Failed to create new statsDB struct due to %s", err) - } + _, err = NewStatsDB(c) + require.NoError(t, err, "Failed to create new statsDB") // Check if last update time file has been written - if _, err := os.Stat(lastUnitsUpdateTimeFile); err != nil { - t.Errorf("Last update time file not created") - } + _, err = os.Stat(lastUnitsUpdateTimeFile) + require.NoError(t, err, "Last update time file not created") // Check content of last update time file - if timeString, _ := os.ReadFile(lastUnitsUpdateTimeFile); string(timeString) != "2023-12-20T00:00:00" { - t.Errorf("Last update time string test failed. Expected %s got %s", "2023-12-20T00:00:00", string(timeString)) - } + timeString, _ := os.ReadFile(lastUnitsUpdateTimeFile) + assert.Equal(t, string(timeString), "2023-12-20T00:00:00", "Expected last update time string is 2023-12-20T00:00:00") // Check DB file exists - if _, err := os.Stat(c.Data.Path); err != nil { - t.Errorf("DB file not created") - } + _, err = os.Stat(c.Data.Path) + require.NoError(t, err, "DB file not created") // Make again a new stats DB with new lastUpdateTime c.Data.LastUpdateTime, _ = time.Parse("2006-01-02", "2023-12-21") _, err = NewStatsDB(c) - if err != nil { - t.Errorf("Failed to create new statsDB struct due to %s", err) - } + require.NoError(t, err, "Failed to create new statsDB") // Check content of last update time file. It should not change - if timeString, _ := os.ReadFile(lastUnitsUpdateTimeFile); string(timeString) != "2023-12-20T00:00:00" { - t.Errorf("Last update time string test failed. Expected %s got %s", "2023-12-20T00:00:00", string(timeString)) - } + timeString, _ = os.ReadFile(lastUnitsUpdateTimeFile) + assert.Equal(t, string(timeString), "2023-12-20T00:00:00", "Expected last update time is 2023-12-20T00:00:00") // Remove read permissions on lastupdatetimefile err = os.Chmod(lastUnitsUpdateTimeFile, 0200) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) // Make again a new stats DB with new lastUpdateTime c.Data.LastUpdateTime, _ = time.Parse("2006-01-02", "2023-12-21") _, err = NewStatsDB(c) - if err != nil { - t.Errorf("Failed to create new statsDB struct due to %s", err) - } + require.NoError(t, err, "Failed to create new statsDB") // Add back read permissions on lastupdatetimefile err = os.Chmod(lastUnitsUpdateTimeFile, 0644) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) // Check content of last update time file. It should change - if timeString, err := os.ReadFile(lastUnitsUpdateTimeFile); string(timeString) != "2023-12-21T00:00:00" { - t.Errorf( - "Last update time string test failed. Expected %s got %s %s", - "2023-12-21T00:00:00", - string(timeString), - err, - ) - } + timeString, err = os.ReadFile(lastUnitsUpdateTimeFile) + require.NoError(t, err) + assert.Equal(t, string(timeString), "2023-12-21T00:00:00", "Expected last update time string is 2023-12-21T00:00:00") // Remove last update time file err = os.Remove(lastUnitsUpdateTimeFile) - if err != nil { - t.Fatal(err) - } + require.NoError(t, err) // Make again a new stats DB with new lastUpdateTime c.Data.LastUpdateTime, _ = time.Parse("2006-01-02", "2023-12-22") _, err = NewStatsDB(c) - if err != nil { - t.Errorf("Failed to create new statsDB struct due to %s", err) - } + require.NoError(t, err, "Failed to create new statsDB") // Check content of last update time file. It should change - if timeString, err := os.ReadFile(lastUnitsUpdateTimeFile); string(timeString) != "2023-12-22T00:00:00" { - t.Errorf( - "Last update time string test failed. Expected %s got %s %s", - "2023-12-22T00:00:00", - string(timeString), - err, - ) - } + timeString, err = os.ReadFile(lastUnitsUpdateTimeFile) + require.NoError(t, err) + assert.Equal(t, string(timeString), "2023-12-22T00:00:00", "Expected last update time string is 2023-12-22T00:00:00") } func TestUnitStatsDBEntries(t *testing.T) { @@ -656,35 +631,25 @@ func TestUnitStatsDBEntries(t *testing.T) { // Make new stats DB s, err := NewStatsDB(c) - if err != nil { - t.Errorf("Failed to create new statsDB struct due to %s", err) - } + require.NoError(t, err, "Failed to create new statsDB") // Fetch units var expectedUnits []models.ClusterUnits expectedUnits = append(expectedUnits, mockUnitsOne...) expectedUnits = append(expectedUnits, mockUnitsTwo...) fetchedUnits, err := s.manager.FetchUnits(time.Now(), time.Now()) - if !reflect.DeepEqual(fetchedUnits, expectedUnits) { - t.Errorf("expected %#v, got %#v", expectedUnits, fetchedUnits) - } - if err == nil { - t.Errorf("expected one err from fetcher got none") - } + require.Error(t, err, "expected one error from fetching units") + assert.ElementsMatch(t, fetchedUnits, expectedUnits, "expected and got cluster units differ") // Try to insert data err = s.Collect() - if err != nil { - t.Errorf("Failed to collect units data: %s", err) - } + require.NoError(t, err, "Failed to collect units data") // Make units query rows, err := s.db.Query( "SELECT uuid,username,project,total_time_seconds,avg_cpu_usage,avg_cpu_mem_usage,total_cpu_energy_usage_kwh,total_cpu_emissions_gms,avg_gpu_usage,avg_gpu_mem_usage,total_gpu_energy_usage_kwh,total_gpu_emissions_gms FROM units ORDER BY uuid", ) - if err != nil { - t.Errorf("Failed to make DB query") - } + require.NoError(t, err, "Failed to make DB query") defer rows.Close() var units []models.Unit @@ -711,18 +676,13 @@ func TestUnitStatsDBEntries(t *testing.T) { for _, units := range mockUpdatedUnits { expectedUpdatedUnits = append(expectedUpdatedUnits, units.Units...) } - - if !reflect.DeepEqual(units, expectedUpdatedUnits) { - t.Errorf("expected %#v, \n\n\n got %#v", expectedUpdatedUnits, units) - } + assert.ElementsMatch(t, units, expectedUpdatedUnits, "expected and got updated cluster units differ") // Make usage query rows, err = s.db.Query( "SELECT avg_cpu_usage,num_updates FROM usage WHERE username = 'foo1' AND cluster_id = 'slurm-0'", ) - if err != nil { - t.Errorf("Failed to make DB query: %s", err) - } + require.NoError(t, err, "Failed to make DB query") defer rows.Close() // // For debugging @@ -741,17 +701,13 @@ func TestUnitStatsDBEntries(t *testing.T) { } } - if cpuUsage["usage"] < 15 { - t.Errorf("expected 15, \n got %f", cpuUsage["usage"]) - } + assert.Equal(t, cpuUsage["usage"], models.JSONFloat(15), "expected cpuUsage = 15") // Make projects query rows, err = s.db.Query( "SELECT users FROM projects WHERE name = 'fooprj' AND cluster_id = 'slurm-0'", ) - if err != nil { - t.Errorf("Failed to make DB query: %s", err) - } + require.NoError(t, err, "Failed to make DB query") defer rows.Close() var users models.List @@ -760,9 +716,7 @@ func TestUnitStatsDBEntries(t *testing.T) { t.Errorf("failed to scan row: %s", err) } } - if !reflect.DeepEqual(models.List{"foo1", "foo2"}, users) { - t.Errorf("expected users %#v, got %#v", models.List{"foo1", "foo2"}, users) - } + assert.ElementsMatch(t, models.List{"foo1", "foo2"}, users, "expected and got users differ") // Close DB s.Stop() @@ -774,21 +728,15 @@ func TestUnitStatsDBLock(t *testing.T) { // Make new stats DB s, err := NewStatsDB(c) - if err != nil { - t.Errorf("Failed to create new statsDB struct due to %s", err) - } + require.NoError(t, err, "Failed to create new statsDB") // Beging exclusive transcation to lock DB _, err = s.db.Exec("BEGIN EXCLUSIVE;") - if err != nil { - t.Errorf("Failed to lock DB due to %s", err) - } + require.NoError(t, err) // Try to insert data. It should fail err = s.Collect() - if err == nil { - t.Errorf("Failed to skip data insertion when DB is locked") - } + require.Error(t, err, "expected error due to DB lock") s.db.Exec("COMMIT;") // Close DB @@ -801,18 +749,14 @@ func TestUnitStatsDBVacuum(t *testing.T) { // Make new stats DB s, err := NewStatsDB(c) - if err != nil { - t.Errorf("Failed to create new statsDB struct due to %s", err) - } + require.NoError(t, err, "Failed to create new statsDB") // Populate DB with data populateDBWithMockData(s) // Run vacuum err = s.vacuum() - if err != nil { - t.Errorf("Failed to vacuum DB due to %s", err) - } + require.NoError(t, err, "failed to vacuum DB") } func TestUnitStatsDBBackup(t *testing.T) { @@ -821,9 +765,7 @@ func TestUnitStatsDBBackup(t *testing.T) { // Make new stats DB s, err := NewStatsDB(c) - if err != nil { - t.Errorf("Failed to create new statsDB struct due to %s", err) - } + require.NoError(t, err, "Failed to create new statsDB") // Populate DB with data populateDBWithMockData(s) @@ -831,13 +773,10 @@ func TestUnitStatsDBBackup(t *testing.T) { // Run backup expectedBackupFile := filepath.Join(c.Data.BackupPath, "backup.db") err = s.backup(expectedBackupFile) - if err != nil { - t.Errorf("Failed to backup DB %s", err) - } + require.NoError(t, err, "failed to backup DB") - if _, err := os.Stat(expectedBackupFile); err != nil { - t.Errorf("Backup DB file not found") - } + _, err = os.Stat(expectedBackupFile) + require.NoError(t, err, "Backup DB file not found") // Check contents of backed up DB var numRows int @@ -849,9 +788,7 @@ func TestUnitStatsDBBackup(t *testing.T) { for rows.Next() { numRows += 1 } - if numRows != 7 { - t.Errorf("Backup DB check failed. Expected rows 7 , Got %d.", numRows) - } + assert.Equal(t, numRows, 7, "Backup DB check failed. Expected rows 7") } func TestStatsDBBackup(t *testing.T) { @@ -860,9 +797,7 @@ func TestStatsDBBackup(t *testing.T) { // Make new stats DB s, err := NewStatsDB(c) - if err != nil { - t.Errorf("Failed to create new statsDB struct due to %s", err) - } + require.NoError(t, err, "failed to create new statsDB") // Make backup dir non existent s.storage.dbBackupPath = tmpDir @@ -871,9 +806,8 @@ func TestStatsDBBackup(t *testing.T) { populateDBWithMockData(s) // Run backup - if err := s.createBackup(); err != nil { - t.Errorf("Failed to backup DB: %s", err) - } + err = s.createBackup() + require.NoError(t, err, "failed to backup DB") } func TestUnitStatsDeleteOldUnits(t *testing.T) { @@ -883,9 +817,7 @@ func TestUnitStatsDeleteOldUnits(t *testing.T) { // Make new stats DB s, err := NewStatsDB(c) - if err != nil { - t.Errorf("Failed to create new statsDB struct due to %s", err) - } + require.NoError(t, err, "failed to create new statsDB") // Add new row that should be deleted units := []models.ClusterUnits{ @@ -905,31 +837,22 @@ func TestUnitStatsDeleteOldUnits(t *testing.T) { } tx, _ := s.db.Begin() stmtMap, err := s.prepareStatements(tx) - if err != nil { - t.Errorf("Failed to prepare SQL statements: %s", err) - } + require.NoError(t, err) s.execStatements(stmtMap, time.Now(), units, nil, nil) // Now clean up DB for old units err = s.purgeExpiredUnits(tx) - if err != nil { - t.Errorf("Failed to delete old entries in DB") - } + require.NoError(t, err, "failed to delete old netries in DB") tx.Commit() // Query for deleted unit result, err := s.db.Prepare( fmt.Sprintf("SELECT COUNT(uuid) FROM %s WHERE uuid = ?;", base.UnitsDBTableName), ) - if err != nil { - t.Errorf("Failed to prepare SQL statement") - } + require.NoError(t, err) + var numRows string err = result.QueryRow(unitID).Scan(&numRows) - if err != nil { - t.Errorf("Failed to get query result due to %s.", err) - } - if numRows != "0" { - t.Errorf("Deleting old units failed. Expected 0 rows. Returned %s", numRows) - } + require.NoError(t, err, "failed to query DB") + assert.Equal(t, numRows, "0", "expected 0 rows after deletion") } diff --git a/pkg/api/http/server.go b/pkg/api/http/server.go index dd772a7..539a7c4 100644 --- a/pkg/api/http/server.go +++ b/pkg/api/http/server.go @@ -485,6 +485,8 @@ func (s *CEEMSServer) unitsQuerier( q.param([]string{queryWindowTS["to"]}) queryUnits: + // Sort by uuid + q.query(" ORDER BY cluster_id ASC, uuid ASC ") // Get all user units in the given time window units, err := s.queriers.unit(s.db, q, s.logger) @@ -711,7 +713,12 @@ func (s *CEEMSServer) clustersAdmin(w http.ResponseWriter, r *http.Request) { // Make query q := Query{} - q.query(fmt.Sprintf("SELECT DISTINCT cluster_id, resource_manager FROM %s", base.UnitsDBTableName)) + q.query( + fmt.Sprintf( + "SELECT DISTINCT cluster_id, resource_manager FROM %s ORDER BY cluster_id ASC", + base.UnitsDBTableName, + ), + ) // Make query and get list of cluster ids clusterIDs, err := s.queriers.cluster(s.db, q, s.logger) @@ -750,6 +757,9 @@ func (s *CEEMSServer) usersQuerier(users []string, w http.ResponseWriter, r *htt q.param(clusterIDs) } + // Sort by cluster_id and name + q.query(" ORDER BY cluster_id ASC, name ASC ") + // Make query and check for users returned in usage userModels, err := s.queriers.user(s.db, q, s.logger) if err != nil { @@ -863,6 +873,9 @@ func (s *CEEMSServer) projectsQuerier(users []string, w http.ResponseWriter, r * q.param(clusterIDs) } + // Sort by cluster_id and name + q.query(" ORDER BY cluster_id ASC, name ASC ") + // Make query projectModels, err := s.queriers.project(s.db, q, s.logger) if err != nil { @@ -1008,6 +1021,9 @@ func (s *CEEMSServer) currentUsage(users []string, fields []string, w http.Respo groupby = slices.Compact(groupby) q.query(fmt.Sprintf(" GROUP BY %s", strings.Join(groupby, ","))) + // Sort by cluster_id, username and project + q.query(" ORDER BY cluster_id ASC, username ASC, project ASC ") + // Make query and check for returned number of rows usage, err := s.queriers.usage(s.db, q, s.logger) if err != nil { @@ -1046,6 +1062,9 @@ func (s *CEEMSServer) globalUsage(users []string, queriedFields []string, w http // Add common query parameters q = s.getCommonQueryParams(&q, r.URL.Query()) + // Sort by cluster_id, username and project + q.query(" ORDER BY cluster_id ASC, username ASC, project ASC ") + // Make query and check for returned number of rows usage, err := s.queriers.usage(s.db, q, s.logger) if err != nil { diff --git a/pkg/api/models/types.go b/pkg/api/models/types.go index 7a8b1fc..bee0592 100644 --- a/pkg/api/models/types.go +++ b/pkg/api/models/types.go @@ -127,46 +127,6 @@ func (m *MetricMap) Scan(v interface{}) error { return nil } -// // MarshalJSON marshals JSONFloat into byte array -// func (m *MetricMap) MarshalJSON() ([]byte, error) { -// newMetricMap := *m -// for k, v := range newMetricMap { -// vFloat := v.(float64) -// if math.IsInf(vFloat, 0) || math.IsNaN(vFloat) { -// newMetricMap[k] = vFloat -// } -// } -// return json.Marshal(newMetricMap) // marshal result -// } - -// // UnmarshalJSON unmarshals byte array into MetricMap after converting string to -// // float64 -// func (m *MetricMap) UnmarshalJSON(v []byte) error { -// // just a regular float value -// var mv map[string]interface{} -// if err := json.Unmarshal(v, &mv); err != nil { -// return err -// } - -// // Iterate over map and convert all non floats to zero -// for key, valueInt := range mv { -// switch value := valueInt.(type) { -// case float64: -// mv[key] = value -// case string: -// if vFloat, err := strconv.ParseFloat(value, 64); err != nil { -// mv[key] = float64(0) -// } else { -// mv[key] = common.SanitizeFloat(vFloat) -// } -// default: -// mv[key] = 0 -// } -// } -// *m = mv -// return nil -// } - // JSONFloat is a custom float64 that can handle Inf and NaN during JSON (un)marshalling type JSONFloat float64 @@ -219,6 +179,8 @@ func (j *JSONFloat) Scan(v interface{}) error { } // MarshalJSON marshals JSONFloat into byte array +// The custom marshal interface will truncate the float64 to 2 decimals as storing +// all decimals will bring a very low value and high DB storage func (j JSONFloat) MarshalJSON() ([]byte, error) { v := float64(j) if math.IsInf(v, 0) || math.IsNaN(v) { @@ -226,7 +188,14 @@ func (j JSONFloat) MarshalJSON() ([]byte, error) { s := "0" return []byte(s), nil } - return json.Marshal(v) // marshal result as standard float64 + + // If v is actually a int, use json.Marshal else truncate the decimals to 2 + if v == float64(int(v)) { + return json.Marshal(v) + } else { + // Convert to bytes by truncating to 2 decimals + return []byte(fmt.Sprintf("%.2f", v)), nil + } } // UnmarshalJSON unmarshals byte array into JSONFloat diff --git a/pkg/api/resource/manager.go b/pkg/api/resource/manager.go index 967b12e..f1612c2 100644 --- a/pkg/api/resource/manager.go +++ b/pkg/api/resource/manager.go @@ -8,6 +8,7 @@ import ( "path/filepath" "slices" "strings" + "sync" "time" "github.com/go-kit/log" @@ -40,6 +41,12 @@ var ( factories = make(map[string]func(cluster models.Cluster, logger log.Logger) (Fetcher, error)) ) +// Mutex lock +var ( + unitFetcherLock = sync.RWMutex{} + userFetcherLock = sync.RWMutex{} +) + // RegisterManager registers the resource manager into factory func RegisterManager( manager string, @@ -148,37 +155,59 @@ func NewManager(logger log.Logger) (*Manager, error) { // FetchUnits implements collection jobs between start and end times func (b Manager) FetchUnits(start time.Time, end time.Time) ([]models.ClusterUnits, error) { // Measure elapsed time - defer common.TimeTrack(time.Now(), "resource manager units", b.Logger) + defer common.TimeTrack(time.Now(), "units fetcher", b.Logger) var clusterUnits []models.ClusterUnits var errs error + var wg sync.WaitGroup + wg.Add((len(b.Fetchers))) for _, fetcher := range b.Fetchers { - units, err := fetcher.FetchUnits(start, end) - if err != nil { - errs = errors.Join(errs, err) - continue - } - clusterUnits = append(clusterUnits, units...) + go func(f Fetcher) { + units, err := f.FetchUnits(start, end) + if err != nil { + unitFetcherLock.Lock() + errs = errors.Join(errs, err) + unitFetcherLock.Unlock() + wg.Done() + return + } + unitFetcherLock.Lock() + clusterUnits = append(clusterUnits, units...) + unitFetcherLock.Unlock() + wg.Done() + }(fetcher) } + wg.Wait() return clusterUnits, errs } // FetchUsersProjects fetches latest projects and users for each cluster func (b Manager) FetchUsersProjects(currentTime time.Time) ([]models.ClusterUsers, []models.ClusterProjects, error) { // Measure elapsed time - defer common.TimeTrack(time.Now(), "resource manager users and projects", b.Logger) + defer common.TimeTrack(time.Now(), "users and projects fetcher", b.Logger) var clusterUsers []models.ClusterUsers var clusterProjects []models.ClusterProjects var errs error + var wg sync.WaitGroup + wg.Add((len(b.Fetchers))) for _, fetcher := range b.Fetchers { - users, projects, err := fetcher.FetchUsersProjects(currentTime) - if err != nil { - errs = errors.Join(errs, err) - continue - } - clusterUsers = append(clusterUsers, users...) - clusterProjects = append(clusterProjects, projects...) + go func(f Fetcher) { + users, projects, err := f.FetchUsersProjects(currentTime) + if err != nil { + userFetcherLock.Lock() + errs = errors.Join(errs, err) + userFetcherLock.Unlock() + wg.Done() + return + } + userFetcherLock.Lock() + clusterUsers = append(clusterUsers, users...) + clusterProjects = append(clusterProjects, projects...) + userFetcherLock.Unlock() + wg.Done() + }(fetcher) } + wg.Wait() return clusterUsers, clusterProjects, errs } diff --git a/pkg/api/testdata/config.yml b/pkg/api/testdata/config.yml index d5fc9e5..73b58bd 100644 --- a/pkg/api/testdata/config.yml +++ b/pkg/api/testdata/config.yml @@ -12,6 +12,7 @@ clusters: manager: slurm updaters: - tsdb-0 + - tsdb-1 cli: path: pkg/api/testdata @@ -65,6 +66,14 @@ updaters: rte_total: total_cpu_emissions_gms{uuid=~"{{.UUIDs}}",provider="rte"} emaps_total: total_cpu_emissions_gms{uuid=~"{{.UUIDs}}",provider="emaps"} + - id: tsdb-1 + updater: tsdb + web: + url: http://localhost:9090 + extra_config: + cutoff_duration: 0s + query_batch_size: 1 + queries: # Total IO Read stats total_io_read_stats: bytes: total_io_read_stats_bytes{uuid=~"{{.UUIDs}}"} diff --git a/pkg/api/testdata/output/e2e-test-api-server-admin-query-all.txt b/pkg/api/testdata/output/e2e-test-api-server-admin-query-all.txt index 0c04d98..64a9025 100644 --- a/pkg/api/testdata/output/e2e-test-api-server-admin-query-all.txt +++ b/pkg/api/testdata/output/e2e-test-api-server-admin-query-all.txt @@ -1 +1 @@ -{"status":"success","data":[{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"1009248","name":"test_script2","project":"testacc","group":"grp15","user":"testusr","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:00:17","state":"CANCELLED by 1015","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"tags":{"exit_code":"0:0","gid":1015,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1015,"workdir":"/home/usr23"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"11508","name":"test_script2","project":"acc1","group":"grp15","user":"usr15","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:08:17","state":"CANCELLED by 1015","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":11.5},"avg_cpu_mem_usage":{"global":11.5},"total_cpu_energy_usage_kwh":{"total":11.5},"total_cpu_emissions_gms":{"emaps_total":11.5,"rte_total":11.5},"avg_gpu_usage":{"global":11.5},"avg_gpu_mem_usage":{"global":11.5},"total_gpu_energy_usage_kwh":{"total":11.5},"total_gpu_emissions_gms":{"emaps_total":11.5,"rte_total":11.5},"total_io_write_stats":{"bytes":11508,"requests":115080},"total_io_read_stats":{"bytes":11508,"requests":115080},"total_ingress_stats":{"bytes":1150800,"packets":11508000},"total_outgress_stats":{"bytes":1150800,"packets":11508000},"tags":{"exit_code":"0:0","gid":1015,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1015,"workdir":"/home/usr15"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"14508","name":"test_script2","project":"acc4","group":"grp4","user":"usr4","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:08:17","state":"CANCELLED by 1004","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"tags":{"exit_code":"0:0","gid":1004,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1004,"workdir":"/home/usr4"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"147975","name":"test_script1","project":"acc3","group":"grp3","user":"usr3","created_at":"2023-02-21T14:37:02+0100","started_at":"2023-02-21T14:37:07+0100","ended_at":"2023-02-21T15:26:29+0100","created_at_ts":1676986622000,"started_at_ts":1676986627000,"ended_at_ts":1676989589000,"elapsed":"00:49:22","state":"CANCELLED by 1003","allocation":{"billing":80,"cpus":8,"gpus":8,"mem":343597383680,"nodes":1},"total_time_seconds":{"alloc_cpumemtime":970588160,"alloc_cputime":23696,"alloc_gpumemtime":2962,"alloc_gputime":23696,"walltime":2962},"avg_cpu_usage":{"global":14.79},"avg_cpu_mem_usage":{"global":14.79},"total_cpu_energy_usage_kwh":{"total":14.79},"total_cpu_emissions_gms":{"emaps_total":14.79,"rte_total":14.79},"avg_gpu_usage":{"global":14.79},"avg_gpu_mem_usage":{"global":14.79},"total_gpu_energy_usage_kwh":{"total":14.79},"total_gpu_emissions_gms":{"emaps_total":14.79,"rte_total":14.79},"total_io_write_stats":{"bytes":147975,"requests":1479750},"total_io_read_stats":{"bytes":147975,"requests":1479750},"total_ingress_stats":{"bytes":14797500,"packets":147975000},"total_outgress_stats":{"bytes":14797500,"packets":147975000},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-0","nodelistexp":"compute-0","partition":"part1","qos":"qos1","uid":1003,"workdir":"/home/usr3"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"1479765","name":"test_script1","project":"acc1","group":"grp8","user":"usr8","created_at":"2023-02-21T14:37:02+0100","started_at":"2023-02-21T14:37:07+0100","ended_at":"2023-02-21T15:26:29+0100","created_at_ts":1676986622000,"started_at_ts":1676986627000,"ended_at_ts":1676989589000,"elapsed":"00:49:22","state":"CANCELLED by 1008","allocation":{"billing":80,"cpus":8,"gpus":8,"mem":343597383680,"nodes":1},"total_time_seconds":{"alloc_cpumemtime":970588160,"alloc_cputime":23696,"alloc_gpumemtime":2962,"alloc_gputime":23696,"walltime":2962},"tags":{"exit_code":"0:0","gid":1008,"nodelist":"compute-0","nodelistexp":"compute-0","partition":"part1","qos":"qos1","uid":1008,"workdir":"/home/usr8"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"1481508","name":"test_script2","project":"acc2","group":"grp2","user":"usr2","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:08:17","state":"CANCELLED by 1002","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":14.58},"avg_cpu_mem_usage":{"global":14.58},"total_cpu_energy_usage_kwh":{"total":14.58},"total_cpu_emissions_gms":{"emaps_total":14.58,"rte_total":14.58},"avg_gpu_usage":{"global":14.58},"avg_gpu_mem_usage":{"global":14.58},"total_gpu_energy_usage_kwh":{"total":14.58},"total_gpu_emissions_gms":{"emaps_total":14.58,"rte_total":14.58},"total_io_write_stats":{"bytes":1481508,"requests":14815080},"total_io_read_stats":{"bytes":1481508,"requests":14815080},"total_ingress_stats":{"bytes":148150800,"packets":1481508000},"total_outgress_stats":{"bytes":148150800,"packets":1481508000},"tags":{"exit_code":"0:0","gid":1002,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1002,"workdir":"/home/usr2"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"1481510","name":"test_script2","project":"acc3","group":"grp3","user":"usr3","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:00:17","state":"CANCELLED by 1003","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1003,"workdir":"/home/usr3"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"81510","name":"test_script2","project":"acc1","group":"grp15","user":"usr15","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:00:17","state":"CANCELLED by 1015","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":81.51},"avg_cpu_mem_usage":{"global":81.51},"total_cpu_energy_usage_kwh":{"total":81.51},"total_cpu_emissions_gms":{"emaps_total":81.51,"rte_total":81.51},"avg_gpu_usage":{"global":81.51},"avg_gpu_mem_usage":{"global":81.51},"total_gpu_energy_usage_kwh":{"total":81.51},"total_gpu_emissions_gms":{"emaps_total":81.51,"rte_total":81.51},"total_io_write_stats":{"bytes":81510,"requests":815100},"total_io_read_stats":{"bytes":81510,"requests":815100},"total_ingress_stats":{"bytes":8151000,"packets":81510000},"total_outgress_stats":{"bytes":8151000,"packets":81510000},"tags":{"exit_code":"0:0","gid":1015,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1015,"workdir":"/home/usr23"}}]} +{"status":"success","data":[{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"1009248","name":"test_script2","project":"testacc","group":"grp15","user":"testusr","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:00:17","state":"CANCELLED by 1015","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":21.23},"avg_cpu_mem_usage":{"global":21.23},"total_cpu_energy_usage_kwh":{"total":21.23},"total_cpu_emissions_gms":{"emaps_total":21.23,"rte_total":21.23},"avg_gpu_usage":{"global":21.23},"avg_gpu_mem_usage":{"global":21.23},"total_gpu_energy_usage_kwh":{"total":21.23},"total_gpu_emissions_gms":{"emaps_total":21.23,"rte_total":21.23},"tags":{"exit_code":"0:0","gid":1015,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1015,"workdir":"/home/usr23"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"11508","name":"test_script2","project":"acc1","group":"grp15","user":"usr15","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:08:17","state":"CANCELLED by 1015","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":17.80},"avg_cpu_mem_usage":{"global":17.80},"total_cpu_energy_usage_kwh":{"total":17.80},"total_cpu_emissions_gms":{"emaps_total":17.80,"rte_total":17.80},"avg_gpu_usage":{"global":17.80},"avg_gpu_mem_usage":{"global":17.80},"total_gpu_energy_usage_kwh":{"total":17.80},"total_gpu_emissions_gms":{"emaps_total":17.80,"rte_total":17.80},"tags":{"exit_code":"0:0","gid":1015,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1015,"workdir":"/home/usr15"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"14508","name":"test_script2","project":"acc4","group":"grp4","user":"usr4","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:08:17","state":"CANCELLED by 1004","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":14.03},"avg_cpu_mem_usage":{"global":14.03},"total_cpu_energy_usage_kwh":{"total":14.03},"total_cpu_emissions_gms":{"emaps_total":14.03,"rte_total":14.03},"avg_gpu_usage":{"global":14.03},"avg_gpu_mem_usage":{"global":14.03},"total_gpu_energy_usage_kwh":{"total":14.03},"total_gpu_emissions_gms":{"emaps_total":14.03,"rte_total":14.03},"tags":{"exit_code":"0:0","gid":1004,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1004,"workdir":"/home/usr4"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"147975","name":"test_script1","project":"acc3","group":"grp3","user":"usr3","created_at":"2023-02-21T14:37:02+0100","started_at":"2023-02-21T14:37:07+0100","ended_at":"2023-02-21T15:26:29+0100","created_at_ts":1676986622000,"started_at_ts":1676986627000,"ended_at_ts":1676989589000,"elapsed":"00:49:22","state":"CANCELLED by 1003","allocation":{"billing":80,"cpus":8,"gpus":8,"mem":343597383680,"nodes":1},"total_time_seconds":{"alloc_cpumemtime":970588160,"alloc_cputime":23696,"alloc_gpumemtime":2962,"alloc_gputime":23696,"walltime":2962},"avg_cpu_usage":{"global":29.72},"avg_cpu_mem_usage":{"global":29.72},"total_cpu_energy_usage_kwh":{"total":29.72},"total_cpu_emissions_gms":{"emaps_total":29.72,"rte_total":29.72},"avg_gpu_usage":{"global":29.72},"avg_gpu_mem_usage":{"global":29.72},"total_gpu_energy_usage_kwh":{"total":29.72},"total_gpu_emissions_gms":{"emaps_total":29.72,"rte_total":29.72},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-0","nodelistexp":"compute-0","partition":"part1","qos":"qos1","uid":1003,"workdir":"/home/usr3"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"1479765","name":"test_script1","project":"acc1","group":"grp8","user":"usr8","created_at":"2023-02-21T14:37:02+0100","started_at":"2023-02-21T14:37:07+0100","ended_at":"2023-02-21T15:26:29+0100","created_at_ts":1676986622000,"started_at_ts":1676986627000,"ended_at_ts":1676989589000,"elapsed":"00:49:22","state":"CANCELLED by 1008","allocation":{"billing":80,"cpus":8,"gpus":8,"mem":343597383680,"nodes":1},"total_time_seconds":{"alloc_cpumemtime":970588160,"alloc_cputime":23696,"alloc_gpumemtime":2962,"alloc_gputime":23696,"walltime":2962},"avg_cpu_usage":{"global":20.21},"avg_cpu_mem_usage":{"global":20.21},"total_cpu_energy_usage_kwh":{"total":20.21},"total_cpu_emissions_gms":{"emaps_total":20.21,"rte_total":20.21},"avg_gpu_usage":{"global":20.21},"avg_gpu_mem_usage":{"global":20.21},"total_gpu_energy_usage_kwh":{"total":20.21},"total_gpu_emissions_gms":{"emaps_total":20.21,"rte_total":20.21},"tags":{"exit_code":"0:0","gid":1008,"nodelist":"compute-0","nodelistexp":"compute-0","partition":"part1","qos":"qos1","uid":1008,"workdir":"/home/usr8"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"1481508","name":"test_script2","project":"acc2","group":"grp2","user":"usr2","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:08:17","state":"CANCELLED by 1002","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":53.48},"avg_cpu_mem_usage":{"global":53.48},"total_cpu_energy_usage_kwh":{"total":53.48},"total_cpu_emissions_gms":{"emaps_total":53.48,"rte_total":53.48},"avg_gpu_usage":{"global":53.48},"avg_gpu_mem_usage":{"global":53.48},"total_gpu_energy_usage_kwh":{"total":53.48},"total_gpu_emissions_gms":{"emaps_total":53.48,"rte_total":53.48},"tags":{"exit_code":"0:0","gid":1002,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1002,"workdir":"/home/usr2"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"1481510","name":"test_script2","project":"acc3","group":"grp3","user":"usr3","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:00:17","state":"CANCELLED by 1003","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":50.14},"avg_cpu_mem_usage":{"global":50.14},"total_cpu_energy_usage_kwh":{"total":50.14},"total_cpu_emissions_gms":{"emaps_total":50.14,"rte_total":50.14},"avg_gpu_usage":{"global":50.14},"avg_gpu_mem_usage":{"global":50.14},"total_gpu_energy_usage_kwh":{"total":50.14},"total_gpu_emissions_gms":{"emaps_total":50.14,"rte_total":50.14},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1003,"workdir":"/home/usr3"}},{"cluster_id":"slurm-1","resource_manager":"slurm","uuid":"81510","name":"test_script2","project":"acc1","group":"grp15","user":"usr15","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:00:17","state":"CANCELLED by 1015","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":18.57},"avg_cpu_mem_usage":{"global":18.57},"total_cpu_energy_usage_kwh":{"total":18.57},"total_cpu_emissions_gms":{"emaps_total":18.57,"rte_total":18.57},"avg_gpu_usage":{"global":18.57},"avg_gpu_mem_usage":{"global":18.57},"total_gpu_energy_usage_kwh":{"total":18.57},"total_gpu_emissions_gms":{"emaps_total":18.57,"rte_total":18.57},"tags":{"exit_code":"0:0","gid":1015,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1015,"workdir":"/home/usr23"}}]} diff --git a/pkg/api/testdata/output/e2e-test-api-server-admin-query.txt b/pkg/api/testdata/output/e2e-test-api-server-admin-query.txt index b8600f3..b994d35 100644 --- a/pkg/api/testdata/output/e2e-test-api-server-admin-query.txt +++ b/pkg/api/testdata/output/e2e-test-api-server-admin-query.txt @@ -1 +1 @@ -{"status":"success","data":[{"cluster_id":"slurm-0","resource_manager":"slurm","uuid":"147975","name":"test_script1","project":"acc3","group":"grp3","user":"usr3","created_at":"2023-02-21T14:37:02+0100","started_at":"2023-02-21T14:37:07+0100","ended_at":"2023-02-21T15:26:29+0100","created_at_ts":1676986622000,"started_at_ts":1676986627000,"ended_at_ts":1676989589000,"elapsed":"00:49:22","state":"CANCELLED by 1003","allocation":{"billing":80,"cpus":8,"gpus":8,"mem":343597383680,"nodes":1},"total_time_seconds":{"alloc_cpumemtime":970588160,"alloc_cputime":23696,"alloc_gpumemtime":2962,"alloc_gputime":23696,"walltime":2962},"avg_cpu_usage":{"global":14.79},"avg_cpu_mem_usage":{"global":14.79},"total_cpu_energy_usage_kwh":{"total":14.79},"total_cpu_emissions_gms":{"emaps_total":14.79,"rte_total":14.79},"avg_gpu_usage":{"global":14.79},"avg_gpu_mem_usage":{"global":14.79},"total_gpu_energy_usage_kwh":{"total":14.79},"total_gpu_emissions_gms":{"emaps_total":14.79,"rte_total":14.79},"total_io_write_stats":{"bytes":147975,"requests":1479750},"total_io_read_stats":{"bytes":147975,"requests":1479750},"total_ingress_stats":{"bytes":14797500,"packets":147975000},"total_outgress_stats":{"bytes":14797500,"packets":147975000},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-0","nodelistexp":"compute-0","partition":"part1","qos":"qos1","uid":1003,"workdir":"/home/usr3"}},{"cluster_id":"slurm-0","resource_manager":"slurm","uuid":"1481510","name":"test_script2","project":"acc3","group":"grp3","user":"usr3","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:00:17","state":"CANCELLED by 1003","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1003,"workdir":"/home/usr3"}}]} +{"status":"success","data":[{"cluster_id":"slurm-0","resource_manager":"slurm","uuid":"147975","name":"test_script1","project":"acc3","group":"grp3","user":"usr3","created_at":"2023-02-21T14:37:02+0100","started_at":"2023-02-21T14:37:07+0100","ended_at":"2023-02-21T15:26:29+0100","created_at_ts":1676986622000,"started_at_ts":1676986627000,"ended_at_ts":1676989589000,"elapsed":"00:49:22","state":"CANCELLED by 1003","allocation":{"billing":80,"cpus":8,"gpus":8,"mem":343597383680,"nodes":1},"total_time_seconds":{"alloc_cpumemtime":970588160,"alloc_cputime":23696,"alloc_gpumemtime":2962,"alloc_gputime":23696,"walltime":2962},"avg_cpu_usage":{"global":29.72},"avg_cpu_mem_usage":{"global":29.72},"total_cpu_energy_usage_kwh":{"total":29.72},"total_cpu_emissions_gms":{"emaps_total":29.72,"rte_total":29.72},"avg_gpu_usage":{"global":29.72},"avg_gpu_mem_usage":{"global":29.72},"total_gpu_energy_usage_kwh":{"total":29.72},"total_gpu_emissions_gms":{"emaps_total":29.72,"rte_total":29.72},"total_io_write_stats":{"bytes":2972084252,"requests":29720842520},"total_io_read_stats":{"bytes":2972084252,"requests":29720842520},"total_ingress_stats":{"bytes":297208425200,"packets":2972084252000},"total_outgress_stats":{"bytes":297208425200,"packets":2972084252000},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-0","nodelistexp":"compute-0","partition":"part1","qos":"qos1","uid":1003,"workdir":"/home/usr3"}},{"cluster_id":"slurm-0","resource_manager":"slurm","uuid":"1481510","name":"test_script2","project":"acc3","group":"grp3","user":"usr3","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:00:17","state":"CANCELLED by 1003","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":50.14},"avg_cpu_mem_usage":{"global":50.14},"total_cpu_energy_usage_kwh":{"total":50.14},"total_cpu_emissions_gms":{"emaps_total":50.14,"rte_total":50.14},"avg_gpu_usage":{"global":50.14},"avg_gpu_mem_usage":{"global":50.14},"total_gpu_energy_usage_kwh":{"total":50.14},"total_gpu_emissions_gms":{"emaps_total":50.14,"rte_total":50.14},"total_io_write_stats":{"bytes":501362011,"requests":5013620110},"total_io_read_stats":{"bytes":501362011,"requests":5013620110},"total_ingress_stats":{"bytes":50136201100,"packets":501362011000},"total_outgress_stats":{"bytes":50136201100,"packets":501362011000},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1003,"workdir":"/home/usr3"}}]} diff --git a/pkg/api/testdata/output/e2e-test-api-server-current-usage-admin-query.txt b/pkg/api/testdata/output/e2e-test-api-server-current-usage-admin-query.txt index 7a3ec4a..0bb711e 100644 --- a/pkg/api/testdata/output/e2e-test-api-server-current-usage-admin-query.txt +++ b/pkg/api/testdata/output/e2e-test-api-server-current-usage-admin-query.txt @@ -1 +1 @@ -{"status":"success","data":[{"cluster_id":"slurm-1","resource_manager":"slurm","num_units":2,"project":"acc1","group":"grp15","user":"usr15","total_time_seconds":{"alloc_cpumemtime":325713920,"alloc_cputime":15904,"alloc_gpumemtime":994,"alloc_gputime":7952,"walltime":994},"avg_cpu_usage":{"global":46.505},"avg_cpu_mem_usage":{"global":46.505},"total_cpu_energy_usage_kwh":{"total":93.01},"total_cpu_emissions_gms":{"emaps_total":93.01,"rte_total":93.01},"avg_gpu_usage":{"global":46.505},"avg_gpu_mem_usage":{"global":46.505},"total_gpu_energy_usage_kwh":{"total":93.01},"total_gpu_emissions_gms":{"emaps_total":93.01,"rte_total":93.01},"total_io_write_stats":{"bytes":93018,"requests":930180},"total_io_read_stats":{"bytes":93018,"requests":930180},"total_ingress_stats":{"bytes":9301800,"packets":93018000},"total_outgress_stats":{"bytes":9301800,"packets":93018000}},{"cluster_id":"slurm-1","resource_manager":"slurm","num_units":1,"project":"acc1","group":"grp8","user":"usr8","total_time_seconds":{"alloc_cpumemtime":970588160,"alloc_cputime":23696,"alloc_gpumemtime":2962,"alloc_gputime":23696,"walltime":2962}},{"cluster_id":"slurm-1","resource_manager":"slurm","num_units":2,"project":"acc3","group":"grp3","user":"usr3","total_time_seconds":{"alloc_cpumemtime":1133445120,"alloc_cputime":31648,"alloc_gpumemtime":3459,"alloc_gputime":27672,"walltime":3459},"avg_cpu_usage":{"global":14.79},"avg_cpu_mem_usage":{"global":14.79},"total_cpu_energy_usage_kwh":{"total":14.79},"total_cpu_emissions_gms":{"emaps_total":14.79,"rte_total":14.79},"avg_gpu_usage":{"global":14.79},"avg_gpu_mem_usage":{"global":14.79},"total_gpu_energy_usage_kwh":{"total":14.79},"total_gpu_emissions_gms":{"emaps_total":14.79,"rte_total":14.79},"total_io_write_stats":{"bytes":147975,"requests":1479750},"total_io_read_stats":{"bytes":147975,"requests":1479750},"total_ingress_stats":{"bytes":14797500,"packets":147975000},"total_outgress_stats":{"bytes":14797500,"packets":147975000}}]} +{"status":"success","data":[{"cluster_id":"slurm-1","resource_manager":"slurm","num_units":2,"project":"acc1","group":"grp15","user":"usr15","total_time_seconds":{"alloc_cpumemtime":325713920,"alloc_cputime":15904,"alloc_gpumemtime":994,"alloc_gputime":7952,"walltime":994},"avg_cpu_usage":{"global":18.18},"avg_cpu_mem_usage":{"global":18.18},"total_cpu_energy_usage_kwh":{"total":36.37},"total_cpu_emissions_gms":{"emaps_total":36.37,"rte_total":36.37},"avg_gpu_usage":{"global":18.18},"avg_gpu_mem_usage":{"global":18.18},"total_gpu_energy_usage_kwh":{"total":36.37},"total_gpu_emissions_gms":{"emaps_total":36.37,"rte_total":36.37}},{"cluster_id":"slurm-1","resource_manager":"slurm","num_units":2,"project":"acc3","group":"grp3","user":"usr3","total_time_seconds":{"alloc_cpumemtime":1133445120,"alloc_cputime":31648,"alloc_gpumemtime":3459,"alloc_gputime":27672,"walltime":3459},"avg_cpu_usage":{"global":34.85},"avg_cpu_mem_usage":{"global":32.65},"total_cpu_energy_usage_kwh":{"total":79.86},"total_cpu_emissions_gms":{"emaps_total":79.86,"rte_total":79.86},"avg_gpu_usage":{"global":32.65},"avg_gpu_mem_usage":{"global":32.65},"total_gpu_energy_usage_kwh":{"total":79.86},"total_gpu_emissions_gms":{"emaps_total":79.86,"rte_total":79.86}},{"cluster_id":"slurm-1","resource_manager":"slurm","num_units":1,"project":"acc1","group":"grp8","user":"usr8","total_time_seconds":{"alloc_cpumemtime":970588160,"alloc_cputime":23696,"alloc_gpumemtime":2962,"alloc_gputime":23696,"walltime":2962},"avg_cpu_usage":{"global":20.21},"avg_cpu_mem_usage":{"global":20.21},"total_cpu_energy_usage_kwh":{"total":20.21},"total_cpu_emissions_gms":{"emaps_total":20.21,"rte_total":20.21},"avg_gpu_usage":{"global":20.21},"avg_gpu_mem_usage":{"global":20.21},"total_gpu_energy_usage_kwh":{"total":20.21},"total_gpu_emissions_gms":{"emaps_total":20.21,"rte_total":20.21}}]} diff --git a/pkg/api/testdata/output/e2e-test-api-server-current-usage-query.txt b/pkg/api/testdata/output/e2e-test-api-server-current-usage-query.txt index 9088b35..e3b6aab 100644 --- a/pkg/api/testdata/output/e2e-test-api-server-current-usage-query.txt +++ b/pkg/api/testdata/output/e2e-test-api-server-current-usage-query.txt @@ -1 +1 @@ -{"status":"success","data":[{"cluster_id":"slurm-1","resource_manager":"slurm","num_units":2,"project":"acc1","group":"grp15","user":"usr15","total_time_seconds":{"alloc_cpumemtime":325713920,"alloc_cputime":15904,"alloc_gpumemtime":994,"alloc_gputime":7952,"walltime":994},"avg_cpu_usage":{"global":46.505},"avg_cpu_mem_usage":{"global":46.505},"total_cpu_energy_usage_kwh":{"total":93.01},"total_cpu_emissions_gms":{"emaps_total":93.01,"rte_total":93.01},"avg_gpu_usage":{"global":46.505},"avg_gpu_mem_usage":{"global":46.505},"total_gpu_energy_usage_kwh":{"total":93.01},"total_gpu_emissions_gms":{"emaps_total":93.01,"rte_total":93.01},"total_io_write_stats":{"bytes":93018,"requests":930180},"total_io_read_stats":{"bytes":93018,"requests":930180},"total_ingress_stats":{"bytes":9301800,"packets":93018000},"total_outgress_stats":{"bytes":9301800,"packets":93018000}},{"cluster_id":"slurm-1","resource_manager":"slurm","num_units":1,"project":"acc1","group":"grp8","user":"usr8","total_time_seconds":{"alloc_cpumemtime":970588160,"alloc_cputime":23696,"alloc_gpumemtime":2962,"alloc_gputime":23696,"walltime":2962}},{"cluster_id":"slurm-1","resource_manager":"slurm","num_units":1,"project":"acc2","group":"grp2","user":"usr2","total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":14.58},"avg_cpu_mem_usage":{"global":14.580000000000002},"total_cpu_energy_usage_kwh":{"total":14.58},"total_cpu_emissions_gms":{"emaps_total":14.58,"rte_total":14.58},"avg_gpu_usage":{"global":14.58},"avg_gpu_mem_usage":{"global":14.58},"total_gpu_energy_usage_kwh":{"total":14.58},"total_gpu_emissions_gms":{"emaps_total":14.58,"rte_total":14.58},"total_io_write_stats":{"bytes":1481508,"requests":14815080},"total_io_read_stats":{"bytes":1481508,"requests":14815080},"total_ingress_stats":{"bytes":148150800,"packets":1481508000},"total_outgress_stats":{"bytes":148150800,"packets":1481508000}}]} +{"status":"success","data":[{"cluster_id":"slurm-1","resource_manager":"slurm","num_units":2,"project":"acc1","group":"grp15","user":"usr15","total_time_seconds":{"alloc_cpumemtime":325713920,"alloc_cputime":15904,"alloc_gpumemtime":994,"alloc_gputime":7952,"walltime":994},"avg_cpu_usage":{"global":18.18},"avg_cpu_mem_usage":{"global":18.18},"total_cpu_energy_usage_kwh":{"total":36.37},"total_cpu_emissions_gms":{"emaps_total":36.37,"rte_total":36.37},"avg_gpu_usage":{"global":18.18},"avg_gpu_mem_usage":{"global":18.18},"total_gpu_energy_usage_kwh":{"total":36.37},"total_gpu_emissions_gms":{"emaps_total":36.37,"rte_total":36.37}},{"cluster_id":"slurm-1","resource_manager":"slurm","num_units":1,"project":"acc2","group":"grp2","user":"usr2","total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":53.48},"avg_cpu_mem_usage":{"global":53.48},"total_cpu_energy_usage_kwh":{"total":53.48},"total_cpu_emissions_gms":{"emaps_total":53.48,"rte_total":53.48},"avg_gpu_usage":{"global":53.48},"avg_gpu_mem_usage":{"global":53.48},"total_gpu_energy_usage_kwh":{"total":53.48},"total_gpu_emissions_gms":{"emaps_total":53.48,"rte_total":53.48}},{"cluster_id":"slurm-1","resource_manager":"slurm","num_units":1,"project":"acc1","group":"grp8","user":"usr8","total_time_seconds":{"alloc_cpumemtime":970588160,"alloc_cputime":23696,"alloc_gpumemtime":2962,"alloc_gputime":23696,"walltime":2962},"avg_cpu_usage":{"global":20.21},"avg_cpu_mem_usage":{"global":20.21},"total_cpu_energy_usage_kwh":{"total":20.21},"total_cpu_emissions_gms":{"emaps_total":20.21,"rte_total":20.21},"avg_gpu_usage":{"global":20.21},"avg_gpu_mem_usage":{"global":20.21},"total_gpu_energy_usage_kwh":{"total":20.21},"total_gpu_emissions_gms":{"emaps_total":20.21,"rte_total":20.21}}]} diff --git a/pkg/api/testdata/output/e2e-test-api-server-running-query.txt b/pkg/api/testdata/output/e2e-test-api-server-running-query.txt index a88004f..3d65fe9 100644 --- a/pkg/api/testdata/output/e2e-test-api-server-running-query.txt +++ b/pkg/api/testdata/output/e2e-test-api-server-running-query.txt @@ -1 +1 @@ -{"status":"success","data":[{"uuid":"1481510","started_at":"2023-02-21T15:49:06+0100","state":"CANCELLED by 1003","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1003,"workdir":"/home/usr3"}},{"uuid":"147975","started_at":"2023-02-21T14:37:07+0100","state":"CANCELLED by 1003","allocation":{"billing":80,"cpus":8,"gpus":8,"mem":343597383680,"nodes":1},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-0","nodelistexp":"compute-0","partition":"part1","qos":"qos1","uid":1003,"workdir":"/home/usr3"}},{"uuid":"2009248","started_at":"2023-02-21T15:49:06+0100","state":"RUNNING","allocation":{"billing":0,"cpus":0,"gpus":0,"mem":0,"nodes":2},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part2","qos":"qos3","uid":1003,"workdir":"/home/usr3"}}]} +{"status":"success","data":[{"uuid":"147975","started_at":"2023-02-21T14:37:07+0100","state":"CANCELLED by 1003","allocation":{"billing":80,"cpus":8,"gpus":8,"mem":343597383680,"nodes":1},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-0","nodelistexp":"compute-0","partition":"part1","qos":"qos1","uid":1003,"workdir":"/home/usr3"}},{"uuid":"1481510","started_at":"2023-02-21T15:49:06+0100","state":"CANCELLED by 1003","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1003,"workdir":"/home/usr3"}},{"uuid":"2009248","started_at":"2023-02-21T15:49:06+0100","state":"RUNNING","allocation":{"billing":0,"cpus":0,"gpus":0,"mem":0,"nodes":2},"tags":{"exit_code":"0:0","gid":1003,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part2","qos":"qos3","uid":1003,"workdir":"/home/usr3"}}]} diff --git a/pkg/api/testdata/output/e2e-test-api-server-uuid-query.txt b/pkg/api/testdata/output/e2e-test-api-server-uuid-query.txt index e20d20f..1792784 100644 --- a/pkg/api/testdata/output/e2e-test-api-server-uuid-query.txt +++ b/pkg/api/testdata/output/e2e-test-api-server-uuid-query.txt @@ -1 +1 @@ -{"status":"success","data":[{"cluster_id":"slurm-0","resource_manager":"slurm","uuid":"1481508","name":"test_script2","project":"acc2","group":"grp2","user":"usr2","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:08:17","state":"CANCELLED by 1002","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":14.58},"avg_cpu_mem_usage":{"global":14.58},"total_cpu_energy_usage_kwh":{"total":14.58},"total_cpu_emissions_gms":{"emaps_total":14.58,"rte_total":14.58},"avg_gpu_usage":{"global":14.58},"avg_gpu_mem_usage":{"global":14.58},"total_gpu_energy_usage_kwh":{"total":14.58},"total_gpu_emissions_gms":{"emaps_total":14.58,"rte_total":14.58},"total_io_write_stats":{"bytes":1481508,"requests":14815080},"total_io_read_stats":{"bytes":1481508,"requests":14815080},"total_ingress_stats":{"bytes":148150800,"packets":1481508000},"total_outgress_stats":{"bytes":148150800,"packets":1481508000},"tags":{"exit_code":"0:0","gid":1002,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1002,"workdir":"/home/usr2"}}]} +{"status":"success","data":[{"cluster_id":"slurm-0","resource_manager":"slurm","uuid":"1481508","name":"test_script2","project":"acc2","group":"grp2","user":"usr2","created_at":"2023-02-21T15:48:20+0100","started_at":"2023-02-21T15:49:06+0100","ended_at":"2023-02-21T15:57:23+0100","created_at_ts":1676990900000,"started_at_ts":1676990946000,"ended_at_ts":1676991443000,"elapsed":"00:08:17","state":"CANCELLED by 1002","allocation":{"billing":160,"cpus":16,"gpus":8,"mem":343597383680,"nodes":2},"total_time_seconds":{"alloc_cpumemtime":162856960,"alloc_cputime":7952,"alloc_gpumemtime":497,"alloc_gputime":3976,"walltime":497},"avg_cpu_usage":{"global":53.48},"avg_cpu_mem_usage":{"global":53.48},"total_cpu_energy_usage_kwh":{"total":53.48},"total_cpu_emissions_gms":{"emaps_total":53.48,"rte_total":53.48},"avg_gpu_usage":{"global":53.48},"avg_gpu_mem_usage":{"global":53.48},"total_gpu_energy_usage_kwh":{"total":53.48},"total_gpu_emissions_gms":{"emaps_total":53.48,"rte_total":53.48},"total_io_write_stats":{"bytes":534770154,"requests":5347701540},"total_io_read_stats":{"bytes":534770154,"requests":5347701540},"total_ingress_stats":{"bytes":53477015400,"packets":534770154000},"total_outgress_stats":{"bytes":53477015400,"packets":534770154000},"tags":{"exit_code":"0:0","gid":1002,"nodelist":"compute-[0-2]","nodelistexp":"compute-0|compute-1|compute-2","partition":"part1","qos":"qos1","uid":1002,"workdir":"/home/usr2"}}]} diff --git a/pkg/api/updater/tsdb.go b/pkg/api/updater/tsdb.go index 78046d4..f6e2a19 100644 --- a/pkg/api/updater/tsdb.go +++ b/pkg/api/updater/tsdb.go @@ -262,7 +262,7 @@ func (t *tsdbUpdater) update(startTime time.Time, endTime time.Time, units []mod } } level.Debug(t.Logger).Log( - "msg", "TSDB updater progress", "batch_id", iBatch, "total_batches", numBatches, + "msg", "progress", "batch_id", iBatch, "total_batches", numBatches, ) }