#!/usr/bin/env python3
"""Compute GPU utilization for one Pacific day from the local Parquet clone.

Mirrors scripts/utilization.sh logic:
- overlap proration: RawHours * overlap_secs / ElapsedSecs
- effective end: Start + ElapsedSecs (not End)
- QOS='RESERVE' deduped by (ReservationId, JobID) keeping max prorated per group
- capacity: MAX(ZoneSize) over records overlapping the day
- denominator: capacity * day_length_hours
"""
import os, sys, duckdb
from datetime import date, timedelta

STORE = os.environ.get("USAGE_STORE", "/pscratch/sd/w/wbhimji/nersc-usage")
day = date.fromisoformat(sys.argv[1])

con = duckdb.connect()
con.execute(f"""
CREATE OR REPLACE VIEW jobs AS
SELECT * FROM read_parquet('{STORE}/jobs/**/*.parquet', hive_partitioning=true)
""")
con.execute("SET TimeZone = 'US/Pacific'")

q = f"""
WITH bounds AS (
  SELECT
    TIMESTAMP '{day} 00:00:00' AT TIME ZONE 'US/Pacific' AS win_start,
    TIMESTAMP '{day + timedelta(days=1)} 00:00:00' AT TIME ZONE 'US/Pacific' AS win_end
),
overlapped AS (
  SELECT
    j."QOS"            AS qos,
    j."ReservationId"  AS rid,
    j."JobID"          AS jid,
    j."RawHours"       AS rawhours,
    j."ElapsedSecs"    AS elapsed,
    j."ZoneSize"       AS zonesize,
    CAST(j."Start" AS TIMESTAMP) AT TIME ZONE 'UTC' AS start_ts,
    CAST(j."Start" AS TIMESTAMP) AT TIME ZONE 'UTC'
      + INTERVAL (j."ElapsedSecs") SECOND          AS end_ts,
    b.win_start, b.win_end
  FROM jobs j, bounds b
  WHERE j."ZoneName" = 'gpu' AND j."hostname" = 'perlmutter gpu'
    AND j."ElapsedSecs" > 0
    AND CAST(j."Start" AS TIMESTAMP) AT TIME ZONE 'UTC' < b.win_end
    AND (CAST(j."Start" AS TIMESTAMP) AT TIME ZONE 'UTC'
         + INTERVAL (j."ElapsedSecs") SECOND) > b.win_start
),
prorated AS (
  SELECT
    qos, rid, jid, elapsed, rawhours, zonesize,
    date_diff(
      'second',
      GREATEST(start_ts, win_start),
      LEAST(end_ts,   win_end)
    ) AS overlap_secs,
    win_start, win_end
  FROM overlapped
),
prorated_hours AS (
  SELECT qos, rid, jid, zonesize,
         rawhours * overlap_secs / elapsed AS prorated_rh,
         win_start, win_end
  FROM prorated
  WHERE overlap_secs > 0
),
nonres AS (
  SELECT SUM(prorated_rh) AS rh FROM prorated_hours WHERE qos <> 'RESERVE'
),
res_groups AS (
  SELECT COALESCE(NULLIF(rid, ''), jid) AS group_key,
         MAX(prorated_rh) AS prorated_rh
  FROM prorated_hours
  WHERE qos = 'RESERVE'
  GROUP BY group_key
),
res AS (
  SELECT SUM(prorated_rh) AS rh FROM res_groups
),
cap AS (
  SELECT MAX(zonesize) AS capacity FROM prorated_hours
),
win AS (
  SELECT (epoch(win_end) - epoch(win_start)) / 3600.0 AS hours
  FROM bounds
)
SELECT
  COALESCE(nonres.rh, 0) + COALESCE(res.rh, 0) AS prorated_raw_hours,
  cap.capacity AS capacity_nodes,
  win.hours AS day_hours,
  100.0 * (COALESCE(nonres.rh, 0) + COALESCE(res.rh, 0))
        / (cap.capacity * win.hours) AS utilization_pct
FROM nonres, res, cap, win
"""
row = con.execute(q).fetchone()
prh, cap, hrs, pct = row
print(f"date           : {day}")
print(f"prorated_rh    : {prh:.2f}")
print(f"capacity_nodes : {cap}")
print(f"day_hours      : {hrs:.2f}")
print(f"utilization_pct: {pct:.4f}")
