#!/usr/bin/env bash
set -euo pipefail

# ============================================================
# Source (EAMxx) and destination (EAM/CAM-style) directories
# ============================================================
SRC=$1
DST=$2

FORCE=false
[[ "${3:-}" == "--force" ]] && FORCE=true

mkdir -p "$DST"

# ============================================================
# Requirements
# ============================================================
command -v ncks     >/dev/null 2>&1 || { echo "ERROR: ncks not found (need NCO)"; exit 1; }
command -v ncrename >/dev/null 2>&1 || { echo "ERROR: ncrename not found (need NCO)"; exit 1; }
command -v ncap2    >/dev/null 2>&1 || { echo "ERROR: ncap2 not found (need NCO)"; exit 1; }
command -v ncatted  >/dev/null 2>&1 || { echo "ERROR: ncatted not found (need NCO)"; exit 1; }

# ============================================================
# EAMxx -> EAM/CAM history shortname mapping
# ============================================================
declare -A var_2d=(
  # Surface / near-sfc
  [surface_upward_latent_heat_flux]="LHFLX"
  [surf_sens_flux]="SHFLX"
  [surf_evap]="QFLX"
  [surf_radiative_T]="TS"
  [T_2m]="TREFHT"
  [qv_2m]="QREFHT"
  [U_at_10m_above_surface]="U10"
  [wind_speed_10m]="WIND10"
  [SeaLevelPressure]="PSL"
  [ps]="PS"

  # Pressure-level diagnostics
  [omega_at_500hPa]="OMEGA500"
  [omega_at_700hPa]="OMEGA700"
  [omega_at_850hPa]="OMEGA850"
  [T_mid_at_700hPa]="T700"
  [z_mid_at_700hPa]="Z700"

  # ---- Clear-sky shortwave ----
  [SW_clrsky_flux_dn_at_model_bot]="FSDSC"
  # [SW_clrsky_flux_dn_at_model_top]="SOLIN"   # redundant w/ all-sky; keep commented unless you really want it
  [SW_clrsky_flux_up_at_model_bot]="FSUSC"
  [SW_clrsky_flux_up_at_model_top]="FSUTC"

  # ---- All-sky shortwave ----
  [SW_flux_dn_at_model_bot]="FSDS"
  [SW_flux_dn_at_model_top]="SOLIN"
  [SW_flux_up_at_model_bot]="FSUS"
  [SW_flux_up_at_model_top]="FSUT"

  # ---- Clear-sky longwave ----
  [LW_clrsky_flux_dn_at_model_bot]="FLDSC"
  [LW_clrsky_flux_up_at_model_top]="FLUTC"

  # ---- All-sky longwave ----
  [LW_flux_dn_at_model_bot]="FLDS"
  [LW_flux_up_at_model_bot]="FLUS"
  [LW_flux_up_at_model_top]="FLUT"

  # Clouds
  [LongwaveCloudForcing]="LWCF"
  [ShortwaveCloudForcing]="SWCF"
  [IceWaterPath]="TGCLDIWP"
  [LiqWaterPath]="TGCLDLWP"
  [VapWaterPath]="TMQ"
  #[isccp_cldtot]="CLDTOT"

  # surface stress
  #[surf_mom_flux_U]="TAUX"
  #[surf_mom_flux_V]="TAUY"

  # Precip components (keep explicit)
  [precip_liq_surf_mass_flux]="PRECLIQ"
  [precip_ice_surf_mass_flux]="PRECICE"
)
echo "2D vars: ${var_2d[*]}"

declare -A var_3d=(
  [T_mid]="T"
  [qv]="Q"
  [U]="U"
  [V]="V"
  [RelativeHumidity]="RELHUM"
  [omega]="OMEGA"
  [p_mid]="PMID"
  [z_mid]="Z3"
  [cldfrac_tot_for_analysis]="CLOUD"
)
echo "3D vars: ${var_3d[*]}"

declare -A MAP=()
# copy 2D
for k in "${!var_2d[@]}"; do
  MAP["$k"]="${var_2d[$k]}"
done
# copy 3D
for k in "${!var_3d[@]}"; do
  MAP["$k"]="${var_3d[$k]}"
done

# ============================================================
# Helpers for derived variables
# ============================================================
derive1 () {
  local outvar="$1" out="$2" v1="$3" f1="$4" expr="$5" units="$6" lname="$7" sname="${8:-}"

  [[ -f "$f1" ]] || return 0

  cp -f "$f1" "$out"

  ncap2 -O -s "${outvar}=${expr}" "$out" "$out"

  # Drop input variable only if it is different from output variable
  if [[ "$v1" != "$outvar" ]]; then
    ncks -O -x -v "$v1" "$out" "$out"
  fi

  ncatted -O \
    -a units,"$outvar",o,c,"$units" \
    -a long_name,"$outvar",o,c,"$lname" \
    "$out"

  if [[ -n "$sname" ]]; then
    ncatted -O -a standard_name,"$outvar",o,c,"$sname" "$out"
  fi

  echo "DERIVED: $(basename "$out")"
}

derive2 () {
  local outvar="$1" out="$2" v1="$3" f1="$4" v2="$5" f2="$6" expr="$7" units="$8" lname="$9" sname="${10}"

  [[ -f "$f1" && -f "$f2" ]] || return 0

  cp -f "$f1" "$out"
  ncks -A "$f2" "$out"

  ncap2 -O -s "${outvar}=${expr}" "$out" "$out"
  # keep output clean: drop the inputs (comment out if you want to keep them)
  ncks -O -x -v "$v1,$v2" "$out" "$out"

  ncatted -O \
    -a units,"$outvar",o,c,"$units" \
    -a long_name,"$outvar",o,c,"$lname" \
    -a standard_name,"$outvar",o,c,"$sname" \
    "$out"

  echo "DERIVED: $(basename "$out")"
}

derive3 () {
  local outvar="$1" out="$2" v1="$3" f1="$4" v2="$5" f2="$6" v3="$7" f3="$8" expr="$9" units="${10}" lname="${11}" sname="${12}"

  [[ -f "$f1" && -f "$f2" && -f "$f3" ]] || return 0

  cp -f "$f1" "$out"
  ncks -A "$f2" "$out"
  ncks -A "$f3" "$out"

  ncap2 -O -s "${outvar}=${expr}" "$out" "$out"
  ncks -O -x -v "$v1,$v2,$v3" "$out" "$out"

  ncatted -O \
    -a units,"$outvar",o,c,"$units" \
    -a long_name,"$outvar",o,c,"$lname" \
    -a standard_name,"$outvar",o,c,"$sname" \
    "$out"

  echo "DERIVED: $(basename "$out")"
}

derive6 () {
  local outvar="$1" out="$2" \
        v1="$3" f1="$4" v2="$5" f2="$6" v3="$7" f3="$8" v4="$9" f4="${10}" \
        v5="${11}" f5="${12}" v6="${13}" f6="${14}" \
        expr="${15}" units="${16}" lname="${17}" sname="${18}"

  [[ -f "$f1" && -f "$f2" && -f "$f3" && -f "$f4" && -f "$f5" && -f "$f6" ]] || return 0

  cp -f "$f1" "$out"
  ncks -A "$f2" "$out"
  ncks -A "$f3" "$out"
  ncks -A "$f4" "$out"
  ncks -A "$f5" "$out"
  ncks -A "$f6" "$out"

  ncap2 -O -s "${outvar}=${expr}" "$out" "$out"
  ncks -O -x -v "$v1,$v2,$v3,$v4,$v5,$v6" "$out" "$out"

  ncatted -O \
    -a units,"$outvar",o,c,"$units" \
    -a long_name,"$outvar",o,c,"$lname" \
    -a standard_name,"$outvar",o,c,"$sname" \
    "$out"

  echo "DERIVED: $(basename "$out")"
}

# ============================================================
# Step 1: Rename/copy EAMxx outputs to EAM/CAM-style shortnames
# ============================================================
for f in "$SRC"/*.nc; do
  [[ -f "$f" ]] || continue

  fname="$(basename "$f")"
  base="${fname%.nc}"

  # Strip trailing _YYYYMM_YYYYMM
  var="${base%_[0-9][0-9][0-9][0-9][0-9][0-9]_[0-9][0-9][0-9][0-9][0-9][0-9]}"
  tail="${base#${var}}"

  newvar="${MAP[$var]:-}"
  [[ -n "$newvar" ]] || continue
  
  echo $var $newvar 

  out="$DST/${newvar}${tail}.nc"

  # Guard: avoid overwriting if two inputs map to same output name
  # Guard: avoid overwriting unless --force is given
  if [[ ! -f "$out" || "${FORCE:-false}" == "true" ]]; then
    # Copy first (never touch originals)
    cp -f "$f" "$out"
    # Only rename if needed AND the source var actually exists in the file
    if [[ "$var" != "$newvar" ]]; then
      if ncks -m -v "$var" "$out" >/dev/null 2>&1; then
        ncrename -O -v "${var},${newvar}" "$out"
        echo "$fname  →  $(basename "$out")  (renamed ${var}→${newvar})"
      else
        echo "WARN: $fname copied, but variable '$var' not found inside file (no rename)"
      fi
    else
      echo "$fname  →  $(basename "$out")  (copied; no rename)"
    fi
  fi 

  # 3D variables: PS rename + add P0
  if [[ -f "$out" && -n "${var_3d[$var]:-}" ]]; then
    echo "3D postprocess for $(basename "$out") ..."

    # ps -> PS if present (avoid conflict if PS already exists)
    if ncks -m -v ps "$out" >/dev/null 2>&1 && ! ncks -m -v PS "$out" >/dev/null 2>&1; then
      ncrename -O -v ps,PS "$out"
    fi

    # Add scalar P0=100000 Pa if missing
    if ! ncks -m -v P0 "$out" >/dev/null 2>&1; then
      ncap2 -O -s 'P0=100000.0' "$out" "$out"
      ncatted -O \
        -a long_name,P0,o,c,"reference pressure" \
        -a units,P0,o,c,"Pa" \
        "$out"
    fi
  fi
done

# ============================================================
# Step 2: Derive common EAM variables (per matching date tail)
# ============================================================
for fsol in "$DST"/SOLIN_*.nc; do
  [[ -f "$fsol" ]] || continue
  tail="${fsol#${DST}/SOLIN}"   # includes leading "_" + dates + ".nc"

  # --- TOA LW net (E3SM convention): FLNT = FLUT ---
  fflut="$DST/FLUT${tail}"
  outflnt="$DST/FLNT${tail}"
  if [[ -e "$outflnt" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $(basename "$outflnt")"
  else
    derive1 "FLNT" "$outflnt" "FLUT" "$fflut" "FLUT" \
      "W/m2" "TOA net longwave flux (upward positive; E3SM convention)" "toa_net_upward_longwave_flux"
    [[ -f "$outflnt" ]] && cp -f "$outflnt" "$DST/FLNTOA${tail}"
  fi

  # --- TOA LW net clear-sky (E3SM convention): FLNTC = FLUTC ---
  fflutc="$DST/FLUTC${tail}"
  outflntc="$DST/FLNTC${tail}"
  if [[ -e "$outflntc" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $(basename "$outflntc")"
  else
    derive1 "FLNTC" "$outflntc" "FLUTC" "$fflutc" "FLUTC" \
      "W/m2" "TOA clear-sky net longwave flux (upward positive; E3SM convention)" "toa_net_upward_longwave_flux"
    [[ -f "$outflntc" ]] && cp -f "$outflntc" "$DST/FLNTOAC${tail}"
  fi

  # --- Surface zonal stress: TAUX = -surf_mom_flux_U ---
  fu="$SRC/surf_mom_flux_U${tail}"
  outtaux="$DST/TAUX${tail}"
  if [[ -e "$outtaux" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $(basename "$outtaux")"
  else
    derive1 "TAUX" "$outtaux" "surf_mom_flux_U" "$fu" "-surf_mom_flux_U" \
      "N/m2" "Zonal surface stress" "surface_downward_eastward_stress"
  fi

  # --- Surface meridional stress: TAUY = -surf_mom_flux_V ---
  fv="$SRC/surf_mom_flux_V${tail}"
  outtauy="$DST/TAUY${tail}"
  if [[ -e "$outtauy" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $(basename "$outtauy")"
  else
    derive1 "TAUY" "$outtauy" "surf_mom_flux_V" "$fv" "-surf_mom_flux_V" \
      "N/m2" "Meridional surface stress" "surface_downward_northward_stress"
  fi

  # --- Total cloud fraction: CLDTOT = isccp_cldtot / 100 ---
  fcld="$SRC/isccp_cldtot${tail}"
  outcld="$DST/CLDTOT${tail}"
  if [[ -e "$outcld" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $(basename "$outcld")"
  else
    derive1 "CLDTOT" "$outcld" "isccp_cldtot" "$fcld" "isccp_cldtot/100.0" \
      "fraction" "Total cloud fraction" "cloud_area_fraction"
  fi

  # --- PRECT (m/s, water-equivalent) ---
  fliq="$DST/PRECLIQ${tail}"
  fice="$DST/PRECICE${tail}"
  outp="$DST/PRECT${tail}"
  if [[ -e "$outp" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $fname  →  $(basename "$out")"
    continue
  else
    derive2 "PRECT" "$outp" "PRECLIQ" "$fliq" "PRECICE" "$fice" "PRECLIQ+PRECICE" \
      "m/s" "Total precipitation rate (liq+ice)" "precipitation_flux"
  fi 

  # --- Surface LW net (E3SM diags convention): FLNS = FLUS - FLDS ---
  fflds="$DST/FLDS${tail}"
  fflus="$DST/FLUS${tail}"
  outflns="$DST/FLNS${tail}"
  if [[ -e "$outflns" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $(basename "$outflns")"
    continue
  else
    derive2 "FLNS" "$outflns" "FLUS" "$fflus" "FLDS" "$fflds" "FLUS-FLDS" \
      "W/m2" "Surface net longwave flux (up - down; consistent with e3sm_diags netlw)" "surface_net_upward_longwave_flux"
  fi

  # --- Surface LW net clear-sky (E3SM diags convention): FLNSC = FLUS - FLDSC ---
  ffldsc="$DST/FLDSC${tail}"
  fflus="$DST/FLUS${tail}"
  outflnsc="$DST/FLNSC${tail}"
  if [[ -e "$outflnsc" && "${FORCE:-false}" != true ]]; then
    echo "SKIP (exists): $(basename "$outflnsc")"
    continue
  else
    derive2 "FLNSC" "$outflnsc" "FLUS" "$fflus" "FLDSC" "$ffldsc" "FLUS-FLDSC" \
      "W/m2" "Surface clear-sky net longwave flux (up - down; consistent with e3sm_diags netlw)" "surface_net_upward_longwave_flux"
  fi

  # --- Surface SW net: FSNS = FSDS - FSUS ---
  ffsds="$DST/FSDS${tail}"
  ffsus="$DST/FSUS${tail}"
  outfsns="$DST/FSNS${tail}"
  if [[ -e "$outfsns" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $(basename "$outfsns")"
    continue
  else
    derive2 "FSNS" "$outfsns" "FSDS" "$ffsds" "FSUS" "$ffsus" "FSDS-FSUS" \
      "W/m2" "Surface net shortwave flux (down - up)" "surface_net_downward_shortwave_flux"
  fi
  
  # --- Surface SW net (clear-sky): FSNSC = FSDSC - FSUSC ---
  ffsds="$DST/FSDSC${tail}"
  ffsus="$DST/FSUSC${tail}"
  outfsns="$DST/FSNSC${tail}"
  if [[ -e "$outfsns" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $(basename "$outfsns")"
    continue
  else
    derive2 "FSNSC" "$outfsns" "FSDSC" "$ffsds" "FSUSC" "$ffsus" "FSDSC-FSUSC" \
      "W/m2" "Surface clear-sky net shortwave flux (down - up)" "surface_net_downward_shortwave_flux"
  fi

  # --- TOA SW net: FSNT = SOLIN - FSUT ---
  ffsut="$DST/FSUT${tail}"
  outfsnt="$DST/FSNT${tail}"
  if [[ -e "$outfsnt" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $(basename "$outfsnt")"
    continue
  else
    derive2 "FSNT" "$outfsnt" \
      "SOLIN" "$fsol" \
      "FSUT"  "$ffsut" \
      "SOLIN-FSUT" \
      "W/m2" \
      "TOA net shortwave flux (down - up)" \
      "toa_net_downward_shortwave_flux"
    cp "$outfsnt" "$DST/FSNTOA${tail}"
  fi

  # --- TOA SW net (clear-sky): FSNTC = SOLIN - FSUTC ---
  ffsutc="$DST/FSUTC${tail}"
  outfsntc="$DST/FSNTC${tail}"
  if [[ -e "$outfsntc" && "${FORCE:-false}" != true ]]; then
    echo "SKIP (exists): $(basename "$outfsntc")"
    continue
  else
    derive2 "FSNTC" "$outfsntc" \
      "SOLIN" "$fsol" \
      "FSUTC" "$ffsutc" \
      "SOLIN-FSUTC" \
      "W/m2" \
      "TOA clear-sky net shortwave flux (down - up)" \
      "toa_net_downward_shortwave_flux"
    cp "$outfsntc" "$DST/FSNTOAC${tail}"
  fi

  # --- Surface LW net: FLNS = FLDS - FLUS ---
  fflds="$DST/FLDS${tail}"
  fflus="$DST/FLUS${tail}"
  outflns="$DST/FLNS${tail}"
  if [[ -e "$outflns" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $(basename "$outflns")"
    continue
  else
    derive2 "FLNS" "$outflns" "FLDS" "$fflds" "FLUS" "$fflus" "FLDS-FLUS" \
      "W/m2" "Surface net longwave flux (down - up)" "surface_net_downward_longwave_flux"
  fi

  # --- Surface LW net (clear-sky): FLNSC = FLDSC - FLUS (since FLUSC ≡ FLUS at surface) ---
  fflds="$DST/FLDSC${tail}"
  fflus="$DST/FLUS${tail}"
  outflns="$DST/FLNSC${tail}"
  if [[ -e "$outflns" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $(basename "$outflns")"
    continue
  else
    derive2 "FLNSC" "$outflns" \
      "FLDSC" "$fflds" \
      "FLUS"  "$fflus" \
      "FLDSC-FLUS" \
      "W/m2" \
      "Surface clear-sky net longwave flux (down - up)" \
      "surface_net_downward_longwave_flux"
  fi

  # --- RESTOM: net TOA radiation = SOLIN - FSUT - FLUT ---
  fflut="$DST/FLUT${tail}"
  outrest="$DST/RESTOM${tail}"
  if [[ -e "$outrest" && "$FORCE" != true ]]; then
    echo "SKIP (exists): $(basename "$outrest")"
    continue
  else
    derive3 "RESTOM" "$outrest" "SOLIN" "$fsol" "FSUT" "$ffsut" "FLUT" "$fflut" "SOLIN-FSUT-FLUT" \
      "W/m2" "Net TOA radiative flux (down - up)" "toa_net_downward_radiative_flux"
  fi

  # --- Surface net flux: NET_FLUX_SRF = FSDS-FSUS + FLDS-FLUS - LHFLX - SHFLX ---
  ffsds="$DST/FSDS${tail}"
  ffsus="$DST/FSUS${tail}"
  fflds="$DST/FLDS${tail}"
  fflus="$DST/FLUS${tail}"
  fflh="$DST/LHFLX${tail}"
  ffsh="$DST/SHFLX${tail}"
  outnet="$DST/NET_FLUX_SRF${tail}"

  if [[ -e "$outnet" && "${FORCE:-false}" != true ]]; then
    echo "SKIP (exists): $(basename "$outnet")"
    continue
  else
    derive6 "NET_FLUX_SRF" "$outnet" \
      "FSDS"  "$ffsds" \
      "FSUS"  "$ffsus" \
      "FLDS"  "$fflds" \
      "FLUS"  "$fflus" \
      "LHFLX" "$fflh" \
      "SHFLX" "$ffsh" \
      "FSDS-FSUS+FLDS-FLUS-LHFLX-SHFLX" \
      "W/m2" "Surface net energy flux (SW+LW-LH-SH; downward positive)" ""
  fi
  
done

