#!/usr/bin/env perl
#-----------------------------------------------------------------------------------------------
#
# build-namelist
#
# This script builds the namelists for the CISM configuration of CESM1.
#
# build-namelist uses a config_cache.xml file that current contains the glc grid information.
# build-namelist reads this file to obtain information it needs to provide
# default values that are consistent with the CISM library.  For example, the grid resolution
# is obtained from the cache file and used to determine appropriate defaults for namelist input
# that is resolution dependent.
#
# The simplest use of build-namelist is to execute it from the build directory where configure
# was run.  By default it will use the config_cache.xml file that was written by configure to
# determine the build time properties of the executable, and will write the files that contain 
# the output namelists in that same directory.  
#
#
# Date        Contributor      Modification
# -------------------------------------------------------------------------------------------
# 2012-01-30  Vertenstein      Original version
#--------------------------------------------------------------------------------------------
use strict;
use Cwd;
use English;
use Getopt::Long;
use IO::File;
#-----------------------------------------------------------------------------------------------

sub usage {
    die <<EOF;
SYNOPSIS
     build-namelist [options]
OPTIONS
     -infile "filepath"    Specify a file containing namelists to read values from.
     -namelist "namelist"  Specify namelist settings directly on the commandline by supplying 
                           a string containing FORTRAN namelist syntax, e.g.,
                              -namelist "&cism_nml dt=1800 /"
     -help [or -h]         Print usage to STDOUT.
     -silent [-s]          Turns on silent mode - only fatal messages issued.
     -verbose              Turn on verbose echoing of informational messages.
     -caseroot             CASEROOT directory variable
     -scriptsroot          SCRIPTSROOT directory variable
     -inst_string          INST_STRING variable
     -paramfile            name of top-level configuration file
     -lnd_grid             LND_GRID variable
     -glc_grid             CISM_GRID variable
     -cism_phys            CISM_PHYS variable

NOTE: The precedence for setting the values of namelist variables is (highest to lowest):
      1. namelist values set by specific command-line options, e.g., paramfile
      2. values set on the command-line using the -namelist option,
      3. values read from the file specified by -infile,
      4. values from the namelist defaults file - or values specifically set in build-namelist 
EOF
}

#-----------------------------------------------------------------------------------------------
# Set some important constants, such as important values of certain config file parameters

# value of 'sigma' that signifies that values are read from a sigma section
my $SIGMA_IN_CONFIG_FILE = 2;

# value of 'gthf' signifying that we should calculate gthf, and thus
# we should have a 'gthf' section
my $GTHF_CALCULATE = 2;

# value of 'isostasy' signifying that isostasy is enabled, and thus we
# should have an 'isostasy' section
my $ISOSTASY_ON = 1;

# regular expressions to match fortran true/false
my $TRUE  = qr/\.true\./i;
my $FALSE = qr/\.false\./i;

#-----------------------------------------------------------------------------------------------
# Set the directory that contains the CISM configuration scripts.  If the command was
# issued using a relative or absolute path, that path is in $ProgDir.  Otherwise assume the
# command was issued from the current working directory.

(my $ProgName = $0) =~ s!(.*)/!!;      # name of this script
my $ProgDir = $1;                      # name of directory containing this script -- may be a
                                       # relative or absolute path, or null if the script is in
                                       # the user's PATH
my $cwd = getcwd();                    # current working directory
my $cfgdir;                            # absolute pathname of directory that contains this script
if ($ProgDir) { 
    $cfgdir = absolute_path($ProgDir);
} else {
    $cfgdir = $cwd;
}

#-----------------------------------------------------------------------------------------------
# Process command-line options.

my %opts = ( help        => 0,
	     silent      => 0,
	     caseroot    => undef,
	     scriptsroot => undef,
	     inst_string => undef,
	     paramfile   => undef,
	     lnd_grid    => undef,
	     glc_grid    => undef,
	     cism_phys   => undef,
	    );

GetOptions(
    "h|help"        => \$opts{'help'},
    "infile=s"      => \$opts{'infile'},
    "namelist=s"    => \$opts{'namelist'},
    "s|silent"      => \$opts{'silent'},
    "v|verbose"     => \$opts{'verbose'},
    "caseroot=s"    => \$opts{'caseroot'},
    "scriptsroot=s" => \$opts{'scriptsroot'},
    "inst_string=s" => \$opts{'inst_string'},	   
    "paramfile=s"   => \$opts{'paramfile'},
    "lnd_grid=s"    => \$opts{'lnd_grid'},
    "glc_grid=s"    => \$opts{'glc_grid'},
    "cism_phys=s"   => \$opts{'cism_phys'},
)  or usage();

# Give usage message.
usage() if $opts{'help'};

# Check for unparsed arguments
if (@ARGV) {
    print "ERROR: unrecognized arguments: @ARGV\n";
    usage();
}

# Define print levels:
# 0 - only issue fatal error messages
# 1 - only informs what files are created (default)
# 2 - verbose
my $print = 1;
if ($opts{'silent'})  { $print = 0; }
if ($opts{'verbose'}) { $print = 2; }
my $eol = "\n";

if ($print>=2) { print "Setting CISM configuration script directory to $cfgdir$eol"; }

my $CASEROOT    = $opts{'caseroot'}; 
my $SCRIPTSROOT = $opts{'scriptsroot'};       
my $INST_STRING = $opts{'inst_string'}; 
my $LND_GRID    = $opts{'lnd_grid'}; 
my $GLC_GRID    = $opts{'glc_grid'}; 
my $CISM_PHYS   = $opts{'cism_phys'};

# Validate some of the commandline option values.
validate_options("commandline", \%opts);

#-----------------------------------------------------------------------------------------------
# Build empty config_cache.xml file (needed below)  

my $bldconfdir = "$CASEROOT/Buildconf/datmconf";
if ( $opts{'debug'} ) {
   my $cmd = "mkdir -p $bldconfdir";
   print "Execute: $cmd\n";
   system( "$cmd" );
   chdir( "$bldconfdir" );
}

# build config_cache.xml file (needed below)
my $config_cache = "${CASEROOT}/Buildconf/cismconf/config_cache.xml";
my  $fh = new IO::File;
$fh->open(">$config_cache") or die "** can't open file: $config_cache\n";
print $fh  <<"EOF";
<?xml version="1.0"?>
<config_definition>
<entry id="lnd_grid"  value="$LND_GRID">
<entry id="glc_grid"  value="$GLC_GRID">
<entry id="cism_phys" value="$CISM_PHYS">
</config_definition>
EOF
$fh->close;
if ($print>=2) { print "Wrote file $config_cache $eol"; }
(-f "config_cache.xml")  or  die <<"EOF";
** $ProgName - Cannot find configuration cache file: config_cache.xml\" **
EOF

#-----------------------------------------------------------------------------------------------
# Make sure we can find required perl modules, definition, and defaults files.
# Look for them under the directory that contains the configure script.

# The root directory for the perl5 required utilities 
my $perl5lib_dir = "${SCRIPTSROOT}/ccsm_utils/Tools/perl5lib";

# The XML::Lite module is required to parse the XML files.
(-f "$perl5lib_dir/XML/Lite.pm")  or  die <<"EOF";
** $ProgName - Cannot find perl module \"XML/Lite.pm\" in directory \"$perl5lib_dir\" **
EOF

# The Build::Config module provides utilities to access the configuration information
# in the config_cache.xml file (see below)
(-f "$perl5lib_dir/Build/Config.pm")  or  die <<"EOF";
** $ProgName - Cannot find perl module \"Build/Config.pm\" in directory \"$perl5lib_dir\" **
EOF

# The Build::NamelistDefinition module provides utilities to validate that the output
# namelists are consistent with the namelist definition file
(-f "$perl5lib_dir/Build/NamelistDefinition.pm")  or  die <<"EOF";
** $ProgName - Cannot find perl module \"Build/NamelistDefinition.pm\" in directory \"$perl5lib_dir\" **
EOF

# The Build::NamelistDefaults module provides a utility to obtain default values of namelist
# variables based on finding a best fit with the attributes specified in the defaults file.
(-f "$perl5lib_dir/Build/NamelistDefaults.pm")  or  die <<"EOF";
** $ProgName - Cannot find perl module \"Build/NamelistDefaults.pm\" in directory \"$perl5lib_dir\" **
EOF

# The Build::Namelist module provides utilities to parse input namelists, to query and modify
# namelists, and to write output namelists.
(-f "$perl5lib_dir/Build/Namelist.pm")  or  die <<"EOF";
** $ProgName - Cannot find perl module \"Build/Namelist.pm\" in directory \"$perl5lib_dir\" **
EOF

# The namelist definition file contains entries for all namelist variables that
# can be output by build-namelist.  The version of the file that is associate with a
# fixed CISM tag is $cfgdir/namelist_files/namelist_definition.xml.  To aid developers
# who make use of the SourceMods/src.cism directory - we allow the definition file 
# to come from that directory
my $nl_definition_file;
if (-f "${CASEROOT}/SourceMods/src.cism/namelist_definition_cism.xml") {
    $nl_definition_file = "${CASEROOT}/SourceMods/src.cism/namelist_definition_cism.xml";
}
if (! defined $nl_definition_file) {
    # default location of namelist definition file
    $nl_definition_file = "$cfgdir/namelist_files/namelist_definition_cism.xml";
    (-f "$nl_definition_file")  or  die <<"EOF";
    ** $ProgName - ERROR: Cannot find namelist definition file \"$nl_definition_file\" **
EOF
}
if ($print>=2) { print "Using namelist definition file $nl_definition_file$eol"; }

# The namelist defaults file contains default values for all required namelist variables.
my $nl_defaults_file = "$cfgdir/namelist_files/namelist_defaults_cism.xml";
(-f "$nl_defaults_file")  or  die <<"EOF";
** $ProgName - Cannot find namelist defaults file \"$nl_defaults_file\" **
EOF
if ($print>=2) { print "Using namelist defaults file $nl_defaults_file$eol"; }

#-----------------------------------------------------------------------------------------------
# Add $perl5lib_dir to the list of paths that Perl searches for modules
unshift @INC, "$perl5lib_dir";
require XML::Lite;
require Build::Config;
require Build::NamelistDefinition;
require Build::NamelistDefaults;
require Build::Namelist;

#-----------------------------------------------------------------------------------------------
# Create a configuration object from the CISM config_cache.xml file-  created by 
# cism.cpl7.template in $CASEROOT/Buildconf/cismconf
my $cfg = Build::Config->new('config_cache.xml');

# Create a namelist definition object.  This object provides a method for verifying that the
# output namelist variables are in the definition file, and are output in the correct
# namelist groups.
my $definition = Build::NamelistDefinition->new($nl_definition_file);

# Create a namelist defaults object.  This object provides default values for variables
# contained in the input defaults file.  The configuration object provides attribute
# values that are relevent for the CISM library for which the namelist is being produced.
my $defaults = Build::NamelistDefaults->new($nl_defaults_file, $cfg);

# Create an empty namelist object.  Add values to it in order of precedence.
my $nl = Build::Namelist->new();

#-----------------------------------------------------------------------------------------------
# Process the user input in order of precedence.  
# At each point we'll only add new values to the namelist and not overwrite previously 
# specified specified values which have higher precedence.

# Process command-line options
my $val;
my $group;
my $var;

# paramfile
# Note special handling of paramfile: This namelist item, which appears in cism_in, cannot be
# set by the user, but instead must be specified as a command-line option. This is because this
# file (cism.config) is created by build-namelist, and then copied by the calling script into a
# new location. Thus, the calling script needs to know the name of the paramfile; to keep things
# simple, we have the calling script set this value and do not allow the user to override it.
$var='paramfile';
if (defined $opts{$var}) {
  $val = $opts{$var};
  $val = quote_string($val);
  $group = $definition->get_group_name($var);
  $nl->set_variable_value($group, $var, $val);
} else {
  die "$ProgName - ERROR: paramfile must be specified";
}
  

# Process the -namelist arg.
if (defined $opts{'namelist'}) {
    # Parse commandline namelist
    my $nl_arg = Build::Namelist->new($opts{'namelist'});

    # Validate input namelist -- trap exceptions
    my $nl_arg_valid;
    eval { $nl_arg_valid = $definition->validate($nl_arg); };
    if ($@) {
	die "$ProgName - ERROR: Invalid namelist variable in commandline arg '-namelist'.\n $@";
    }

    # Merge input values into namelist.  Previously specified values have higher precedence
    # and are not overwritten.
    $nl->merge_nl($nl_arg_valid);
}

# Process the -infile arg.
if (defined $opts{'infile'}) {
    # Parse namelist input from a file
    my $nl_infile = Build::Namelist->new($opts{'infile'});

    # Validate input namelist -- trap exceptions
    my $nl_infile_valid;
    eval { $nl_infile_valid = $definition->validate($nl_infile); };
    if ($@) {
	die "$ProgName - ERROR: Invalid namelist variable in '-infile' $opts{'infile'}.\n $@";
    }

    # Merge input values into namelist.  Previously specified values have higher precedence
    # and are not overwritten.
    $nl->merge_nl($nl_infile_valid);
}

#-----------------------------------------------------------------------------------------------
# Determine namelist 
#-----------------------------------------------------------------------------------------------

#-----------------------------------------------------------------------------------------------
####################################
# Required xml variables           #
####################################

my %xmlvars = ();
my @files = <${CASEROOT}/*xml>;
foreach my $file (@files) {
    my $xml = XML::Lite->new($file);
    my @e = $xml->elements_by_name('entry');
    while ( my $e = shift @e ) {
	my %a = $e->get_attributes();
	$xmlvars{$a{'id'}} = $a{'value'};
    }
}
foreach my $attr (keys %xmlvars) {
    if ( $xmlvars{$attr} =~ m/\$/ ) {$xmlvars{$attr} = expand_env_xml($xmlvars{$attr});}
}

my $CASE                   = $xmlvars{'CASE'};
my $CALENDAR               = $xmlvars{'CALENDAR'};
my $EXEROOT                = $xmlvars{'EXEROOT'};
my $CODEROOT               = $xmlvars{'CODEROOT'};
my $SCRIPTSROOT            = $xmlvars{'SCRIPTSROOT'};
my $CASEROOT               = $xmlvars{'CASEROOT'};
my $DIN_LOC_ROOT           = $xmlvars{'DIN_LOC_ROOT'};
my $RUN_TYPE               = $xmlvars{'RUN_TYPE'};
my $RUN_STARTDATE          = $xmlvars{'RUN_STARTDATE'};
my $RUN_REFCASE            = $xmlvars{'RUN_REFCASE'};
my $RUN_REFDATE            = $xmlvars{'RUN_REFDATE'};
my $CONTINUE_RUN           = $xmlvars{'CONTINUE_RUN'};
my $NCPL_BASE_PERIOD       = $xmlvars{'NCPL_BASE_PERIOD'};
my $GLC_NCPL               = $xmlvars{'GLC_NCPL'};
my $GLC_TWO_WAY_COUPLING   = $xmlvars{'GLC_TWO_WAY_COUPLING'};
my $CISM_USE_TRILINOS      = $xmlvars{'CISM_USE_TRILINOS'};
my $CISM_OBSERVED_IC       = $xmlvars{'CISM_OBSERVED_IC'};

(-d $DIN_LOC_ROOT)  or mkdir $DIN_LOC_ROOT;
if ($print>=2) { print "CESM inputdata root directory: $DIN_LOC_ROOT$eol"; }

####################################
# namelist group: time_manager_nml #
####################################

my $startdate = $RUN_STARTDATE;
if ($RUN_TYPE eq 'branch') { $startdate = $RUN_REFDATE; }

my $iyear0  = `echo $startdate | cut -c1-4  | sed -e 's/^0*//'`;
$iyear0 =~ s/\n/ /g; # remove imbedded newline   
my $imonth0 = `echo $startdate | cut -c6-7  | sed -e 's/^0*//'`;
$imonth0 =~ s/\n/ /g; # remove imbedded newline   
my $iday0   = `echo $startdate | cut -c9-10 | sed -e 's/^0*//'`;
$iday0 =~ s/\n/ /g; # remove imbedded newline   
my $ihour0  = 0;

my ($dt_option, $dt_count) = get_glc_dt($NCPL_BASE_PERIOD, $GLC_NCPL);

add_default($nl, 'runid', 'val'=>"$CASE");
add_default($nl, 'dt_option', 'val'=>$dt_option);
add_default($nl, 'dt_count', 'val'=>$dt_count);
add_default($nl, 'allow_leapyear', 'calendar'=>"$CALENDAR");
add_default($nl, 'iyear0',  'val'=>$iyear0);
add_default($nl, 'imonth0', 'val'=>$imonth0);
add_default($nl, 'iday0'  , 'val'=>$iday0);
add_default($nl, 'ihour0' , 'val'=>$ihour0);
add_default($nl, 'iminute0','val'=>0);
add_default($nl, 'isecond0','val'=>0);
add_default($nl, 'date_separator');
add_default($nl, 'stop_option');

####################################
# namelist group: grid_nml #
####################################

add_default($nl, 'horiz_grid_opt');
add_default($nl, 'horiz_grid_file');
add_default($nl, 'mask_varname');
add_default($nl, 'frac_varname');

####################################
# namelist group: cism_params      #
####################################

add_default($nl, 'paramfile', 'noprepend'=>'1' );
add_default($nl, 'cism_debug');
add_default($nl, 'cesm_history_vars');
add_default($nl, 'ice_flux_routing');

my $hotstart;
if ($RUN_TYPE eq 'startup') {
   add_default($nl, 'cisminputfile'); 
   $hotstart = 0;
} 
elsif ($CISM_OBSERVED_IC eq 'TRUE') {
   if ($RUN_TYPE eq 'branch') {
      die "$ProgName - ERROR: CISM_OBSERVED_IC=TRUE not allowed for branch runs - only for hybrid runs\n";
   }

   add_default($nl, 'cisminputfile'); 
   $hotstart = 0;
}
elsif ($RUN_TYPE eq 'branch' || $RUN_TYPE eq 'hybrid') {
   add_default($nl, 'cisminputfile',
               'val'=>"${RUN_REFCASE}.cism.r.${RUN_REFDATE}-00000.nc", 'noprepend'=>'1');
   $hotstart = 1;
}
else {
   die "$ProgName - ERROR: Unknown RUN_TYPE: $RUN_TYPE\n";
}

####################################
# namelist group: glc_override_nml #
####################################

add_default($nl, 'enable_frac_overrides');
if ($nl->get_value('enable_frac_overrides') =~ $TRUE) {
   add_default($nl, 'override_delay');
   add_default($nl, 'decrease_frac');
   add_default($nl, 'increase_frac');
   add_default($nl, 'rearrange_freq');
}

#-----------------------------------------------------------------------------------------------
# *** Write output namelist file (cism_in) and input dataset list (cism.input_data_list) ***
#-----------------------------------------------------------------------------------------------
# Set namelist groups to be written out

my @groups = qw(cism_params grid_nml time_manager_nml glc_override_nml); 

# Write out all groups  to cism_in
my $outfile = "./cism_in";
$nl->write($outfile, 'groups'=>\@groups);
if ($print>=2) { print "Writing cism glc component namelist to $outfile $eol"; }

# Write  input dataset list.
check_input_files($nl, $DIN_LOC_ROOT, "../cism.input_data_list");

####################################
# create cism.config in run dir    #
####################################

# Write cism.config
my $fh = new IO::File;
$fh->open(">cism.config") or die "** can't open file: cism.config\n";

print $fh "[grid]\n";
add_default($nl, 'ewn');
add_default($nl, 'nsn');
add_default($nl, 'upn');
add_default($nl, 'dew');
add_default($nl, 'dns');
$nl->write_cism_config($fh, "cism_config_grid");

print $fh "\n[GLINT climate]\n";
add_default($nl, 'evolve_ice');
add_default($nl, 'precip_mode');
add_default($nl, 'acab_mode');
add_default($nl, 'test_coupling');
add_default($nl, 'zero_gcm_fluxes', 'glc_two_way_coupling'=>"$GLC_TWO_WAY_COUPLING");
$nl->write_cism_config($fh, "cism_config_climate");

print $fh "\n[projection]\n";
add_default($nl, 'type');
add_default($nl, 'centre_latitude');
add_default($nl, 'centre_longitude');
add_default($nl, 'false_easting');
add_default($nl, 'false_northing');
add_default($nl, 'standard_parallel');
$nl->write_cism_config($fh, "cism_config_projection");

print $fh "\n[options]\n";
add_default($nl, 'dycore');
add_default($nl, 'temperature');
add_default($nl, 'temp_init');
add_default($nl, 'flow_law');
add_default($nl, 'basal_water');
add_default($nl, 'basal_mass_balance');
add_default($nl, 'gthf');
add_default($nl, 'isostasy');
add_default($nl, 'marine_margin');
add_default($nl, 'slip_coeff');
add_default($nl, 'evolution');
add_default($nl, 'vertical_integration');
add_default($nl, 'sigma');
add_default($nl, 'hotstart', 'val'=>$hotstart);
$nl->write_cism_config($fh, "cism_config_options");

# The [sigma] section only applies if we're using user-defined sigma
# levels -- if not, leave it out to avoid confusion
if ($nl->get_value('sigma') == $SIGMA_IN_CONFIG_FILE) {
    print $fh "\n[sigma]\n";
    add_default($nl, 'sigma_levels');
    $nl->write_cism_config($fh, "cism_config_sigma");
}
else {
    confirm_empty("cism_config_sigma", "items in sigma section can only be set if sigma is set to $SIGMA_IN_CONFIG_FILE");
}
  

print $fh "\n[time]\n";
add_default($nl, 'dt');
add_default($nl, 'ntem');
add_default($nl, 'nvel');
add_default($nl, 'dt_diag');
add_default($nl, 'idiag');
add_default($nl, 'jdiag');
$nl->write_cism_config($fh, "cism_config_time");

print $fh "\n[parameters]\n";
add_default($nl, 'log_level');
add_default($nl, 'ice_limit');
add_default($nl, 'marine_limit');
add_default($nl, 'geothermal');
add_default($nl, 'flow_factor');
add_default($nl, 'hydro_time');
add_default($nl, 'basal_tract_const');
$nl->write_cism_config($fh, "cism_config_parameters");

# The ho_options section only applies if dycore is not 0
if ($nl->get_value('dycore') != 0) {
    print $fh "\n[ho_options]\n";
    add_default($nl, 'which_ho_babc');
    add_default($nl, 'which_ho_efvs');
    add_default($nl, 'which_ho_resid');
    add_default($nl, 'which_disp');
    add_default($nl, 'which_ho_sparse', 'cism_use_trilinos'=>"$CISM_USE_TRILINOS");
    add_default($nl, 'which_ho_nonlinear');
    $nl->write_cism_config($fh, "cism_config_ho_options");
}
else {
    confirm_empty("cism_config_ho_options", "items in ho_options_section can only be set if dycore is not 0");
}

# The [GTHF] section only applies if the 'gthf' option is set to
# GTHF_CALCULATE -- if not, we leave it out to avoid confusion
if ($nl->get_value('gthf') == $GTHF_CALCULATE) {
    print $fh "\n[GTHF]\n";
    $nl->write_cism_config($fh, "cism_config_gthf");
}
else {
    confirm_empty("cism_config_gthf", "items in gthf section can only be set if gthf is set to $GTHF_CALCULATE");
}

# The [isostasy] section only applies if the 'isostasy' option is
# turned on -- if not, we leave it out to avoid confusion
if ($nl->get_value('isostasy') == $ISOSTASY_ON) {
    print $fh "\n[isostasy]\n";
    $nl->write_cism_config($fh, "cism_config_isostasy");
}
else {
    confirm_empty("cism_config_isostasy", "items in isostasy section can only be set if isostasy is set to $ISOSTASY_ON");
}


print $fh <<"EOF";

########################################
# I/O configuration follows
########################################

\[CF default]
title: Glimmer-CISM simulation
institution: Community Earth System Model
EOF

my $cisminputfile= $nl->get_value('cisminputfile');
$cisminputfile =~ s/\'//g;
if ($CONTINUE_RUN eq 'FALSE') {
print $fh  <<"EOF";

[CF input]
name: $cisminputfile
EOF
}
$fh->close;

check_consistency($nl, \%xmlvars);


#-----------------------------------------------------------------------------------------------
# END OF MAIN SCRIPT
#===============================================================================================

#===============================================================================================
sub add_default {

# Add a value for the specified variable to the specified namelist object.  The variables
# already in the object have the higher precedence, so if the specified variable is already
# defined in the object then don't overwrite it, just return.
#
# This method checks the definition file and adds the variable to the correct
# namelist group.
#
# The value can be provided by using the optional argument key 'val' in the
# calling list.  Otherwise a default value is obtained from the namelist
# defaults object.  If no default value is found this method throws an exception
# unless the 'nofail' option is set true.
#
# Additional optional keyword=>value pairs may be specified.  If the keyword 'val' is
# not present, then any other keyword=>value pairs that are specified will be used to
# match attributes in the defaults file.
#
# Example 1: Specify the default value $val for the namelist variable $var in namelist
#            object $nl:
#
#  add_default($nl, $var, 'val'=>$val)
#
# Example 2: Add a default for variable $var if an appropriate value is found.  Otherwise
#            don't add the variable
#
#  add_default($nl, $var, 'nofail'=>1)
#
#
# ***** N.B. ***** This routine assumes the following variables are in package main::
#  $definition        -- the namelist definition object
#  $DIN_LOC_ROOT -- CCSM inputdata root directory

    my $nl = shift;     # namelist object
    my $var = shift;    # name of namelist variable
    my %opts = @_;      # options

    my $val = undef;

    # Query the definition to find which group the variable belongs to.  Exit if not found.
    my $group = $definition->get_group_name($var);
    unless ($group) {
	my $fname = $definition->get_file_name();
	die "$ProgName - ERROR: variable \"$var\" not found in namelist definition file $fname.\n";
    }

    # check whether the variable has a value in the namelist object -- if so then return
    $val = $nl->get_variable_value($group, $var);
    if (defined $val) {return;}

    # Look for a specified value in the options hash
    if (defined $opts{'val'}) {
	$val = $opts{'val'};
    }
    # or else get a value from namelist defaults object.
    # Note that if the 'val' key isn't in the hash, then just pass anything else
    # in %opts to the get_value method to be used as attributes that are matched
    # when looking for default values.
    else {
	$val = get_default_value($var, \%opts);
    }

    # if no value is found then exit w/ error (unless 'nofail' option set)
    unless (defined $val) {
	unless ($opts{'nofail'}) {
	    print "$ProgName - ERROR: No default value found for $var\n".
		"user defined attributes:\n";
	    foreach my $key (keys(%opts)) {
		if ($key ne 'nofail' and $key ne 'val') {
		    print "key=$key  val=$opts{$key}\n";
		}
	    }
	    die;
	}
	else {
	    return;
	}
    }

    # query the definition to find out if the variable is an input pathname
    my $is_input_pathname = $definition->is_input_pathname($var);

    # The default values for input pathnames are relative.  If the namelist
    # variable is defined to be an absolute pathname, then prepend
    # the CCSM inputdata root directory.
    # TODO: unless ignore_abs is passed as argument 
    if ($is_input_pathname eq 'abs') {
	unless ($opts{'noprepend'}){
	    $val = set_abs_filepath($val, $DIN_LOC_ROOT);
	}
    }

    # query the definition to find out if the variable takes a string value.
    # The returned string length will be >0 if $var is a string, and 0 if not.
    my $str_len = $definition->get_str_len($var);

    # If the variable is a string, then add quotes if they're missing
    if ($str_len > 0) {
	$val = quote_string($val);
    }

    # set the value in the namelist
    $nl->set_variable_value($group, $var, $val);
}

#-----------------------------------------------------------------------------------------------

sub get_default_value {

# Return a default value for the requested variable.
# Return undef if no default found.
#
# ***** N.B. ***** This routine assumes the following variables are in package main::
#  $defaults          -- the namelist defaults object
#  $uc_defaults       -- the use CASE defaults object

    my $var_name    = lc(shift);   # name of namelist variable (CASE insensitive interface)
    my $usr_att_ref = shift;       # reference to hash containing user supplied attributes

    # Check in the namelist defaults
    return $defaults->get_value($var_name, $usr_att_ref);

}

#-----------------------------------------------------------------------------------------------

sub confirm_empty {

# Confirm that a namelist group is empty (i.e., has no defined
# namelist items).
# Die if it isn't empty
#
# Usage: confirm_empty(group, errmsg)
#
# - group: name of namelist group
# - errmsg: error message to print if group is not empty

    my $group = shift;
    my $errmsg = shift;

    my ($numvars, $varnames) = $nl->get_defined_vars_in_group($group);
    if ($numvars > 0) {
	print "$ProgName: ERROR: $errmsg\n";
	die "$ProgName: ERROR: This applies to: $varnames\n";
    }
}

#-----------------------------------------------------------------------------------------------

sub check_input_files {

# For each variable in the namelist which is an input dataset, check to see if it
# exists locally.
#
# ***** N.B. ***** This routine assumes the following variables are in package main::
#  $definition        -- the namelist definition object

    my $nl = shift;     # namelist object
    my $inputdata_rootdir = shift;    # if false prints test, else creates inputdata file
    my $outfile = shift;
    open(OUTFILE, ">$outfile") if defined $inputdata_rootdir;

    # Look through all namelist groups
    my @groups = $nl->get_group_names();
    foreach my $group (@groups) {

	# Look through all variables in each group
	my @vars = $nl->get_variable_names($group);
	foreach my $var (@vars) {

	    # Is the variable an input dataset?
	    my $input_pathname_type = $definition->is_input_pathname($var);

	    # If it is, check whether it exists locally and print status
	    if ($input_pathname_type) {
		# Get pathname of input dataset
		my $pathname = $nl->get_variable_value($group, $var);
		# Need to strip the quotes
		$pathname =~ s/[\'\"]//g;

		if ($input_pathname_type eq 'abs') {
                    if ($inputdata_rootdir) {
                        print OUTFILE "$var = $pathname\n";
                    }
		}
		elsif ($input_pathname_type =~ m/rel:(.+)/o) {
		    # The match provides the namelist variable that contains the
		    # root directory for a relative filename
		    my $rootdir_var = $1;
		    my $rootdir = $nl->get_variable_value($group, $rootdir_var);
		    $rootdir =~ s/[\'\"]//g;
                    if ($inputdata_rootdir) {
                        $pathname = "$rootdir/$pathname";
                        print OUTFILE "$var = $pathname\n";
                    }
		}
	    }
	}
    }
    close OUTFILE if defined $inputdata_rootdir;
    return 0 if defined $inputdata_rootdir;
}

#-----------------------------------------------------------------------------------------------

sub set_abs_filepath {

# check whether the input filepath is an absolute path, and if it isn't then
# prepend a root directory

    my ($filepath, $rootdir) = @_;

    # strip any leading/trailing whitespace
    $filepath =~ s/^\s+//;
    $filepath =~ s/\s+$//;
    $rootdir  =~ s/^\s+//;
    $rootdir  =~ s/\s+$//;

    # strip any leading/trailing quotes
    $filepath =~ s/^['"]+//;
    $filepath =~ s/["']+$//;
    $rootdir =~ s/^['"]+//;
    $rootdir =~ s/["']+$//;

    my $out = $filepath;
    unless ( $filepath =~ /^\// ) {  # unless $filepath starts with a /
	$out = "$rootdir/$filepath"; # prepend the root directory
    }
    return $out;
}

#-----------------------------------------------------------------------------------------------


sub absolute_path {
#
# Convert a pathname into an absolute pathname, expanding any . or .. characters.
# Assumes pathnames refer to a local filesystem.
# Assumes the directory separator is "/".
#
  my $path = shift;
  my $cwd = getcwd();  # current working directory
  my $abspath;         # resulting absolute pathname

# Strip off any leading or trailing whitespace.  (This pattern won't match if
# there's embedded whitespace.
  $path =~ s!^\s*(\S*)\s*$!$1!;

# Convert relative to absolute path.

  if ($path =~ m!^\.$!) {          # path is "."
      return $cwd;
  } elsif ($path =~ m!^\./!) {     # path starts with "./"
      $path =~ s!^\.!$cwd!;
  } elsif ($path =~ m!^\.\.$!) {   # path is ".."
      $path = "$cwd/..";
  } elsif ($path =~ m!^\.\./!) {   # path starts with "../"
      $path = "$cwd/$path";
  } elsif ($path =~ m!^[^/]!) {    # path starts with non-slash character
      $path = "$cwd/$path";
  }

  my ($dir, @dirs2);
  my @dirs = split "/", $path, -1;   # The -1 prevents split from stripping trailing nulls
                                     # This enables correct processing of the input "/".

  # Remove any "" that are not leading.
  for (my $i=0; $i<=$#dirs; ++$i) {
      if ($i == 0 or $dirs[$i] ne "") {
	  push @dirs2, $dirs[$i];
      }
  }
  @dirs = ();

  # Remove any "."
  foreach $dir (@dirs2) {
      unless ($dir eq ".") {
	  push @dirs, $dir;
      }
  }
  @dirs2 = ();

  # Remove the "subdir/.." parts.
  foreach $dir (@dirs) {
    if ( $dir !~ /\.\./ ) {
        push @dirs2, $dir;
    } else {
        pop @dirs2;   # remove previous dir when current dir is ..
    }
  }
  if ($#dirs2 == 0 and $dirs2[0] eq "") { return "/"; }
  $abspath = join '/', @dirs2;
  return( $abspath );
}

#-------------------------------------------------------------------------------

sub valid_option {

    my ($val, @expect) = @_;
    my ($expect);

    $val =~ s/^\s+//;
    $val =~ s/\s+$//;
    foreach $expect (@expect) {
	if ($val =~ /^$expect$/i) { return $expect; }
    }
    return undef;
}

#-------------------------------------------------------------------------------

sub validate_options {

    my $source = shift;   # text string declaring the source of the options being validated
    my $opts   = shift;   # reference to hash that contains the options

    my ($opt, $old, @expect);

}

#-------------------------------------------------------------------------------

sub quote_string {
    my $str = shift;
    $str =~ s/^\s+//;
    $str =~ s/\s+$//;
    unless ($str =~ /^['"]/) {        #"'
        $str = "\'$str\'";
    }
    return $str;
}

#-------------------------------------------------------------------------------

sub expand_env_xml {

    my $value = shift;

    if ($value =~ /\$([\w_]+)(.*)$/) {
	my $subst = $xmlvars{$1};
	$value =~ s/\$${1}/$subst/g;
    }
    return $value; 
}	 

#-------------------------------------------------------------------------------

sub get_glc_dt {

# Usage: ($dt_option, $dt_count) = get_glc_dt($ncpl_base_period, $glc_ncpl)
#
# Given the number of coupling intervals for GLC and the coupling base period,
# returns appropriate values of dt_option and dt_count for CISM's namelist. Note
# that this gives the dt values in cism_in - NOT the values in cism.config.
#
# This assumes that we want one timestep per coupling interval.

  my $ncpl_base_period = shift;
  my $glc_ncpl = shift;

  my ($dt_option, $dt_count);

  if ($ncpl_base_period eq 'hour') {
    if (3600 % $glc_ncpl != 0) { die "$ProgName: ERROR glc NCPL doesn't divide an hour evenly\n";}
    $dt_option = 'seconds';
    $dt_count = 3600 / $glc_ncpl;
  } elsif ($ncpl_base_period eq 'day') {
    $dt_option = 'steps_per_day';
    $dt_count = $glc_ncpl;
  } elsif ($ncpl_base_period eq 'year') {
    $dt_option = 'steps_per_year';
    $dt_count = $glc_ncpl;
  } elsif ($ncpl_base_period eq 'decade') {
    $dt_option = 'steps_per_year';
    $dt_count = $glc_ncpl / 10;
  } else {
    die "$ProgName: ERROR invalid NCPL_BASE_PERIOD $ncpl_base_period\n";
  }

  return ($dt_option, $dt_count);
}
  
#-------------------------------------------------------------------------------

sub check_consistency {

# Usage: check_consistency($nl, \%xmlvars)
# (note that xmlvars is a reference to a hash)
#
# Checks various options for consistency; dies if any errors are found

    my ($nl, $xmlvars) = @_;
    my $num_errors = 0;

    $num_errors += check_cism_dt($nl->get_value('dt'));

# ----------------------------------------------------------------------
# Requirements for evolve_ice = .false.
# ----------------------------------------------------------------------

    if ($nl->get_value('evolve_ice') == 0) {
       if ($nl->get_value('zero_gcm_fluxes') == 0) {
          print "$ProgName: ERROR: for evolve_ice = 0, you must also set zero_gcm_fluxes = 1\n";
          print "(This is because evolve_ice = 0 implies that there will be no fluxes,\n";
          print "and you must explicitly set zero_gcm_fluxes = 1 for the sake of logic\n";
          print "that depends on whether these fluxes will be zero - particularly, the creation\n";
          print "of icemask_coupled_fluxes used by CLM).\n";
          $num_errors += 1;
       }
    }

# ----------------------------------------------------------------------
# Requirements for use of trilinos solver
# ----------------------------------------------------------------------

    if ($nl->get_value('which_ho_sparse') == 4) {
	if ($xmlvars->{'CISM_USE_TRILINOS'} eq 'FALSE') {
	    print "$ProgName: ERROR: Use of the trilinos solver (which_ho_sparse=4) requires building with trilinos (CISM_USE_TRILINOS=TRUE)\n";
	    $num_errors += 1;
	}

	if ($nl->get_value('dycore') == 0) {
	    print "$ProgName: ERROR: Can't use trilinos solver (which_ho_sparse=4) with glide dycore (dycore=0)\n";
	    $num_errors += 1;
	}
    }

# ----------------------------------------------------------------------
# Requirements for running with more than one MPI task per ensemble member
# ----------------------------------------------------------------------

    if ($xmlvars->{'NTASKS_GLC'} > $xmlvars->{'NINST_GLC'}) {
	if ($nl->get_value('dycore') == 0) {
	    print "$ProgName: ERROR: With dycore=0 (glide dycore), can only have 1 GLC task per instance\n";
	    print "NTASKS_GLC = " . $xmlvars->{'NTASKS_GLC'} . ", NINST_GLC = " . $xmlvars->{'NINST_GLC'} . "\n";
	    $num_errors += 1;
	}

	if ($nl->get_value('dycore') == 1 && $nl->get_value('which_ho_sparse') != 4) {
	    print "$ProgName: ERROR: To run with more than 1 GLC task per instance, with dycore=1, must use trilinos solver (which_ho_sparse=4)\n";
	    print "NTASKS_GLC = " . $xmlvars->{'NTASKS_GLC'} . ", NINST_GLC = " . $xmlvars->{'NINST_GLC'} . "\n";
	    print "which_ho_sparse = " . $nl->get_value('which_ho_sparse') . "\n";
	    $num_errors += 1;
	}
    }


    die if ($num_errors > 0);
}

#-------------------------------------------------------------------------------

sub check_cism_dt {

# Usage: check_cism_dt($dt)
#
# Checks cism's dt value: i.e., the dt variable in the time section of cism.config.
# Returns 0 if okay, > 0 if errors found (i.e., if dt is an inappropriate value). Also, if
# any errors are found, an error message is printed.

  my $dt = shift;
  my $num_errors = 0;


# Ensure that dt translates into an integer number of hours

  my $dt_hours = $dt * 365. * 24.;
  # round to nearest integer:
  my $dt_hours_int = sprintf("%.0f", $dt_hours);
  
  # make sure difference is basically 0, by comparing relative difference with a value near machine epsilon
  if (abs($dt_hours - $dt_hours_int)/$dt_hours > 1.e-15) {
    print "$ProgName: ERROR: dt (in years) must translate into an integer number of hours\n";
    print "dt = $dt\n";
    print "dt (hours) = $dt_hours\n";
    $num_errors += 1;
  }

  return $num_errors;
}

