## requirements

1. Tracks/Reference files must be read/exectuable (chmod a+rx) 
2. Tracks/References must be in a path below the "base_dir" where the HTML file is written (can be changed as long as it is in `/global/cfs/cdirs/m342/www/`)
3. Bam files must be indexed
4. Works best with bigwigs (instead of bedGraph)
5. gff3 files must be sorted and indexed
## function to sort / index gff3 (make a copy or symlink to gff3 before running

```
#!/bin/bash
function sort_and_index_gff3() {
    # Check if a filename has been provided
    if [ -z "$1" ]; then
        echo "Usage: sort_and_index_gff3 <filename>"
        return 1
    fi

    # Variables
    local filename="$1"
    local sorted_filename="${filename%.gff3}.sorted.gff3"
    local bgzipped_filename="${sorted_filename}.gz"

    # Sort the GFF3 file by chromosome and then by start position
    # This assumes the GFF3 file is structured in a standard way where:
    # - Column 1 is the chromosome
    # - Column 4 is the start position
    # Note: Modify the sort parameters if your file has a header or if columns differ
    echo "Sorting the GFF3 file..."
    (grep "^#" $filename; grep -v "^#" $filename | sort -k1,1 -k4,4n) > $sorted_filename

    # Compress the sorted file using bgzip
    echo "Compressing the sorted GFF3 file..."
    bgzip -c $sorted_filename > $bgzipped_filename
    rm $sorted_filename  # Optionally remove the intermediate sorted file

    # Index the compressed file using tabix
    echo "Indexing the compressed GFF3 file..."
    tabix -p gff $bgzipped_filename

    echo "Sorting and indexing complete. Output: $bgzipped_filename"
}

# usage
sort_and_index_gff3 ${my_copy_of_gff3}


```

## see links below for more info
https://github.com/igvteam/igv.js

https://github.com/igvteam/igv.js/wiki/Tracks-1.0

https://github.com/igvteam/igv.js/wiki/Tracks-2.0

## example HTML

/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/athaliana_igv.html

https://portal.nersc.gov/cfs/m342/jgi_usa/drna_tracks/athaliana_igv.html


## Track dictionary class to hold tracks

In [22]:
import pandas as pd
import os
import json

# a class to gather tracks / parameters
class trackDict:
    def __init__(self):
        self.tracks = []
    def addTrack(self, **kwargs):
        track = {}
        for k,v in kwargs.items():
            track[k] = v
        self.tracks.append(track)
    def getTracks(self):
        return(self.tracks)
    def save_to_json(self, file_path):
        # Open the file in write mode
        with open(file_path, 'w', encoding='utf-8') as f:
            # Convert the dictionary to a JSON formatted string and write to the file
            json.dump(self.tracks, f, ensure_ascii=False, indent=4)
            

## Load refs and GFFs

In [23]:

# source directories
fasta_dir = "/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/genomes/"
gffdir = "/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/gff3/"

# gff3 names
alygff = "Alyrata_384_v2.1.gene_exons.sorted.gff3.gz"
athgff = "Athaliana_447_Araport11.gene_exons.sorted.gff3.gz"
crgff = "Crubella_474_v1.1.gene_exons.sorted.gff3.gz"
csgff = "CsativaCAM116_802_v1.1.gene_exons.sorted.gff3.gz"

# species organizer; combine the above; the dict keys should match values in the species column from the track organizer below
sporg = {
    'alyrata': {"fasta_file": os.path.join(fasta_dir, "Alyrata_384_v1.fa"),
                'gff_fn': os.path.join(gffdir,alygff)},
    'athaliana': {"fasta_file": os.path.join(fasta_dir, "Athaliana_447_TAIR10.fa"),
                'gff_fn': os.path.join(gffdir,athgff)},
    'crubella': {"fasta_file": os.path.join(fasta_dir, "Crubella_474_v1.fa"),
                'gff_fn': os.path.join(gffdir,crgff)},
    'camelina': {"fasta_file": os.path.join(fasta_dir, "CsativaCAM116_802_v1.0.fa"),
                'gff_fn': os.path.join(gffdir,csgff)}
}

sporg

{'alyrata': {'fasta_file': '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/genomes/Alyrata_384_v1.fa',
  'gff_fn': '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/gff3/Alyrata_384_v2.1.gene_exons.sorted.gff3.gz'},
 'athaliana': {'fasta_file': '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/genomes/Athaliana_447_TAIR10.fa',
  'gff_fn': '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/gff3/Athaliana_447_Araport11.gene_exons.sorted.gff3.gz'},
 'crubella': {'fasta_file': '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/genomes/Crubella_474_v1.fa',
  'gff_fn': '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/gff3/Crubella_474_v1.1.gene_exons.sorted.gff3.gz'},
 'camelina': {'fasta_file': '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/genomes/CsativaCAM116_802_v1.0.fa',
  'gff_fn': '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/gff3/CsativaCAM116_802_v1.1.gene_exons.sorted.gff3.gz'}}

## load track organizer

In [24]:
track_gfn = "/global/cfs/cdirs/m342/www/jgi_usa/igv_generator/igv_track_guide.txt"
tgdf = pd.read_csv(track_gfn, sep="\t"); tgdf.head()

Unnamed: 0,species,track_path,track_name,track_color,track_height,autoscale,min,max
0,athaliana,/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks...,ont_pseu_X0290_negative,"rgb(100,233,136)",30,False,0.2,1
1,athaliana,/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks...,ont_pseu_X0290_positive,"rgb(100,233,136)",30,False,0.2,1
2,athaliana,/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks...,ont_m6a_X0290_negative,"rgb(200,100,200)",30,False,0.2,1
3,athaliana,/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks...,ont_m6a_X0290_positive,"rgb(200,100,200)",30,False,0.2,1
4,athaliana,/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks...,ont_pseu_X0289_negative,"rgb(100,233,136)",30,False,0.2,1


## Function to generate HTML using the class and the track organizer

In [25]:
import os
import pandas as pd
import json

def generate_species_igv_html(tgdf, sporg, base_dir):
    # Helper function to create HTML with IGV browser
    def generate_igv_html(config, filename):
        config_json = json.dumps(config, indent=4)
        html_template = f"""
<html>
<head>
    <title>IGV.js - {config['reference']['name']} Browser</title>
</head>
<body>
<div id="igvDiv"></div>
<script src="https://cdn.jsdelivr.net/npm/igv@2.15.5/dist/igv.min.js"></script>
<script type="module">
    const config = {config_json}
    const browser = await igv.createBrowser(document.getElementById('igvDiv'), config)
</script>
</body>
</html>
"""
        with open(filename, 'w') as file:
            file.write(html_template)
        print(f"HTML file '{filename}' has been created.")

    # Loop through each species in the dataframe
    for species in tgdf['species'].unique():
        species_tracks_df = tgdf[tgdf['species'] == species]
        # Initialize the trackDict for this species
        tracks = trackDict()
        # add gff3
        tracks.addTrack(name="GFF3 annotation",
                        url=os.path.relpath(sporg[species]['gff_fn'], start=base_dir),
                        indexURL=os.path.relpath(sporg[species]['gff_fn'], start=base_dir) + ".tbi",
                        height=125,
                        displayMode = "EXPANDED",
                        searchable = "true")
        # add each track; brute force track type from extension; this could be more elegant ...
        for idx, row in species_tracks_df.iterrows():
            rel_track_path = os.path.relpath(row['track_path'], start=base_dir)  # Get relative path
            if row['track_path'].endswith(".bw") or row['track_path'].endswith(".bigWig"):
                if row['autoscale']:
                    tracks.addTrack(name=row['track_name'],
                                    url=rel_track_path,
                                    color=row['track_color'],
                                    height=row['track_height'],
                                    type='wig',  # Assuming all tracks are of type 'wig'
                                    format='bigWig',  # Based on file extension
                                    displayMode='EXPANDED')  # You can adjust this as needed
                else:
                    tracks.addTrack(name=row['track_name'],
                                    url=rel_track_path,
                                    color=row['track_color'],
                                    height=row['track_height'],
                                    type='wig',  # Assuming all tracks are of type 'wig'
                                    format='bigWig',  # Based on file extension
                                    displayMode='EXPANDED',
                                    autoscale='false',
                                    min=row['min'],
                                    max=row['max'])  # You can adjust this as needed
            elif row['track_path'].endswith(".bam"):
                tracks.addTrack(name=row['track_name'],
                                url=rel_track_path,
                                color=row['track_color'],
                                height=row['track_height'],
                                type='alignment',  # Assuming all tracks are of type 'wig'
                                displayMode='SQUISHED')  # You can adjust this as needed
            else:
                tracks.addTrack(name=row['track_name'],
                                url=rel_track_path,
                                color=row['track_color'],
                                height=row['track_height'])  # You can adjust this as needed

        # Create IGV config for this species
        config = {
            "reference": {
                "id": species,
                "name": f"{species} Genome",
                "fastaURL": os.path.relpath(sporg[species]['fasta_file'], start=base_dir),
                "indexURL": os.path.relpath(sporg[species]['fasta_file'], start=base_dir) + '.fai',
                "tracks": tracks.getTracks()
            }
        }

        # Specify the filename for the HTML file
        html_filename = os.path.join(base_dir, f"{species}_igv_temp.html")
        # generate the HTML browser
        generate_igv_html(config, html_filename)

## Example usage

### HTML file names automatically generated using species name and written to `base_dir`

In [26]:
base_dir = '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks'
generate_species_igv_html(tgdf, sporg, base_dir)

HTML file '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/athaliana_igv.html' has been created.
HTML file '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/alyrata_igv.html' has been created.
HTML file '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/crubella_igv.html' has been created.
HTML file '/global/cfs/cdirs/m342/www/jgi_usa/drna_tracks/camelina_igv.html' has been created.
