B
    8\	!                 @   s   d dl Z d dlmZmZmZ d dlZd dlZd dlZd dlmZ d dl	Z
d dlZd dlmZ d dlmZmZ ddlmZ ddlmZmZ dd	lmZ G d
d dZG dd deZG dd deZG dd deZG dd deZG dd deZdS )    N)DatasetConcatDatasetSampler)glob)colored)CounterOrderedDict   )util)	TraceModePriorInflation)ConcurrentShelfc               @   s,   e Zd Zdd Zdd Zdd Zdd Zd	S )
Batchc             C   sz   || _ t|| _i }xR|D ]J}|jdkr0tdddd |jD }||krVg ||< || | qW t|	 | _
d S )Nr   zTrace of length zero. c             S   s   g | ]
}|j qS  )address).0variabler   r   S/global/project/projectdirs/dasrepo/etalumis/conda_envs/pyprob/pyprob/nn/dataset.py
<listcomp>   s    z"Batch.__init__.<locals>.<listcomp>)traceslensizelength
ValueErrorjoinvariables_controlledappendlistvaluessub_batches)selfr   r    trace
trace_hashr   r   r   __init__   s    


zBatch.__init__c             C   s
   t | jS )N)r   r   )r!   r   r   r   __len__   s    zBatch.__len__c             C   s
   | j | S )N)r   )r!   keyr   r   r   __getitem__"   s    zBatch.__getitem__c             C   s    x| j D ]}|j|d qW d S )N)device)r   to)r!   r(   r"   r   r   r   r)   %   s    zBatch.toN)__name__
__module____qualname__r$   r%   r'   r)   r   r   r   r   r      s   r   c               @   s@   e Zd ZdejfddZdd Zdd Zedd	 Z	d
d Z
dS )OnlineDatasetNc             C   s&   || _ |d krtd}|| _|| _d S )Ng    .A)_modelint_length_prior_inflation)r!   modelr   prior_inflationr   r   r   r$   +   s
    zOnlineDataset.__init__c             C   s   | j S )N)r0   )r!   r   r   r   r%   2   s    zOnlineDataset.__len__c             C   s   t | jjtj| jdS )N)
trace_moder3   )nextr.   _trace_generatorr   PRIORr1   )r!   idxr   r   r   r'   5   s    zOnlineDataset.__getitem__c             C   s   | ` | `| `| `| `| `| `| `| `| `	| `
| `x6| jD ],}|`|`|`	|`|`|`|`|`|`|`q8W xF| j D ]8\}}|`|`|`|`|`	|`|`|`|`|`|`|`qtW d S )N)	variablesvariables_uncontrolledvariables_replacedvariables_observedvariables_observablevariables_taggedvariables_dict_addressvariables_dict_address_baseresultlog_problog_prob_observedexecution_time_secr   address_baseinstancecontrolreplacename
observableobservedreusedtaggednamed_variablesitemsdistributionr   )r"   r   _r   r   r   _prune_trace8   sH    zOnlineDataset._prune_tracec          	   O   s   t || }td||||d d}x||k r||7 }tj|d|tt	
 }t|}	|	jdd xRt|D ]F}
t| jj|tj| jd|}| | ||	t|
< |
d |	d	< qxW |	  t| q,W t  d S )
Nz?Saving offline dataset, traces:{}, traces per file:{}, files:{}Tracesr   zpyprob_traces_{}_{}T)write)r4   r3      __length)mathceilr
   progress_bar_initformatospathr   struuiduuid4r   lockranger5   r.   r6   r   r7   r1   rR   unlockprogress_bar_updateprogress_bar_end)r!   dataset_dir
num_tracesnum_traces_per_fileargskwargsZ	num_filesi	file_nameshelfjr"   r   r   r   save_datasetn   s     
 
zOnlineDataset.save_dataset)r*   r+   r,   r   DISABLEDr$   r%   r'   staticmethodrR   rn   r   r   r   r   r-   *   s
   6r-   c               @   s$   e Zd Zdd Zdd Zdd ZdS )OfflineDatasetFilec             C   s    || _ t|| _| jd | _d S )NrV   )
_file_namer   _shelfr0   )r!   rk   r   r   r   r$      s    
zOfflineDatasetFile.__init__c             C   s   | j S )N)r0   )r!   r   r   r   r%      s    zOfflineDatasetFile.__len__c             C   s   | j t| S )N)rs   r]   )r!   r8   r   r   r   r'      s    zOfflineDatasetFile.__getitem__N)r*   r+   r,   r$   r%   r'   r   r   r   r   rq      s   rq   c                   s$   e Zd Z fddZdd Z  ZS )OfflineDatasetc                s  || _ tttj| j d}g }xh|D ]`}yt|}|| W q( tk
r } z&t	| t	t
d|ddgd W d d }~X Y q(X q(W t | t	d| j  tj| j d}t|}d|kr<t	d	| |d
 | _|d | _t| jr
| j  | _t| jt| krrtdt| jt| |n6t	d| |  \}	}
|	|d< |
|d
< |
| _|	| _t	dt|  t	dtt| j ttt| j }t	d x&| D ]\}}t	d|| qW t	  d S )Nzpyprob_traces_*z7Warning: dataset file potentially corrupt, omitting: {}redbold)attrszOfflineDataset at: {}Zpyprob_hasheshashesz Using pre-computed hashes in: {}sorted_indiceszLength of pre-computed hashes ({}) and length of offline dataset ({}) do not match. Dataset files have been altered. Delete and re-generate pre-computed hash file: {}z,No pre-computed hashes found, generating: {}zNum. traces     : {:,}zNum. trace types: {:,}zTrace hash	Countz	{:.8f}	{})Z_dataset_dirsortedr   r[   r\   r   rq   r   	Exceptionprintr   rZ   superr$   r   _sorted_indicesZ_hashestorch	is_tensorcpunumpyr   RuntimeError_compute_hashessetr   r   rO   )r!   re   filesZdatasetsfiledataseterk   Zhashes_filerx   ry   Zhashes_and_countshashcount)	__class__r   r   r$      sF    
0


zOfflineDataset.__init__c             C   s   dd }t t| }tdt| d x.tt| D ]}|| | ||< t| q6W t  td t 	|\}}td |
  |
  fS )Nc             S   s6   t ddd | jD tj d }td| j|S )Nr   c             S   s   g | ]
}|j qS r   )r   )r   r   r   r   r   r      s    zFOfflineDataset._compute_hashes.<locals>.trace_hash.<locals>.<listcomp>rU   z{}.{})r   r   r   sysmaxsizefloatrZ   length_controlled)r"   hr   r   r   r#      s    $z2OfflineDataset._compute_hashes.<locals>.trace_hashz#Hashing offline dataset for sortingrS   zSorting offline datasetzSorting done)r   zerosr   r
   rY   ra   rc   rd   r|   sortr   r   )r!   r#   rx   rj   rQ   ry   r   r   r   r      s    zOfflineDataset._compute_hashes)r*   r+   r,   r$   r   __classcell__r   r   )r   r   rt      s   'rt   c               @   s$   e Zd Zdd Zdd Zdd ZdS )SortedTraceSamplerc             C   s   |j | _ d S )N)r~   )r!   offline_datasetr   r   r   r$      s    zSortedTraceSampler.__init__c             C   s
   t | jS )N)iterr~   )r!   r   r   r   __iter__   s    zSortedTraceSampler.__iter__c             C   s
   t | jS )N)r   Z_offline_dataset)r!   r   r   r   r%      s    zSortedTraceSampler.__len__N)r*   r+   r,   r$   r   r%   r   r   r   r   r      s   r   c               @   s&   e Zd Zd	ddZdd Zdd ZdS )
SortedTraceBatchSamplerFc             C   s   t t|j|| _|| _d S )N)r   r
   chunksr~   _batches_shuffle)r!   r   
batch_sizeshuffler   r   r   r$      s    z SortedTraceBatchSampler.__init__c             C   s   | j rtj| j t| jS )N)r   nprandomr   r   r   )r!   r   r   r   r      s    z SortedTraceBatchSampler.__iter__c             C   s
   t | jS )N)r   r   )r!   r   r   r   r%      s    zSortedTraceBatchSampler.__len__N)F)r*   r+   r,   r$   r   r%   r   r   r   r   r      s   
r   )r   Ztorch.utils.datar   r   r   rW   r[   r   r   r   r   r^   	termcolorr   collectionsr   r   r   r
   r   r   concurrencyr   r   r-   rq   rt   r   r   r   r   r   r   <module>   s$   W8