B
    ÛÖ8\I  ã               @   sˆ   d dl Z d dlZd dlZd dlZd dlZd dlZd dlm	Z
 d dlZd dlZd dlmZ ddlmZmZ ddlmZ G dd„ deƒZdS )	é    N)Úcoloredé   )ÚDistributionÚCategoricalé   )Úutilc            
       sŽ  e Zd ZdZ‡ fdd„	Zdd„ Zdd	„ Zd
d„ Zdd„ Zedd„ ƒZ	dd„ Z
d[dd„Zdd„ Zdd„ Zd\dd„Zd]dd„Zdd„ Zdd„ Zd d!„ Zd"d#„ Zd$d%„ Zd^d&d'„Zd(d)„ Zd*d+„ Zd,d-„ Zd.d/„ Zd0d1„ Zd_d2d3„Zd`d4d5„Zed6d7„ ƒZed8d9„ ƒZed:d;„ ƒZd<d=„ Z d>d?„ Z!ed@dA„ ƒZ"dBdC„ Z#dDdE„ Z$edFdG„ ƒZ%edHdI„ ƒZ&dJdK„ Z'e(dadLdM„ƒZ)dNdO„ Z*dPdQ„ Z+dRdS„ Z,dbdXdY„Z-‡  Z.S )cÚ	EmpiricalNéè  Fc                s  d| _ d| _d | _g | _d| _|d kr4d| _g | _nvd| _|| _tj	| j|d| _
d| j
kr–d| j
krp| j
d }| j
d | _| j
d | _t| jƒ| _nd| _|| _| j| _d | _d | _d | _d | _d | _d | _d| _tƒ  |¡ |d k	rt|ƒdkr|  |||¡ |  ¡  d S )	NFr   T)Z	writebackÚlog_weightsÚnameÚlast_keyéÿÿÿÿ)Ú
_finalizedÚ_closedÚ_categoricalÚ_log_weightsÚ_lengthÚ_on_diskÚ_valuesÚ
_file_nameÚshelveÚopenÚ_shelfÚ_file_last_keyÚlenÚ_file_sync_timeoutÚ_file_sync_countdownÚ_meanÚ	_varianceÚ_modeÚ_minÚ_maxÚ_effective_sample_sizeÚ_uniform_weightsÚsuperÚ__init__Úadd_sequenceÚfinalize)ÚselfÚvaluesr
   ÚweightsÚ	file_nameZfile_sync_timeoutZfile_writebackr   )Ú	__class__© ú`/global/project/projectdirs/dasrepo/etalumis/conda_envs/pyprob/pyprob/distributions/empirical.pyr%      s@    



zEmpirical.__init__c             C   s   | S )Nr-   )r(   r-   r-   r.   Ú	__enter__5   s    zEmpirical.__enter__c             C   s   | j s|  ¡  d S )N)r   Úclose)r(   Zexception_typeZexception_valueÚ	tracebackr-   r-   r.   Ú__exit__8   s    zEmpirical.__exit__c             C   s   | j s|  ¡  d S )N)r   r0   )r(   r-   r-   r.   Ú__del__<   s    zEmpirical.__del__c             C   s   | j S )N)r   )r(   r-   r-   r.   Ú__len__@   s    zEmpirical.__len__c             C   s   | j S )N)r   )r(   r-   r-   r.   ÚlengthC   s    zEmpirical.lengthc             C   s(   | j r$|  ¡  | js$| j ¡  d| _d S )NT)r   r'   r   r   r0   )r(   r-   r-   r.   r0   G   s
    
zEmpirical.closec             C   sÞ   |   ¡  | jrœ|d kr<td | j¡ƒ t|  ¡ | j| jdS td | j|¡ƒ t|| jd}x0t	| j
ƒD ]"}|j| jt|ƒ | j| d qhW | ¡  |S n>|d kr¶tdƒ t | ¡S td |¡ƒ t| j| j|| jdS d S )	Nz;Copying Empirical(file_name: {}) to Empirical(on memory)...)r)   r
   r   z?Copying Empirical(file_name: {}) to Empirical(file_name: {})...)r+   r   )ÚvalueÚ
log_weightz7Copying Empirical(on memory) to Empirical(on memory)...z;Copying Empirical(on memory) to Empirical(file_name: {})...)r)   r
   r+   r   )Ú_check_finalizedr   ÚprintÚformatr   r   Ú
get_valuesr   r   Úranger   Úaddr   Ústrr'   Úcopyr   )r(   r+   ÚretÚir-   r-   r.   r?   N   s     "
zEmpirical.copyc             C   s†   t jjtj| jt jdd| _t  | jj	| jj	d ¡ 
¡ | _t| jƒ| _| jr|| j| jd< | j| jd< | j| jd< | j ¡  d| _d S )N)Údtype)Úlogitsr   r   r
   r   T)ÚtorchÚdistributionsr   r   Ú	to_tensorr   Úfloat64r   ÚeqrC   Úallr#   r   r   r   r   r   r   Úsyncr   )r(   r-   r-   r.   r'   c   s    
zEmpirical.finalizec             C   s   | j stdƒ‚d S )Nz-Empirical not finalized. Call finalize first.)r   ÚRuntimeError)r(   r-   r-   r.   r8   n   s    zEmpirical._check_finalizedc             C   sØ   d| _ d | _d | _d | _d | _d | _d | _|d k	rF| j t	 
|¡¡ n4|d k	rh| j t t	 
|¡¡¡ n| j t	 
d¡¡ | jrÈ|  jd7  _|| jt| jƒ< |  jd8  _| jdkrÔ|  ¡  | j| _n| j |¡ d S )NFg        r   r   )r   r   r   r   r    r!   r"   r   Úappendr   rF   rD   Úlogr   r   r   r>   r   r'   r   r   )r(   r6   r7   Úweightr-   r-   r.   r=   r   s(    

zEmpirical.addc             C   s”   |d k	r6x†t t|ƒƒD ]}| j|| || d qW nZ|d k	rlxPt t|ƒƒD ]}| j|| || d qLW n$x"t t|ƒƒD ]}|  || ¡ qzW d S )N)r7   )rN   )r<   r   r=   )r(   r)   r
   r*   rA   r-   r-   r.   r&   ‹   s    zEmpirical.add_sequencec             C   s   || _ | jr| j | jd< | S )Nr   )r   r   r   )r(   r   r-   r-   r.   Úrename–   s    zEmpirical.renamec             C   s:   | j r,|dk r|  | j| ¡S | jt|ƒ S | j| S d S )Nr   )r   Ú
_get_valuer   r   r>   r   )r(   Úindexr-   r-   r.   rP   œ   s
    zEmpirical._get_valuec             C   s   | j j| S )N)r   rC   )r(   rQ   r-   r-   r.   Ú_get_log_weight¤   s    zEmpirical._get_log_weightc             C   s   | j j| S )N)r   Úprobs)r(   rQ   r-   r-   r.   Ú_get_weight§   s    zEmpirical._get_weightc                s0   ˆ   ¡  ˆ jr&‡ fdd„tˆ jƒD ƒS ˆ jS d S )Nc                s   g | ]}ˆ j t|ƒ ‘qS r-   )r   r>   )Ú.0rA   )r(   r-   r.   ú
<listcomp>­   s    z(Empirical.get_values.<locals>.<listcomp>)r8   r   r<   r   r   )r(   r-   )r(   r.   r;   ª   s    zEmpirical.get_valuesc             C   sh   |   ¡  | jr:|d krd}|d kr,| jd }t ||¡}n$|d k	sJ|d k	rPtƒ ‚t| j ¡ ƒ}|  	|¡S )Nr   r   )
r8   r#   r   ÚrandomÚrandintÚNotImplementedErrorÚintr   ÚsamplerP   )r(   Ú	min_indexÚ	max_indexrQ   r-   r-   r.   r[   ±   s    
zEmpirical.samplec             c   s,   |   ¡  xt| jƒD ]}|  |¡V  qW d S )N)r8   r<   r   rP   )r(   rA   r-   r-   r.   Ú__iter__¿   s    zEmpirical.__iter__c             C   sH   |   ¡  t|tƒr:| jrtƒ ‚t| j| | j| | jdS |  	|¡S d S )N)r)   r
   r   )
r8   Ú
isinstanceÚslicer   rY   r   r   r   r   rP   )r(   rQ   r-   r-   r.   Ú__getitem__Ä   s    
zEmpirical.__getitem__c             C   s¼   |   ¡  d}| jrVxžt| jƒD ]2}|tj|| jt|ƒ ƒtj	d| j
j|  7 }qW n\| jrttt|| jƒƒ| j }n>x<t| jƒD ].}|tj|| j| ƒtj	d| j
j|  7 }q€W t |¡S )Ng        )rB   )r8   r   r<   r   r   rF   r   r>   rD   rG   r   rS   r#   ÚsumÚmapr   )r(   Úfuncr@   rA   r-   r-   r.   ÚexpectationÍ   s    4.zEmpirical.expectationc             O   sN   |   ¡  g }x&t| jƒD ]}| ||  |¡ƒ¡ qW t||| j| jdœ|—ŽS )N)r)   r
   r   )r8   r<   r   rL   rP   r   r   r   )r(   rd   ÚargsÚkwargsr)   rA   r-   r-   r.   rc   Û   s
    zEmpirical.mapc             O   sz   |   ¡  | jdkr| S g }g }x>t| jƒD ]0}|  |¡}||ƒr*| |¡ | |  |¡¡ q*W t|f|ž|| jdœ|—ŽS )Nr   )r
   r   )	r8   r5   r<   r   rP   rL   rR   r   r   )r(   rd   rf   rg   Zfiltered_valuesZfiltered_log_weightsrA   r6   r-   r-   r.   Úfilterâ   s    


zEmpirical.filterc       
      O   s°   |   ¡  |d krdd„ }g }d |d kr,dn
dt|ƒ |d krBdn
dt|ƒ ¡}t ||d¡ x2t|ƒD ]&}	t |	¡ | || jd d dƒ¡ qhW t 	¡  t
||| jd	œ|—ŽS )
Nc             S   s   | S )Nr-   )Úxr-   r-   r.   Ú<lambda>ó   s    z$Empirical.resample.<locals>.<lambda>zResampling{}{}...Ú z, min_index: z, max_index: ÚSamples)r\   r]   )r)   r   )r8   r:   r>   r   Úprogress_bar_initr<   Úprogress_bar_updaterL   r[   Úprogress_bar_endr   r   )
r(   Únum_samplesÚmap_funcr\   r]   rf   rg   r)   ÚmessagerA   r-   r-   r.   Úresampleï   s    4
zEmpirical.resamplec             O   s  |   ¡  |d krdd„ }|d kr$d}|d kr2| j}tdt || | ¡ƒ}t|||ƒ}g }	g }
d ||d krpdn
dt|ƒ |d kr†dn
dt|ƒ ¡}t 	|t
|ƒd	¡ xJtt
|ƒƒD ]:}t |¡ |	 ||  || ¡ƒ¡ |
 |  || ¡¡ q´W t ¡  t||	|
| jd
œ|—ŽS )Nc             S   s   | S )Nr-   )ri   r-   r-   r.   rj      s    z Empirical.thin.<locals>.<lambda>r   r   zThinning, step: {}{}{}...rk   z, min_index: z, max_index: rl   )r)   r
   r   )r8   r5   ÚmaxÚmathÚfloorr<   r:   r>   r   rm   r   rn   rL   rP   rR   ro   r   r   )r(   rp   rq   r\   r]   rf   rg   ÚstepÚindicesr)   r
   rr   rA   r-   r-   r.   Úthiný   s&    6
zEmpirical.thinc             C   s    | j d kr|  dd„ ¡| _ | j S )Nc             S   s   | S )Nr-   )ri   r-   r-   r.   rj     s    z Empirical.mean.<locals>.<lambda>)r   re   )r(   r-   r-   r.   Úmean  s    
zEmpirical.meanc                s*   | j d kr$| j‰ |  ‡ fdd„¡| _ | j S )Nc                s   | ˆ  d S )Nr   r-   )ri   )rz   r-   r.   rj     s    z$Empirical.variance.<locals>.<lambda>)r   rz   re   )r(   r-   )rz   r.   Úvariance  s    
zEmpirical.variancec             C   sà   |   ¡  | jd krÚ| jr´i }t d| jd¡ ttdddgdƒ xHt| jƒD ]:}t 	|¡ |  
|¡}||kr~||  d7  < qLd||< qLW t ¡  t| ¡ dd	„ d
dd d | _n&t | j¡ d¡\}}|  
t|ƒ¡| _| jS )NzComputing mode...ÚValueszYWarning: weights are uniform and mode is correct only if values in Empirical are hashableÚredÚbold)Úattrsr   c             S   s   | d S )Nr   r-   )ri   r-   r-   r.   rj   /  s    z Empirical.mode.<locals>.<lambda>T)ÚkeyÚreverser   r   )r8   r   r#   r   rm   r   r9   r   r<   rn   rP   ro   ÚsortedÚitemsrF   r   rt   rZ   )r(   ÚcountsrA   r6   Ú_r]   r-   r-   r.   Úmode  s"    


"zEmpirical.modec             C   sx   |   ¡  ||  d¡ƒ}d}t d| jd¡ x:t| jƒD ],}t |¡ ||  |¡ƒ}||kr6|}|}q6W t ¡  |  |¡S )Nr   zComputing arg_max...r|   )r8   rP   r   rm   r   r<   rn   ro   )r(   rq   Úmax_valÚmax_irA   Úvalr-   r-   r.   Úarg_max5  s    
zEmpirical.arg_maxc             C   sx   |   ¡  ||  d¡ƒ}d}t d| jd¡ x:t| jƒD ],}t |¡ ||  |¡ƒ}||kr6|}|}q6W t ¡  |  |¡S )Nr   zComputing arg_min...r|   )r8   rP   r   rm   r   r<   rn   ro   )r(   rq   Úmin_valZmin_irA   r‰   r-   r-   r.   Úarg_minC  s    
zEmpirical.arg_minc             C   s4   |   ¡  | jd kr.| jj}d| d¡ ¡  | _| jS )Ng      ð?r   )r8   r"   r   rS   Úpowrb   )r(   r*   r-   r-   r.   Úeffective_sample_sizeQ  s
    
zEmpirical.effective_sample_sizec             O   s"   |   ¡  t||  ¡ | jdœ|—ŽS )N)r)   r   )r8   r   r;   r   )r(   rf   rg   r-   r-   r.   Ú
unweighted[  s    zEmpirical.unweightedc             C   sD   y*t tt|  ¡ ƒƒ}|d | _|d | _W n   tdƒ‚Y nX d S )Nr   r   z»Cannot compute the minimum and maximum of values in this Empirical. Make sure the distribution is over values that are scalar or castable to scalar, e.g., a PyTorch tensor of one element.)r‚   rc   Úfloatr;   r    r!   rK   )r(   Zsorted_valuesr-   r-   r.   Ú_find_min_max_  s    
zEmpirical._find_min_maxc             C   s   | j d kr|  ¡  | j S )N)r    r‘   )r(   r-   r-   r.   Úming  s    
zEmpirical.minc             C   s   | j d kr|  ¡  | j S )N)r!   r‘   )r(   r-   r-   r.   rt   m  s    
zEmpirical.maxc          	   O   sú   |   ¡  | jrtƒ ‚nàt t¡}t | jd ¡}|rîxˆt	| j
ƒD ]z}d}xX| ¡ D ]L\}}t t |¡t | j| ¡¡rRtjt || j| f¡dd||< d}qRW |s@| j| || j| < q@W t| ¡ ƒ}	t| ¡ ƒ}
t||	|
| jdœ|—ŽS tdƒ‚d S )Nr   F)ÚdimT)r)   r
   r   z^The values in this Empirical as not hashable. Combining of duplicates not currently supported.)r8   r   rY   ÚcollectionsÚdefaultdictr   r   Úis_hashabler   r<   r5   rƒ   rD   ÚequalrF   Ú	logsumexpÚstackr   ÚlistÚkeysr)   r   r   rK   )r(   rf   rg   ÚdistributionZhashablerA   Úfoundr€   r6   r)   r
   r-   r-   r.   Úcombine_duplicatess  s$    
"zEmpirical.combine_duplicatesc       	      C   sþ   | d j }x0| D ](}|j |kr&tdƒ‚t|tƒstdƒ‚qW |r¦|d krPtdƒ‚t|d}x>| D ]6}x0t|jƒD ]"}|j|jt	|ƒ |j
| d qpW q`W | ¡  |S g }g }| d j}x2| D ]*}|j|krÔtdƒ‚||j7 }||j
7 }q¾W t|||dS d S )	Nr   zAExpecting all Empirical distributions to be on disk or in memory.z>Combination is only supported between Empirical distributions.z8Expecting a target file_name for the combined Empirical.)r+   )r6   r7   zNCombination is only supported between Empirical distributions of equal length.)r)   r
   r+   )r   rK   r_   r   Ú	TypeErrorr<   r   r=   r   r>   r   r'   r5   r   )	Zempirical_distributionsr+   Zon_diskÚdistr@   rA   r)   r
   r5   r-   r-   r.   Úcombine‹  s0    





&



zEmpirical.combinec          	   C   sT   |   ¡  yt |  ¡ ¡ ¡  ¡ S    yt |  ¡ ¡S    tdƒ‚Y nX Y nX d S )NzCannot convert values to numpy.)	r8   rD   r™   r;   ÚcpuÚnumpyÚnpÚarrayrK   )r(   r-   r-   r.   Úvalues_numpy¨  s    zEmpirical.values_numpyc             C   s   |   ¡  t | jj¡S )N)r8   r   Úto_numpyr   rS   )r(   r-   r-   r.   Úweights_numpy²  s    zEmpirical.weights_numpyc             C   s   |   ¡  t | jj¡S )N)r8   r   r§   r   rC   )r(   r-   r-   r.   Úlog_weights_numpy¶  s    zEmpirical.log_weights_numpy©é
   é   Ú	FrequencyTr   c             O   sà   |	sdt jd< t d¡ tj|d}|  ¡ }|  ¡ }tj|f|ž||
dœ|—Ž |r^t d¡ |rptj	ddd |d k	r‚t 
|¡ |d k	r”t 
|¡ |d kr¢| j}t |¡ t |¡ | ¡  |d k	rÐt |¡ |	rÜt ¡  d S )	NFzaxes.unicode_minusÚagg)Úfigsize)r*   ÚdensityrM   Úclip)Znonposy)ÚmplÚrcParamsÚpltÚswitch_backendÚfigurer¦   r¨   ÚhistZxscaleZyscaleÚxticksr   ÚxlabelÚylabelÚtight_layoutÚsavefigÚshow)r(   r¯   r¹   rº   r¸   ZyticksZ
log_xscaleZ
log_yscaler+   r½   r°   rf   rg   Úfigr)   r*   r-   r-   r.   Úplot_histogramº  s0    







zEmpirical.plot_histogram)NNNNr	   Fr   )N)NN)NN)NN)NNN)NNN)N)
rª   Nr­   NNFFNTr   )/Ú__name__Ú
__module__Ú__qualname__r%   r/   r2   r3   r4   Úpropertyr5   r0   r?   r'   r8   r=   r&   rO   rP   rR   rT   r;   r[   r^   ra   re   rc   rh   rs   ry   rz   r{   r†   rŠ   rŒ   rŽ   r   r‘   r’   rt   rž   Ústaticmethodr¡   r¦   r¨   r©   r¿   Ú__classcell__r-   r-   )r,   r.   r      sT   $



	



r   )rD   r£   r¤   r?   r   r”   Ú
matplotlibr²   Úmatplotlib.pyplotÚpyplotr´   rW   ru   Ú	termcolorr   rk   r   r   r   r   r-   r-   r-   r.   Ú<module>   s   