B
    -]dÏ  ã               @   s  d dl Z d dlZd dlmZmZ d dlZd dlZd dl	m
Z d dlZd dlZd dlZd dlmZ ddlmZmZ ddlmZ ddlmZ ddlmZ d,d
d„Zd-dd„Zd.dd„Zd/dd„Zd0dd„Zd1dd„Zdd„ Zd2dd „Z d3d!d"„Z!d4d#d$„Z"d5d%d&„Z#d6d(d)„Z$d7d*d+„Z%dS )8é    N)ÚOrderedDictÚdefaultdict)Úkl_divergenceé   )Ú__version__Úutil)Ú	Empirical)ÚGraph)ÚTraceTc             C   sb  i }i }|d k	r"|d }|d }ni }i }xFt | jƒD ]6}|  |¡}t|  |¡ƒ}	x|jD ]
}
|
j}|
j}|rz|n|}||kr°|| d  d7  < || d  |	7  < q`||krÂ|| }n|rô| d¡rÞ|dd … }ndt	t
|ƒd ƒ }nV| d¡r|dd … }n<||kr4dt	t
|ƒd ƒ }|||< || d	 t	|
jƒ }|||< d|	||
d
œ||< |
||< q`W q8W tt| ¡ dd„ dƒ}tƒ }t|d< tj|d< t
| ƒ|d< t
|ƒ|d< t
dd„ t| ¡ ƒD ƒƒ|d< t
dd„ t| ¡ ƒD ƒƒ|d< t
dd„ t| ¡ ƒD ƒƒ|d< t
dd„ t| ¡ ƒD ƒƒ|d< t
dd„ t| ¡ ƒD ƒƒ|d< |||||dœS )NÚaddress_idsÚaddress_base_idsÚcountr   ÚweightZ__Aé   ÚAÚ__)r   r   Ú
address_idÚvariablec             S   s   t  | d d ¡S )Nr   r   )r   Úaddress_id_to_int)Úv© r   úS/global/project/projectdirs/dasrepo/etalumis/pyprob_saeid-dev/pyprob/diagnostics.pyÚ<lambda>9   ó    z _address_stats.<locals>.<lambda>)ÚkeyÚpyprob_versionÚtorch_versionZnum_distribution_elementsÚ	addressesc             S   s   g | ]}|d  j rd‘qS )r   r   )Úcontrol)Ú.0Úvaluer   r   r   ú
<listcomp>?   s    z"_address_stats.<locals>.<listcomp>Zaddresses_controlledc             S   s   g | ]}|d  j rd‘qS )r   r   )Úreplace)r   r    r   r   r   r!   @   s    Zaddresses_replacedc             S   s   g | ]}|d  j rd‘qS )r   r   )Ú
observable)r   r    r   r   r   r!   A   s    Zaddresses_observablec             S   s   g | ]}|d  j rd‘qS )r   r   )Úobserved)r   r    r   r   r   r!   B   s    Zaddresses_observedc             S   s   g | ]}|d  j rd‘qS )r   r   )Útagged)r   r    r   r   r   r!   C   s    Zaddresses_tagged)r   Úaddresses_extrar   r   Úaddress_id_to_variable)ÚrangeÚlengthÚ
_get_valueÚfloatÚ_get_weightÚ	variablesÚaddress_baseÚaddressÚ
startswithÚstrÚlenÚinstancer   ÚsortedÚitemsr   ÚtorchÚlistÚvalues)Ú
trace_distÚuse_address_baseÚreuse_ids_from_address_statsr   r'   r   r   ÚiÚtraceÚtrace_weightr   r.   r/   r   r   Zaddress_base_idr&   r   r   r   Ú_address_stats   sX    





r?   c          
      sÜ  t | ˆ|d}|d ‰ i }|d k	r,|d }ni }xÚt| jƒD ]Ì}|  |¡}t|  |¡ƒ}	d ‡fdd„|jD ƒ¡}
|
|krà|
|krŒ||
 }ndtt	|ƒd ƒ }|||
< d	g‡ ‡fd
d„|jD ƒ dg }d|	|||dœ||
< q<||
 d  d7  < ||
 d  |	7  < q<W t
t| ¡ dd„ ddƒ}dd„ tt	ˆ ƒƒD ƒ}g }x$ˆ  ¡ D ]\}}| |d ¡ qJW t||dd}dd„ tt	|ƒƒD ƒ}g }x$| ¡ D ]\}}| |d ¡ q˜W t||dd}|  dd„ ¡ ¡  d¡}|  dd„ ¡ ¡  d¡}|  dd„ ¡ ¡  d¡}t
ƒ }t	|ƒ|d< t|jƒ|d< t|jƒ|d < t|jƒ|d!< t|jƒ|d"< t|jƒ|d#< t|jƒ|d$< t|jƒ|d%< t|jƒ|d&< t|jƒ|d'< t|jƒ|d(< t|jƒ|d)< t|jƒ|d*< |||||||||d+œ	S ),N)r:   r;   r   Ú	trace_idsÚ c                s   g | ]}ˆ r|j n|j‘qS r   )r.   r/   )r   r   )r:   r   r   r!   R   s    z _trace_stats.<locals>.<listcomp>ÚTr   ZSTARTc                s$   g | ]}ˆ ˆr|j n|j d  ‘qS )r   )r.   r/   )r   r   )r   r:   r   r   r!   Y   s    ÚEND)r   r   Útrace_idr=   Úaddress_id_sequencer   r   c             S   s   | d d S )Nr   r   r   )r   r   r   r   r   ^   r   z_trace_stats.<locals>.<lambda>T)r   Úreversec             S   s   g | ]}|‘qS r   r   )r   r<   r   r   r   r!   _   s    z
Address ID)ÚweightsÚnamec             S   s   g | ]}|‘qS r   r   )r   r<   r   r   r   r!   d   s    zUnique trace IDc             S   s   | j S )N)r)   )r=   r   r   r   r   i   r   zTrace length (all)c             S   s   | j S )N)Úlength_controlled)r=   r   r   r   r   j   r   zTrace length (controlled)c             S   s   | j S )N)Úexecution_time_sec)r=   r   r   r   r   k   r   zTrace execution time (s)Ztrace_typesZtrace_length_minZtrace_length_maxZtrace_length_meanZtrace_length_stddevZtrace_length_controlled_minZtrace_length_controlled_maxZtrace_length_controlled_meanZtrace_length_controlled_stddevZtrace_execution_time_minZtrace_execution_time_maxZtrace_execution_time_meanZtrace_execution_time_stddev)	ÚtracesÚtraces_extrar@   Úaddress_statsÚtrace_id_distÚtrace_length_distÚtrace_length_controlled_distÚtrace_execution_time_distÚaddress_id_dist)r?   r(   r)   r*   r+   r,   Újoinr-   r1   r2   r   r4   r5   Úappendr   ÚmapÚ
unweightedÚrenameÚminÚmaxÚmeanÚstddev)r9   r:   r;   Zreuse_ids_from_trace_statsrM   rK   r@   r<   r=   r>   Z	trace_strrD   rE   r   Zaddress_weightsr   r    rR   Zunique_trace_idsZtrace_weightsÚ_rN   rO   rP   rQ   rL   r   )r   r:   r   Ú_trace_statsG   s`    


"r]   ©é
   é   é   Fc             C   s"  t | |d}|d }|d }	|r|s:dtjd< t d¡ tjdd|d\}
}|d	  ¡ }|d	  ¡ }|d	 j}|d
 j	||d|d |d
  
|¡ |d
  d¡ |d
 jddd |d  ¡ }|d  ¡ }|d j}|d j	||d|d |d  
|¡ |d jddd |d  ¡ }|d  ¡ }|d j}|d j	||dt|ƒd |d  
|¡ |d  d¡ |d jddd |d  ¡ }|d  ¡ }|d j}|d j	||d|d |d  
|¡ |d jddd tj| jdddddd tjddddgd  |d k	r|d! }td" |¡ƒ t |¡ |d# }td$ |¡ƒ t|d%ƒ:}| d&¡ x(|	 ¡ D ]\}}| d' ||¡¡ qZW W d Q R X |d( }td) |¡ƒ t|d%ƒf}| d*¡ xT| ¡ D ]H\}}| d+ |d, |d- t|d. jƒt|d. jƒd/ |d0 ¡¡¡ qºW W d Q R X |rt ¡  d S )1N)r:   rK   rL   Fzaxes.unicode_minusÚaggr   )ÚfigsizerO   )r   r   r   )rG   ÚdensityÚbinsÚ	FrequencyÚlogÚclip)ÚnonposyrP   )r   r   rR   )r   r   rQ   )r   r   g        g®Gáz®ï?ÚleftÚtopr_   )ÚxÚyÚhorizontalalignmentÚverticalalignmentÚfontsizer   g¸…ëQ¸ž?gffffffî?)Úrectz.pdfzPlotting to file: {}z.txtzSaving trace report to file: {}Úwzpyprob diagnostics
z{}: {}
z.csvzSaving traces to file: {}z@trace_id, count, length, length_controlled, address_id_sequence
z{}, {}, {}, {}, {}
rD   r   r=   ú rE   )r]   ÚmplÚrcParamsÚpltÚswitch_backendÚsubplotsÚvalues_numpyÚweights_numpyrH   ÚhistÚ
set_xlabelÚ
set_ylabelÚ
set_yscaler2   ÚsuptitleÚtight_layoutÚprintÚformatÚsavefigÚopenÚwriter5   r-   Úvariables_controlledrS   Úshow)r9   r:   rc   re   ÚplotÚ	plot_showÚ	file_nameZtrace_statsrK   rL   ÚfigÚaxr8   rG   rH   Úplot_file_nameÚreport_file_nameÚfiler   r    Ztraces_file_namer   r   r   Útrace_histograms}   sl    








"
Nr   ©é   é   c       )         s¢  t | tƒs| g} i }d }	i }
xÖ| D ]Ì}td |j¡ƒ t|||	d}	|	d }x>| ¡ D ]2\}}||
kr„|
| d  |d 7  < qZ||
|< qZW |	d }d}t dt	|ƒd¡ x8| ¡ D ]*\}}t 
|¡ |d	7 }|d
 }|d }d}y¸|r.|j‰| ‡fdd„¡ ‡fdd„¡ dd„ ¡ dd„ ¡}n:|j‰ | ‡ fdd„¡ ‡ fdd„¡ dd„ ¡ dd„ ¡}| |jd kr€|d nd ||j¡¡ |jdkr d}W n tk
r¼   d}Y nX |rº||krÔi ||< ||f|| |j< qºW t ¡  q$W |rž|sdtjd< t d¡ dtjd< t t	|ƒ¡\}}tj|||d\}}| ¡ }d}ttddd d!d"d#d$d%d&d'd(d)gƒƒ}i }t d*t	|ƒd+¡ x~| ¡ D ]p\}}t 
|¡ xP| ¡ D ]B\}}|d }|d	 }| ¡ }| ¡ }||krúd } || }!n|} | ¡ }!|!||< t|jd,ƒr6t |jj!ƒt |jj"ƒf}"nd }"|| j#||d	||!| d-|"d. || j$|jdd/d0 || j%d1d2d3 |d k	r¸d }#|r¸|j‰ˆ|j&krÚt |j&ˆ j'ƒ}#n"|j‰ ˆ |j(krÚt |j(ˆ  j'ƒ}#|#d k	r¸|| j)|#d4d5d-d6 q¸W |d	7 }q˜W t ¡  | *¡  tj+d7d/d7d/d8d9d: |d k	r|d; }$td< |$¡ƒ t ,|$¡ |d= }%td> |%¡ƒ t-|%d?ƒR}&|& .d@¡ |& .|r˜dAnddB ¡ x(| ¡ D ]\}}|& .dC ||¡¡ q¬W W d Q R X |dD }'tdE |'¡ƒ t-|'d?ƒ”}&|& .dF |r
dGndH¡¡ xr|
 ¡ D ]f\}}|d jd kr8dn|d j}(|& .dI |d
 |d |(|d j/|d j0|d j1|d j2|¡¡ qW W d Q R X |ržt 3¡  d S )JNz&Collecting values for distribution: {})r:   r;   r   r   r&   r   zCollecting valuesZ	Addressesr   r   r   Tc                s
   ˆ | j kS )N)Úvariables_dict_address_base)r=   )r.   r   r   r   Ú   r   z$address_histograms.<locals>.<lambda>c                s   t  | jˆ  j¡S )N)r   Ú	to_tensorr”   r    )r=   )r.   r   r   r   Ú   r   c             S   s
   t  | ¡S )N)r6   Ú	is_tensor)r   r   r   r   r   Ú   r   c             S   s   |   ¡ dkS )Nr   )Únelement)r   r   r   r   r   Ú   r   c                s
   ˆ | j kS )N)Úvariables_dict_address)r=   )r/   r   r   r   Ý   r   c                s   t  | jˆ  j¡S )N)r   r•   r˜   r    )r=   )r/   r   r   r   Ý   r   c             S   s
   t  | ¡S )N)r6   r–   )r   r   r   r   r   Ý   r   c             S   s   |   ¡ dkS )Nr   )r—   )r   r   r   r   r   Ý   r   rA   z{} ({})Fzaxes.unicode_minusrb   é   z	font.size)rc   z#1f77b4z#ff7f0ez#2ca02cz#d62728z#9467bdz#8c564bz#e377c2z#7f7f7fz#bcbd22z#17becfÚbÚkzPlotting histogramsÚ
HistogramsÚlowg      è?)rG   rd   re   ÚcolorÚlabelÚalphar(   gffffffî?)rp   rm   g        r   )Úpadr)   ÚdashedÚgray)rl   Ú	linestylerž   Ú	linewidthgš™™™™™©?g      ø?g333333ë?)rj   ÚrightÚbottomrk   ÚhspaceÚwspacez.pdfzPlotting to file: {}z.txtz!Saving address report to file: {}rr   zpyprob diagnostics
zaggregated zaddress report
z{}: {}
z.csvzSaving addresses to file: {}zHaddress_id, count, name, controlled, replaced, observable, observed, {}
r.   r/   z{}, {}, {}, {}, {}, {}, {}, {}
)4Ú
isinstancer7   r   r‚   rH   r?   r5   r   Úprogress_bar_initr2   Úprogress_bar_updater.   ÚfilterrU   r/   rW   r)   Ú	ExceptionÚprogress_bar_endrt   ru   rv   rw   Útile_rows_colsrx   ÚflattenÚreversedry   rz   ÚpopÚhasattrÚdistributionr+   r   Úhighr{   Ú	set_titleÚtick_paramsr”   r    r˜   ÚaxvlineÚlegendÚsubplots_adjustrƒ   r„   r…   r   r"   r#   r$   r‡   ))Útrace_distsÚground_truth_tracerc   re   r:   rˆ   r‰   rŠ   ÚdistsrM   Úaddress_stats_combinedr9   r   r   Úvalr&   r<   r    r   r   Z
can_renderÚdistÚrowsÚcolsr‹   rŒ   Úhist_color_cycleÚhist_colorsZtrace_dist_namer   r8   rG   rŸ   rž   r(   Zvline_xr   rŽ   r   Zaddresses_file_namerH   r   )r/   r.   r   Úaddress_histograms¿   sÐ    

64(




$







"NrÆ   c       $   	   C   s‚  | j | j }| j| j }| j| j  }| jd }| jd }|| }|| j }|| j  }	|| j }
t| jƒdkr¦| jd }| jd }|| }|| j }|| j  }|| j }tƒ }t|d< tj|d< | j	|d< | j
d |d< | j|d< | j|d< | j|d	< t| jƒ|d
< | jd k	|d< | j|d< | j|d< t| jƒ|d< | j|d< | j|d< | j|d< | j|d< | j|d< | j |d< ||d< ||d< ||d< ||d< ||d< ||d< |	|d< |
|d< t| jƒdkræ||d< ||d< ||d< ||d < ||d!< |d k	r~tj |¡std" |¡ƒ t |¡ tj |d#¡}td$ |¡ƒ t|d%ƒZ}| d&¡ x(| ¡ D ]\}}| d' ||¡¡ qRW | d(¡ | tt |  !¡ ƒƒ¡ W d Q R X d)t"j#d*< t$ %d+¡ tj |d,¡}td- |¡ƒ t$j&d.d/}t$ 'd0¡}|j(| j)| jd1d2 |j(| j*| jd3d2 | +¡  t$ ,d4¡ t$ -d5¡ t$ .¡  | /¡  t$ 0|¡ tj |d6¡}td7 |¡ƒ t$j&d.d/}t$ 'd0¡}|j(| j1| j
d1d2 t$ ,d4¡ t$ -d8¡ t$ .¡  | /¡  t$ 0|¡ tj |d9¡}tj |¡sètd" |¡ƒ t |¡ tj |d:¡}t|d%ƒv}| d;¡ tt2|  3¡ ƒƒ}t4 5d<|d=¡ x<t6|  3¡ ƒD ]*\}}t4 7|d> ¡ tƒ  tj |d? |¡¡}|d } | d@ tj 8|¡| ¡¡ tdA || ¡ƒ |d>  9¡  :¡  ;¡ }!|!j<d>krÎt= >|!d>¡}!n8|!j<dBkrtdC | |!j<¡ƒ |!j?d }"t= @|!|"df¡}!t$j&d.d/}t$ 'd0¡}|jA|!t$jBjCdD}#| D¡  t$ ,dE | |!j?¡¡ t$ E|#¡ t$ 0|¡ t$ F¡  q<W t4 G¡  W d Q R X |S )FNr   éÿÿÿÿzpyprob versionztorch versionznetwork typeznumber of parameterszpre-generated layersÚmodifiedZupdatesztrained on devicezdistributed trainingzdistributed backendzdistributed world sizeÚ	optimizerzlearning rateÚmomentumz
batch sizeztotal train. secondsztotal train. tracesztotal train. iterationsztrain. iter. per secondztrain. traces per secondztrain. traces per iter.ztrain. loss initialztrain. loss finalztrain. loss change per secondztrain. loss change per iter.ztrain. loss change per tracezvalid. loss initialzvalid. loss finalzvalid. loss change per secondzvalid. loss change per iter.zvalid. loss change per tracez&Directory does not exist, creating: {}zinference_network_stats.txtz%Saving diagnostics information to {} rr   zpyprob diagnostics report
z{}: {}
zarchitecture:
Fzaxes.unicode_minusrb   zloss.pdfzPlotting loss to file: {})r_   é   )rc   éo   ZTraining)rŸ   Z
ValidationzTraining tracesZLossznum_params.pdfz*Plotting number of parameters to file: {} zNumber of parametersÚparamsz
params.csvzfile_name, param_name
z%Plotting inference network parametersZ
Parametersr   zparam_{}.pngz{}, {}
z#Plotting to file: {}  parameter: {}r   z3Warning: reshaping parameter {} to 2D for plotting.)Úcmapz{} {})HÚ_total_train_iterationsÚ_total_train_secondsÚ_total_train_tracesÚ_history_train_lossr2   Ú_history_valid_lossr   r   r6   Ú_network_typeÚ_history_num_paramsÚ_layers_pre_generatedÚ	_modifiedÚ_updatesr1   Ú_deviceÚ_distributed_backendÚ_distributed_world_sizeÚ_optimizer_typeÚ_learning_rateÚ	_momentumÚ_batch_sizeÚosÚpathÚexistsr   r‚   ÚmakedirsrS   r„   r…   r5   ÚnextÚmodulesrt   ru   rv   rw   ÚfigureÚsubplotrˆ   Ú_history_train_loss_traceÚ_history_valid_loss_tracerº   ÚxlabelÚylabelÚgridr€   rƒ   Ú_history_num_params_tracer7   Únamed_parametersr   r«   Ú	enumerater¬   ÚbasenameÚcpuÚdetachÚnumpyÚndimÚnpÚexpand_dimsÚshapeÚreshapeÚpcolorÚcmÚjetÚinvert_yaxisÚcolorbarÚcloser¯   )$Úinference_networkÚsave_dirZtrain_iter_per_secZtrain_traces_per_secZtrain_traces_per_iterZtrain_loss_initialZtrain_loss_finalZtrain_loss_changeZtrain_loss_change_per_secZtrain_loss_change_per_iterZtrain_loss_change_per_traceZvalid_loss_initialZvalid_loss_finalZvalid_loss_changeZvalid_loss_change_per_secZvalid_loss_change_per_iterZvalid_loss_change_per_traceÚstatsZfile_name_statsr   r   r    Zfile_name_lossr‹   rŒ   Zfile_name_num_paramsZsave_dir_paramsZfile_name_paramsÚ
num_paramsÚindexÚparamZfile_name_paramÚ
param_nameZ	param_valÚcZheatmapr   r   r   Únetwork1  sì    


























 















r  c       	      C   sd   t | ||||d}|d k	r`|j||d x6| ¡ D ]*\}}|jd ||¡|d krT|n|d q2W |S )N)r9   r:   Ún_most_frequentÚ
base_graphÚnormalize_weights)Zbackground_graphz{}_{})r	   Zrender_to_fileZtrace_graphsr‚   )	r9   r:   r  r	  rŠ   r
  ÚgraphrD   Ztrace_graphr   r   r   r  ±  s    &r  c          	   C   s€   t d |¡ƒ t |¡ t|dƒT}| d¡ xB| j ¡ D ]4\}}| d¡r:| 	dd¡}|}| d ||¡¡ q:W W d Q R X d S )Nz&Saving address_id, address pairs to {}rr   zaddress_id, address
Ú__id__rA   z{}, {}
)
r   r‚   r   Úcreate_pathr„   r…   Ú_shelfr5   r0   r"   )Úaddress_dictionaryrŠ   r   r   r    r   r/   r   r   r   r  º  s    


r  éè  Ú	IterationúLog probabilityc                sú  t ˆ ƒtkrtdƒ‚|d kr d}g }g }x†ttˆ ƒƒD ]t}t ˆ | d ƒtkrZtdƒ‚|d krnˆ | j}ntˆ | j|ƒ}|| }| tt||t	dt
|| ƒƒƒƒ¡ t ¡ }d}tt|ƒƒ}tdƒ td d |d d ¡¡ƒ g }x²|| D ]¦}| ˆ |  |¡j¡ t ¡ | }|| tjks6||d krô|}|d | }td t |¡t || | ¡t |d |¡t|d ƒ |¡||¡d	d
 tj ¡  qôW tƒ  | |¡ q8W |
ræ|sÒdtjd< t d¡ tj|d}|d kr‡ fdd„tt|ƒƒD ƒ}x>tt|ƒƒD ].}tj|| || f|ž|d|| i—Ž qW |rRt  d¡ |	rftj!ddd |d k	rzt "|¡ |d k	rŽt "|¡ t #|¡ t $|¡ tj%dd | &¡  |d k	rØtd |¡ƒ t '|¡ |ræt (¡  t) *|¡t) *|¡fS )NzbExpecting a list of posterior trace distributions, each from a call to a Model's posterior_traces.r   r   z)Loading trace log-probabilities to memoryzBTime spent  | Time remain.| Progress             | {} | Traces/secr
   r   z%{} | {} | {} | {}/{} | {:,.2f}       ú)ÚendFzaxes.unicode_minusrb   )rc   c                s   g | ]}d   ˆ | j¡‘qS )z{})r‚   rH   )r   r<   )r¼   r   r   r!   í  s    zlog_prob.<locals>.<listcomp>rŸ   rg   rh   )ri   Úbest)ÚloczPlotting to file: {})+Útyper7   Ú	TypeErrorr(   r2   r
   r)   rX   rT   rY   ÚintÚtimer1   r   r‚   Úljustr*   Úlog_probr   Ú_print_refresh_rateÚdays_hours_mins_secs_strÚprogress_barÚrjustÚsysÚstdoutÚflushrt   ru   rv   rw   ræ   rˆ   ÚxscaleÚyscaleÚxticksrê   rë   rº   r€   rƒ   r‡   rõ   Úarray)r¼   Ú
resolutionÚnamesrc   rê   rë   r&  ÚyticksÚ
log_xscaleÚ
log_yscalerˆ   r‰   rŠ   Ú	min_indexÚ	max_indexÚargsÚkwargsÚmin_iÚitersÚ	log_probsÚjÚmax_iÚ
num_tracesÚ
time_startÚprev_durationÚlen_str_num_tracesÚvalsr<   ÚdurationÚtraces_per_secondr‹   r   )r¼   r   r  Æ  sr    $J


.








r  c       	         sè   t tƒ}ˆ d kr| j‰ t dˆ d¡ xTtˆ ƒD ]H}|  |¡}t |¡ x.|jD ]$}|j	 
¡ dkrN||j  d7  < qNW q.W t ¡  ‡ fdd„| ¡ D ƒ}tt| ¡ dd„ dd	ƒ}g }x,t|ƒD ] \}}| |¡ |d |krÀP qÀW |S )
Nz"Collecting most frequent addressesÚTracesr   c                s   i | ]\}}|ˆ kr||“qS r   r   )r   r›   r   )r6  r   r   ú
<dictcomp>  s    z._n_most_frequent_addresses.<locals>.<dictcomp>c             S   s   | d S )Nr   r   )rl   r   r   r   r     r   z,_n_most_frequent_addresses.<locals>.<lambda>T)r   rF   )r   r  r)   r   r«   r(   r*   r¬   r-   r    r—   r/   r¯   r5   r   r4   rï   rT   )	r9   r  r6  Zaddress_countsr<   r=   r   Úretr/   r   )r6  r   Ú_n_most_frequent_addresses  s&    


r@  c                sÒ  ˆd kr| j ‰|d krrttƒ‰ x>tˆƒD ]2}|  |¡}x"|j ¡ D ]}ˆ |  d7  < qBW q(W ‡ ‡fdd„ˆ D ƒ}i }x\|D ]T}| d j| }|j ¡ dkr||j	|krÂd t
 ˆ¡t
j dœ||j	< |||j	 d< q|W |d k	rJt| |ˆƒ}	x^|	D ]V}
| d j|
 }|j ¡ dkrð|j	|kr8d t
 ˆ¡t
j dœ||j	< |||j	 d< qðW t|ƒdkr`tdƒ‚t dˆd	¡ xVtˆƒD ]J}|  |¡}t |¡ x.| ¡ D ]"\}
}t|j|
 jƒ|d
 |< qšW qxW t ¡  |S )Nr   c                s   g | ]}ˆ | ˆkr|‘qS r   r   )r   rH   )Úname_countsr6  r   r   r!   $  s    z$_variable_values.<locals>.<listcomp>r   )r   r8   r   zNo variables with scalar value.z$Loading selected variables to memoryr=  r8   )r)   r   r  r(   r*   Únamed_variablesÚkeysr    r—   r/   rõ   ÚonesÚnanr@  r˜   r2   ÚRuntimeErrorr   r«   r¬   r5   r+   r¯   )r9   r)  r  r6  r<   r=   rH   Úvariable_valuesr   r   r/   r   r   )rA  r6  r   Ú_variable_values  sD    






$rH  c          	   O   s2  t | ƒtkrtdƒ‚t | d ƒtkr,tdƒ‚dd„ }| j}|d krdt t dt |d ¡¡ 	t
¡¡}t| ||ƒ}xVt| ¡ ƒD ]F\}\}}td |d j|d j|d t|ƒ¡ƒ ||d	 |ƒ|d
< q~W |r*|	sædtjd< t d¡ tj|d}tjdddd d}x’| ¡ D ]†\}}|d j}|d
 }|d krrd }|sPd t|ƒ¡}d}tj||f|ž|dd|dœ—Ž n"tj||f|ž|d|d ji—Ž qW |rªt d¡ |d k	r¾t |¡ |d k	rÒt |¡ t d¡ t d¡ tjdd | ¡  |
d k	rtd |
¡ƒ t  |
¡ |	r*t !¡  ||fS )NzExpecting a trace distribution.r   c                s   t  ‡ fdd„|D ƒ¡}|S )Nc                s>   g | ]6}|d krdn$t  ˆ |d… ˆ d| … ¡d  d ‘qS )r   g      ð?Nr   )rõ   Úcorrcoef)r   Úlag)r8   r   r   r!   M  s    z=autocorrelation.<locals>._autocorrelation.<locals>.<listcomp>)rõ   r'  )r8   Úlagsr?  r   )r8   r   Ú_autocorrelationL  s    z)autocorrelation.<locals>._autocorrelationr   zGComputing autocorrelation for variable address: {}, name: {} ({} of {})r   r   r8   ÚautocorrelationFzaxes.unicode_minusrb   )rc   Úblack)rm   r¥   rž   z{} most frequent addressesTr£   )r¥   rž   rŸ   rŸ   rg   ZLagZAutocorrelationr  )r  zPlotting to file: {})"r  r   r  r
   r)   rõ   ÚuniqueÚlogspaceÚlog10Úastyper  rH  rï   r5   r   r‚   r/   rH   r2   rt   ru   rv   rw   ræ   Úaxhlinerˆ   r$  r&  rê   rë   rº   r€   rƒ   r‡   )r9   r)  rK  r  rc   r&  r*  r+  rˆ   r‰   rŠ   r/  r0  rL  r6  rG  r<   r/   r   r‹   Úother_legend_addedrH   rM  rŸ   r   r   r   rM  F  sZ    "(



"(








rM  é2   c          	      s¬  dd„ ‰ ‡ fdd„}dd„ | D ƒ}t |ƒ}t|ƒ|krDtd |¡ƒ |d krjt t dt |¡¡ t	¡¡}i }xd| D ]\}t
||||ƒ}xH| ¡ D ]<\}}||krÄt || d	 |d	 f¡|| d	< q|||< qW qtW xVt| ¡ ƒD ]F\}\}}td
 |d j|d j|d t|ƒ¡ƒ ||d	 |ƒ|d< qâW |	r¤|
sLdtjd< t d¡ tj|d}tjdddd d}x’| ¡ D ]†\}}|d j}|d }|d krØd }|s¶d t|ƒ¡}d}tj||f|ž|dd|dœ—Ž n"tj||f|ž|d|d ji—Ž qvW |rt d¡ |r$tjddd |d k	r8t |¡ |d k	rLt |¡ t d¡ t d¡ tjdd | ¡  |d k	r–td  |¡ƒ t |¡ |
r¤t  ¡  ||fS )!Nc             S   sˆ   | j d | j d  }}|dk r&tdƒ‚|tjtj| ddddd }tjtj| ddddd}|d | | ||  }t || ¡}|S )Nr   r   r   z4Gelman-Rubin diagnostic requires at least two chains)Úaxis)rV  Úddof)r÷   Ú
ValueErrorrõ   ÚvarrZ   Úsqrt)r8   ÚmÚnrš   rr   Zv_hatZr_hatr   r   r   Ú_r_hat  s    zgelman_rubin.<locals>._r_hatc                sD   t j|td}x0t|ƒD ]$\}}ˆ | d d …d |…f ƒ||< qW |S )N)Údtype)rõ   Ú
zeros_liker+   rï   )r8   r2  r?  r<   Úiter)r]  r   r   Ú_r_hats‹  s     zgelman_rubin.<locals>._r_hatsc             S   s   g | ]
}|j ‘qS r   )r)   )r   r=   r   r   r   r!   ‘  s    z gelman_rubin.<locals>.<listcomp>zDDistributions have unequal length, setting the length to minimum: {}r   r8   z=Computing R-hat for variable address: {}, name: {} ({} of {})r   r   ÚrhatFzaxes.unicode_minusrb   )rc   rN  )rm   r¥   rž   z{} most frequent addressesTr£   )r¥   rž   rŸ   rŸ   rg   rh   )ri   r  zR-hatr  )r  zPlotting to file: {})!rX   rY   r   r‚   rõ   rO  rP  rQ  rR  r  rH  r5   Úvstackrï   r/   rH   r2   rt   ru   rv   rw   ræ   rS  rˆ   r$  r%  r&  rê   rë   rº   r€   rƒ   r‡   )r¼   r)  r2  r  rc   r&  r*  r+  r,  rˆ   r‰   rŠ   r/  r0  ra  Útrace_lengthsr6  rG  r9   Úvvr/   r   r<   r‹   rT  rH   rb  rŸ   r   )r]  r   Úgelman_rubin€  sj    

$(



"(








rf  c       4         s  dd„ }dd„ }dd„ ‰dd„ ‰ ‡ ‡fd	d
„}t |tƒs<t‚|| |g|||ƒ\\}}}i }t| ¡ ƒt| ¡ ƒ@ }t dt|ƒd¡ xðt|ƒD ]â\}}t 	|¡ d 
|| d j|| d j|d t|ƒ¡}|| d }|| d }tt|ƒt|ƒƒ}|dk rtd 
||¡ƒ q| ¡ }| ¡ }tt |¡t |¡ƒtt |¡t |¡ƒf}tj||fžŽ } | d | d  }!tj||ddd }"tj||ddd }#|"d| 7 }"|#d| 7 }#tjjjt |"¡d}$tjjjt |#¡d}%t|$|%ƒ ¡ }&t|%|$ƒ ¡ }'|&|' d }(i })|(|)d< x,|| D ] }*|*dkr|| |* |)|*< qW i |)d< ||)d | j< ||)d |j< |!|)d< |)||< qW t ¡  |r ||||||||	|
||ƒ
 |d k	r|d }+td 
|+¡ƒ t|+dƒ2},dd d!d"d#d$g}-tj|,|-d%}.|. ¡  x€|  ¡ D ]t\}/})i }0|)d j|0d< |/|0d < |)d j|0d!< |)d |0d"< t|)d t!t"|)d ƒƒ ƒ|0d#< |)d |0d$< |. #|0¡ qþW d&d'„ | $¡ D ƒ}1t %|1¡}2t &|1¡}3|. #|-d d(|-d |i¡ |. #|-d d)|-d |2i¡ |. #|-d d*|-d |3i¡ td+ 
|2|3¡ƒ W d Q R X |S ),Nc
             S   st  |sdt jd< t d¡ dt jd< ttdddd	d
dddddddgƒƒ}
tj|d\}}| d¡ | d¡ |j	dd„ |  
¡ D ƒd|
d d t ¡  |	d k	r¼|	d }td |¡ƒ t |¡ |rÈt ¡  ttdddd	d
dddddddgƒƒ}
t t| ƒ¡\}}tj|||d\}}| ¡ }d}i }t dt| ƒd¡ xÜ|  ¡ D ]Ð\}}t |¡ x²|d  ¡ D ]¢\}}||kr~d }|| }n|}|
 ¡ }|||< t |¡t |¡f}|| j	|d |||d|d! || jd" | | d# j|d$ ¡dd%d& || jd'd(d) q^W |d 7 }q>W t ¡  | ¡  tj d*d%d*d%d+d,d- |	d k	rb|	d. }td |¡ƒ t |¡ |rpt ¡  d S )/NFzaxes.unicode_minusrb   r™   z	font.sizez#1f77b4z#ff7f0ez#2ca02cz#d62728z#9467bdz#8c564bz#e377c2z#7f7f7fz#bcbd22z#17becfrš   r›   )rc   ú#u   Jensenâ€“Shannonc             S   s   g | ]}|d  ‘qS )Ú
divergencer   )r   r   r   r   r   r!   Ý  s    z5jensen_shannon.<locals>.plot_func.<locals>.<listcomp>g      è?rÇ   )r    rž   z_divergence_hist.pdfzPlotting to file: {}r   zPlotting histogramsrœ   r8   r   )rd   re   rž   rŸ   r    r(   z{} / {:.2f}rÁ   rh  gffffffî?)rp   rm   g        r   )r¡   r)   gš™™™™™©?g      ø?g333333ë?)rj   r¦   r§   rk   r¨   r©   z_address.pdf)!rt   ru   rv   rw   r7   r²   rx   r}   r|   r{   r8   r€   r   r‚   rƒ   r‡   r   r°   r2   r±   r«   r5   r¬   r³   rõ   rX   rY   r·   rH   r¸   r¯   rº   r»   )Úvariable_infor¿   rc   re   r&  r*  r+  r,  r‰   rŠ   rÄ   r‹   Úax1r   rÂ   rÃ   rŒ   r<   rÅ   r   r   Z
dist_labelr8   rŸ   rž   Úrange_r   r   r   Ú	plot_funcÑ  s\    


$

"
$


*

z!jensen_shannon.<locals>.plot_funcc             S   sJ   | j r"|t| ƒkr|  |¡S | S n$|r0|  |¡S ttjj| |ddƒS dS )a-  
        Given an empirical distribution, shrinks it to final_size.
        If final size is bigger than the current size, it will be unchanged
        If it has unifrom weights, we assume it comes from some sort of MCMC, therefore it will be thinned.
        Otherwise, it will be resampled.
        F)r"   N)Ú_uniform_weightsr2   ÚthinÚresampler   rõ   ÚrandomÚchoice)Úempirical_distÚ
final_sizeÚposterior_flagr   r   r   Ú_shrink  s    

zjensen_shannon.<locals>._shrinkc             S   sò   t dd„ ƒ}| j}t d|d¡ x²t|ƒD ]¦}|  |¡}t |¡ |  |¡}x‚|D ]z}|rb|j}	n|j	}	||	krR|	| j
 ¡ dkrR|| d dkr¢|	| || d< || d  |	| j
¡ || d	  |¡ qRW q*W d
d„ | ¡ D ƒ}
t ¡  |
S )aÅ  
        Arguments
        ---------
        trace_dist          Empirical distribution over traces
        chosen_addresses    List of chosen addresses (could be address bases, depending on use_address_base)
        num_traces          length of the empirical distribution to consider
        use_address_base    If True, uses address base as variable identifier.

        Returns
        -------
        A dictionary of the same addresses (could be a subset of addresses, if no sample exist for that variable)
        to another dictionary:
            'variable'  ->  variable information for the variable at the address
            'dist'      ->  Empirical distribution over this single variable
        c               S   s   d g g dœS )N)r   r8   Úlog_weightsr   r   r   r   r   r   3  r   zFjensen_shannon.<locals>.generate_variable_empiricals.<locals>.<lambda>z$Loading selected variables to memoryr=  r   r   Nr8   rv  c             S   sL   i | ]D\}}|d  dk	rt |d ƒdkr|d  t|d |d ddœ|“qS )r   Nr8   r   rv  )rv  )r   rÁ   )r2   r   )r   r/   Úinfor   r   r   r>  G  s   zHjensen_shannon.<locals>.generate_variable_empiricals.<locals>.<dictcomp>)r   r)   r   r«   r(   r*   r¬   Ú_get_log_weightr”   r˜   r    r—   rT   r5   r¯   )r9   Úchosen_addressesr:   ri  r6  r<   r=   r>   r/   Ztrace_variables_dictZret_valr   r   r   Úgenerate_variable_empiricals#  s*    



z4jensen_shannon.<locals>.generate_variable_empiricalsc             S   sn   | d }t t| ¡ dd„ ddƒ}g }d}x>| ¡ D ]2\}}t|ƒ|krJP |d j ¡ dkr4| |¡ q4W |S )	Nr   c             S   s   | d d S )Nr   r   r   )rl   r   r   r   r   P  r   zEjensen_shannon.<locals>._n_most_frequent_from_stats.<locals>.<lambda>T)r   rF   r   r   r   )r   r4   r5   r2   r    r—   rT   )rM   r\  r   Zordered_addressesÚresr<   Zcandidate_addressri  r   r   r   Ú_n_most_frequent_from_statsN  s    z3jensen_shannon.<locals>._n_most_frequent_from_statsc                sØ   g }d}xÆ| D ]¾}t d |j¡ƒ t|||d}ˆ ||ƒ}ˆ|||ƒ}t| ¡ ƒ}	x&|	D ]}
|d |
 }| |
¡||< qXW xF| ¡ D ]:\}}|d }|d  |jdkr®|d nd ||j¡¡ q„W | 	|¡ qW ||fS )	a  
        Arguments
        ---------
        trace_dists         List of trace distributions
        names               List of chosen variable names (if any)
        n_most_frequent     Number of most frequent variables to include in the result
        use_address_base    If True, uses address base as variable identifier.

        Returns
        -------
        variable_empiricals A dictionary from variable address_id to:
                            A dictionary with the keys ['dist', 'variable']
                            The value associated with 'dist' is the distribution over
                            the corresponding variable, renamed to be used as plot title
        address_stats       Combined address_stats for trace distributions in the given trace_dists.

        NzComputing address stats for {})r:   r;   r   r   rÁ   rA   z{} ({}))
r   r‚   rH   r?   r7   rC  r³   r5   rW   rT   )r¼   r)  r  r:   Zvariable_empiricalsrM   r9   ry  Zvariable_empiricalrC  r›   Znew_keyr   ri  r   )r|  rz  r   r   Úget_renamed_variable_empiricals\  s     


.z7jensen_shannon.<locals>.get_renamed_variable_empiricalsz#Computing Jensen-Shannon divergenceZ	Variablesz address: {}, name: {} ({} of {})r   r   rÁ   r_   z)
Too few samples for {} ({}). Skipping...r   T)re   rd   g#B’¡œÇ;)Úprobsr   rh  r8   Ú	bin_widthz	_info.csvu2   Saving Jensenâ€“Shannon diagnostic info to CSV: {}rr   ÚNameÚIDÚAddressZ
Divergencezsample-sizez	bin width)Ú
fieldnamesc             S   s   g | ]}|d  ‘qS )rh  r   )r   r   r   r   r   r!   æ  s    z"jensen_shannon.<locals>.<listcomp>zNumber of binsu   Jensenâ€“Shannon meanu   Jensenâ€“Shannon varianceu:   Jensenâ€“Shannon mean = {}, Jensenâ€“Shannon variance = {})'rª   r  ÚAssertionErrorÚsetrC  r   r«   r2   rï   r¬   r‚   r/   rH   rX   r   ry   rõ   rY   ÚlinspaceÚ	histogramr6   ÚdistributionsÚcategoricalÚCategoricalr•   r   Úitemr¯   r„   ÚcsvÚ
DictWriterÚwriteheaderr5   rä   r`  Úwriterowr8   rZ   rY  )4Ztrace_dist_pZtrace_dist_qr)  r  r:   rc   re   r&  r*  r+  r,  rˆ   r‰   rŠ   rt  rl  ru  r}  Zvariable_empirical_pZvariable_empirical_qr¿   ri  Zcommon_address_idsr<   r   Zvariable_info_logZ
var_dist_pZ
var_dist_qÚnum_samplesZv_pZv_qrk  Zbins_seqr  Zp_probsZq_probsZp_categoricalZq_categoricalZkl_pqZkl_qprh  r   Zvv_keyZdivergence_info_csvÚcsvfileÚ
csv_titlesÚ
csv_writerr›   rw  Zdivergence_valuesZdivergence_meanZdivergence_varr   )r|  rz  r   Újensen_shannonÍ  sŽ    :+;
,

,

 

r”  )TN)TNN)Tr^   ra   FTN)Nr‘   ra   TFTN)N)TNNNT)r  Nr^   r  r  NNFFFTNNN)N)NNN)
NNNr^   NNTFTN)NNrU  r^   NNFFFTN)NrU  Fr^   ra   NNFTFTNF)&r6   rà   Úcollectionsr   r   ró   rõ   Ú
matplotlibrt   Úmatplotlib.pyplotÚpyplotrv   r  r!  rŒ  Ztorch.distributions.klr   rA   r   r   rˆ  r   r  r	   r=   r
   r?   r]   r   rÆ   r  r  r  r@  rH  rM  rf  r”  r   r   r   r   Ú<module>   s<   
5
6
B
r
 
	
?

+
:
M   