1
2 import matplotlib
3 matplotlib.use('Agg')
4 import matplotlib.pyplot as plt
5 from scipy import stats
6 import numpy as np
7 import string
8 import sys
9 from pylal import bayespputils as bppu
10 from lalapps.combine_evidence import combine_evidence
11 import os
12
14 """
15 Return a one-dimensional array with the column
16 of data corresponding to the specified parameter.
17 """
18 ret = []
19 for index, p in enumerate(param_arr):
20 if p == param:
21 for i in range(len(data)):
22 ret.append(data[i][index])
23
24 return ret
25
27 """
28 Merge a list of files and create a 'chain' column
29 that labels each file.
30 """
31
32 wf = open(writefile, 'w')
33
34 chainFound = False
35 for i in param_arr:
36 wf.write(i+' ')
37 if i == 'chain':
38 chainFound = True
39 if not chainFound:
40 wf.write('chain\n')
41 run_num = 0
42 for d in data:
43 for i in d:
44 for j in i:
45 wf.write(str(j)+' ')
46 wf.write(str(run_num)+'\n')
47 run_num+=1
48 wf.close()
49
50
51 -def kstest(pos_samples, param_arr, outdir):
52 """
53 Compute p-value for each parameter in param_arr for each combination
54 of runs in pos_samples and histogram results.
55 """
56 runs = len(pos_samples)
57 num_params = len(param_arr)
58 ksdir = os.path.join(outdir,'ks')
59 if not os.path.isdir(ksdir):
60 os.makedirs(ksdir)
61
62 k = 1
63 ks_arr = []
64 for param in param_arr:
65 D_arr = []
66 p_arr = []
67 p_plot_arr = []
68 for i in range(runs):
69 data1 = get_data_col(pos_samples[i], param_arr, param)
70 for j in range(runs):
71 data2 = get_data_col(pos_samples[j], param_arr, param)
72 D, p = stats.ks_2samp(data1, data2)
73 D_arr.append(D)
74 p_arr.append(p)
75 if i is not j:
76 p_plot_arr.append(p)
77
78 ks_arr.append(p_arr)
79 plt.figure(k)
80
81 plt.hist(p_plot_arr, bins = 20, normed = True)
82 plt.xlabel('p-value')
83 plt.ylabel('probability density')
84 plt.title(param)
85 plt.savefig(outdir+'/ks/'+param+'_ks')
86 k+=1
87 return ks_arr
88
90 """
91 Compute Gelman-Rubin R-statistic for each parameter in param_arr.
92 """
93 writefile = os.path.join(outdir,'merged_files.dat')
94 runs = len(pos_samples)
95 R_arr = []
96 merge_files(pos_samples, param_arr, writefile)
97 for param in param_arr:
98 data=bppu.PEOutputParser('common')
99 inputFileObj = open(writefile)
100 dataObj0 = data.parse(inputFileObj)
101 posterior = bppu.Posterior(dataObj0)
102 R = posterior.gelman_rubin(param)
103 R_arr.append(R)
104 return R_arr
105
107 """
108 Find maximum value for logl for each run and compute maximum
109 difference.
110 """
111 runs = len(pos_samples)
112 maxl_arr = []
113 for i in range(runs):
114 l = get_data_col(pos_samples[i], param_arr, 'logl')
115 if not l:
116 l = get_data_col(pos_samples[i], param_arr, 'likelihood')
117 maxl = max(l)
118 maxl_arr.append(maxl)
119 max_diff = max(maxl_arr)-min(maxl_arr)
120 return (maxl_arr, max_diff)
121
122 -def compare_maxposterior(pos_samples, param_arr):
123 """
124 Compute the maximum posterior value (maximum of logl+logprior)
125 for each run. Calculate maximum difference in maxposterior.
126 """
127 runs = len(pos_samples)
128 l = []
129 p = []
130 for i in range(runs):
131 l.append(get_data_col(pos_samples[i], param_arr, 'logl'))
132 p.append(get_data_col(pos_samples[i], param_arr, 'prior'))
133
134 max_pos_arr = []
135 for i in range(runs):
136 pos = []
137 for j in range(len(pos_samples[i])):
138 pos.append(l[i][j]+p[i][j])
139 max_pos_arr.append(max(pos))
140 max_diff = max(max_pos_arr)-min(max_pos_arr)
141 return (max_pos_arr, max_diff)
142