Package pylal :: Module PopStatement
[hide private]
[frames] | no frames]

Source Code for Module pylal.PopStatement

  1  # Copyright (C) 2008  Nickolas Fotopoulos and Alexander Dietz 
  2  # 
  3  # This program is free software; you can redistribute it and/or modify it 
  4  # under the terms of the GNU General Public License as published by the 
  5  # Free Software Foundation; either version 2 of the License, or (at your 
  6  # option) any later version. 
  7  # 
  8  # This program is distributed in the hope that it will be useful, but 
  9  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 10  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General 
 11  # Public License for more details. 
 12  # 
 13  # You should have received a copy of the GNU General Public License along 
 14  # with this program; if not, write to the Free Software Foundation, Inc., 
 15  # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA. 
 16  # 
 17  from __future__ import division 
 18   
 19  import itertools 
 20  import os 
 21  import sys 
 22  import random 
 23   
 24  import numpy as np 
 25  from scipy import optimize 
 26  from scipy import special 
 27  from scipy import stats 
 28   
 29  from glue import iterutils 
 30  from pylal import plotutils 
 31  from lal import rate 
 32  from pylal.stats import rankdata 
 33   
 34  #################################################### 
 35  ## class GRBdata 
 36  #################################################### 
37 -class GRBdata(object):
38 39 # -------------------------------------------------
40 - def __init__(self, grb_name, inj_by_trial, off_by_trial, on_source):
41 """ 42 Initializes this object by passing the injection/offsource 43 and onource data 44 """ 45 46 # store the grb name 47 self.grb_name = grb_name 48 49 # store the likelihood data 50 self.inj_by_trial = inj_by_trial 51 self.off_by_trial = off_by_trial 52 self.onsource = on_source 53 54 # sort the given data 55 self.inj_by_trial.sort() 56 self.off_by_trial.sort()
57 58 # -------------------------------------------------
59 - def choice(self, type):
60 """ 61 Returns a particluar number from any sample 62 """ 63 64 if type=='box': 65 return self.onsource 66 elif type=='random': 67 index = random.randrange(len(self.off_by_trial)) 68 return self.off_by_trial[index] 69 elif type=='max': 70 return max(self.off_by_trial) 71 elif type=='injectMax': 72 return max(self.inj_by_trial) 73 elif type=='injectUpper': 74 n = len(self.inj_by_trial) 75 index = random.randrange(int(0.7*n), n) 76 return self.inj_by_trial[index] 77 elif type=='injectMedian': 78 n = len(self.inj_by_trial) 79 index = random.randrange(int(0.3*n), int(0.5*n)) 80 return self.inj_by_trial[index] 81 elif type=='injectLower': 82 n = len(self.inj_by_trial) 83 index = random.randrange(0, int(0.4*n)) 84 return self.inj_by_trial[index] 85 elif type=='injectRange': 86 pseudo_list = [x for x in self.inj_by_trial if x>3.5 and x<5.5] 87 return random.choice(pseudo_list) 88 89 elif type=='injectRandom': 90 index = random.randrange(len(self.inj_by_trial)) 91 return self.inj_by_trial[index] 92 else: 93 raise ValueError, "Unknown type %s for trigger selection. " % type
94 95 96 #################################################### 97 # 98 ####################################################
99 -class PopStatement(object):
100 101 # -------------------------------------------------
102 - def __init__(self, grb_data, name_suffix):
103 """ 104 Initializes the class with the GRB data 105 """ 106 107 # the data 108 self.off_list = [] 109 self.on_list = [] 110 111 # the main lists containing the data objects 112 self.lik_by_grb = [] 113 self.ifar_by_grb = [] 114 115 # sets the statistic to be used: lik or ifar 116 self.stat = None 117 118 # sets the type of data to be used: 119 # box or offsource/injection data 120 self.type = None 121 122 # a list of used GRBs (names only) 123 self.list_grbs = [] 124 125 # a list of all GRBs 126 self.grb_data = grb_data 127 128 # two counters 129 self.n_grb = 0 130 self.n_off = 0 131 132 # a name suffix for plot creation 133 self.name_suffix = name_suffix 134 135 # the statistical results 136 self.p_one_sided = None 137 self.p_two_sided = None 138 self.u = None 139 self.z = None 140 141 self.colors = itertools.cycle(['b', 'g', 'r', 'c', 'm', 'y', 'k'])
142 143 # -------------------------------------------------
144 - def add_data(self, grb_name, pop_injections_by_trial, \ 145 pop_background_by_trial, pop_onsource):
146 147 # append the GRB to the list 148 self.list_grbs.append(grb_name) 149 150 # create an instance for the likelihood data 151 data_lik = GRBdata(grb_name, pop_injections_by_trial, \ 152 pop_background_by_trial, pop_onsource) 153 154 # calculate the IFAR as well 155 off_ifar = self.calculate_sample_to_ifar(pop_background_by_trial,\ 156 pop_background_by_trial) 157 inj_ifar = self.calculate_sample_to_ifar(pop_injections_by_trial,\ 158 pop_background_by_trial) 159 on_ifar = self.calculate_sample_to_ifar([pop_onsource], \ 160 pop_background_by_trial) 161 162 # and an instance for the IFAR data 163 data_ifar = GRBdata(grb_name, inj_ifar, off_ifar, on_ifar[0]) 164 165 # store the samples 166 self.lik_by_grb.append(data_lik) 167 self.ifar_by_grb.append(data_ifar)
168 169 # -------------------------------------------------
170 - def finalize(self):
171 """ 172 Just sets some numbers 173 """ 174 175 self.n_grb = len(self.lik_by_grb) 176 177 self.n_off = 0 178 for data_list in self.lik_by_grb: 179 self.n_off += len(data_list.off_by_trial)
180 181 # -------------------------------------------------
182 - def calculate_sample_to_ifar(self, sample, sample_ref):
183 """ 184 Calculates a list of FAR given the items in the sample 185 """ 186 187 vector_ifar = [] 188 for item in sample: 189 ifar = self.calculate_ifar(item, sample_ref) 190 vector_ifar.append(ifar) 191 192 return vector_ifar
193 194 # -------------------------------------------------=
195 - def calculate_ifar(self, value, sample):
196 197 n_sample = float(len(sample)) 198 count_louder = (sample >= value).sum(axis=0) 199 200 count_louder = max(count_louder, 1) 201 return n_sample/count_louder
202 203 # -------------------------------------------------
204 - def check_off_distribution(self, pop_min, pop_max, far = False):
205 206 data_list_by_grb = self.lik_by_grb 207 if far: 208 data_list_by_grb = self.ifar_by_grb 209 210 # prepare the plot 211 if far: 212 tag = 'log(IFAR)' 213 sname = 'far' 214 else: 215 tag = 'Likelihood' 216 sname = 'lik' 217 plot = plotutils.SimplePlot(tag, r"cumulative sum",\ 218 r"Cumulative distribution offsource") 219 220 # create the hist data in a consistent binning 221 nbins = 20 222 bins = rate.LinearBins(pop_min, pop_max, nbins) 223 px = bins.lower() 224 225 for data_list in data_list_by_grb: 226 227 grb_name = data_list.grb_name 228 229 tmp_pop = data_list.off_by_trial 230 tmp_arr = np.array(tmp_pop, dtype=float) 231 off_pop = tmp_arr[~np.isinf(tmp_arr)] 232 off_pop.sort() 233 py = range(len(off_pop), 0, -1) 234 if far: 235 off_pop = np.log10(off_pop) 236 237 238 ifos = self.grb_data[grb_name]['ifos'] 239 if ifos=='H1L1': 240 linestyle = '-' 241 elif ifos == 'H1H2': 242 linestyle = '-.' 243 else: 244 linestyle = ':' 245 246 247 # add content to the plot 248 plot.add_content(off_pop, py, color = self.colors.next(),\ 249 linestyle = linestyle, label=grb_name) 250 plot.finalize() 251 plot.ax.set_yscale("log") 252 return plot
253 254 255 # -------------------------------------------------
257 return self.check_off_distribution(0.0, 25.0, far = False)
258 259 # ------------------------------------------------- 261 return self.check_off_distribution(0.0, 3.0, far = True)
262 263 # -------------------------------------------------
264 - def set_stat(self, stat):
265 """ 266 Choose your statistics, either lik or ifar 267 """ 268 self.stat = stat
269 270 # -------------------------------------------------
271 - def select_onsource(self, type, reject_empty = False):
272 """ 273 Selecting fake trials from the set of offsource 274 trials for each GRB. 275 """ 276 277 # select the statistic value to be used 278 if self.stat=='ifar': 279 data_list_by_grb = self.ifar_by_grb 280 elif self.stat=='lik': 281 data_list_by_grb = self.lik_by_grb 282 else: 283 raise ValueError, "Unknown statistics %s in select_onsource" %\ 284 self.stat 285 286 # delete the old items 287 self.on_list = [] 288 self.off_list = [] 289 self.type = type 290 291 for counter, data_list in \ 292 enumerate(data_list_by_grb): 293 294 if type=='box': 295 value = data_list.choice(type) 296 297 elif 'inject' in type: 298 number_inj = int(type[-1]) 299 if counter<number_inj: 300 if 'Max' in type: 301 value = data_list.choice('injectMax') 302 print "injectMax lik: ", value 303 elif 'Random' in type: 304 value = data_list.choice('injectRandom') 305 print "injectRandom lik: ", value 306 elif 'Upper' in type: 307 value = data_list.choice('injectUpper') 308 print "injectUpper lik: ", value 309 elif 'Median' in type: 310 value = data_list.choice('injectMedian') 311 print "injectMedian lik: ", value 312 elif 'Lower' in type: 313 value = data_list.choice('injectLower') 314 print "injectLower lik: ", value 315 elif 'Range' in type: 316 value = data_list.choice('injectRange') 317 print "injectRange lik: ", value 318 else: 319 raise ValueError,"Unknown injection type: ", type 320 else: 321 value = data_list.choice('box') 322 print "box lik: ", value 323 324 elif type=='random' or (type=='single' and counter>0): 325 value = data_list.choice('random') 326 327 elif type=='max' or (type=='single' and counter==0): 328 value = data_list.choice('max') 329 330 else: 331 raise ValueError,"Unknown selection type: ", type 332 333 # fill the data to be used for the test 334 self.on_list.append(value) 335 self.off_list.extend(data_list.off_by_trial) 336 337 # convert to arrays 338 self.on_list = np.asarray(self.on_list) 339 self.off_list = np.asarray(self.off_list) 340 341 # remove empty trials from the 342 if reject_empty: 343 self.on_list = self.remove_empty_trials(self.on_list) 344 self.off_list = self.remove_empty_trials(self.off_list) 345 346 # adjust the number 347 self.n_grb = len(self.on_list) 348 self.n_off = len(self.off_list)
349 350 # -------------------------------------------------
351 - def remove_empty_trials(self, data):
352 """ 353 Function to remove any infinite value from a given 354 list of data. 355 """ 356 inf_ind = np.isinf(data) 357 return data[~inf_ind]
358 359 # -------------------------------------------------
360 - def mannwhitney_u(self, x, y):
361 """ 362 Return the Mann-Whitney U statistic on the provided scores. Copied from 363 scipy.stats.mannwhitneyu except that we only return the U such that 364 large U means that population x was systematically larger than population 365 y, rather than the smaller U between x and y. The two possible U values 366 one can report are related by U' = n1*n2 - U. 367 """ 368 x = np.asarray(x) 369 y = np.asarray(y) 370 if x.ndim != 1 or y.ndim != 1: 371 raise ValueError, "populations must be rank 1 collections" 372 n1 = len(x) 373 n2 = len(y) 374 375 ranked = rankdata(np.concatenate((x,y))) 376 rankx = ranked[0:n1] # get the x-ranks 377 u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - rankx.sum() # calc U for x 378 self.u = n1 * n2 - u1 # return U for y 379 return self.u
380 381 # -------------------------------------------------
382 - def mannwhitney_u_zscore(self, pop_test, pop_ref):
383 """ 384 Return the z-score of a given sample. 385 Not appropriate for n1 + n2 < ~20. 386 """ 387 388 n1 = len(pop_test) 389 n2 = len(pop_ref) 390 u_value = self.mannwhitney_u(pop_test, pop_ref) 391 mean_U = n1 * n2 / 2 392 stdev_U = np.sqrt(n1 * n2 * (n1 + n2 + 1) / 12) 393 self.z = (u_value - mean_U) / stdev_U 394 395 return self.z
396 397 # -------------------------------------------------
398 - def compute_wmu(self):
399 """ 400 Computes the WMU z-score for the both cases 401 using likelihood and the IFAR 402 """ 403 404 # compute the z-value 405 z_value = self.mannwhitney_u_zscore(self.on_list, self.off_list) 406 407 # sf = 1 - cdf 408 self.p_one_sided = stats.distributions.norm.sf(z_value) 409 410 # erfc = 1 - erf 411 self.p_two_sided = stats.erfc(abs(z_value) / np.sqrt(2.)) 412 413 return z_value
414 415 # -------------------------------------------------
416 - def float_to_latex(self, x, format="%g"):
417 """ 418 Convert a floating point number to a latex representation. In particular, 419 scientific notation is handled gracefully: e -> 10^ 420 """ 421 base_str = format % x 422 if "e" not in base_str: 423 return base_str 424 mantissa, exponent = base_str.split("e") 425 exponent = str(int(exponent)) # remove leading 0 or +
426 427 # -------------------------------------------------
428 - def create_plot_hist(self):
429 430 # 431 # Create the histogram comparison 432 # 433 plot_title = r"$m_2 \in [%s), U=%d, z_U=%s, p_1=%s, p_2=%s$" \ 434 % (self.name_suffix , int(self.u), self.float_to_latex(self.z,\ 435 "%5.2g"), 436 self.float_to_latex(self.p_one_sided, "%5.2g"), 437 self.float_to_latex(self.p_two_sided, "%5.2g")) 438 plot = plotutils.VerticalBarHistogram(r"$IFAR(m_2 \in [%s))$" %\ 439 self.name_suffix, "PDF", \ 440 plot_title) 441 plot.add_content(self.on_list, color='r', label = r'On source', \ 442 bottom = 1.0e-4) 443 plot.add_content(self.off_list, color='b', label = r'Off source', \ 444 bottom = 1.0e-4) 445 plot.finalize(normed=True) 446 plot.ax.set_yscale('log') 447 return plot
448 449 # -------------------------------------------------
450 - def create_plot_qq(self):
451 452 # 453 # Create the QQ plot 454 # 455 plot_title = r"$m_2 \in [%s), U=%d, z_U=%s, p_1=%s, p_2=%s$" \ 456 % (self.name_suffix , int(self.u), \ 457 self.float_to_latex(self.z, "%5.2g"), 458 self.float_to_latex(self.p_one_sided, "%5.2g"), 459 self.float_to_latex(self.p_two_sided, "%5.2g")) 460 if self.type=="box": 461 plot_title = "" 462 plot = plotutils.QQPlot(r"self quantile", "combined quantile", \ 463 plot_title) 464 plot.add_bg(self.off_list, linewidth = 3, label="\"Off source\"") 465 plot.add_fg(self.on_list, color='r', marker = 'o',\ 466 label = r'On source',\ 467 linestyle='None',markersize=10) 468 plot.finalize() 469 return plot
470 471 # -------------------------------------------------
472 - def create_cdf_plot(self):
473 474 # create the cumulative data set 475 self.off_list.sort() 476 on_data = np.asarray(self.on_list) 477 off_data = np.asarray(self.off_list) 478 479 y_on = [] 480 y_off = [] 481 x_onoff = [] 482 counter = 0 483 for value in off_data[::-1]: 484 counter += 1 485 x_onoff.append(value) 486 y_off.append(counter) 487 y_on.append((on_data>value).sum()) 488 489 # replace the infinite values by 0.0 for plotting purposes 490 x_onoff = np.asarray(x_onoff) 491 y_on = np.asarray(y_on) 492 y_off = np.asarray(y_off) 493 inf_ind = np.isinf(x_onoff) 494 x_onoff[inf_ind] = 0.0 495 496 # scale the values correspondingly 497 scale = y_on.max()/y_off.max() 498 y_off_scale = y_off*scale 499 500 # create the plot 501 plot = plotutils.SimplePlot(r"Likelihood", r"Cumulative sum",\ 502 r"Cumulative distribution") 503 plot.add_content(x_onoff, y_off_scale, color = 'r', \ 504 linewidth = 2, label = 'off-source') 505 plot.add_content(x_onoff, y_on, color = 'b',\ 506 linewidth = 3, label = 'on-source') 507 508 plot.finalize() 509 510 # make the plot nice 511 plot.ax.set_xscale('log') 512 plot.ax.axis([2, 20, 0.0, 22.0]) 513 plot.ax.set_xticks([2,3,5,10]) 514 plot.ax.set_xticklabels(['2','3','5','10']) 515 516 return plot
517 518 # -------------------------------------------------
519 - def create_pdf_plot(self):
520 """ 521 Create a pdf plot of the used data. 522 """ 523 524 # get the data and sort them in reverse order 525 data_off = np.sort(self.off_list)[::-1] 526 data_on = np.sort(self.on_list)[::-1] 527 528 # remove infinities 529 data_on = self.remove_empty_trials(data_on) 530 data_off = self.remove_empty_trials(data_off) 531 n_grb = len(data_on) 532 533 # prepare lists 534 pdf_x = [] 535 pdf_off_y = [] 536 pdf_on_y = [] 537 pdf_on_r = [] 538 539 # prepare the main loop 540 index = 0 541 sum_off = 0.0 542 sum_tot = 0 543 for d in data_off: 544 545 # add one counter to the offsource sum 546 sum_off += 1 547 sum_tot +=1 548 549 # have we reached an onsource statistics value? 550 if d<=data_on[index]: 551 552 # compute the values to be stored 553 sum_scale = sum_off/self.n_off 554 y = 1.0/n_grb 555 r = y-sum_scale 556 557 # store these values in the lists 558 pdf_x.append(d) 559 pdf_off_y.append(sum_scale) 560 pdf_on_y.append(y) 561 pdf_on_r.append(r) 562 563 # check breaking condition 564 if index==n_grb-1: 565 break 566 567 # reset the sum and increase the index 568 index += 1 569 sum_off = 0.0 570 571 # double check the data 572 remain_data = data_off[data_off<=d] 573 sum_off = sum(pdf_off_y)+len(remain_data)/self.n_off 574 sum_on = sum(pdf_on_y) 575 print "Sum of the onsource: %.1f offsource: %.1f" % \ 576 (sum_on, sum_off) 577 578 # create the plot 579 plot = plotutils.SimplePlot(r"Likelihood", r"pdf",\ 580 r"PDF Distribution") 581 plot.add_content(pdf_x, pdf_off_y, color = 'r', \ 582 linewidth = 2, label = 'off-source') 583 plot.add_content(pdf_x, pdf_on_y, color = 'b', marker = 'o',\ 584 markersize = 10.0, label = 'on-source') 585 plot.finalize() 586 587 return plot
588 589 # -------------------------------------------------
590 - def create_hist_plot(self, n_bin, range = None):
591 592 def create_area(x, dx, y, dy): 593 px = [x-dx/2, x+dx/2, x+dx/2, x-dx/2, x-dx/2] 594 py = [y-dy/2, y-dy/2, y+dy/2, y+dy/2, y-dy/2] 595 return px, py
596 597 def draw_error_boxes(plot, x, dx, y, dy, col): 598 599 bx, by = create_area(x, dx, y, dy ) 600 plot.ax.fill(bx, by, ec='w', fc = col, alpha = 0.2) 601 602 bx, by = create_area(x, dx, y, 2*dy ) 603 plot.ax.fill(bx, by, ec='w', fc = col, alpha = 0.2) 604 605 bx, by = create_area(x, dx, y, 3*dy ) 606 plot.ax.fill(bx, by, ec='k', fc = col, alpha = 0.2) 607 608 return plot 609 610 611 # set the surroundings of the parameter space 612 if range is None: 613 data_on = np.asarray(self.on_list) 614 inf_ind = np.isinf(data_on) 615 val_min = data_on[~inf_ind].min() 616 val_max = data_on.max() 617 else: 618 val_min = range[0] 619 val_max = range[1] 620 621 # create the hists 622 hist_on = np.zeros(n_bin) 623 hist_off = np.zeros(n_bin) 624 625 # create the rate bins 626 lik_bins = rate.LinearBins(val_min, val_max, n_bin) 627 628 # and fill the histograms 629 for x in self.off_list: 630 if x>=val_min and x<=val_max: 631 hist_off[lik_bins[x]] += 1 632 633 for x in self.on_list: 634 if x>=val_min and x<=val_max: 635 hist_on[lik_bins[x]] += 1 636 637 # get the centres 638 px = lik_bins.centres() 639 dx = px[1]-px[0] 640 641 # norm the histograms 642 norm = self.n_grb/self.n_off 643 hist_on_norm = hist_on 644 hist_off_norm = norm*hist_off 645 646 # create the plot 647 plot = plotutils.SimplePlot(r"Likelihood", r"counts",\ 648 r"Histogramm of on/offsource with %d bins"%\ 649 (n_bin)) 650 651 # the norm of the normed histograms: 1.0 652 plot.add_content(px, hist_on, color = 'r', marker = 'o',\ 653 markersize = 10.0, label = 'on-source') 654 plot.add_content(px, hist_off_norm, color = 'b',marker = 's', \ 655 markersize = 10, label = 'off-source') 656 657 # add the error shading 658 for x,n in zip(px, hist_on): 659 660 # calculate the error (just the statistic error) 661 dn = np.sqrt(n) 662 plot = draw_error_boxes(plot, x, dx, n, dn, 'r') 663 664 plot.finalize() 665 plot.ax.axis([val_min, val_max, 0.0, 20.0]) 666 #return plot 667 668 # insert the lines of the data itself 669 for x in self.on_list: 670 if x>=val_min and x<=val_max: 671 plot.ax.plot([x,x],[0.0, 1.0],'k') 672 plot.ax.axis([val_min, val_max, 0.0, 20.0]) 673 674 return plot 675 676 ##################################################################### 677 # A simpler, function-based approach to population statements 678 679 from scipy import stats 680
681 -def mannwhitney_u(x, y):
682 """ 683 Return the Mann-Whitney U statistic on the provided scores. Copied from 684 scipy.stats.mannwhitneyu except that we only return the U such that 685 large U means that population x was systematically larger than population 686 y, rather than the smaller U between x and y. The two possible U values 687 one can report are related by U' = n1*n2 - U. 688 """ 689 x = np.asarray(x) 690 y = np.asarray(y) 691 if x.ndim != 1 or y.ndim != 1: 692 raise ValueError, "populations must be rank 1 collections" 693 n1 = len(x) 694 n2 = len(y) 695 696 ranked = rankdata(np.concatenate((x,y))) 697 rankx = ranked[0:n1] # get the x-ranks 698 u1 = n1 * n2 + (n1 * (n1 + 1)) / 2.0 - rankx.sum() # calc U for x 699 return n1 * n2 - u1 # return U for y
700
701 -def mannwhitney_p(U, n1, n2):
702 """ 703 Return the right-tailed probability of the U value, assuming that 704 N is sufficiently high. 705 """ 706 mean_U = n1 * n2 / 2. 707 stdev_U = np.sqrt(n1 * n2 * (n1 + n2 + 1) / 12.) 708 return stats.norm.sf((U - mean_U) / stdev_U)
709
710 -def grbbinomial_Pmin_raw(localProb, Ndraws):
711 localProb = np.asarray(localProb) 712 Ntail = len(localProb) 713 714 # Cumulative binomial probability of getting (1+,2+,...Ntail+) events this improbable. 715 # NB: stats.binom.sf maps to the lower level special.bdtrc 716 P = special.bdtrc(np.arange(Ntail), Ndraws, localProb) 717 index = P.argmin() 718 Pmin_raw = P[index] 719 720 return Pmin_raw, index + 1
721
722 -def grbbinomialtest(localProb, Ndraws, Nmc, discreteness=None):
723 """ 724 Adapted from https://trac.ligo.caltech.edu/xpipeline/browser/trunk/utilities/grbbinomialtest.m 725 726 localProb is a *sorted* array of FAP values, one per GRB to be tested 727 Ndraws is a scalar saying how many GRBs were analyzed in total 728 Nmc is the number of Monte-Carlo simulations to perform in assessing 729 significance. 730 discreteness is optional, but allows you to draw FAP values uniformly 731 from multiples of 1 / discreteness 732 733 Pmin_raw Lowest cumulative binomial probability of the input set 734 localProb. Note that this number does not account for the 735 trials factor when length(localProb)>1. 736 Pmin Probability that the tail of length(localProb) of a set of 737 Ndraws uniformly distributed random numbers will give a 738 cumulative binomial probability less than or equal to 739 Pmin_raw. 740 Nmin Number of tail values to include at which the binomial 741 probability Pmin_raw occurs. 742 """ 743 Ntail = len(localProb) 744 Pmin_raw, Nmin = grbbinomial_Pmin_raw(localProb, Ndraws) 745 746 # Do a Monte-Carlo to determine significance 747 if discreteness is None: 748 localProbMC = stats.uniform.rvs(size=(Nmc, Ndraws)) 749 else: 750 localProbMC = stats.randint.rvs(0, discreteness + 1, size=(Nmc, Ndraws)) / discreteness 751 752 # keep the Ntail most significant values 753 localProbMC.sort(axis=1) 754 localProbMC = localProbMC[:, :Ntail] 755 756 PMC = special.bdtrc(np.arange(Ntail)[None, :], Ndraws, localProbMC) 757 PminMC = PMC.min(axis=1) 758 Pmin = (PminMC <= Pmin_raw).mean() 759 760 return Pmin_raw, Pmin, Nmin
761
762 -def grbbinomialtest_threshold(Ndraws, Ntail, percentile, Nmc, discreteness=None, blocksize=10000):
763 """ 764 Adapted from https://trac.ligo.caltech.edu/xpipeline/browser/trunk/utilities/grbbinomialtest_threshold.m 765 766 Ndraws is a scalar saying how many GRBs were analyzed in total 767 Ntail is the number of loudest GRB events kept 768 percentile is the desired percentile of the binomial probability 769 distribution (This should be between 0 and 100!) 770 Nmc is the number of Monte-Carlo simulations to perform in assessing 771 significance 772 discreteness is optional, but allows you to draw FAP values uniformly 773 from multiples of 1 / discreteness 774 775 Return the threshold on Pmin for the given percentile and an array of the 776 FAPs corresponding to that threshold for each k=1..Ntail at which 777 we evaluate the binomial probability. 778 """ 779 assert Ntail <= Ndraws 780 if discreteness is None: 781 draw = lambda n: stats.uniform.rvs(size=(n, Ndraws)) 782 else: 783 draw = lambda n: stats.randint.rvs(0, discreteness + 1, size=(n, Ndraws)) / discreteness 784 785 PminMC = [] 786 num_drawn = 0 787 while num_drawn < Nmc: # draw random numbers in blocks to reduce memory 788 num_to_draw = min(Nmc - num_drawn, blocksize) 789 localProbMC = draw(num_to_draw) 790 791 # keep Ntail most significant values of this block 792 localProbMC.sort(axis=1) 793 localProbMC = localProbMC[:, :Ntail] 794 795 # NB: stats.binom.sf maps to the lower level special.bdtrc 796 PMC = special.bdtrc(np.arange(Ntail)[None, :], Ndraws, localProbMC) 797 PminMC.extend(PMC.min(axis=1)) 798 num_drawn += num_to_draw 799 800 # determine threshold on Pmin 801 PminMC = np.asarray(PminMC) 802 Pmin_thresh = stats.scoreatpercentile(PminMC, percentile) 803 return Pmin_thresh
804