pylal.auxmvc

1 # Module to keep definitions of job and node classes for auxmvc pipeline. 2 3 import os 4 import sys 5 import tempfile 6 from glue import pipeline 7 import tempfile 8

9 -def construct_command(node):

10 command_string = node.job().get_executable() + " " + node.get_cmd_line() 11 return [opt for opt in command_string.split(" ") if opt.strip()]

12 13

14 -class auxmvc_DAG(pipeline.CondorDAG):

15 - def __init__(self, basename, log_path):

16 self.basename = basename 17 tempfile.tempdir = log_path 18 tempfile.template = self.basename + '.dag.log.' 19 logfile = tempfile.mktemp() 20 fh = open(logfile, "w" ) 21 fh.close() 22 pipeline.CondorDAG.__init__(self,logfile) 23 self.set_dag_file(self.basename) 24 self.jobsDict = {}

25 #self.id = 0

26 - def add_node(self, node):

27 #self.id+=1 28 pipeline.CondorDAG.add_node(self, node)

29 30 31 32 ##################### JOB and NODE classes for auxmvc pipeline ################################# 33 34

35 -class auxmvc_analysis_job(pipeline.AnalysisJob, pipeline.CondorDAGJob):

36 """ 37 A basic auxmvc job class. Sets common atributes needed for any auxmvc job. It uses config parser object to 38 set the options. 39 """

40 - def __init__(self,cp,sections,exec_name,tag_base='', id ='',extension='',dax=False, short_opts=False):

41 """ 42 cp = ConfigParser object from which options are read. 43 sections = sections of the ConfigParser that get added to the opts 44 exec_name = exec_name name in ConfigParser 45 """ 46 self.__exec_name = exec_name 47 self.__extension = extension 48 self.tag_base = tag_base 49 universe = cp.get('condor','universe') 50 executable = cp.get('condor',exec_name) 51 pipeline.CondorDAGJob.__init__(self,universe,executable) 52 pipeline.AnalysisJob.__init__(self,cp,dax) 53 self.add_condor_cmd('copy_to_spool','False') 54 self.add_condor_cmd('getenv','True') 55 self.add_condor_cmd('environment',"KMP_LIBRARY=serial;MKL_SERIAL=yes") 56 self.__use_gpus = cp.has_option('condor', 'use-gpus') 57 for sec in sections: 58 if cp.has_section(sec): 59 if short_opts: 60 self.add_short_ini_opts(cp,sec) 61 else: 62 self.add_ini_opts(cp, sec) 63 else: 64 print >>sys.stderr, "warning: config file is missing section [" + sec + "]" 65 66 self.set_stdout_file('logs/' + tag_base + id + '.out') 67 self.set_stderr_file('logs/' + tag_base + id + '.err') 68 self.set_sub_file(tag_base + '.sub')

69

70 - def set_exec_name(self,exec_name):

71 """ 72 Set the exec_name name 73 """ 74 self.__exec_name = exec_name

75

76 - def set_exec_name(self,exec_name):

77 """ 78 Set the exec_name name 79 """ 80 self.__exec_name = exec_name

81

82 - def get_exec_name(self):

83 """ 84 Get the exec_name name 85 """ 86 return self.__exec_name

87

88 - def set_extension(self,extension):

89 """ 90 Set the file extension 91 """ 92 self.__extension = extension

93

94 - def get_extension(self):

95 """ 96 Get the extension for the file name 97 """ 98 return self.__extension

99

100 - def get_use_gpus(self):

101 """ 102 Get whether this job was requested to run on a GPU node 103 """ 104 return self.__use_gpus

105

106 - def add_short_ini_opts(self, cp, section):

107 """ 108 Parse command line options from a given section in an ini file and 109 pass to the executable as short options. 110 @param cp: ConfigParser object pointing to the ini file. 111 @param section: section of the ini file to add to the options. 112 """ 113 for opt in cp.options(section): 114 arg = cp.get(section,opt) 115 self.add_short_opt(opt,arg)

116 117 118

119 -class build_auxmvc_vectors_job(auxmvc_analysis_job):

120 """ 121 Job for building auxmvc feature vectors. 122 """

123 - def __init__(self, cp, main_channel, channels=None, unsafe_channels=None):

124 """ 125 """ 126 sections = ['build_auxmvc_vectors'] 127 exec_name = 'idq_build_auxmvc_vectors' 128 tag_base = 'build_vectors' 129 auxmvc_analysis_job.__init__(self,cp,sections,exec_name,tag_base=tag_base) 130 self.add_opt('main-channel',main_channel) 131 if channels: self.add_opt('channels', channels) 132 if unsafe_channels: self.add_opt('unsafe-channels', unsafe_channels)

133 134

135 -class build_auxmvc_vectors_node(pipeline.CondorDAGNode):

136 """ 137 Dag node for building auxmvc feature vector. 138 """

139 - def __init__(self, job, trigdir, gps_start_time, gps_end_time, output_file, dq_segments=None, dq_segments_name="", p_node=[]):

140 job.set_stdout_file('logs/' + job.tag_base + '-' + str(gps_start_time) + '-' + str(gps_end_time - gps_start_time) +'.out') 141 job.set_stderr_file('logs/' + job.tag_base + '-' + str(gps_start_time) + '-' + str(gps_end_time - gps_start_time) +'.err') 142 pipeline.CondorDAGNode.__init__(self,job) 143 self.add_output_file(output_file) 144 self.add_var_opt('trigger-dir', trigdir) 145 self.add_var_opt('gps-start-time', gps_start_time) 146 self.add_var_opt('gps-end-time', gps_end_time) 147 self.add_var_opt('output-file', output_file) 148 if dq_segments: 149 self.add_var_opt('dq-segments', dq_segments) 150 self.add_var_opt('dq-segments-name', dq_segments_name) 151 for p in p_node: 152 self.add_parent(p)

153

154 -class prepare_training_auxmvc_samples_job(auxmvc_analysis_job):

155 """ 156 Job for preparing training auxmvc samples. 157 """

158 - def __init__(self, cp):

159 """ 160 """ 161 sections = ['prepare_training_auxmvc_samples'] 162 exec_name = 'idq_prepare_training_auxmvc_samples' 163 tag_base = 'training_auxmvc' 164 auxmvc_analysis_job.__init__(self,cp,sections,exec_name,tag_base=tag_base)

165

166 -class prepare_training_auxmvc_samples_node(pipeline.CondorDAGNode):

167 """ 168 Node for preparing training auxmvc samples job. 169 """

170 - def __init__(self, job, source_dir, gps_start_time, gps_end_time, output_file, dq_segments="", dq_segments_name="",p_node=[]):

171 job.set_stdout_file('logs/' + os.path.split(output_file)[1].replace('.pat', '.out')) 172 job.set_stderr_file('logs/' + os.path.split(output_file)[1].replace('.pat', '.err')) 173 pipeline.CondorDAGNode.__init__(self,job) 174 self.add_output_file(output_file) 175 self.add_var_opt('source-directory', source_dir) 176 self.add_var_opt('gps-start-time', gps_start_time) 177 self.add_var_opt('gps-end-time', gps_end_time) 178 if dq_segments and dq_segments_name: 179 self.add_var_opt('dq-segments', dq_segments) 180 self.add_var_opt('dq-segments-name', dq_segments_name) 181 self.add_var_opt('output-file', output_file) 182 for p in p_node: 183 self.add_parent(p)

184 185

186 -class add_file_to_cache_job(auxmvc_analysis_job):

187 """ 188 Job for preparing training auxmvc samples. 189 """

190 - def __init__(self, cp):

191 """ 192 """ 193 sections = ['add_file_to_cache'] 194 exec_name = 'add_file_to_cache' 195 tag_base = 'add_file_to_cache' 196 auxmvc_analysis_job.__init__(self, cp, sections, exec_name, tag_base=tag_base)

197

198 -class add_file_to_cache_node(pipeline.CondorDAGNode):

199 """ 200 Node for preparing training auxmvc samples job. 201 """

202 - def __init__(self, job, files, cache, p_node=[]):

203 job.set_stdout_file('logs/' + os.path.split(files[0])[1].split(".")[0] + '_adding_to_cache.out') 204 job.set_stderr_file('logs/' + os.path.split(files[0])[1].split(".")[0] + '_adding_to_cache.err') 205 pipeline.CondorDAGNode.__init__(self,job) 206 self.add_var_arg(cache) 207 for file in files: 208 self.add_var_arg(file) 209 for p in p_node: 210 self.add_parent(p)

211 212 213

214 -class train_forest_job(auxmvc_analysis_job):

215 """ 216 Training job for random forest (MVSC). 217 """

218 - def __init__(self, cp):

219 """ 220 """ 221 sections = ['train_forest'] 222 exec_name = 'SprBaggerDecisionTreeApp' 223 tag_base = 'train_forest' 224 auxmvc_analysis_job.__init__(self,cp,sections,exec_name,tag_base=tag_base, short_opts=True)

225 226

227 -class train_forest_node(pipeline.CondorDAGNode):

228 """ 229 Dag node for training the random forest (MVSC). 230 """

231 - def __init__(self, job, training_data_file, trainedforest_filename, p_node=[]):

232 job.set_stdout_file('logs/' + os.path.split(training_data_file)[1].replace('.pat', '.out')) 233 job.set_stderr_file('logs/' + os.path.split(training_data_file)[1].replace('.pat', '.err')) 234 pipeline.CondorDAGNode.__init__(self,job) 235 self.add_input_file(training_data_file) 236 self.training_data_file = self.get_input_files()[0] 237 self.trainedforest = trainedforest_filename 238 self.add_output_file(self.trainedforest) 239 self.add_var_opt("f", self.trainedforest, short=True) 240 self.add_file_arg(" %s" % (self.training_data_file)) 241 for p in p_node: 242 self.add_parent(p)

243

244 -class use_forest_job(auxmvc_analysis_job):

245 """ 246 Job using random forest to evaluate unclassified data. 247 """

248 - def __init__(self, cp):

249 """ 250 """ 251 sections = ['forest_evaluate'] 252 exec_name = 'SprOutputWriterApp' 253 tag_base = 'forest_evaluate' 254 auxmvc_analysis_job.__init__(self,cp, sections, exec_name, tag_base=tag_base, short_opts=True) 255 self.add_short_opt("A", "")

256

257 -class use_forest_node(pipeline.CondorDAGNode):

258 """ 259 Node for radnom forest evaluation job. 260 """

261 - def __init__(self, job, trainedforest, file_to_rank, ranked_file,p_node=[]):

262 job.set_stdout_file('logs/' + os.path.split(ranked_file)[1].replace('.dat', '.out')) 263 job.set_stderr_file('logs/' + os.path.split(ranked_file)[1].replace('.dat', '.err')) 264 pipeline.CondorDAGNode.__init__(self,job) 265 self.add_input_file(trainedforest) 266 self.add_input_file(file_to_rank) 267 self.add_output_file(ranked_file) 268 self.trainedforest = self.get_input_files()[0] 269 self.file_to_rank = self.get_input_files()[1] 270 self.ranked_file = ranked_file 271 self.add_file_arg(" %s %s %s" % (self.trainedforest, self.file_to_rank, self.ranked_file)) 272 for p in p_node: 273 self.add_parent(p)

274 275 276

277 -class forest_add_excluded_vars_job(auxmvc_analysis_job):

278 """ 279 A simple fix job that adds the variables excluded by forest (MVSC) from classification into the output file. 280 Need to be run right after use_forest job. 281 """

282 - def __init__(self, cp):

283 """ 284 """ 285 sections = ['forest_add_excluded_vars'] 286 exec_name = 'forest_add_excluded_vars' 287 tag_base = 'forest_add_excluded_vars' 288 auxmvc_analysis_job.__init__(self,cp, sections, exec_name, tag_base=tag_base) 289 self.add_opt('excluded-variables', cp.get('forest_evaluate', 'z'))

290

291 -class forest_add_excluded_vars_node(pipeline.CondorDAGNode):

292 """ 293 Node for forest_add_excluded_vars_job. 294 """

295 - def __init__(self, job, patfile, datfile, p_node=[]):

296 job.set_stdout_file('logs/' + os.path.split(datfile)[1].replace('.dat', 'faev.out')) 297 job.set_stderr_file('logs/' + os.path.split(datfile)[1].replace('.dat', 'faev.err')) 298 pipeline.CondorDAGNode.__init__(self,job) 299 self.add_input_file(patfile) 300 self.add_input_file(datfile) 301 self.add_var_opt('pat-file',patfile) 302 self.add_var_opt('dat-file',datfile) 303 for p in p_node: 304 self.add_parent(p)

305 306 307 308

309 -class plot_channels_significance_job(pipeline.CondorDAGJob):

310 """ 311 Job that makes verious plots and histograms using significance of the axuiloary channels. 312 """

313 - def __init__(self, cp):

314 sections = ['plot-forest-channels-significance'] 315 exec_name = 'auxmvc_plot_mvsc_channels_significance' 316 tag_base = 'plot_channels_signif' 317 auxmvc_analysis_job.__init__(self,cp, sections, exec_name, tag_base=tag_base)

318 319

320 -class plot_channels_significance_node(pipeline.CondorDAGNode):

321 """ 322 Node for plot_channels_significance_job. 323 """

324 - def __init__(self, job, input, p_node=[]):

325 pipeline.CondorDAGNode.__init__(self,job) 326 self.add_var_opt("input", input) 327 for p in p_node: 328 self.add_parent(p)

329 330 331

332 -class result_plots_job(pipeline.CondorDAGJob):

333 """ 334 Job that makes plots based on results of evaluation e.g. ROC curves. 335 """

336 - def __init__(self, cp, tag_base='RESULT_PLOTS'):

337 """ 338 """ 339 sections = ['result_plots'] 340 exec_name = 'auxmvc_result_plots' 341 tag_base = 'auxmvc_result_plots' 342 auxmvc_analysis_job.__init__(self,cp, sections, exec_name, tag_base=tag_base)

343 344

345 -class result_plots_node(pipeline.CondorDAGNode):

346 """ 347 Node for result_plots_job. 348 """

349 - def __init__(self, job, datfiles, p_node=[]):

350 pipeline.CondorDAGNode.__init__(self,job) 351 for file in datfiles: 352 self.add_file_arg(file[0]) 353 for p in p_node: 354 self.add_parent(p)

355 356 357 ######################## svm for idq ############################ 358 359

360 -class use_svm_job(auxmvc_analysis_job):

361 """ 362 """

363 - def __init__(self, cp):

364 """ 365 """ 366 sections = ['svm_evaluate'] 367 exec_name = 'svm_evaluate_cmd' 368 tag_base = 'svm_evaluate' 369 auxmvc_analysis_job.__init__(self, cp, sections, exec_name, tag_base=tag_base)

370 371

372 -class use_svm_node(pipeline.CondorDAGNode):

373 """ 374 Node for SVM evaluation job. 375 """

376 - def __init__(self, job, cp, test_file, range_file, svm_model, predict_file, p_node=[]):

377 job.set_stdout_file('logs/' + os.path.split(predict_file)[1].replace('.dat', '.out')) 378 job.set_stderr_file('logs/' + os.path.split(predict_file)[1].replace('.dat', '.err')) 379 pipeline.CondorDAGNode.__init__(self, job) 380 self.add_input_file(test_file) 381 self.add_input_file(range_file) 382 self.add_input_file(svm_model) 383 self.add_output_file(predict_file) 384 385 #self.scale_cmd = cp.get('svm_evaluate','svm_scale_cmd') 386 #self.predict_cmd = cp.get('svm_evaluate', 'svm_predict_cmd') 387 self.test_file = self.get_input_files()[0] 388 self.range_file = self.get_input_files()[1] 389 self.svm_model = self.get_input_files()[2] 390 self.predict_file = self.get_output_files()[0] 391 self.add_var_opt('i', self.test_file, short=True) 392 self.add_var_opt('r', self.range_file, short=True) 393 self.add_var_opt('m', self.svm_model, short=True) 394 self.add_var_opt('o', self.predict_file, short=True) 395 #self.add_file_arg(" --scale %s --predict %s -i %s -r %s -m %s -o %s" % (self.scale_cmd, self.predict_cmd, self.test_file, self.range_file, self.svm_model, self.predict_file)) 396 for p in p_node: 397 self.add_parent(p)

398 399

400 -class train_svm_job(auxmvc_analysis_job):

401 """ 402 Training job for svm. 403 """

404 - def __init__(self, cp):

405 """ 406 """ 407 sections = ['svm_train'] # no section in configuration yet 408 exec_name = 'svm_train_cmd' 409 tag_base = 'svm_train' 410 auxmvc_analysis_job.__init__(self, cp, sections, exec_name, tag_base=tag_base)

411 412

413 -class train_svm_node(pipeline.CondorDAGNode):

414 """ 415 Node for SVM train job. 416 """

417 - def __init__(self, job, cp, train_file, range_file, model_file, p_node=[]):

418 job.set_stdout_file('logs/' + os.path.split(train_file)[1].replace('.pat', '.out')) 419 job.set_stderr_file('logs/' + os.path.split(train_file)[1].replace('.pat', '.err')) 420 pipeline.CondorDAGNode.__init__(self, job) 421 self.add_input_file(train_file) 422 self.add_output_file(range_file) 423 self.add_output_file(model_file) 424 #self.scale_cmd = cp.get('svm_evaluate','svm_scale_cmd') 425 #self.train_cmd = cp.get('svm_evaluate','svm_train_cmd') 426 #self.gamma = cp.get('svm_evaluate','svm_gamma') 427 #self.cost = cp.get('svm_evaluate','svm_cost') 428 self.train_file = self.get_input_files()[0] 429 self.range_file = self.get_output_files()[0] 430 self.model_file = self.get_output_files()[1] 431 432 self.train_file_svm = os.path.abspath(self.train_file) + '.mid' 433 self.scale_file = os.path.abspath(self.train_file) + '.scale' 434 self.add_var_opt("train-file", self.train_file) 435 self.add_var_opt("train-file-svm", self.train_file_svm) 436 self.add_var_opt("scale-file", self.scale_file) 437 self.add_var_opt("range-file", self.range_file) 438 self.add_var_opt("model-file", self.model_file) 439 #self.add_file_arg(" --scale %s --train %s -g %s -c %s " % (self.scale_cmd, self.train_cmd, self.train_file, self.train_file_svm, self.scale_file, self.range_file, self.model_file, self.gamma, self.cost)) 440 self.set_post_script("/bin/rm ") 441 self.add_post_script_arg(self.train_file_svm) 442 self.add_post_script_arg(self.scale_file) 443 444 for p in p_node: 445 self.add_parent(p)

446 447 448 ######################## ann for idq ############################ 449

450 -class convert_annfile_job(auxmvc_analysis_job):

451 """ 452 Job for converting pat files to FANN type files. 453 """

454 - def __init__(self, cp):

455 """ 456 """ 457 sections = ['ann_convert'] 458 exec_name = 'ConvertSprToFann' 459 tag_base = 'ann_convert' 460 auxmvc_analysis_job.__init__(self,cp,sections,exec_name,tag_base=tag_base, short_opts=False)

461 462

463 -class convert_annfile_node(pipeline.CondorDAGNode):

464 """ 465 Dag node for converting pat files to FANN type files. 466 """

467 - def __init__(self, job, pat_file, p_node=[]):

468 job.set_stdout_file('logs/' + os.path.split(training_data_file)[1].replace('.pat', '.out')) 469 job.set_stderr_file('logs/' + os.path.split(training_data_file)[1].replace('.pat', '.err')) 470 pipeline.CondorDAGNode.__init__(self,job) 471 self.add_input_file(pat_file) 472 self.pat_file = self.get_input_files()[0] 473 self.fann_file = pat_file.replace(".pat",".ann") 474 self.add_output_file(self.fann_file) 475 self.add_file_arg(" %s" % (self.pat_file)) 476 for p in p_node: 477 self.add_parent(p)

478

479 -class train_ann_job(auxmvc_analysis_job):

480 """ 481 Training job for Artifical Neural Networks (ANN) with iRPROP- algorithm. 482 """

483 - def __init__(self, cp):

484 """ 485 """ 486 sections = ['train_ann'] 487 exec_name = 'TrainNeuralNet' 488 tag_base = 'train_ann' 489 auxmvc_analysis_job.__init__(self,cp,sections,exec_name,tag_base=tag_base, short_opts=False)

490 491

492 -class train_ann_node(pipeline.CondorDAGNode):

493 """ 494 Dag node for training the Artificial Neural Networks (ANN) with iRPROP- algorithm. 495 """

496 - def __init__(self, job, training_data_file, trained_ann_filename, p_node=[]):

497 job.set_stdout_file('logs/' + os.path.split(training_data_file)[1].replace('.pat', '.out')) 498 job.set_stderr_file('logs/' + os.path.split(training_data_file)[1].replace('.pat', '.err')) 499 pipeline.CondorDAGNode.__init__(self,job) 500 self.add_input_file(training_data_file) 501 self.training_data_file = self.get_input_files()[0] 502 self.trained_ann = trained_ann_filename 503 self.add_output_file(self.trained_ann) 504 self.add_file_arg(" -t %s -s %s" % (self.training_data_file, self.trained_ann)) 505 for p in p_node: 506 self.add_parent(p)

507

508 -class use_ann_job(auxmvc_analysis_job):

509 """ 510 Job using ANN to evaluate unclassified data. 511 """

512 - def __init__(self, cp):

513 """ 514 """ 515 sections = ['ann_evaluate'] 516 exec_name = 'EvaluateNeuralNet' 517 tag_base = 'ann_evaluate' 518 auxmvc_analysis_job.__init__(self,cp, sections, exec_name, tag_base=tag_base, short_opts=False) 519 self.add_short_opt("", "")

520

521 -class use_ann_node(pipeline.CondorDAGNode):

522 """ 523 Node for ANN evaluation job. 524 """

525 - def __init__(self, job, trained_ann, file_to_rank, ranked_file,p_node=[]):

526 job.set_stdout_file('logs/' + os.path.split(ranked_file)[1].replace('.dat', '.out')) 527 job.set_stderr_file('logs/' + os.path.split(ranked_file)[1].replace('.dat', '.err')) 528 pipeline.CondorDAGNode.__init__(self,job) 529 self.add_input_file(trained_ann) 530 self.add_input_file(file_to_rank) 531 self.add_output_file(ranked_file) 532 self.trained_ann = self.get_input_files()[0] 533 self.file_to_rank = self.get_input_files()[1] 534 self.ranked_file = ranked_file 535 self.add_file_arg(" -n %s -e %s -s %s" % (self.trained_ann, self.file_to_rank, self.ranked_file)) 536 for p in p_node: 537 self.add_parent(p)

538

Source Code for Module pylal.auxmvc