当前位置:   article > 正文

一份开源的果蝇算法C++源代码_果蝇算法c代码

果蝇算法c代码

仅供参考

编译此代码,您需要系统上已经安装 了“Gnu Scientific Library” (GSL)。

  1. #include <string>
  2. #include <iostream>
  3. #include <fstream>
  4. #include <vector>
  5. #include <sstream>
  6. #include <math.h>
  7. #include <cstdlib>
  8. #include <map>
  9. #include <gsl/gsl_randist.h>
  10. #include <gsl/gsl_cdf.h>
  11. //#include <gd.h>
  12. //#define DEBUG1
  13. //#define DEBUG2
  14. //#define DEBUG3
  15. //#define DEBUG4
  16. //#define DEBUG5
  17. //#define DEBUG6
  18. //#define DEBUG7
  19. //#define DEBUG8
  20. //#define DEBUG9
  21. //#define MATRIX_TRANSLATOR
  22. //#define BOUND_OLIGOS
  23. using namespace std;
  24. //CONSTANTS DEFINITIONS
  25. typedef double m_point;
  26. const int ALPHABET_SIZE = 4, SWITCH_TO_Z_VALUE = 500, MAX_DOTS = 150000, DA_TOP = 20;
  27. const char ALPHABET[4] = {'A','C','G','T'};
  28. const char C_ALPHABET[4] = {'T','G','C','A'};
  29. const m_point MIN_ADD_FREQ = 0.01, INF = 1000000000, CONSERVATION_THRESHOLD = 0.75;
  30. const char *matrix_files = "*.pro *.pfm";
  31. enum {A,C,G,T};
  32. char VI['T' + 1], rc_VI['T' + 1];
  33. //*fasta_files = "*.fasta",
  34. const char *m_start_identifier = "P0", *m_start_identifier_for_free_transfac = "PO", *m_data_start_identifier = "AC";
  35. enum {NO_FILES, BAD_MATRIX_FILE, MATRIX_NOT_ALLOCATED, MISSING_FASTA_FILE, BAD_FASTA_SEQUENCE,
  36. NO_MATRIX_FILE, BAD_MATRIX, NO_JASPAR_MATRIX_LIST, JASPAR_FILE_NOT_IN_LIST, CANT_COMPUTE_SCORES, READING_OUT_OF_MATRIX,
  37. NEED_COMPLETE_MATRIX, NO_BACKGROUND_AVAILABLE, NO_SEQUENCE, NO_TFBS, CANT_DO_POSITIONAL, SEQUENCE_SHORTER_THAN_WINDOW, BAD_BIG_VALUES, CANTOPENOUTPUT, TOO_MANY_EDGES, NO_BOF, BAD_FILE_FORMAT, NO_INDEX_FOUND, EVEN_PROBE, MISSING_CHIP_FILE, INCONSISTENT_CHIP, NO_MATRIX_FOR_DISTANCE_ANALYSIS}; //ERROR CODES
  38. const m_point step = 0.025; //bins step!!
  39. //END
  40. //OBJECTS DECLARATION
  41. #ifdef BOUND_OLIGOS
  42. class bound_oligos
  43. {
  44. private:
  45. void build_matrix();
  46. void build_pos_cor_matrix();
  47. void w2_vector(vector<string>*);
  48. int char_to_index(char);
  49. int observed_cooccurrences(int, char, int, char);
  50. vector<string> raw_mat;
  51. m_point **pos_cor_mat;
  52. string NAME;
  53. int **mat, x_size, y_size;
  54. public:
  55. vector<string> assign(vector<string>);
  56. string show_pos_cor_mat();
  57. };
  58. #endif
  59. class tfbs
  60. {
  61. private:
  62. bool raw_tfbsdata_parser();
  63. void j_raw_tfbsdata_parser();
  64. bool raw_matrix_parser();
  65. bool j_raw_matrix_parser();
  66. bool matrix_normalizer();
  67. void matrix_to_log();
  68. void matrix_min_max();
  69. void bns_sum();
  70. void matrix_average();
  71. void build_conservation_vector();
  72. vector<bool> v_conservation;
  73. vector<string> raw_matrix;
  74. vector<int> bns;
  75. vector<m_point> row;
  76. vector<string> raw_data;
  77. m_point bin_freq(int);
  78. string j_raw_data;
  79. m_point **matrix;
  80. m_point *mat_avg;
  81. m_point avg_row;
  82. m_point stddev_row;
  83. string file_name;
  84. // string raw_data;
  85. string ID;
  86. string NAME;
  87. string AC;
  88. int rowsize;
  89. int x_size;
  90. int bin_sum;
  91. m_point max_score;
  92. m_point min_score;
  93. bool matrix_alloc;
  94. bool norm_sec_step;
  95. public:
  96. bool assign(const char*);
  97. bool j_assign(const char*);
  98. bool w2_assign(vector<string>);
  99. #ifdef BOUND_OLIGOS
  100. bound_oligos bo;
  101. #endif
  102. string name();
  103. string id();
  104. string file();
  105. string matrix_id();
  106. string ac();
  107. string show_raw_matrix();
  108. #ifdef MATRIX_TRANSLATOR
  109. string matrix_translator();
  110. #endif
  111. string show_matrix();
  112. int get_bin_sum();
  113. m_point max();
  114. m_point min();
  115. void gen_bns();
  116. string show_bns();
  117. string show_bns_line();
  118. string show_conservation_vector(int);
  119. int m_length();
  120. bool assign_short_matrix(string);
  121. void row_average();
  122. void row_stddev();
  123. m_point get_value(int,int);
  124. m_point get_matrix_average(int);
  125. void row_push(m_point);
  126. void row_v_push(vector<m_point>);
  127. int row_size();
  128. int row_actual_size();
  129. m_point row_get(int);
  130. vector<m_point> row_get();
  131. m_point row_avg_get();
  132. m_point row_stddev_get();
  133. m_point bin_prob(int);
  134. vector<m_point>::iterator row_iter();
  135. unsigned int bin_hm(int);
  136. vector<int> bin_vector();
  137. // void draw_background();
  138. };
  139. class sequence
  140. {
  141. private:
  142. vector<vector<m_point> > seq_rel_scores;
  143. vector<vector<char > > seq_r_strand;
  144. vector<m_point> max_score;
  145. vector<m_point> rel_score;
  146. vector<m_point> rel_score2;
  147. vector<int> max_pos;
  148. vector<int> max_pos2;
  149. vector<char> max_strand;
  150. string NAME;
  151. string SEQ;
  152. string WRAP_NAME;
  153. string ID;
  154. string CHR;
  155. unsigned long int ABS_START;
  156. unsigned long int ABS_END;
  157. char STRAND;
  158. virtual void SCAN(vector<tfbs>::iterator,bool);
  159. void SCAN_SS(vector<tfbs>::iterator,bool);
  160. void store_complete_seq_scores(vector<m_point>::iterator,vector<m_point>::iterator,int,vector<tfbs>::iterator);
  161. void store_complete_seq_scores_ss(vector<m_point>::iterator,int,vector<tfbs>::iterator);
  162. // void set_seq_rel_max_pos();
  163. bool absolute_position_parser();
  164. protected:
  165. bool seq_cleaner();
  166. int char_to_index(char);
  167. int comp_char_to_index(char);
  168. m_point max_v(vector<m_point>*,int*);
  169. m_point max_v2(vector<m_point>*,int*);
  170. m_point sum_v(vector<m_point>*);
  171. public:
  172. m_point get_seq_rel_score_at_pos(int,int);
  173. m_point get_seq_rel_max_pos(int,int*,char*);
  174. unsigned long int get_abs_start();
  175. unsigned long int get_abs_end();
  176. string get_chr();
  177. char get_strand();
  178. bool assign(string);
  179. virtual void scan(vector<tfbs>*,int,bool);
  180. string name();
  181. string seq();
  182. string w_name();
  183. string get_id();
  184. m_point max(int);
  185. m_point rel(int);
  186. m_point rel2(int);
  187. int mpos(int);
  188. int mpos2(int);
  189. char mstrand(int);
  190. void reverse();
  191. };
  192. class big_sequence : public sequence
  193. {
  194. private:
  195. vector<vector <m_point> > MAX_REL_SCORE;
  196. vector<vector <int> > MAX_POS;
  197. vector<vector <string> > MAX_MOTIF;
  198. vector<vector<char> > MOTIF_STRAND;
  199. void SCAN(vector<tfbs>::iterator,bool);
  200. public:
  201. // vector<m_point> row_return(int);
  202. void scan(vector<tfbs>*,int,bool);
  203. string track_output(int);
  204. string inline_output();
  205. };
  206. class query
  207. {
  208. private:
  209. // m_point **correlation_matrix;
  210. int dam, bigst_seq_size;
  211. vector<sequence> QUERY;
  212. vector<tfbs> QTFBS;
  213. vector<m_point> ts;
  214. vector<m_point> zt;
  215. vector<m_point> bt;
  216. vector<m_point> min_binom_bins;
  217. vector<vector <m_point> > binom_bins;
  218. vector<int> seq_bin_num;
  219. vector<int> soil_pos;
  220. void distance_analysis();
  221. char comp_char(char);
  222. string revcomp(string, char);
  223. m_point t_den_calc(int,m_point,int,m_point);
  224. int max_v_pos(vector<m_point>);
  225. int min_v_pos(vector<m_point>);
  226. // void build_correlation_matrix(vector<tfbs>*);
  227. // void correlation_matrix_output(vector<tfbs>*,string);
  228. // void correlation_output(vector<tfbs>*,string);
  229. // void correlation_matrix_copy(m_point**, int);
  230. vector<int> matrix_line_max_pos(m_point*,int,int);
  231. vector<int> matrix_line_min_pos(m_point*,int,int);
  232. string positional_output();
  233. m_point seq_stats(int, m_point*);
  234. char flip_strand(char);
  235. // void draw_distr_graph(vector<m_point>*, vector<int>*, int, string);
  236. public:
  237. void assign(vector<sequence>,vector<tfbs>);
  238. void scan();
  239. void t_stud_calc(vector<tfbs>*);
  240. void z_test(vector<tfbs>*);
  241. void b_test(vector<tfbs>*);
  242. void bins_stuff(vector<tfbs>*);
  243. // void correlation(vector<tfbs>*);
  244. void pearson_corr();
  245. string binom_bins_line(int);
  246. void output(int,vector<tfbs>*);
  247. // void img_output(vector<tfbs>*);
  248. void zvectors_output(vector<tfbs>*);
  249. };
  250. class chip
  251. {
  252. private:
  253. vector<m_point> Log_Ratio;
  254. unsigned short int noe;
  255. vector<string> Chr;
  256. vector<m_point> exp_bg_means;
  257. vector<m_point> exp_fg_means;
  258. // vector<m_point> bin_label;
  259. vector<unsigned int> bins;
  260. vector<unsigned int> Start;
  261. vector<unsigned int> Stop;
  262. vector<m_point> Score;
  263. string current_chr, current_chr_seq;
  264. ofstream fout;
  265. bool consistency_check(vector<string> *);
  266. void stack_processor(vector<vector<string> > *);
  267. void set_bins();
  268. void display_bins();
  269. void Exp_Push_Mean(vector<string>*);
  270. void get_current_chr_seq(string, string*);
  271. public:
  272. chip();
  273. };
  274. //END
  275. //GLOBAL VARIABLES DECLARATION
  276. bool VERBOSE = false, BINS = false, COMPLETE_MATRIX = false, SHORT = false, SHOW_BINS_BINOM = false, USE_N = false, CORRELATION = false,
  277. COMPLETE_CORRELATION_MATRIX = false, GENERATE_MATRIX = false, POSITIONAL = false, USE_SUM = false, BIG_SEQ = false, PEARSON = false,
  278. NO_BACKGROUND = false, DOUBLE_STRAND = true, IMG_OUTPUT = false, TRASH_N = false, ZVECTOR_OUTPUT = false, USE_T = false,
  279. SHOW_CONSERVATION_VECTOR = false, REVERSE_STRAND = false, BED_OUTPUT = false, DRAW_BACKGROUND = false, BUILD_INDEX = false, NO_HEADER = false, CHIP = false;
  280. int BIG_WINDOW = 500, BIG_STEP = 250, SEQ_EDGE_START = 0, SEQ_EDGE_END = 0, MIN_SEQ_LENGTH = 0, TSS_POS = 0;
  281. string q_idlist, idfile, queryfile, promfile, matrixfile, usethismatrix, usethismatrixname, matrixlist, trackbg, w2file, fasta_matrix_file, bound_oligos_file, bg_for_img, magic, index_file, distance_analysis_matrix;
  282. vector<string> chip_files;
  283. vector<m_point> bins;
  284. double TRACK_CUTOFF = 0, CHIP_CUTOFF = 0.05, DISTANCE_CUTOFF = 0.000001;
  285. unsigned int SPLIT[2] = {0,0}, PROBE = 3;
  286. //END
  287. //FUNCTIONS DECLARATION
  288. vector<string> get_file_names(const char *);
  289. void error_handler(int, string);
  290. vector<tfbs> v_tfbs_builder(vector<string>);
  291. vector<sequence> v_sequence_builder(vector<string>, bool);
  292. vector<big_sequence> v_big_sequence_builder(vector<string>, bool);
  293. vector<string> fasta_reader(const char*);
  294. vector<vector <string> > w2_mat_reader(string);
  295. vector<vector <string> > wil_mat_reader(string);
  296. void v_tfbs_add_bof(vector<tfbs>*);
  297. void generate_output_matrix(vector<tfbs>*, vector<sequence>*);
  298. void generate_output_bins(vector<tfbs>*, vector<sequence>*);
  299. void generate_output_big_sequence(vector<tfbs>*, vector<big_sequence>*);
  300. void generate_tfbs_rows(vector<tfbs>*, vector<sequence>*, bool);
  301. //void generate_tfbs_rows(vector<tfbs>*, vector<big_sequence>*);
  302. void generate_tfbs_rows_from_matrix(vector<tfbs>*);
  303. void generate_tfbs_rows_from_short_matrix(vector<tfbs>*,ifstream*);
  304. void bins_gen();
  305. void command_line_parser(int, char**);
  306. void min_seq_length_fixer(vector<tfbs>*);
  307. void idfile_handler(vector<sequence> *);
  308. string build_sequence_index(vector<sequence>*);
  309. string show_conservation(vector<tfbs>*);
  310. bool id_check(vector<string> *, vector<sequence>::iterator);
  311. bool qlist_check(string, vector<string>*);
  312. void display_help();
  313. //END
  314. int main(int argc, char **argv)
  315. {
  316. vector<tfbs> TFBS;
  317. vector<sequence> SEQUENCE;
  318. vector<big_sequence> BIG_SEQUENCE;
  319. query Q;
  320. VI['A'] = 0;
  321. VI['C'] = 1;
  322. VI['G'] = 2;
  323. VI['T'] = 3;
  324. rc_VI['T'] = 0;
  325. rc_VI['G'] = 1;
  326. rc_VI['C'] = 2;
  327. rc_VI['A'] = 3;
  328. command_line_parser(argc, argv);
  329. bins_gen();
  330. if(CHIP)
  331. {
  332. chip C;
  333. }
  334. TFBS = v_tfbs_builder(get_file_names(matrix_files));
  335. min_seq_length_fixer(&TFBS);
  336. if((int)promfile.size() == 0 && (int)matrixfile.size() == 0 && usethismatrix.size() == 0 && !PEARSON && (int)usethismatrixname.size() == 0 && distance_analysis_matrix.empty())
  337. // SEQUENCE = v_sequence_builder(get_file_names(fasta_files),false);
  338. error_handler(NO_BACKGROUND_AVAILABLE,"");
  339. else if((int)promfile.size() != 0 && (int)matrixfile.size() == 0)
  340. {
  341. vector<string> pfile;
  342. pfile.push_back(promfile);
  343. if(!BIG_SEQ)
  344. {
  345. SEQUENCE = v_sequence_builder(pfile,false);
  346. if(!idfile.empty())
  347. idfile_handler(&SEQUENCE);
  348. }
  349. else
  350. {
  351. if(BIG_STEP > BIG_WINDOW || BIG_STEP < 1)
  352. error_handler(BAD_BIG_VALUES,"");
  353. BIG_SEQUENCE = v_big_sequence_builder(pfile,false);
  354. }
  355. }
  356. if((int)queryfile.size() != 0 && BIG_SEQUENCE.size() == 0)
  357. {
  358. vector<string> qfile;
  359. vector<sequence> QUERY;
  360. qfile.push_back(queryfile);
  361. QUERY = v_sequence_builder(qfile,true);
  362. Q.assign(QUERY,TFBS);
  363. QUERY.clear();
  364. qfile.clear();
  365. }
  366. else if(BIG_SEQUENCE.size() != 0)
  367. {
  368. cerr << "Processing..." << endl;
  369. for(int S = 0; S < BIG_SEQUENCE.size(); S++)
  370. BIG_SEQUENCE[S].scan(&TFBS,S,false);
  371. generate_output_big_sequence(&TFBS, &BIG_SEQUENCE);
  372. }
  373. if((int)matrixfile.size() == 0 && promfile.size() != 0)
  374. {
  375. for(int S = 0; S < SEQUENCE.size(); S++)
  376. SEQUENCE[S].scan(&TFBS,S,false);
  377. generate_tfbs_rows(&TFBS, &SEQUENCE, false);
  378. }
  379. else if(matrixfile.size() != 0 && promfile.size() == 0)
  380. generate_tfbs_rows_from_matrix(&TFBS);
  381. else if(matrixfile.size() == 0 && promfile.size() == 0)
  382. NO_BACKGROUND = true;
  383. if((int)queryfile.size() != 0)
  384. {
  385. Q.scan();
  386. if(PEARSON)
  387. Q.pearson_corr();
  388. if(!POSITIONAL && !NO_BACKGROUND)
  389. {
  390. Q.t_stud_calc(&TFBS);
  391. Q.bins_stuff(&TFBS);
  392. Q.z_test(&TFBS);
  393. Q.b_test(&TFBS);
  394. }
  395. Q.output(TFBS[0].row_size(),&TFBS);
  396. // if(IMG_OUTPUT && !POSITIONAL)
  397. // Q.img_output(&TFBS);
  398. if(ZVECTOR_OUTPUT)
  399. Q.zvectors_output(&TFBS);
  400. }
  401. // if(CORRELATION && TFBS.size() > 1)
  402. // Q.correlation(&TFBS);
  403. if((int)matrixfile.size() == 0 && (GENERATE_MATRIX || queryfile.size() == 0))
  404. generate_output_matrix(&TFBS, &SEQUENCE);
  405. else if((int)matrixfile.size() != 0 && !SHORT && (GENERATE_MATRIX || queryfile.size() == 0))
  406. {
  407. COMPLETE_MATRIX = false;
  408. promfile = matrixfile;
  409. generate_output_matrix(&TFBS, &SEQUENCE);
  410. }
  411. if(BINS)
  412. generate_output_bins(&TFBS, &SEQUENCE);
  413. cerr << endl;
  414. exit(EXIT_SUCCESS);
  415. }
  416. void display_help()
  417. {
  418. ifstream in("HELP.txt");
  419. if(!in)
  420. {
  421. cerr << "\nCan't find file: \"HELP.txt\" in this folder. Please check your installation." << endl;
  422. exit(1);
  423. }
  424. string line;
  425. while(getline(in,line))
  426. cerr << line << endl;
  427. in.close();
  428. /* cerr << endl << "SYNOPSIS" << endl
  429. << "\tpscan -q multifastafile -p multifastafile [options]" << endl
  430. << "\tpscan -p multifastafile [options]" << endl
  431. << "\tpscan -q multifastafile -M matrixfile [options]" << endl << endl
  432. << "OPTIONS" << endl
  433. << "\t[-q file] | Specify the multifasta file containing the foreground sequences." << endl << endl
  434. << "\t[-p file] | Specify the multifasta file containing the background sequences." << endl << endl
  435. << "\t[-m file] | Use it if the background data are already available in a file (see -g option)." << endl << endl
  436. << "\t[-M file] | Scan the foreground sequences using only the Jaspar/Transfac matrix file contained in the specified file." << endl << endl
  437. << "\t[-l file] | Use the matrices contained in the file (for matrix file format see below)." << endl << endl
  438. << "\t[-N name] | Use only the matrix with that name (usable only in association with -l)." << endl << endl
  439. << "\t[-ss] | Perform single strand only analysis." << endl << endl
  440. << "\t[-rs] | Perform single strand only analysis on the reverse strand." << endl << endl
  441. << "\t[-split num1 num2] | Sequences are scanned only from position num1 and for num2 nucleotides." << endl << endl
  442. << "\t[-trashn] | Discards sequences containing \"N\"." << endl << endl
  443. << "\t[-n] | Oligos containing \"N\" will not be discarded. Instead a \"N\" will obtain an \"average\" score." << endl << endl
  444. << "\t[-g] | If a background sequences file is used than a file will be written containing the data calculated" << endl <<
  445. "\t\tfor that background and the current set of matrices." << endl <<
  446. "\t\tFrom now on one can use that file (-m option) instead of the sequences file for faster calculations." << endl << endl
  447. << "\t[-ui file] | An index of the background file will be used to avoid duplicated sequences." << endl<< endl
  448. << "\t[-bi] | Build an index of the background sequences file (to be used later with the -ui option)." << endl <<
  449. "\t\tThis is useful when you have duplicated sequences in your background that may introduce a bias in your results." << endl << endl
  450. << "\t[-h] | Display this help." << endl << endl
  451. << "NOTES" << endl
  452. << "\tThe sequences to be used with Pscan have to be promoter sequences." << endl
  453. << "\tTo obtain meaningful results it's critical that the background and the foreground sequences are consistent between them either in size" << endl
  454. << "\tand in position (with respect to the transcription start site). For optimal results the foreground set should be a subset of the background set." << endl << endl
  455. << "\tIf the \"-l\" option is not used Pscan will try to find Jaspar/Transfac matrix files in the current folder." << endl
  456. << "\tJaspar files have \".pfm\" extension while Transfac ones have \".pro\" extension." << endl
  457. << "\tIf Jaspar matrix files are used than a file called \"matrix_list.txt\" must be present in the same folder." << endl
  458. << "\tThat file contains required info about the matrices in the \".pfm\" files." << endl << endl
  459. << "\tFor info on how Pscan works pleare refer to the paper." << endl << endl
  460. << "EXAMPLES" << endl << endl
  461. << "1)\tpscan -p human_450_50.fasta -bi" << endl << endl
  462. << "\tThis command will scan the file \"human_450_50.fasta\" using the matrices in the current folder." << endl
  463. << "\tIt is handy to use that command the first time one uses a set of matrices with a given background sequences file." << endl
  464. << "\tA file called human_450_50.short_matrix will be written and it can be used from now on every time you want to use" << endl
  465. << "\tthe same background sequences with the same set of matrices. A file called human_450_50.index will be written too" << endl
  466. << "\tand it will be useful every time you will use the same background file." << endl << endl
  467. << "2)\tpscan -q human_nfy_targets.fasta -m human_450_50.short_matrix -ui human_450_50.index" << endl << endl
  468. << "\tThis command will scan the file human_nfy_targets.fasta searching for over-represented binding sites (with respect" << endl
  469. << "\tto the preprocessed background contained in the \"human_450_50.short_matrix\" file) using the matrices in the current folder." << endl
  470. << "\tPlease note that the query file \"human_nfy_targets.fasta\" must be a subset of the sequences contained in the " << endl
  471. << "\tbackground file \"human_450_50.fasta\""
  472. << "\tin order to use the index file with the \"-ui\" option." << endl << "\tThis means that both the sequences and their FASTA headers used" << endl
  473. << "\tin the query file must appear"
  474. << "\tin the background file as well." << endl << "\tUsing the \"-ui\" option when the sequences contained in the query file are not a subset of" << endl
  475. << "\tthe background file will"
  476. << "have undefined/unpredictable outcomes."
  477. << "\tThe output will be a file called \"human_nfy_targets.fasta.res\" where you will find all the used matrices sorted by ascending P-value." << endl
  478. << "\tThe lower the P-value obtained by a matrix, the higher are the chances that the transcription factor associated to that matrix" << endl
  479. << "\tis a regulator of the input promoter sequences." << endl
  480. << "\tThe fields of the output are the following: \"Transcription Factor Name\", \"Matrix ID\", \"Z Score\", \"Pvalue\", \"Foreground Average\", \"Background Average\"." << endl << endl
  481. << "3)\tpscan -q human_nfy_targets.fasta -M MA0108.pfm" << endl << endl
  482. << "\tThis command will scan the sequences file \"human_nfy_targets.fasta\" using the matrix contained in \"MA0108.pfm\"." << endl
  483. << "\tThe result will be written in a file called \"human_nfy_targets.fasta.ris\" where you will find the sequences in input" << endl
  484. << "\tsorted by a descending score (between 1 and 0). The higher the score, the better is the oligo found with respect to the used matrix." << endl
  485. << "\tThe fields of the output are the following: \"Sequence Header\", \"Score\", \"Position from the end of sequence\", \"Oligo that obtained the score\"," << endl
  486. << "\tStrand where the oligo was found\"." << endl << endl
  487. << "4)\tpscan -p human_450_50.fasta -bi -l matrixfile.wil" << endl << endl
  488. << "\tThis command is like Example #1 with the difference that the matrices set to be used is the one contained in the \"matrixfile.wil\" file." << endl
  489. << "\tPlease look at the \"example_matrix_file.wil\" file included in this Pscan distribution to see the correct format for matrices file." << endl << endl
  490. << "5)\tpscan -q human_nfy_targets.fasta -l matrixfile.wil -N MATRIX1" << endl << endl
  491. << "\tThis command is like Example #3 but it will use the matrix called \"MATRIX1\" contained in the \"matrixfile.wil\" file." << endl << endl;
  492. */
  493. exit(1);
  494. }
  495. void command_line_parser(int argc, char **argv)
  496. {
  497. if(argc == 1)
  498. display_help();
  499. for(int i = 1; i < argc; i++)
  500. {
  501. string buf = argv[i];
  502. if(buf == "-q")
  503. {
  504. if(i < argc - 1)
  505. queryfile = argv[++i];
  506. continue;
  507. }
  508. if(buf == "-h")
  509. {
  510. display_help();
  511. continue;
  512. }
  513. /* if(buf == "-Q")
  514. {
  515. if(i < argc - 1)
  516. idfile = argv[++i];
  517. continue;
  518. }
  519. if(buf == "-d")
  520. {
  521. if(i < argc - 1)
  522. distance_analysis_matrix = argv[++i];
  523. continue;
  524. }
  525. */
  526. if(buf == "-ql")
  527. {
  528. if(i < argc - 1)
  529. q_idlist = argv[++i];
  530. continue;
  531. }
  532. /*
  533. if(buf == "-imgm")
  534. {
  535. if(i < argc - 1)
  536. bg_for_img = argv[++i];
  537. continue;
  538. }
  539. if(buf == "-magic")
  540. {
  541. if(i < argc - 1)
  542. magic = argv[++i];
  543. continue;
  544. }
  545. if(buf == "-track")
  546. {
  547. if(i < argc - 1)
  548. trackbg = argv[++i];
  549. continue;
  550. }*/
  551. else if(buf == "-p")
  552. {
  553. if(i < argc - 1)
  554. promfile = argv[++i];
  555. BIG_SEQ = false;
  556. continue;
  557. }
  558. /*
  559. else if(buf == "-P")
  560. {
  561. if(i < argc - 1)
  562. promfile = argv[++i];
  563. BIG_SEQ = true;
  564. // USE_N = true;
  565. continue;
  566. }
  567. else if(buf == "-chip")
  568. {
  569. i++;
  570. while(i < (argc))
  571. {
  572. buf = argv[i];
  573. if(buf[0] == '-')
  574. {
  575. i--;
  576. break;
  577. }
  578. else
  579. {
  580. chip_files.push_back(buf);
  581. i++;
  582. }
  583. }
  584. if(!chip_files.empty())
  585. CHIP = true;
  586. continue;
  587. }
  588. */
  589. else if(buf == "-m")
  590. {
  591. if(i < argc - 1)
  592. matrixfile = argv[++i];
  593. continue;
  594. }
  595. #ifdef BOUND_OLIGOS
  596. else if(buf == "-bo")
  597. {
  598. if(i < argc - 1)
  599. bound_oligos_file = argv[++i];
  600. continue;
  601. }
  602. #endif
  603. /* else if(buf == "-w2")
  604. {
  605. if(i < argc - 1)
  606. w2file = argv[++i];
  607. continue;
  608. }*/
  609. else if(buf == "-l")
  610. {
  611. if(i < argc - 1)
  612. fasta_matrix_file = argv[++i];
  613. continue;
  614. }
  615. else if(buf == "-ui")
  616. {
  617. if(i < argc - 1)
  618. index_file = argv[++i];
  619. continue;
  620. }
  621. else if(buf == "-M")
  622. {
  623. if(i < argc - 1)
  624. usethismatrix = argv[++i];
  625. continue;
  626. }
  627. else if(buf == "-N")
  628. {
  629. if(i < argc - 1)
  630. usethismatrixname = argv[++i];
  631. continue;
  632. }
  633. /*
  634. else if(buf == "-L")
  635. {
  636. if(i < argc - 1)
  637. matrixlist = argv[++i];
  638. continue;
  639. }*/
  640. /* else if(buf == "-start")
  641. {
  642. if(i < argc - 1)
  643. {
  644. SEQ_EDGE_START = atoi(argv[++i]);
  645. MIN_SEQ_LENGTH = SEQ_EDGE_START;
  646. }
  647. continue;
  648. }
  649. else if(buf == "-end")
  650. {
  651. if(i < argc - 1)
  652. {
  653. SEQ_EDGE_END = atoi(argv[++i]);
  654. MIN_SEQ_LENGTH = SEQ_EDGE_END;
  655. }
  656. continue;
  657. }
  658. else if(buf == "-min")
  659. {
  660. if(i < argc - 1)
  661. MIN_SEQ_LENGTH = atoi(argv[++i]);
  662. continue;
  663. }*/
  664. else if(buf == "-split")
  665. {
  666. if(i < argc - 2)
  667. {
  668. SPLIT[0] = atoi(argv[++i]);
  669. SPLIT[1] = atoi(argv[++i]);
  670. }
  671. continue;
  672. }
  673. /* else if(buf == "-w")
  674. {
  675. if(i < argc - 1)
  676. BIG_WINDOW = atoi(argv[++i]);
  677. continue;
  678. }
  679. else if(buf == "-step")
  680. {
  681. if(i< argc - 1)
  682. BIG_STEP = atoi(argv[++i]);
  683. continue;
  684. }
  685. else if(buf == "-probe")
  686. {
  687. if(i< argc - 1)
  688. PROBE = atoi(argv[++i]);
  689. continue;
  690. }
  691. else if(buf == "-tss")
  692. {
  693. if(i< argc - 1)
  694. TSS_POS = -atoi(argv[++i]);
  695. continue;
  696. }
  697. else if(buf == "-cut")
  698. {
  699. istringstream str;
  700. if(i< argc - 1)
  701. str.str(argv[++i]);
  702. str >> TRACK_CUTOFF;
  703. continue;
  704. }
  705. else if(buf == "-chip_cutoff")
  706. {
  707. istringstream str;
  708. if(i < argc - 1)
  709. str.str(argv[++i]);
  710. str >> CHIP_CUTOFF;
  711. }
  712. else if(buf == "-d_cutoff")
  713. {
  714. istringstream str;
  715. if(i < argc - 1)
  716. str.str(argv[++i]);
  717. str >> DISTANCE_CUTOFF;
  718. }
  719. else if(buf == "-v")
  720. {
  721. VERBOSE = true;
  722. continue;
  723. }*/
  724. else if(buf == "-bi")
  725. {
  726. BUILD_INDEX = true;
  727. continue;
  728. }
  729. /* else if(buf == "-drawbg")
  730. {
  731. DRAW_BACKGROUND = true;
  732. continue;
  733. }
  734. else if(buf == "-noheader")
  735. {
  736. NO_HEADER = true;
  737. continue;
  738. }
  739. else if(buf == "-cv")
  740. {
  741. SHOW_CONSERVATION_VECTOR = true;
  742. continue;
  743. }
  744. else if(buf == "-img")
  745. {
  746. IMG_OUTPUT = true;
  747. continue;
  748. }
  749. else if(buf == &
声明:本文内容由网友自发贡献,不代表【wpsshop博客】立场,版权归原作者所有,本站不承担相应法律责任。如您发现有侵权的内容,请联系我们。转载请注明出处:https://www.wpsshop.cn/w/繁依Fanyi0/article/detail/353105
推荐阅读
相关标签
  

闽ICP备14008679号