#!/usr/bin/perl ###################################################################################################################################### # samt99_filter.pl ###################################################################################################################################### # # Written by Osvaldo Graņa, Protein Design Group (CNB-CSIC), December 2000 # # # This application changes the format of SAMT99 files to a new format readable by THREADLIZE # ("http://www.cnb.uam.es/~pazos/threadlize" Pazos, Rost & Valencia (1999) Bioinformatics 15(12):1062-1063) # It can also include the SAMT99 secondary structure prediction in the output. # # # 1.- HOW TO USE IT WITHOUT SECONDARY STRUCTURE PREDICTION (TWO ARGUMENTS) # # You go to "http://www.cse.ucsc.edu/research/compbio/HMM-apps/T99-model-library-search.html", you put your query sequence and your # email, then you must check the following options: "Pretty Align", "A2M", and "email results". # When you've got your email answers from SAMT99 you save the "*dbhits.txt" and "*pairwise.pa" files, those are the arguments for # samt99_filter.pl # # For example, if you have saved from SAMT99 the files "namedbhits.txt" and "namepairwise.pa", you type the following in your # Unix/Linux terminal: # # samt99_filter.pl namedbhits.txt namepairwise.pa > results # # in this case your results will be in the file "results", you can use another file name for your results # # By doing it this way, you will find in your results file the list of hits # and the corresponding alignments. # # # # 2.- HOW TO USE IT INCLUDING SECONDARY STRUCTURE PREDICTION (THREE ARGUMENTS) # # You do the same as before, but you also put your query sequence in "http://www.cse.ucsc.edu/research/compbio/HMM-apps/T99-query.html", # your email, and just check the option "return secondary structure prediction in format(s): CASP" and the option "email results". # Once you've got your email answers from SAMT99 you save the "*casp.ss" file, it is the third argument for samt99_filter.pl # # Taking the last example: "namedbhits.txt", "namepairwise.pa" and "namecasp.ss" you type the following in your Unix/Linux terminal: # # samt99_filter.pl namedbhits.txt namepairwise.pa namecasp.ss > results # # with your results in the file "results" # # By doing it this way, you will find in your results file the list of hits, the corresponding alignments and the predicted secondary # structure. # # # IN BOTH CASES, the file "results" can be used as input by THREADLIZE: # # threadlize results & # # # LICENSE TERMS: # # This program is free licensed for academic and non-profit users. Private users, please, contact Osvaldo Graņa (osvaldog@cnb.uam.es). # # Please, do not modify the code. # # This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY. # # # # Osvaldo Graņa # Protein Design Group (CNB-CSIC) # Campus Universidad Autonoma. # Cantoblanco. 28049 Madrid. # Tlf: +34-91-5854570. Fax: +34-91-5854506. # osvaldog@cnb.uam.es # http://gredos.cnb.uam.es/osvaldog # # ###################################################################################################################################### use strict; $|=1; my ( @pairwise, @pairwise2, $parcial, @linea, $stop, $comprobar, $provisional, $provisional2, $longitud, $i, $copiar, @cadena1, @cadena2, $ya, $j, $secuencia1, $secuencia2, $dosD, @secundaria, @puntuacion, $secuencia, $empieza, $secund, $puntuac, $x, $k, @prov ); print"3D-PSSM\n\n"; open(fileHandle,$ARGV[1]) || die "\n samt99_filter.pl: Cannot find second argument\n\n"; @pairwise=; close fileHandle; foreach (@pairwise) { push(@pairwise2,$_); } $secuencia1=""; $comprobar=0; $stop=0; $dosD=0; if ($ARGV[2] ne "") { $dosD=1 }; if (!$dosD){ while(!$stop){ $_=shift @pairwise; chomp; @linea=(split); switch:{ (($linea[0]==1) && ($linea[1]=~ /\w/)) && do{ $secuencia1=$linea[1]; $secuencia1=~ s/-|\.//g; $secuencia1=uc($secuencia1); $provisional=""; $provisional2=""; $longitud=length($secuencia1); for ($i=0;$i<$longitud;$i++) { $provisional=$provisional."0"; $provisional2=$provisional2." "; } print"Conf: ".$provisional."\n"; print"Pred: ".$provisional2."\n"; print" AA: ".$secuencia1."\n\n\n"; $comprobar=1; last switch; }; #(($linea[0] eq "Alignment") && $comprobar) && do { ((($linea[0] eq "Alignment") || (@pairwise==0)) && $comprobar) && do { $stop=1; last switch; }; } } } else { open (fileHandle,$ARGV[2]) || die "\n samt99_filter.pl: Cannot find third argument\n\n"; $empieza=0; @secundaria=(); @puntuacion=(); while () { @linea=(split); switch: { ($linea[0] eq "MODEL") && do { $empieza=1; last switch; }; ($linea[0] eq "END") && do { $empieza=0; last switch; }; ($empieza) && do { push(@secundaria,$linea[1]); push(@puntuacion,$linea[2]); last switch; }; } } while(!$stop) { $_=shift @pairwise; chomp; @linea=(split); switch:{ $linea[0]==1 && do{ $secuencia1=$linea[1]; $secuencia1=~ s/-|\.//g; $secuencia1=uc($secuencia1); $longitud=length($secuencia1); $secund=""; $puntuac=""; for ($i=0;$i<$longitud;$i++) { $secund=$secund.(shift @secundaria); $x=int(9*(shift @puntuacion)); $puntuac=$puntuac.$x; } print"Conf: $puntuac\n"; print"Pred: $secund\n"; print" AA: ".$secuencia1."\n\n\n"; $comprobar=1; last switch; }; #(($linea[0] eq "Alignment") && $comprobar) && do { ((($linea[0] eq "Alignment") || (@pairwise==0)) && $comprobar) && do { $stop=1; last switch; }; } } } $copiar=0; open(fileHandle,$ARGV[0]) || die "\n samt99_filter.pl: Cannot find first argument\n\n"; while () { @linea=(split); if (($copiar) && ($linea[0] ne "")) { if (length($linea[0])==4) { $linea[0]=$linea[0]."-"; } printf (": %5s %14s %16s %16s\n",$linea[0],$linea[2],$linea[1],$linea[3]); $_=; } if ($linea[0] eq "SeqID") { $copiar=1; print "SeqID EValue Reverse SeqLabel /domaincoreclass/\n\n"; } } close fileHandle; @cadena1=(); @cadena2=(); $ya=0; $i=1; $secuencia1=""; $secuencia2=""; foreach (@pairwise2) { chomp; @linea=(split); switch:{ (($linea[0] eq "Seq") && ($linea[1] eq "1:")) && do { $cadena1[0]="xxxxx__Seq"; $ya=1; last switch; }; (($linea[0] eq "Seq") && ($linea[1] eq "2:")) && do { $cadena2[0]=$linea[2]; if (length($cadena2[0])==4) { $cadena2[0]=$cadena2[0]."-"; } $cadena2[0]=$cadena2[0]."__Seq"; last switch; }; ($linea[0]==1) && do { $cadena1[$i]=$linea[1]; $secuencia1=$secuencia1.$linea[1]; last switch; }; ($linea[0]==2) && do { $_=$linea[1]; chomp; @prov=split(//,$_); $k=0; while (($prov[$k]!~ /[A-Z]/) && ($k