Logo Search packages:      
Sourcecode: t-coffee version File versions  Download package

reformat.c

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdarg.h>
#include <string.h>
#include <ctype.h>
#include <time.h>
#include "io_lib_header.h"
#include "util_lib_header.h"
#include "dp_lib_header.h" 
#include "define_header.h"

#define ACTION(x) ((n_actions>=(x+1))?action_list[x]:NULL)
#define ACTION2(x,y) ((n_actions>=(x+1))?action_list[x]:y)

/**************************************************************************************************/
/*****************************    SEQ_REFORMAT     ******************************************/
/**************************************************************************************************/
int output_transitions(char *outfile, Alignment *A);
static int output_age_matrix ( char *outfile, int val);
int SeqGCGCheckSum(char *seq, int len);
static Sequence *seq2year ( Sequence *S, int modulo);
static Sequence* output_n_pavie_age_channel (Sequence *S, char *name, int n);
static Sequence* output_pavie_age_channel (Sequence *S, char *name, int modulo);

static int output_seq2struc(char *outfile, Alignment *A);
void output_conservation_statistics ( char *file, Alignment *A);
/**************************************************************************************************/
/*****************************    SEQ_REFORMAT     ******************************************/
/**************************************************************************************************/
int seq_reformat ( int argc, char *argv[]) 
      {
      
        Sequence_data_struc *D1=NULL;
      Sequence_data_struc *D2=NULL;
      Sequence_data_struc *D_ST=NULL;
      Action_data_struc  *RAD;
      
      
      
      int a, b;
      
      char *in_format;
      char *in2_format;
      char *out_format;
      char *in_file;
      char *in2_file;
      char *out_file;
      char *out2_file;
      char *struc_in_format;
      char *struc_out_format;
      char *struc_in_file;
      char *struc_out_file;
      char**action_list;
      char **action;
      char *rename_file;
      char *cache;
      char ***rename_list=NULL;
      int code=CODE;

      int n_actions=0;
      int print_format=0;
      /*INITIALIZATIONS*/
      
      RAD=vcalloc ( 1, sizeof ( Action_data_struc));
      RAD->keep_case=1;
      declare_name (cache);sprintf ( cache, "use");
      declare_name(in_file);
      declare_name(in2_file);
      declare_name(out_file);
      declare_name(out2_file);
      declare_name(struc_in_format);
      declare_name(struc_out_format);
      declare_name(RAD->coor_file);
      
      declare_name(struc_in_file);
      declare_name(struc_out_file);
      declare_name(in_format);
      declare_name(in2_format);
      declare_name(out_format);
      declare_name(rename_file);
      
      
      

      action_list=declare_char ( 100, 100);

/*END INITIALIZATION*/

      addrandinit ( (unsigned long) 500);
      
      if ( argc==1 || strm6 ( argv[1], "h", "-h", "help", "-help", "-man", "?"))
            {
            
              fprintf ( stdout, "\n%s (%s,%s,%s [%s])\n",PROGRAM, VERSION,AUTHOR, DATE, URL);
            fprintf ( stdout, "\n***********     MINIMUM SYNTAX        *****************");
            fprintf ( stdout, "\nseq_reformat -in <in_file> -output <out_format>");
            fprintf ( stdout, "\nSome File formats are automatically recognised");
            fprintf ( stdout, "\nSee Format section");
            fprintf ( stdout, "\n");
            fprintf ( stdout, "\n***********        MAIN FLAGS              ******************");
            fprintf ( stdout, "\n-in     name........Name of the file read");
            

            fprintf ( stdout, "\n-input  format......Name of the format read, see Input Format Section");
            fprintf ( stdout, "\n...................Automatic detection, except for seqs of numbers");
            fprintf ( stdout, "\n...................-input number_aln | number_fasta");
            fprintf ( stdout, "\n-in2    fname......Second alignment");
            fprintf ( stdout, "\n-input2 format.....See -input");
            fprintf ( stdout, "\n-out    fname......Output file (defualt is STDOUT");
            fprintf ( stdout, "\n-output format.....Output Format, default is fasta_aln");
            fprintf ( stdout, "\n-struc_in   name...File containing a coded aln");
            fprintf ( stdout, "\n-struc_in_f format.See -input and output format section");
            fprintf ( stdout, "\n-struc_out  fname..Name of the output structure");
            fprintf ( stdout, "\n-struc_out_f symbol");
            fprintf ( stdout, "\n-keep_case=on|off..keep case, On by default");
            fprintf ( stdout, "\n-action +ac1 +ac2..See the action section");
            fprintf ( stdout, "\n-rename <file>.....Rename the sequences following <file> indications");
            fprintf ( stdout, "\n...................File Format: One couple <name1><space><name2>/line");
            fprintf ( stdout, "\n...................Rename order <name1> into <name2>");
            fprintf ( stdout, "\n...................code file: -output code_name");
            fprintf ( stdout, "\n-code   <file>     Rename file <name1> to <name2>");
            fprintf ( stdout, "\n-decode <file>     Rename file <name2> to <name1>");
            fprintf ( stdout, "\n-no_warning........Suppresses all warnings");
            fprintf ( stdout, "\n-cache.............use,ignore,update,local, DirectoryName");
            
            
            fprintf ( stdout, "\n");

            fprintf ( stdout, "\n***********     REFORMAT ACTIONS               *****************");
            
            fprintf ( stdout, "\n     +name2unique_name....replace duplicated name with name_#");
            fprintf ( stdout, "\n     +swap_header........,swapp comments: replace comments/name in 1 by in 2");
            fprintf ( stdout, "\n     +swap_lib_header.F...Replace the sequences in the tc_lib (-in) with those in F");
            fprintf ( stdout, "\n     .....................F is a legal FASTA file");
            
                
            fprintf ( stdout, "\n     +translate[0-2]......Translate on Frame 0, 1, 2 ");
            fprintf ( stdout, "\n     +translate[3]........Translate on the longest frame");
            
            fprintf ( stdout, "\n     +rm_gap n ...........Removes col with n%% gap [n=100]");
            fprintf ( stdout, "\n     +rmgap_col SEQ1:SEQ2.Removes column with a gap in SEQ [#] ");
            fprintf ( stdout, "\n     +clean_cdna..........Undocumented"); 
            fprintf ( stdout, "\n     +clean_maln..........Undocumented"); 
            fprintf ( stdout, "\n     +backtranslate.......Random Backtranslation");
            fprintf ( stdout, "\n     +reorder.............Reorders sequences of <in> (in2 is the master");
            
            fprintf ( stdout, "\n     +trim[_aln_%%%%50_n111_N50_T_Fn_fS_pS] [string2]");
            fprintf ( stdout, "\n     ....................._<seq or aln>, aln is assumed");
            fprintf ( stdout, "\n     ....................._%%%%<max_percent_similarity>");
            
            
            fprintf ( stdout, "\n     ....................._n<max_number_of_sequence>       ");
            fprintf ( stdout, "\n     ....................._N<percent_of_sequences_to_keep>");
            fprintf ( stdout, "\n     ....................._T Reorder the sequences according to a tree BEFORE triming");
            fprintf ( stdout, "\n     ....................._Fn Keep only sequences that have AT LEAST ONE residue aligned");
            
            
            fprintf ( stdout, "\n     ....................._O<min sim> Remove outlayers that have less than min average sim with other sequences"); 
            
            fprintf ( stdout, "\n     .....................in the n first and n last columns. ");
            
            fprintf ( stdout, "\n     .....................Keeping Sequences: Sequences provided via -in2 will be kept");

            fprintf ( stdout, "\n     .....................Keeping Sequences: Sequences whose name contains <string> in field fS will be kept");
            fprintf ( stdout, "\n     ....................._f<NAME|SEQ|COMMENT> designates a field"); 
            fprintf ( stdout, "\n     .....................<string> is a Perl regular expression");

            fprintf ( stdout, "\n     +chain...............Identifies all the intermediate sequences from <-in>");
            fprintf ( stdout, "\n     .....................needed to join every sequence pair in <-in2>");
            fprintf ( stdout, "\n     +trimTC3 <seqfile>...Identifies in the infile aln the ");
            fprintf ( stdout, "\n     .....................sequences that should improve the ");
            fprintf ( stdout, "\n     .....................alignment of the sequences in seqfile");
            fprintf ( stdout, "\n     +trimTC2 N...........Trim the the ALN (-in) with the tree (-in2)");
            fprintf ( stdout, "\n     .....................Make sure N sequences remain");
            fprintf ( stdout, "\n     +trimTC #%%<max_similarity>#K<keep seq>#K<..>#N<MaxNseq>#D<delta_value>");
            fprintf ( stdout, "\n     .....................Trims using the -trim option of T-Coffee");
                fprintf ( stdout, "\n     .....................#%%90: gives all the representatives of the 90%% similarity groups");
            fprintf ( stdout, "\n     .....................#n10: returns the 10 most divergeant sequences");
            fprintf ( stdout, "\n     .....................#K1aab: guarantees sequence 1aab will be in the group");
            fprintf ( stdout, "\n     +aln2cons  mat name..Ouputs a consensus sequence");
            fprintf ( stdout, "\n     .....................The consensus is determined using mat");
            fprintf ( stdout, "\n     .....................By Default, mat=blosum62mt, name=Cons");
            
            fprintf ( stdout, "\n     +collapse_aln <new name> <seq1> <seq2...> | file name");
                fprintf ( stdout, "\n     .....................Replaces a group of sequences with its consensus");
            fprintf ( stdout, "\n     .....................The replacement sequence is named <new_seq>");
            fprintf ( stdout, "\n     .....................List of sequences can be provided via a file");        
            fprintf ( stdout, "\n     .....................File:>new_name seq1 seq2 seq3....");
            fprintf ( stdout, "\n     +original_seqnos.....Keep original seqnos [SWITCH]");
            fprintf ( stdout, "\n     +seqnos..............Print Seqnos [SWITCH]");
            fprintf ( stdout, "\n     +code_dna_aln........Undocumented")  ;
            fprintf ( stdout, "\n     +grep..[NAME|SEQ|COMMENT]..[KEEP|REMOVE]..[string]......");
            fprintf ( stdout, "\n     .....................Keeps or Removes Sequences matching string");
            fprintf ( stdout, "\n     +extract_block <seq> <start> <end> | <seq> <pos> |<filename>");
            fprintf ( stdout, "\n     .....................Extract column pos OR [start to end[");
            fprintf ( stdout, "\n     .....................<filename> Format");
            fprintf ( stdout, "\n     .......................seq start end | seq pos");
            fprintf ( stdout, "\n     .......................# for comments");
            fprintf ( stdout, "\n     .......................! seq offset_value (0 by default)");
            fprintf ( stdout, "\n     .....................Can extract as many positions as needed");
            fprintf ( stdout, "\n     .....................seq=cons: measure positions on the full aln");
            fprintf ( stdout, "\n     +cat_aln.............Concatenates the alignments input via -in and -in2");
            fprintf ( stdout, "\n     +msalist2cat_pwaln.min..max");
            fprintf ( stdout, "\n     .....................extract all pw projections and conctaenates those\n");
            fprintf ( stdout, "\n     .....................where id>=min and id<=max\n");
            fprintf ( stdout, "\n     .....................min and max can be omitted (min=0, max=100)\n");
            
            
            fprintf ( stdout, "\n     +seq2msa <matrix>....makes a standard progressive alignment using matrix");
            fprintf ( stdout, "\n     +realign_block <c1> <c2> <pg>");
            fprintf ( stdout, "\n     .....................Realign column c1 to c2 (non inc.) with pg)");
            fprintf ( stdout, "\n     .....................pg reads fasta and outputs fasta");
            fprintf ( stdout, "\n     .....................pg -infile=<infile> -outfile=<outfile>");
            fprintf ( stdout, "\n     +extract_seq seq_name (start end seq_name start end...) | filename");
            fprintf ( stdout, "\n     .....................seq_name='*': every seq");
            fprintf ( stdout, "\n     .....................start='*'   : real start");
            fprintf ( stdout, "\n     .....................end='*'     : real end");
            fprintf ( stdout, "\n     .....................filename: fasta format");
            fprintf ( stdout, "\n     +extract_seq_list name1 name2");
            fprintf ( stdout, "\n     .....................Extracts entire sequences");
            fprintf ( stdout, "\n     +remove_seq sn1 sn2..Removes sequences sn1, sn2...");
            fprintf ( stdout, "\n     +thread_profile_on_msa <file>");
            fprintf ( stdout, "\n     .....................Threads a list of profiles on corresponding seq");
            fprintf ( stdout, "\n     .....................File: >seqname _R_ <msa file> [nlines]");
            
            fprintf ( stdout, "\n     +thread_dna_on_prot_aln");
            fprintf ( stdout, "\n     .....................-in DNA.seq and -in2 AA.aln"); 
                fprintf ( stdout, "\n     +thread_struc_on_aln");
            fprintf ( stdout, "\n     .....................-in structure and -in2 aln"); 
            fprintf ( stdout, "\n     +use_cons............Use the consensus for n[SWITCH]");
            fprintf ( stdout, "\n     +upper.n|[n1-n2].....n omitted sets everything to upper case");
            fprintf ( stdout, "\n     .....................To use n: provide a number_aln via:");
            fprintf ( stdout, "\n     .....................-struc_in <number_file> -struc_in_f number_aln");
            fprintf ( stdout, "\n     .....................if use_cons is set n, is read on the cons");
            fprintf ( stdout, "\n     .....................n: will upper every residue with a value of n in struc_in");
            fprintf ( stdout, "\n     .....................[n1-n2]: upper residues between n1 and n2");
            fprintf ( stdout, "\n     +lower  n|[n1-n2]....See +upper");
            fprintf ( stdout, "\n     +switchcase  n|[n1-n2]See +upper");
            fprintf ( stdout, "\n     +keep   n|[n1-n2]....Only keep residues that have a score between n1 and n2");
            
            fprintf ( stdout, "\n     +invert..............Inverts the sequences: CAT => TAC");
            fprintf ( stdout, "\n     +rotate name         Rotate an MSA, names each sequence name_col#");
            fprintf ( stdout, "\n     +convert n|[n1-n2] s1 s2 ....");
            fprintf ( stdout, "\n     +merge_annotation.... ");
            
            fprintf ( stdout, "\n     .....................Converts residues with your alignment");
            fprintf ( stdout, "\n     .....................similar to upper");
            fprintf ( stdout, "\n     .....................s1: ABCDe turns every ABCD into e");
            fprintf ( stdout, "\n     .....................s1: #e turns any residue into e");
            fprintf ( stdout, "\n     aln2short_aln L C S..Turns sequences into shorter sequences");
            fprintf ( stdout, "\n     .....................L: list of residues to keep");
            fprintf ( stdout, "\n     .....................S: Size of Streches replaced by symbol C");

                        
            fprintf ( stdout, "\n     +random n l..........Generates N random sequences of len l");
            fprintf ( stdout, "\n     .....................You must provide a file with -in");
            fprintf ( stdout, "\n     +count n|[n1-n2] s1 s2....");
            fprintf ( stdout, "\n     .....................Counts residues with your alignment");
            fprintf ( stdout, "\n     .....................similar to convert");
            fprintf ( stdout, "\n     +print_format........prints the format name");
            fprintf ( stdout, "\n     +keep_name...........Keep the original sequence name on extraction");
            
            fprintf ( stdout, "\n     +remove_aa pos Ml Ncycle Random_len");
            fprintf ( stdout, "\n     .....................Randomly modifies an alignment");
            fprintf ( stdout, "\n     .....................pos=0: chosen randomly");
            fprintf ( stdout, "\n     .....................MaxLen of the deletions, Ncycle: number of cycles");
            fprintf ( stdout, "\n     .....................Random_len: 0 sets the len to maxlen, 1 to a random value");
            fprintf ( stdout, "\n     +remove_nuc.x........Remove Position 1, 2 or 3 of every codon"); 
            fprintf ( stdout, "\n     +evaluate matrix..gop..gep");
            fprintf ( stdout, "\n.................................Make an SP evaluation with matrix");
            fprintf ( stdout, "\n.................................Uses Natural Gap penalties");
            fprintf ( stdout, "\n.................................gop and gep must be negative");
            fprintf ( stdout, "\n.................................use -output=color_ascii, color_html to get a color display");
            fprintf ( stdout, "\n     +evaluate matrix.....Make an SP evaluation with a matrix");
            
            fprintf ( stdout, "\n     +msa_weight proc.....Computes weights using the procedure");
            fprintf ( stdout, "\nTree Analysis___________________________________________________");
            
            fprintf ( stdout, "\n     +force_aln filename|seq1 res1 seq2 res2");
            fprintf ( stdout, "\n     .....................Forces residue 1 of seq1 to be aligned with res2 of seq 2");
            fprintf ( stdout, "\n     .....................In a file, there must be one pair of interaction/line");
            fprintf ( stdout, "\n     +tree_prune..........Prune the tree -in using the sequences provided via -in2");
            fprintf ( stdout, "\n     +tree_cmp............Compares the tree -in and the tree -in2");
            fprintf ( stdout, "\n     .....................-in and -in2 can contain different taxons");
            fprintf ( stdout, "\n     +treelist2groups......count all topologies within a list of trees");
            fprintf ( stdout, "\n     .....................-in is in fasta format with each name being a newick file");
            fprintf ( stdout, "\n     +treelist2splits......List all the splits in a list of files");
            fprintf ( stdout, "\n     ......................splits can be restricted to a list of sequences provided via -in2");
            fprintf ( stdout, "\n     +treelist2dmat.......outputs a diatance matrix for a list of trees");

            fprintf ( stdout, "\n     +tree_compute n s....Computes a tree using the MSA provided with -in");
            fprintf ( stdout, "\n     ....................n:0-9, controls the way the MSA is filtered");
            fprintf ( stdout, "\n     ....................s:pam250mt|blosum62mt|categories|enthropy");
            fprintf ( stdout, "\n     ....................s:controls the column evaluation in MSA");
            fprintf ( stdout, "\n     +change_distances.f.f:float, sets all the distances to f in the tree");
            fprintf ( stdout, "\n     +change_bootstrap n..:n=0 removes all the bootstrap values");
            fprintf ( stdout, "\n     .....................:n!=0 adds a the value n to every node");
            fprintf ( stdout, "\n     +tree2dpatree........Replaces tree distances with the minimum %%ID in");
            fprintf ( stdout, "\n     .....................the depending subgroup. The ID is measured on an");
            fprintf ( stdout, "\n     .....................-in=TREE -in2=ALN");
            fprintf ( stdout, "\n     +unroot..............Removes the root in the input tree");
            fprintf ( stdout, "\n     +tree2group.N.I.P....Reports all the tree subgroup with at most Nseq");
            fprintf ( stdout, "\n     .....................and at min I%% identity. Output format can be read by");
            fprintf ( stdout, "\n     .....................collapse_tree. New groups are named P_1, P_2...");           
            fprintf ( stdout, "\n     +collapse_tree.F.....Collapses trees. F is either a file or a list");
            fprintf ( stdout, "\n     .....................<new name> <seq1> <seq2>...");
            fprintf ( stdout, "\n     +aln2tree............Computes a tree");
            fprintf ( stdout, "\n     ..ktupN|aln|sarmat   ktupN: match size N to estimate distances");
            fprintf ( stdout, "\n     .....................aln: Measures distances on aln");
            fprintf ( stdout, "\n     .....................sarmat: expects in to be a SAR matrix of O and I");
            fprintf ( stdout, "\n     ..nj | cw............Runs Neighbor Joining OR Cw to compute Tree");
            fprintf ( stdout, "\n     ..dpa................Turns the tree into a daptree (+tree2dpatree)");
            fprintf ( stdout, "\n     +node_sort..<name>...Sort leafs of tree n1, by node distance");
                    
            fprintf ( stdout, "\nStructure Analysis___________________________________________________");
            fprintf ( stdout, "\n     +struc2contacts.A.B D.Displays in capitals all the residues of A");
            fprintf ( stdout, "\n     ......................Less than D Angs from a residue of B");
            fprintf ( stdout, "\n     ......................A and B are pdb file, D is a distance in Angs");
            fprintf ( stdout, "\n     +seq2contacts.A.D.....Identifies all the residues in contact with ligands");
            fprintf ( stdout, "\n     ......................Ligands are in the FASTA header of struc in");
            fprintf ( stdout, "\n     ......................>Name _S_ [Target Struc] [Ligand1] [Chain] ...");
            fprintf ( stdout, "\n     ......................Output: number_fasta: 0=no contact, 1=ligand 1...");
            fprintf ( stdout, "\n     ......................9: residues in contact with more than 1 ligand");
            fprintf ( stdout, "\n     ......................Use -output=color_html/ascii to display result");
            fprintf ( stdout, "\n     +struc2nb...D.........Display a list of all the residues D appart");
            fprintf ( stdout, "\n     +rm_template...V......Removes _[S|G|R]_[template] to sequence names");
            fprintf ( stdout, "\n     ......................V: omitted | sequences <=> Output sequences");
            fprintf ( stdout, "\n     ......................V: template <=> Output templates");
                  
            fprintf ( stdout, "\n     +add_template.F.......Add _[S|G|R]_[template] to sequence names");
            fprintf ( stdout, "\n     ......................F can either be a fasta file or an executable");
            fprintf ( stdout, "\n     ......................F: File: >name _S_ template");
            fprintf ( stdout, "\n     ......................F: executable: pg -infile=<seq> -outfile=<tagged>");
            fprintf ( stdout, "\nMatrix Comparison___________________________________________________");
            fprintf ( stdout, "\n    +mat2cmp...............Returns the correlation coefficient between two matrices");
            fprintf ( stdout, "\n    .......................-in mat1 -input matrix, -in2 mat2 -input2 matrix");
            fprintf ( stdout, "\n***********  INPUT FORMATS: Alignments *****************");
            fprintf ( stdout, "\n     AUTOMATIC RECOGNITION");
            fprintf ( stdout, "\n     perl_xxx:............. runs xxx onto the input file");
            fprintf ( stdout, "\n     xxxx <file> > outfile..xxx reads any formats, outputs fasta");
            fprintf ( stdout, "\n     amps_aln       saga_aln      ");
            fprintf ( stdout, "\n     clustal_aln    fasta_aln     msf_aln  ");
            fprintf ( stdout, "\n     dali_aln       gotoh_aln     pima_aln");
            fprintf ( stdout, "\n     dialign_aln    matrix");    
            fprintf ( stdout, "\n     NON AUTOMATIC RECOGNITION (use the -input file to specify the format");
            fprintf ( stdout, "\n     number_aln     newick_tree");
            fprintf ( stdout, "\n");
            fprintf ( stdout, "\n***********  INPUT FORMATS: Sequences *****************");
            fprintf ( stdout, "\n     fasta_seq      dali_seq       pir_seq");
            fprintf ( stdout, "\n     barton_list_tc amps_sd_scores EST_fasta");
            fprintf ( stdout, "\n     gor_seq        gor_struc      number_fasta[*]");
            fprintf ( stdout, "\n     swissprot      tc_lib         pdb_struc");
            fprintf ( stdout, "\n");
            fprintf ( stdout, "\n***********  INPUT FORMATS: Structures   *****************");
            fprintf ( stdout, "\n    rna_number");
            fprintf ( stdout, "\n");
            fprintf ( stdout, "\n***********  OUTPUT FORMATS: Alignments ******************");
            fprintf ( stdout, "\n     compressed_aln saga_aln        clustal_aln");
            fprintf ( stdout, "\n     phylip_aln     msf_aln         fasta_aln ");
            fprintf ( stdout, "\n     pir_aln        ");
            fprintf ( stdout, "\n     color_html,color_ps......colored using the struc_in file  ");
            fprintf ( stdout, "\n     color_protogene..........colors codons");
            fprintf ( stdout, "\n     color_exoset.............mixes conservation (gray) and introns (RGB)");
                        
            fprintf ( stdout, "\n     color_pdf      pw_lib_saga_aln tdna_aln");
            fprintf ( stdout, "\n     thread_dna_on_prot_aln");
            fprintf ( stdout, "\n");
            fprintf ( stdout, "\n*********** OUTPUT FORMATS: sequence  ******************");
            fprintf ( stdout, "\n     fasta_seq      fasta_seq1     gotoh_seq");
            fprintf ( stdout, "\n     gor_seq        cache_id");
            fprintf ( stdout, "\n");
            fprintf ( stdout, "\n*********** OUTPUT FORMATS: weights ******************");
            fprintf ( stdout, "\n     constraints    saga_pw_sd_weights  nseq\n");
            fprintf ( stdout, "\n");
            fprintf ( stdout, "\n*********** OUTPUT Formats: special  ****************");
            fprintf ( stdout, "\n     len             name               statistics<_hnrglNL>");
            fprintf ( stdout, "\n      sim............outputs a similarity matrix based on an id comparison of -in");
            fprintf ( stdout, "\n      sim_sarmat.....in is sar matrix");
            fprintf ( stdout, "\n      sim_idscore....makes dp alignment of the sequences using Blosum62mt");
            fprintf ( stdout, "\n      sim_idscoreDNA.makes dp alignment of the sequences using idmat");
            fprintf ( stdout, "\n      sim............if -in2 is set: in1 vs in2, idscore");
            
            fprintf ( stdout, "\n     code_name......Outputs a compact list of names for code/decode");

            
            
            fprintf ( stdout, "\n");
      
            
            fprintf ( stdout, "\n");
            myexit (EXIT_SUCCESS);
            }

      argv=standard_initialisation (argv, &argc);
      for ( a=1; a< argc; a++)
            {
            if ( strcmp ( argv[a], "-in_f")==0 ||strm(argv[a],"-input") )
                  {
                  if ( strcmp ( argv[a], "-in_f")==0) fprintf ( stdout,"\nWARNING: %s deprecated, use -input instead", argv[a]);
            
                  sprintf ( in_format, "%s", argv[a+1]);
                  a++;
                  }
            else if ( strcmp ( argv[a], "-cache")==0 )
                  {
                  sprintf (cache, "%s", argv[a+1]);
            
                  a++;
                  }
            else if ( strcmp ( argv[a], "-in2_f")==0||strm(argv[a],"-input2") )
                  {
                    if ( strcmp ( argv[a], "-in_f")==0) fprintf ( stdout,"\nWARNING: %s deprecated, use -input2 instead", argv[a]);
            
                  sprintf ( in2_format, "%s", argv[a+1]);
                  a++;
                  }
            else if ( strcmp ( argv[a], "-seqnos")==0)
                  {
                  sprintf (action_list[n_actions++], "seqnos");
                  }
            
            else if ( strcmp( argv[a], "-action")==0)
                    {
                  while ((a+1)<argc && argv[a+1][0]!='-')
                    {
                      sprintf (action_list[n_actions++], "%s", argv[a+1]);
                      a++;
                    }
                  }
            else if ( strcmp ( argv[a], "-keep_case")==0)
                    {
                    if(!NEXT_ARG_IS_FLAG)RAD->keep_case=1;
                    else RAD->keep_case=(strm3(argv[a], "on","ON","On"))?1:0;
                    
                  }
            
            else if ( strcmp ( argv[a], "-conv")==0)
                      {
                  if ( strncmp ( argv[a+1],"set",3)==0)RAD->symbol_list=make_symbols (argv[++a],&(RAD->n_symbol));
                  else
                      {
                      RAD->symbol_list=declare_char (STRING, STRING);
                      while(!NEXT_ARG_IS_FLAG)
                            { 
                          sprintf ( RAD->symbol_list[RAD->n_symbol], "%s", argv[++a]);
                          RAD->n_symbol++; 
                          }
                      }
                  }
            else if ( strcmp ( argv[a], "-struc_in_f")==0)
                  {
                  sprintf ( struc_in_format, "%s", argv[a+1]);
                  a++;
                  }
            else if ( strcmp ( argv[a], "-out_f")==0 ||strm(argv[a],"-output") )
                  {
                  if ( strcmp ( argv[a], "-out_f")==0) fprintf (stdout, "\nWARNING: %s deprecated, use -output instead", argv[a]);
                  sprintf ( out_format, "%s", argv[a+1]);
                  a++;
                  }
            else if ( strm ( argv[a], "-struc_out_f") || strm ( argv[a], "-output_struc") )
                  {
                  sprintf ( struc_out_format, "%s", argv[a+1]);
                  a++;
                  }
            else if ( strcmp (argv[a],"-in")==0)
                  {
                  sprintf( in_file, "%s", argv[a+1]);
                  a++;
                  }
            else if ( strcmp (argv[a],"-rename")==0)
                  {
                  sprintf( rename_file, "%s", argv[a+1]);
                  a++;
                  }
            else if ( strcmp (argv[a],"-code")==0)
                  {
                  code=CODE;
                  sprintf( rename_file, "%s", argv[a+1]);
                  a++;
                  }
            else if ( strcmp (argv[a],"-decode")==0)
                  {
                    code=DECODE;
                    sprintf( rename_file, "%s", argv[a+1]);
                    a++;
                  }
            else if ( strcmp (argv[a],"-in2")==0)
                  {
                  sprintf( in2_file, "%s", argv[a+1]);
                  a++;
                  }
            else if ( strcmp (argv[a],"-coor")==0)
                  {                 
                  sprintf( RAD->coor_file, "%s", argv[a+1]);
                  a++;
                  }
            else if (strcmp (argv[a],"-out")==0) 
                  {
                  sprintf (out_file, "%s", argv[a+1]);
                  a++;
                  }
            else if (strcmp (argv[a],"-out2")==0) 
                  {
                  sprintf (out2_file, "%s", argv[a+1]);
                  a++;
                  }
            else if ( strcmp (argv[a],"-struc_in")==0)
                  {
                  sprintf( struc_in_file, "%s", argv[a+1]);
                  a++;
                  }
            else if (strcmp (argv[a],"-struc_out")==0) 
                  {
                  sprintf (struc_out_file, "%s", argv[a+1]);
                  a++;
                  }
            else if ( strcmp ( argv[a], "-rm_gap")==0)
                  {
                  RAD->rm_gap=1;
                  }
            else if ( strcmp ( argv[a], "-print_format")==0)
                  {
                  print_format=1;
                  }
            else if ( strcmp ( argv[a], "-no_warning")==0)
                    {
                  set_warning_mode (NO);
                  }
            
            else  
                  {
                  fprintf ( stdout, "\nUNKNOWN OPTION: %s", argv[a]);
                  myexit(EXIT_FAILURE);
                  }
            }
/****************************************************************/
/*                                                              */
/*                          Data Preparation                    */
/*                                                              */
/*                                                              */
/****************************************************************/
      
      prepare_cache (cache);    
/****************************************************************/
/*                                                              */
/*                          INPUT SEQ/ALN                       */
/*                                                              */
/*                                                              */
/****************************************************************/
      

      if ( strm (out_format, "hasch"))
        {
          fprintf ( stdout, "%d\n", hash_file(in_file));
          myexit (EXIT_SUCCESS);
        }

      if ( rename_file[0])
        {
          rename_list=read_rename_file ( rename_file,code);
        }


      if ((D1=read_data_structure (in_format, in_file,RAD))!=NULL)
        {
          in_format=(in_format && in_format[0])?in_format:identify_seq_format(in_file);
          
          if (print_format)fprintf ( stdout, "\nFILE:%s FORMAT:%s\n", in_file, in_format);
        }
      else if ( in_file[0])
              {
              fprintf ( stdout, "\nFORMAT of file %s Not Supported[FATAL:%s]\n", in_file, PROGRAM);
            myexit(EXIT_FAILURE);
            }
      
      if ((D2=read_data_structure (in2_format, in2_file,RAD))!=NULL){if (print_format)fprintf ( stderr, "\nFILE:%s FORMAT:%s\n", in2_file, (in2_format&&in2_format[0])?in2_format:identify_seq_format(in2_file));}

      else if (!D2 && in2_file[0])
              {
              fprintf ( stderr, "\nFORMAT of file %s Not Supported [FATAL:%s]\n", in2_file, PROGRAM);
            myexit(EXIT_FAILURE);
            }
      
/*STRUCTURE INPUT*/     
      
      
      if ((D_ST=read_data_structure (struc_in_format, struc_in_file,RAD)))
          {

            if ( D_ST->CL)
            {
              Constraint_list *CL;
              int *entry;

              CL=D_ST->CL;
              
              entry=vcalloc ( LIST_N_FIELDS, sizeof (int));
              
              for (a=0; a<CL->ne; a++)
                {
                  entry=extract_entry (entry, a, CL);
                  if ( D_ST->S)(D_ST->S)->seq[entry[SEQ1]][entry[R1]-1]=entry[WE];
                }
              thread_seq_struc2aln (D_ST->A, D_ST->S);
            }
            else if ( name_is_in_list ("cons", ((D_ST)->A)->name, ((D_ST)->A)->nseq, 100));
            else
            {
              D_ST->A=copy_aln ( D1->A, D_ST->A);
              
              thread_seq_struc2aln (D_ST->A, D_ST->S);
            }
          }
      else if ((strcmp (struc_in_format, "rna_number")==0) && in_file[0])
            {
            D_ST->RNA_ST=read_rna_struc_number((D1->A),struc_in_file);
            }
      else if ( struc_in_format[0] && struc_in_file[0])
              {
                
            fprintf ( stderr, "\nSTRUC %s UNKNOWN[FATAL]", struc_in_format);
            myexit(EXIT_FAILURE);
            }
      else
        {
          D_ST=vcalloc ( 1, sizeof (Sequence_data_struc));
        }

      action=declare_char(100, 100);
      for ( a=0; a< n_actions;)
        {
         if (action_list[a][0]!='+')
            {
            fprintf ( stderr, "\nWARNING: Action %s Unknown. Actions start with a +", action_list[a]);
            myexit (EXIT_FAILURE);
            }
         else
           {
           b=0;
           sprintf ( action[b++], "%s", action_list[a++]+1);
           while ( a<n_actions && action_list[a][0]!='+')sprintf ( action[b++], "%s", action_list[a++]);
           modify_data( D1, D2, D_ST, action,b, RAD);
           }
        }

      if (rename_list)
        {
          if (D1)D1->A= rename_seq_in_aln(D1->A, rename_list);
          if (D2)D2->A=rename_seq_in_aln (D2->A, rename_list);
          if (D_ST)D_ST->A=rename_seq_in_aln (D_ST->A,rename_list);
          
          if (D1)D1->T  =rename_seq_in_tree (D1->T, rename_list);
          if (D2)D2->T  =rename_seq_in_tree (D2->T, rename_list);
          if (D_ST)D_ST->T=rename_seq_in_tree (D_ST->T,rename_list);
        }


      if ( !out_format[0] && ! struc_out_format[0])sprintf ( out_format, "%s", (in_format && in_format[0])?in_format:"fasta_aln");
      main_output  ( D1, D2, D_ST, out_format, out_file);
      main_output  ( D1, D2, D_ST, struc_out_format, struc_out_file);
      myexit (EXIT_SUCCESS);
      return EXIT_SUCCESS;
      }
      



/**************************************************************************************************/
/*****************************    FORMAT GUESSING     ******************************************/
/**************************************************************************************************/
Sequence_data_struc *read_data_structure ( char *in_format, char *in_file,    Action_data_struc  *RAD) 
         
        {
      Sequence_data_struc *D;
      char **seq_name=NULL, **sequences=NULL;
      int nseq=0, a;

      
      D=vcalloc ( 1, sizeof (Sequence_data_struc));
      

      if (!in_file[0])return NULL; 
      if (!in_format[0])
        {
          in_format=identify_seq_format(in_file);
        }
      if (!in_format[0])return NULL;
      


      D->A=declare_Alignment(NULL);             
      if ( RAD->keep_case)(D->A)->residue_case=KEEP_CASE;
      
      D->rm_gap=RAD->rm_gap;
      sprintf ( D->format, "%s", in_format);
      sprintf ( D->file, "%s", in_file);
      
      if ( strm2(in_format,"saga_aln","clustal_aln"))
            {
            read_aln (in_file, D->A);
            D->S=aln2seq(D->A);
            
            }
      else if ( strm (in_format, "file_list"))
        {
          D->S=get_file_list(in_file);
        }
      else if (strm (in_format, "matrix"))
        {
          D->M=read_matrice (in_file);
        }
      else if (strm4 (in_format, "newick_tree", "newick", "nh", "new_hampshire"))
        {
          D->T=main_read_tree (in_file);
        }
      else if (strm (in_format, "blast_aln"))
              {
            if (read_blast_aln (in_file, D->A))
              {
                D->S=aln2seq(D->A);
              }
            else
              {
                return NULL;
              }
            }
      else if ( strm( in_format,"number_aln"))
            {           
            read_number_aln (in_file, D->A);
            D->S=aln2seq(D->A);
            }
      
      else if ( strm( in_format,"gotoh_aln"))
            {           
            read_gotoh_aln (in_file, D->A);
            D->S=aln2seq(D->A);
            }
      
      else if ( strm ( in_format, "msf_aln"))
            {
            read_msf_aln (in_file, D->A);
            D->S=aln2seq(D->A);
            }
      else if ( strm ( in_format, "amps_aln"))
            {
            read_amps_aln (in_file, D->A);
            D->S=aln2seq(D->A);
            }
      else if ( strm (in_format, "excel_seq"))
            {           
              D->S=perl_reformat2fasta ("excel2fasta.pl",in_file);
              (D->S)->contains_gap=0;
              D->A=seq2aln(D->S, D->A,RAD->rm_gap);
            }
      else if ( strm (in_format, "pavie_seq"))
            {           
              D->S=perl_reformat2fasta ("pavie2fasta.pl",in_file);
              (D->S)->contains_gap=0;
              D->A=seq2aln(D->S, D->A,RAD->rm_gap);
            }
      else if ( strncmp (in_format, "perl_",5 )==0)
            {           
              D->S=perl_reformat2fasta (in_format+5,in_file);
              (D->S)->contains_gap=0;
              D->A=seq2aln(D->S, D->A,RAD->rm_gap);
            }
      else if ( strm (in_format, "number_fasta"))
            {           
            D->S=get_fasta_sequence_num (in_file, NULL);
            (D->S)->contains_gap=0;
            D->A=seq2aln(D->S, D->A,RAD->rm_gap);
            }     
      else if ( strm (in_format, "raw_fasta"))
            {           
            D->S=get_fasta_sequence_raw (in_file, NULL);
            (D->S)->contains_gap=0;
            D->A=seq2aln(D->S, D->A,RAD->rm_gap);
            }     
      
      else if ( strm2 (in_format, "fasta_aln", "fasta_seq"))
            {
            
            D->S=get_fasta_sequence (in_file, NULL);
            if ( strcmp (in_format, "fasta_aln")==0)(D->S)->contains_gap=0;
            D->A=seq2aln(D->S, D->A,RAD->rm_gap);
            }     
      else if ( strm (in_format, "pdb") || strm (in_format, "pdb_struc"))
            {
                D->S=get_pdb_sequence (in_file);
                if ( D->S==NULL)
                  {
                  add_warning (stderr, "FAILED TO find PDB File %s", in_file);
                  myexit (EXIT_FAILURE);
                  }
                D->A=seq2aln(D->S, D->A,RAD->rm_gap);
            }
      else if ( strm2(in_format, "pir_seq", "pir_aln"))
            {
            D->S=get_pir_sequence ( in_file,NULL );
            seq2aln(D->S, D->A, RAD->rm_gap);
            }
        else if ( strm(in_format, "gor_seq") )
            {
            D->S=get_gor_sequence ( in_file,NULL );
            seq2aln(D->S, D->A, RAD->rm_gap);
            }
      else if ( strm2 ( in_format, "dali_aln", "dali_seq"))
            {
            D->S=get_sequence_dali ( in_file);
            seq2aln(D->S, D->A, RAD->rm_gap);
            }
      else if ( strm (in_format, "barton_list_tc"))
            {
            get_barton_list_tc_seq ( in_file);
            }
      else if ( strm (in_format, "amps_sd_scores"))
            {
            D->W=get_amps_sd_scores ( in_file);
            }
      
      else if ( strm ( in_format, "pima_aln"))
            {
            D->S=get_pima_sequence ( in_file);
            seq2aln (D->S, D->A, RAD->rm_gap);
            }
      else if ( strm( in_format, "gor_struc"))
              {
            D->S=get_struc_gor ( in_file);
            seq2aln(D->S, D->A, RAD->rm_gap);
            }
      else if ( strm( in_format, "dialign_aln"))
            {
            D->S=get_dialign_sequence ( in_file);
            seq2aln (D->S, D->A, RAD->rm_gap);
            }
      else if ( strm( in_format, "tc_lib") ||  strm( in_format, "mocca_lib") ||  strm( in_format, "lib"))
              {
              read_seq_in_list (in_file,&nseq,&sequences,&seq_name); 
              D->S=fill_sequence_struc ( nseq, sequences, seq_name);
              D->CL=declare_constraint_list ( D->S,NULL, NULL, 0,NULL, NULL); 
              D->CL=read_constraint_list_file(D->CL,in_file);
              seq2aln (D->S, D->A, RAD->rm_gap);
              free_char (sequences,-1);
              free_char (seq_name, -1);
            }
      else if ( strm( in_format,"swissprot_seq"))
              {
              D->S=get_swissprot_sequence ( in_file,NULL);
              seq2aln (D->S, D->A, RAD->rm_gap);
            }
      else
              {
            return NULL; 
            }

      if ( D->A)
        {
          for ( a=0; a<(D->A)->nseq; a++)sprintf ( (D->A)->file[a], "%s", in_file);
        }
      if ( D->S)
        {
          for ( a=0; a<(D->A)->nseq; a++)sprintf ( (D->S)->file[a], "%s", in_file);
        }
      
      return D;
      }
Sequence *read_sequences (char *name)
{
  return main_read_seq (name);
}
Sequence  * main_read_seq ( char *name)
       {
       char *format=NULL;
       Sequence *S=NULL;
       Alignment *A=NULL;
       int a;


       format=identify_seq_format (name);
       

       if ( getenv4debug ("DEBUG_REFORMAT"))fprintf ( stderr, "\n\nFormat %s\n", format); 

       if (format &&strm(format, "fasta_seq"))        
       {
         S= get_fasta_sequence ( name, NULL);
       }
       else if (format &&strm(format, "pir_seq"))     S= get_pir_sequence ( name, NULL);
       else if (format &&strm(format,"swissprot_seq"))S= get_swissprot_sequence (name, NULL); 
       else if (format && strstr (format, "aln")) 
       {
         A=main_read_aln ( name, NULL);
         S=aln2seq(A);
         ungap_seq(S);
         free_aln(A);
       }
       else if ( format && strstr (format, "tc_lib"))
       {
         int nseq,b;
         char **sequences=NULL, **seq_name=NULL;
         
         read_seq_in_list (name,&nseq,&sequences,&seq_name);             
         S=fill_sequence_struc ( nseq, sequences, seq_name);
         for ( b=0; b< S->nseq; b++)sprintf ( S->file[b], "%s",name);
         free_char (seq_name, -1);free_char (sequences, -1);
       }
       else
        {
        /*Use The ClustalW routine*/
          S=cw_read_sequences (name);
        }
       
       for ( a=0; a<S->nseq; a++)sprintf ( S->file[a], "%s", name);
       vfree(format);
       ungap_seq(S);
       S=clean_sequence ( S);
       return S;
       }

Alignment * main_read_aln ( char *name, Alignment *A)
       {
       int a;

       static char *format;
       Sequence *S=NULL;
       Sequence *IN_SEQ;
 
       
       if ( !name)return NULL;
       else if (!check_file_exists(name))
       {
         if ( !check_file_exists (name+1))return NULL;
         else if ( name[0]=='A') name++;
         else if ( name[0]=='S') name++;/*Line Added for the -convert flag of T-Coffee*/
       }
       

       if (!A)A=declare_aln(NULL);
       format=identify_seq_format (name);
       
       IN_SEQ=A->S;
       

              
       if      ((format && strm(format, "saga_aln" )) ||strm(format, "clustal_aln")||strm(format, "t_coffee_aln" ) )
       {

         read_aln ( name, A);

       }
       else if (format && strm (format, "conc_aln"))A=input_conc_aln (name,NULL);
       else if (format &&strm(format, "msf_aln"  ))read_msf_aln ( name, A);
       else if (format &&(strm(format, "fasta_aln")))
                {
              
              
            S=get_fasta_sequence ( name, NULL);
            
            S->contains_gap=0;
            seq2aln (S, A, 0);            
            }
       else if (format &&strm(format, "pir_aln"))
                {
            S=get_pir_sequence ( name, NULL);
            S->contains_gap=0;
            seq2aln (S, A, 0);
            } 
       else if (format && strm(format, "fasta_seq") && A)
         {
         S=get_fasta_sequence ( name, NULL);
         
         for ( a=1; a<S->nseq; a++)if ( strlen (S->seq[a-1])!=strlen (S->seq[a])){free_sequence (S, S->nseq); free_aln (A); return NULL;}
         S->contains_gap=0;
         seq2aln (S, A, 0);
         }
       else if (format && strm(format, "pir_seq") && A)
         {
         S=get_pir_sequence ( name, NULL);
        
         for ( a=1; a<S->nseq; a++)if ( strlen (S->seq[a-1])!=strlen (S->seq[a])){free_sequence (S, S->nseq); free_aln (A); return NULL;}
         S->contains_gap=0;
         seq2aln (S, A, 0);
         }
       else
          {
            free_aln(A);
            return NULL;        
        }

       if ( check_list_for_dup( A->name, A->nseq))
          {
            fprintf ( stderr, "\nWARNING (main_read_aln): %s is duplicated in File %s ", check_list_for_dup( A->name, A->nseq), A->file[0]);
            A=aln2unique_name_aln(A);
        }
       if (IN_SEQ)A->S=IN_SEQ;
       else if (!A->S){A->S=aln2seq(A);}
       
       A->S=ungap_seq(A->S);
       A=fix_aln_seq(A, A->S);     
       compress_aln (A);
       for ( a=0; a< A->nseq; a++) sprintf ( A->file[a], "%s", name);
       
       A=clean_aln (A);
       return A;
       }


char * identify_aln_format ( char *file)
       {
      /*This function identify known sequence and alignmnent formats*/
       return identify_seq_format (file);
       }
char * identify_seq_format ( char *file)
       {
       char *format=NULL;
       /*This function identify known sequence and alignmnent formats*/
       
       if ( format==NULL)format=vcalloc ( 100, sizeof (char));
       else format[0]='\0';
       
       
       if ( !check_file_exists(file))
       {
         fprintf (stderr, "ERROR: %s Does Not Exist [FATAL:%s]\n",file, PROGRAM);
         myexit (EXIT_FAILURE);
       }
       
       else if ( is_pdb_file(file))sprintf ( format, "pdb_struc");  
       else if ( format_is_msf      (file))sprintf ( format, "msf_aln");
       else if ( format_is_fasta_seq(file))sprintf ( format, "fasta_seq");
       else if ( format_is_fasta_aln(file))sprintf ( format, "fasta_aln");       
       else if ( format_is_pir_aln  (file))sprintf ( format, "pir_aln");
       else if ( format_is_pir_seq  (file))sprintf ( format, "pir_seq");
       else if ( format_is_oligo    (file))sprintf ( format, "oligo_aln");
       else if ( format_is_swissprot     (file))sprintf ( format, "swissprot_seq");
       else if ( format_is_saga     (file))sprintf ( format, "clustal_aln");
       else if ( format_is_conc_aln (file))sprintf ( format, "conc_aln");
       else if ( is_lib (file))sprintf ( format, "tc_lib");
       else if ( is_lib_02 (file))sprintf ( format, "tc_lib_02");
       else if ( is_newick(file))sprintf ( format, "newick_tree");
       else 
       {
         FILE *fpr, *fpw;
         int c;
         char *tmp1, *tmp2;
         //test the possibility of saga_aln format
         //adds robustness when interleaved MSA headers are missing
         tmp1=vtmpnam (NULL);
         tmp2=vtmpnam (NULL);
         
         fpr=vfopen (file, "r");
         fpw=vfopen (tmp1, "w");
         
         fprintf ( fpw, "CLUSTAL W (1.83) multiple sequence alignment\n\n");
         while ( (c=fgetc(fpr))!=EOF)
           {
             fprintf (fpw, "%c", c);
           }
         vfclose (fpw);vfclose (fpr);
         
         printf_system ( "clustalw_aln2fasta_aln.pl %s > %s",tmp1, tmp2);
         if (format_is_fasta (tmp2))
           {
             add_warning ( stderr, "\nWARNING: The Source File %s was reformated into a CLUSTAL file (Missing Header)\n", file);
             printf_system ( "cp %s %s", tmp1, file);
             sprintf (format, "clustal_aln");
           }
       }
       
       return format;
       }
char **identify_list_format ( char **list, int n)
       {
         int a;
         char *name;
         char *string;
         char mode;
      
         declare_name (name);
         for ( a=0; a< n; a++)
             {
            
             sprintf (name, "%s", list[a]);
             string=list[a];
             if ((mode=identify_format ( &string))!='?')
               {
                   sprintf ( name, "%s", string);
                   sprintf ( list[a], "%c%s", mode,name);
               }
             else
                 {
                   fprintf ( stderr, "\nERROR: %s not recognised [FATAL:%s]", name, PROGRAM);
               }
           
             }
      
         vfree(name);
         return list;
       }
             
char * name2type_name ( char *name)
{
  /*turns <file> into <Sfile>, <Afile>...*/
  char *new_name;
  char mode;
  
  new_name=vcalloc ( strlen (name)+2, sizeof (char));
  sprintf ( new_name, "%s", name);
  if (is_in_set (name[0], "ALSMXPRW") && !check_file_exists(name))
    {
      sprintf ( new_name, "%s", name);
    }
  else
    {
      mode=identify_format (&new_name);
      sprintf ( new_name, "%c%s", mode,name);
    }
  return new_name;
}        

char identify_format (char **fname)
       {
         char mode='?';
         mode=fname[0][0];
         
         if ((is_in_set (mode, "ALMSPR") && check_file_exists(fname[0]+1)) ||(mode=='X' && is_matrix ( fname[0]+1)) ||(mode=='M' && is_method(fname[0]+1)) )
           {
             
             fname[0]++;
           }
         else if (mode=='W' && !check_file_exists(fname[0])){fname[0]++;}
         else
             {
               
             /*WARNING: Order matters => internal methods can be confused with files, must be checked last*/
                      if (is_lib(fname[0]))mode='L';
                  else if (is_pdb_file(fname[0]))mode='P';
                  else if (is_seq(fname[0]))mode='S';
                  else if (is_aln(fname[0]))mode='A';
                  else if (is_matrix(fname[0]))mode='X';
                  else if (is_method(fname[0]))mode='M';                
                  else mode='?';
              }
         return mode;
       }



int is_pdb_name ( char *name)
    {
      char command[1000];
      int result;
      char *result_file;
      static char **buf_names;
      static int   *buf_result;
      static int   nbuf;
      FILE *fp;

      
      /*Use the look up*/
      if ( !buf_names)
      {
        buf_names=declare_char (1000, 100);
        buf_result=vcalloc (1000, sizeof (int));
      }
      if ( (result=name_is_in_list ( name, buf_names,nbuf,100))!=-1)return buf_result[result];
      
      

      result_file=vtmpnam (NULL);
   
      sprintf ( command, "extract_from_pdb -is_pdb_name \'%s\' > %s", name, result_file);
      if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:is_pdb_name] %s\n", command);
      my_system ( command);
      
      fp=vfopen ( result_file, "r");
      fscanf ( fp, "%d", &result);
      vfclose (fp);
      vremove ( result_file);
      
      sprintf ( buf_names[nbuf], "%s", name);
      result=buf_result[nbuf++]=(result==1)?1:0;

      return result;
      
    }

char*  get_pdb_id ( char *file)
{
  /*receives the name of a pdb file*/
  /*reads the structure id in the header*/
  /*returns the pdb_id*/
  char *tmp_name;
  char command[10000];
  char cached [1000];
  char fname[1000];
  FILE *fp;
  char *id;
  char buf[1000];


  tmp_name=vtmpnam(NULL);
  
  sprintf ( cached, "%s/%s", get_cache_dir(),file);
  if ( check_file_exists(cached))sprintf ( fname, "%s", cached);
  else sprintf ( fname, "%s", file);

  sprintf ( command, "extract_from_pdb -get_pdb_id %s > %s",fname, tmp_name);
  
  if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:get_pdb_id] %s\n", command);
  my_system ( command);
  
  buf[0]='\0';
  fp=vfopen (tmp_name, "r");
  fscanf ( fp, "\n%s\n", buf);  
  vfclose (fp);

  if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:get_pdb_id]DONE\n");
  
  id=vcalloc ( strlen (buf)+1, sizeof (char));
  sprintf ( id, "%s", buf);
  
  

  return id;  
}
    
  
char*  get_pdb_struc(char *in_name, int start, int end)
    {
      char *name1,*name2;
      char command[LONG_STRING];
      char *name;

      
      

      name=vcalloc ( STRING, sizeof (char));
      sprintf ( name, "%s", in_name);
     
      if ( (name1=is_pdb_struc(name))==NULL && (name[0]=='P' && ((name1=is_pdb_struc (name+1))==NULL)))
      {
        fprintf ( stderr, "\nERROR Could not download structure %s [FATAL:%s]\n", name, PROGRAM);crash("");
      }
      else if ( (start==0) && (end==0))return name1;
      else
      {
        declare_name(name2);
        sprintf ( name2, "%s_%d_%d.pdb", name, start, end);
        sprintf ( command, "extract_from_pdb -infile \'%s\' -chain FIRST -coor %d %d > %s%s",check_file_exists(name1),start, end, get_cache_dir(),name2);
        if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:get_pdb_struc] %s\n", command);
        my_system (command);

        if ( is_pdb_file(name2))return name2;
        else
          {
            fprintf ( stderr, "\nERROR Could not extract segment [%d %d] from structure %s [FATAL:%s]\n",start, end, name, PROGRAM);crash("");
          }
        exit (EXIT_FAILURE);
      }

      return NULL;
    }

char*  seq_is_pdb_struc ( Sequence *S, int i)
{
  
  if (!S){return NULL;}
  else if ( !S->T[i]){return NULL;}
  else if ( !((S->T[i])->P)){return NULL;}
  else return ((S->T[i])->P)->template_file;
}
char*  is_pdb_struc ( char *name)
   {
     /*Receives a name
       checks if this is the name of a local file that contains PDB data
       checks if this is the name of a file from a local db
                                            put the file in the cache
       checks if this is a file from a remote db (extract_from_pdb
       return NULL if everything fails
     */

     static char *file_name1;
     static char *file_name2;
     static char **buf_names;
     static char **buf_result;
     static int   nbuf, s;
    
     
     char *r=NULL;
     char command[1000];
     

     if ( !name || name[0]=='\0')return NULL;
       

     /*Use the look up*/
     if ( !buf_names)
      {

        buf_names=vcalloc ( 1000, sizeof (char*));
        buf_result=vcalloc ( 1000, sizeof (char*));
        file_name1=vcalloc ( 1000, sizeof (char));
        file_name2=vcalloc ( 1000, sizeof (char));
      }
     if ( (s=name_is_in_list ( name, buf_names,nbuf,-1))!=-1)return buf_result[s];
      

     r=NULL;
     sprintf ( file_name1, "%s", name);
     sprintf ( file_name2, "%s.pdb", name);


     if (is_pdb_file(file_name1)){r=file_name1;}
     else if (is_pdb_file(file_name2)){r=file_name2;}
     else if (is_pdb_name (name))
       {
       char *tmpname;
       tmpname=vtmpnam (NULL);


       sprintf ( file_name2, "%s.pdb", name);
       /*sprintf ( command, "extract_from_pdb -netfile \'%s\' > %s%s 2>/dev/null",name, get_cache_dir(), file_name2);*/
       sprintf ( command, "extract_from_pdb -netfile \'%s\' > %s 2>/dev/null",name,tmpname);
       if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:is_pdb_struc] %s\n", command);
       my_system (command);
       
       sprintf ( command, "cp %s %s%s", tmpname, get_cache_dir(), file_name2);
       my_system (command);
       
       if ( is_pdb_file(file_name2))r=file_name2; 
       else r=NULL;      
       
       }

      /*Fill the buffer*/
     buf_names[nbuf]=vcalloc ( strlen (name)+1, sizeof (char)); 
     sprintf ( buf_names[nbuf], "%s", name);
     if ( r)
       {
       buf_result[nbuf]=vcalloc ( strlen (r)+1, sizeof (char));
       sprintf (buf_result[nbuf], "%s", r);
       }
     else buf_result[nbuf]=NULL;      
     nbuf++;

     return r;
   }

char *fix_pdb_file ( char *in)
{
  char *empty;
  
  empty=vcalloc(1, sizeof(char));

  if ( !in || !check_file_exists (in))return empty;
  else if ( is_pdb_file(in))return in;
  else
    {
      char command[10000];
      char *tmp;
      char *tmp2;
      tmp=vtmpnam (NULL);
      tmp2=vcalloc (strlen (tmp)+1, sizeof (char));
      sprintf (tmp2, "%s", tmp);
      sprintf ( command, "extract_from_pdb %s > %s", check_file_exists(in), tmp2);
      my_system (command);
      if ( is_pdb_file (tmp))return tmp2;
      else return empty;

    }
}

int is_sap_file ( char *name)
      {
      FILE *fp;

      if (!check_file_exists(name))return 0;
      
      if ((fp=find_token_in_file (name, NULL, "Percent"))!=NULL)
        {
          if ((fp=find_token_in_file (name,fp, "Percent"))!=NULL)
            {
            vfclose (fp);
                  return 1;
            }
          else
            {     
            return 0;
            }
        }
      else
        {
          return 0;
        }
      }


int is_blast_file ( char *name)
       {
       FILE *fp;

       if ( !check_file_exists(name) ) return 0;
       else
         {
           if ((fp=find_token_in_file (name, NULL, "Lambda"))!=NULL)
             {
             vfclose (fp);
             return 1;
             }
           else
             {
             return 0;
             }
         }
       return 0;
       }
int is_simple_pdb_file ( char *name)
{
  FILE *fp;
  if ((fp=find_token_in_file (name, NULL, "SIMPLE_PDB_FORMAT"))!=NULL){vfclose (fp);return 1;}
  return 0;
}


int is_pdb_file ( char *name)
       {
       FILE *fp;
       int ispdb=0;

       if ( name==NULL) return 0;
       if (!check_file_exists (name))return 0;
       
              
      
       if ((fp=find_token_in_file (name, NULL, "\nHEADER"))!=NULL)
           {vfclose (fp);
           ispdb++; 
         }
       if ((fp=find_token_in_file (name, NULL, "\nSEQRES"))!=NULL)
           {vfclose (fp);
        
           ispdb++;
         }
       
       if ((fp=find_token_in_file (name, NULL, "\nATOM"))!=NULL)
       {
         vfclose (fp);
         ispdb++;
        
       }
       else
       {
         ispdb=0;
       }

       

       if ( ispdb>=2)return 1;
       else return 0;
       }
int is_seq ( char *name)
       {
       char *format;

       if ( !check_file_exists(name))return 0;
       
       format= identify_seq_format(name);
       if(!format || format[0]=='\0'){vfree (format);return 0;}
       else if (strstr(format, "seq")){vfree (format);return 1;}
       else return 0;
       }
int is_aln ( char *name)
       {
       char *format;     
       if ( !check_file_exists       (name))return 0;   
      
       format= identify_seq_format(name);
       if ( !format || format[0]=='\0'){vfree (format);return 0;}
       else if (strstr(format, "aln")){vfree (format); return 1;}
       else return 0;
       }   

int is_matrix (char *name)
       {
       int **m;
       
       if ((m=read_matrice (name))!=NULL){free_int (m, -1); return 1;}
       return 0;              
       }
int is_newick (char *name)
   {
     int c;
     FILE *fp;
     

     fp=vfopen (name, "r");
     if ( (c=fgetc(fp))!='('){vfclose (fp); return 0;}


     while ( (c=fgetc(fp))!=EOF)
       {
       if ( c==';'){vfclose (fp); return 1;}
       }
     vfclose (fp);
     return 0;
   }

int is_clustalw_matrix ( char *name)
{
       
  FILE *fp;


       if ( (fp=find_token_in_file (name, NULL, "CLUSTALW_MATRIX"))!=NULL){vfclose(fp);return 1;}
       else return 0;
}
int is_pavie_matrix ( char *name)
{
       
  FILE *fp;


       if ( (fp=find_token_in_file (name, NULL, "PAVIE_MATRIX"))!=NULL){vfclose(fp);return 1;}
       else return 0;
}
int is_distance_matrix_file (char *name)
{
  FILE *fp;
  if ( (fp=find_token_in_file (name, NULL, "TC_DISTANCE_MATRIX_FORMAT_01"))!=NULL){vfclose(fp);return 1;}
  else return 0;
}  
int is_blast_matrix ( char *name)
{
       
  FILE *fp;


  if ( (fp=find_token_in_file (name, NULL, "BLAST_MATRIX"))!=NULL){vfclose(fp);return 1;}
       else return 0;
}

int is_single_seq_weight_file ( char *name)
{
 
  
  return token_is_in_file ( name, "SINGLE_SEQ_WEIGHT_FORMAT_01");
  
}

int is_lib ( char *name)
{
  return is_lib_01(name);
}
int is_lib_02 ( char *name)
{
  
  return token_is_in_file ( name, "TC_LIB_FORMAT_02");
  
}

int is_lib_01 (char *name)
       {

       
       if ( token_is_in_file ( name, "TC_LIB_FORMAT_01")) return 1;
       else if (token_is_in_file ( name, "T-COFFEE_LIB_FORMAT_01"))return 1;
       else if (token_is_in_file (name, "SEQ_1_TO_N"))return 1;
       else return 0;
       }
int is_lib_list ( char *name)
{
  if ( !check_file_exists (name))return 0;
  if ( token_is_in_file ( name, "TC_LIB_LIST_FORMAT_01")) return 1;
  return 0;
}
int is_method ( char *file)
    {
      char new_file[200];

      
      sprintf ( new_file, "%s", file);
      if ( (token_is_in_file(new_file, "TC_METHOD_FORMAT_01"))){return 1;}
      if ( is_in_pre_set_method_list(new_file)) 
          {
      
            vremove ( new_file);
            return 1;
          }
      else
        {
      
          return 0;
        }
    }

/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                              SEQUENCE FORMAT IDENTIFIERS                                */
/*                                                                                         */
/***************************************************************************************** */

int format_is_oligo(char *file)
    { 
    char *buf=NULL;
    FILE *fp;
    int r=0;
    
    fp=vfopen ( file, "r");
    buf=vfgets(buf,fp);
    vfclose ( fp);
    

    if ( strm (buf, "ALPHABET"))r=1;
    
    vfree (buf);
    
    return r;
    }
int format_is_msf ( char *file)
    {
    char buf[1000];
    FILE *fp;

   
    
    if ( (fp=find_token_in_file_nlines (file,NULL,"MSF:", 30))!=NULL){vfclose (fp);return 1;}
    else
      {
       return 0;
      }
    
    fp=vfopen ( file, "r");
    fscanf (fp , "%s", buf);
    vfclose ( fp);

    if ( strm (buf, "MSF:"))return 1;
    return 0;
    }

//Fasta and PIR
int format_is_fasta_aln ( char *file)

    {
      if ( format_is_fasta(file) && !format_is_fasta_seq(file))return 1;
      else return 0;
    }


int format_is_fasta_seq  ( char *file)
    {
      int a, l1, l2,l;
      Sequence *S;

      if ( format_is_fasta (file))
      {     
      S=get_fasta_sequence (file, NULL);
      l=strlen ( S->seq[0]);
      for ( a=0; a< S->nseq; a++)if(strlen(S->seq[a])!=l){free_sequence (S, S->nseq);return 1;}
      for ( a=0; a< S->nseq; a++)
        {
          l1=strlen ( S->seq[a]);
          ungap (S->seq[a]);
          l2=strlen ( S->seq[a]);
          if ( l1!=l2)
            {
            free_sequence (S, S->nseq);
            return 0;
            }
        }
      free_sequence (S, S->nseq);
      return 1;
      }
    else
      {
      return 0;
      }
    }

int format_is_fasta ( char *file)
    {
      int a,c, cu, pr;
      FILE *fp;
      static char *name;
      int is_fasta=0, pir_name=1, star_end=1;
      Sequence *S;
      
    if ( !check_file_exists(file))return 0;
    if ( !(S=get_fasta_sequence (file, NULL)))return 0;
    free_sequence (S, -1);
    if ( format_is_pir(file)) return 0;
    return 1;
    
    }

int format_is_pir_aln ( char *file)

    {
      if ( format_is_pir(file) && !format_is_pir_seq(file))return 1;
      else return 0;
    }

int format_is_pir_seq ( char *file)
    {
      int a, l1, l2;
      Sequence *S;

      
    if ( format_is_pir (file))
      {
      S=get_pir_sequence (file, NULL);
      for ( a=0; a< S->nseq; a++)
        {
          l1=strlen ( S->seq[a]);
          ungap (S->seq[a]);
          l2=strlen ( S->seq[a]);
          if ( l1!=l2)
            {
            free_sequence (S, S->nseq);
            return 0;
            }
        }
      return 1;
      }
    else
      {
      return 0;
      }
    }
    

int format_is_pir ( char *file)
    {
      Sequence *S;
      int pir_name=1, star_end=1, a;
      
      S=get_fasta_sequence (file, NULL);
      if (!S)return 0;
      
      pir_name=1; star_end=1;
      for (a=0; a< S->nseq; a++)
      {

        if (!is_pir_name(S->name[a]))pir_name=0;
        if ( S->seq[a][strlen (S->seq[a])-1]!='*')star_end=0;
      }
      free_sequence(S,-1);
      if ( pir_name && star_end) return 1;
      else return 0;
    }
int is_pir_name (char *name)
{
  if ( strstr (name, "P1;"))return 1;
  if ( strstr (name, "F1;"))return 1;
  if ( strstr (name, "DL;"))return 1;
  if ( strstr (name, "DC;"))return 1;
  if ( strstr (name, "RL;"))return 1;
  if ( strstr (name, "RC;"))return 1;
  if ( strstr (name, "XX;"))return 1;
  return 0;
}
  
  
int format_is_conc_aln (char *file)
{
  FILE *fp;
  if ( (fp=find_token_in_file (file, NULL, "CONC_MSF_FORMAT_01"))){vfclose (fp); return 1;}
  return 0;
}
int format_is_saga ( char *file) 
    {
    FILE *fp;
    int **list;
    int n_blocks;
    int n_seq;
    int a, b;
    
    if ( (fp=find_token_in_file (file, NULL, "SAGA"))){vfclose (fp); return 1;}
    else if  ((fp=find_token_in_file (file, NULL, "CLUSTAL"))){vfclose (fp); return 1;}
    else if  ((fp=find_token_in_file (file, NULL, "T-COFFEE_MSA"))){vfclose (fp); return 1;}
    else if  ((fp=find_token_in_file (file, NULL, "INTERLEAVED_MSA"))){vfclose (fp); return 1;}
    
    else return 0;
    
    if (1==1);
    else if  ((fp=find_token_in_file (file, NULL, "T-COFFEE"))){vfclose (fp); return 1;}
    else if  ((fp=find_token_in_file (file, NULL, "SAGA_FORMAT"))){vfclose (fp); return 1;}
    else if  ((fp=find_token_in_file (file, NULL, "GARP"))){vfclose (fp); return 1;}
    else if  ((fp=find_token_in_file (file, NULL, "INTERLEAVED"))){vfclose (fp); return 1;}
    
    else 
       {
         list=get_file_block_pattern (file,&n_blocks,100); 
         if (n_blocks<=2){free_int (list, -1);return 0;}
         else 
             {            
             n_seq=list[1][0];
             for ( a=1; a< n_blocks-1; a++)
                 {
                   if ( list[a][0]!=n_seq){free_int (list, -1);return 0;}
                   else
                   {
                     for ( b=1; b<=list[a][0]; b++)
                         if ( list[a][b]!=2){free_int (list, -1);return 0;}
                   }
               }
             }
         return 1;
       }
    
    return 0;
    }


int format_is_swissprot (char *name)
    {
      FILE *fp;
      
      if ( !check_file_exists(name))return 0;
       
       
   
    
      if (   (fp=find_token_in_file_nlines (name,NULL,"\nID ",10))!=NULL\
         &&(fp=find_token_in_file (name,NULL,"\nSQ "))!=NULL  )
      {
        
        vfclose (fp);return 1;
      }
      else
      {
        return 0;
      }
    } 

/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                               OUTPUT STUFF                                              */
/*                                                                                         */
/***************************************************************************************** */             
int output_format_aln ( char *format, Alignment *inA, Alignment *inEA,char *name)
        {
      Sequence_data_struc *D1=NULL;
      Sequence_data_struc *D2=NULL;
      Alignment *A=NULL;
      Alignment *EA=NULL;



      A =copy_aln (inA, NULL);
      EA=copy_aln (inEA,NULL);
      
      A =expand_aln(A);
      EA=expand_number_aln(inA,EA); 
            
      if (A && A->expanded_order  )A=reorder_aln ( A, A->expanded_order,A->nseq);  
      if (EA && EA->expanded_order)EA=reorder_aln ( EA, EA->expanded_order,EA->nseq);  

        
        D1=vcalloc ( 1, sizeof (Sequence_data_struc));
      D1->A=A;
      if (EA)
         {
         D2=vcalloc ( 1, sizeof (Sequence_data_struc));
         D2->A=EA;
         }

      main_output ( D1, NULL,D2, format, name);

       vfree(D1);
       vfree(D2);
       free_aln (A);
       free_aln (EA);
       return 1;
       
       }



int main_output  (Sequence_data_struc *D1, Sequence_data_struc *D2, Sequence_data_struc *DST, char *out_format, char *out_file)

      {  
      FILE *fp;
      int value;

      if ( !out_format[0])return 0;
      if ( D1 && D1->rm_gap)ungap_aln ((D1->A));
      
      if ( strm (out_format, ""))return 0;
      else if      ( strncmp (out_format, "score",5)==0 || strm (out_format, "html"))
            {
              Alignment *BUF;

              if (!D1)return 1;
              if ( !DST) 
                {
                  fprintf ( stderr,"\n[You Need an evaluation File: Change the output format or use +evaluate][FATAL:%s]\n", PROGRAM);    
                  myexit(EXIT_FAILURE);
                }
              if ( !strm ("html", out_format))while ( out_format[0]!='_' && out_format[0]!='\0' )out_format++;
              
              D1->S=aln2seq(D1->A);
              BUF=copy_aln (DST->A, NULL);
              DST->A=aln2number (DST->A);
              
              if     ( strstr ( out_format, "html"  ))output_reliability_html  ( D1->A,  DST->A, out_file);
              else if( strm ( out_format, "_ps"    ))output_reliability_ps    ( D1->A,  DST->A, out_file);
              else if( strm ( out_format, "_pdf"   ))output_reliability_pdf   ( D1->A,  DST->A, out_file);  
              else if( strm ( out_format, "_ascii" ))output_reliability_ascii ( D1->A,  DST->A, out_file);  
              else if( strm ( out_format, "_seq"   ))output_seq_reliability_ascii ( D1->A,  DST->A, out_file);
              else
                {
                  DST->A=BUF;
                  return main_output (DST, NULL, NULL, out_format+1, out_file);
                }
            }

      else if (strm (out_format, "color_exoset"))
        {
          Alignment *ST, *EX, *A;
          Constraint_list *CL;
          int a, b, n;
          char *buf;
          
          if ( !DST->A)
            {
            printf_exit ( EXIT_FAILURE, stderr, "\nYou must provide an obj file via the -struc_in flag [FATAL:%s]", PROGRAM);
            }
          EX=DST->A;
          A=D1->A;
          
          CL=declare_constraint_list ( DST->S,NULL, NULL, 0,NULL, read_matrice("pam250mt"));
          
          ST=  main_coffee_evaluate_output(A, CL, "boxshade");
          
          buf=vcalloc ( EX->len_aln+1, sizeof (int));
          
          for ( a=0; a< A->nseq; a++)
            {
            int i;
                        
            i=name_is_in_list (A->name[a],EX->name, EX->nseq, -1);
            if ( i==-1)continue;
                        
            sprintf ( buf, "%s", EX->seq_al[i]);
            ungap (buf);
            
            for (n=0,b=0; b<A->len_aln; b++)
              {
                if (!is_gap(A->seq_al[a][b]))
                  {
                  if ( buf[n]=='o' || buf[n]=='j' || buf[n]=='b')
                    ST->seq_al[a][b]='0';
                  n++;
                  }
              }
            }
          vfree (buf);
      
          output_color_html  ( A, ST, out_file);
          myexit (EXIT_SUCCESS);
        }
      
      else if (strm (out_format, "color_protogene"))
        {
          int n, a, b;
          DST->A=copy_aln (D1->A, NULL);
          for (n=1,a=0; a< (D1->A)->len_aln; a++, n++)
            {
            for ( b=0; b<(D1->A)->nseq; b++)
              {
                if (is_gap((D1->A)->seq_al[b][a]));
                else if ( n<=3)(DST->A)->seq_al[b][a]=2;
                else if ( n>3)(DST->A)->seq_al[b][a]=9;
              }

            if ( n==6)n=0;
            }
          output_color_html  ( D1->A,  DST->A, out_file);
          myexit (EXIT_SUCCESS);
          
        }
      else if      ( strncmp (out_format, "color",5)==0)
       {
         Alignment *BUF;
        
         if (!D1)return 1;
         
         if ( !DST) 
           {
             fprintf ( stderr,"\n[You Need an evaluation File: Change the output format or use +evaluate][FATAL:%s]\n", PROGRAM);   
             myexit(EXIT_FAILURE);
           }
         while ( out_format[0]!='_' && out_format[0]!='\0' )out_format++;
         
         BUF=copy_aln (DST->A, NULL);

      
      

         if     ( strm ( out_format, "_html"  ))output_color_html  ( D1->A,  DST->A, out_file);
         else if( strm ( out_format, "_ps"    ))output_color_ps    ( D1->A,  DST->A, out_file);
         else if( strm ( out_format, "_pdf"   ))output_color_pdf   ( D1->A,  DST->A, out_file); 
         else if( strm ( out_format, "_ascii"   ))output_color_ascii   ( D1->A,  DST->A, out_file);   
         else
           {
             DST->A=BUF;
             return main_output (DST, NULL, NULL, out_format+1, out_file);
           }
         myexit (EXIT_SUCCESS);
       }
      else if ( strm4  ( out_format, "tc_aln","t_coffee_aln", "t_coffee", "tcoffee"))
        {
          if (!D1)return 1;
          vfclose (output_aln ( D1->A, vfopen (out_file, "w")));
        }
      else if ( strm  ( out_format, "analyse_pdb"))
        {
          if (!D1)return 1;
          if ( !DST) 
            {
            fprintf ( stderr,"\n[You Need an evaluation File: Change the output format][FATAL:%s]\n", PROGRAM);   
            myexit(EXIT_FAILURE);
            }
          analyse_pdb ( D1->A,DST->A, "stdout");
          (DST->A)=aln2number (DST->A);
          output_reliability_ps    ( D1->A,  DST->A, out_file);
        }
      else if ( strm4 ( out_format, "lower0", "lower1", "lower2", "lower3") || strm4(out_format, "lower4", "lower5", "lower6", "lower7") || strm4 (out_format,"lower8", "lower9", "align_pdb", "malign_pdb") )
        {
          if (!D1)return 1;
          if ( !DST) 
            {
            fprintf ( stderr,"\n[You Need an evaluation File: Change the output format][FATAL:%s]\n", PROGRAM);   
            myexit(EXIT_FAILURE);
            }
          
          
          
          (DST->A)=aln2number (DST->A);
          if ( strm (out_format, "align_pdb"))value=0;
          else if (  strm (out_format, "malign_pdb"))value=5;
          else value=atoi(out_format+5);
          
          D1->A=filter_aln_upper_lower (D1->A, DST->A,0, value);
          output_clustal_aln ( out_file, D1->A);
        }
      else if ( strnm (out_format, "repeat", 6))
        {
          int size;
          int a, b, c;
          Alignment *CONC;
          
          if ( !D1)return 1;
          size=atoi (out_format+6);
          print_aln (D1->A);
          CONC=declare_aln2 ( (D1->A)->nseq, ((D1->A)->len_aln+1)*size+1);

          for ( a=0; a< (D1->A)->nseq; a++)(D1->A)->seq_al[a][(D1->A)->len_aln]='\0';
          for ( c=0,a=0; a< (D1->A)->nseq;c++)
            {
            
            sprintf ( CONC->name[c], "%s", (D1->A)->name[a]);
            for ( b=0; b<size; b++, a++)
              {
                strcat (CONC->seq_al[c], (D1->A)->seq_al[a]);
                strcat (CONC->seq_al[c], "O");
              }
            }
          CONC->nseq=c;CONC->len_aln=strlen (CONC->seq_al[0]);
          output_clustal_aln ( out_file, CONC);
          free_aln (CONC);
        }
      
      else if ( strnm (out_format, "upper", 5))
            {
            
            if (!D1)return 1;
            if ( !DST) 
               {
               fprintf ( stderr,"\n[You Need an evaluation File: Change the output format][FATAL:%s]\n", PROGRAM);      
               myexit(EXIT_FAILURE);
               }
              
            
            (DST->A)=aln2number (DST->A);

            value=atoi(out_format+5);

            D1->A=filter_aln_lower_upper (D1->A, DST->A,0, value);
            output_clustal_aln ( out_file, D1->A);
            }
      
      else if ( strm4 ( out_format, "filter0", "filter1", "filter2", "filter3"))
             {
             if (!D1)return 1;
             if ( !DST) 
               {
               fprintf ( stderr,"\n[You Need an evaluation File: Change the output format][FATAL:%s]\n", PROGRAM);      
               myexit(EXIT_FAILURE);
               }
              
             (DST->A)=aln2number (DST->A);
             
             D1->A=filter_aln (D1->A, DST->A, atoi(out_format+6));
             output_clustal_aln ( out_file, D1->A);
             }
      
      else if ( strm3 ( out_format, "phylip_aln", "phylip", "phy"))
            {
            if (!D1)return 1;
            output_phylip_aln ( out_file, D1->A);
            }
      else if ( strm ( out_format, "mocca_aln"))
              {
            if (!D1)return 1;
            output_mocca_aln ( out_file, D1->A, DST->A);
            }       
      else if ( strm ( out_format, "saga_pw_sd_weights") )
            {
            if (!D1)return 1;
            output_pw_weights4saga ((D1->W),(D1->W)->PW_SD, out_file);
            }
      else if ( strm ( out_format, "saga_aln"))
            {
            if (!D1)return 1;
            output_saga_aln (out_file, D1->A);
            }
      else if (strm5 ( out_format, "aln","clustal_aln", "clustalw","clustal", "clustalw_aln") || strm (out_format, "msa"))
            {            
            if (!D1)return 1;
            output_clustal_aln (out_file, D1->A);
            }
      else if ( strm ( out_format, "conc_aln"))
              {
            if (!D1)return 1;
            output_conc_aln (out_file, D1->A);
            }
      else if ( strm2 ( out_format, "lalign_aln","lalign"))
              {
            if (!D1)return 1;
            output_lalign (out_file, D1->A);
            }
      else if ( strm2 ( out_format, "glalign_aln","glalign"))
              {
            if (!D1)return 1;
            output_glalign (out_file, D1->A, DST->A);
            }
      
      else if ( strm2 ( out_format, "fasta_aln","fasta" ))
            {
            if (!D1)return 1;
            output_fasta_aln( out_file, D1->A);
            }
      else if ( strm ( out_format, "est_prf" ))
            {
            if (!D1)return 1;
            output_est_prf( out_file, D1->A);
            }
      else if ( strm ( out_format, "clean_est_fasta_seq" ))
            {
            if (!D1)return 1;
            D1->A=clean_est(D1->A);
            output_fasta_seq(out_file, D1->A);
            
            }
      
      else if ( strm3 ( out_format, "msf_aln", "gcg", "msf"))
            {
            if (!D1)return 1;
            output_msf_aln( out_file, D1->A);
            }
      else if ( strm ( out_format, "rnalign"))
            {
            if (!D1)return 1;
            output_rnalign (out_file, D1->A, DST->S);
            }
      else if ( strm ( out_format, "fasta_seq"))
            {
            if (!D1)return 1;
            output_fasta_seq (out_file,D1->A);
            }
      else if ( strm ( out_format, "gotoh_seq"))
            {
            if (!D1)return 1;
            output_gotoh_seq (out_file,D1->A);
            }
      else if ( strm (out_format, "fasta_seq1"))
              {
            if (!D1)return 1;
            output_fasta_seq1 (out_file, D1->A);
            }
      else if ( strm2 (out_format, "pir_aln", "pir"))
            {
            if (!D1)return 1;
            output_pir_aln (out_file, D1->A);
            }
      else if ( strm (out_format, "pir_seq"))
            {
            if (!D1)return 1;
            output_pir_seq (out_file, D1->A);
            }
        else if ( strm (out_format, "gor_seq"))
            {
                if (!D1)return 1;
            output_gor_seq (out_file, D1->A);
            }
      else if ( strm (out_format, "pir_seq1"))
            {
              if (!D1)return 1;
            output_pir_seq1 (out_file, D1->A);
            }
      else if ( strm (out_format, "pw_lib_saga_aln"))
            {
              if (!D1)return 1;
            output_pw_lib_saga_aln (out_file, D1->A);
            }
      else if ( strm (out_format, "lib"))
            {
              if (!D1)return 1;
            output_lib (out_file, D1->A);
            }
      else if ( strm (out_format, "pdb_constraint_list"))
              {
              if (!D1)return 1;
            output_constraints (out_file, "pdb",D1->A);
            }
      else if ( strm2 (out_format, "constraint_list","tc_lib"))
              {
              
              if (!D1)return 1;
              else if (!D1->CL)output_constraints (out_file,"sim", D1->A);
              else if (D1->CL) vfclose ( save_constraint_list ( D1->CL, 0, (D1->CL)->ne, out_file, NULL, "ascii",(D1->CL)->S)); 
            }
      else if (  strm2 (out_format, "extended_lib","extended_cosmetic"))
              {
              if (!D1)return 1;
              output_constraints (out_file,out_format, D1->A);
            }
      else if ( strncmp (out_format, "extended_pair", 13)==0)
              {
              if (!D1)return 1;
              output_constraints (out_file,out_format, D1->A);
            }
      else if ( strm (out_format, "cache_id"))
              {
              if (!D1)return 1;
              cache_id (D1->A);
            output_saga_aln (out_file, D1->A);
            }
        else if ( strm (out_format, "compress_aln"))
            {
              if (!D1)return 1;
                compress_aln (D1->A);
            output_saga_aln (out_file, D1->A);
            } 
      else if (strm (out_format, "n_seq") ||strm (out_format, "nseq") )
            {
              if (!D1)return 1;
            fp=vfopen ( out_file, "w");
            fprintf ( fp, "%d\n", (D1->A)->nseq);
                vfclose (fp);
            }
      
      else if ( strm ( out_format, "thread_dna_on_prot_aln"))
              {
              if (!D1)return 1;
            D1->A=thread_dnaseq_on_prot_aln (D1->S, D2->A);
            output_saga_aln ( out_file, D1->A);
            }
      else if ( strm ( out_format, "tdna_fasta_seq1"))
              {if (!D1)return 1;
            D1->A=translate_dna_aln (D1->A,0);
            output_fasta_seq1 (out_file, D1->A);
            }
      else if ( strm ( out_format, "tdna_aln"))
              {if (!D1)return 1;          
            D1->A=translate_dna_aln (D1->A,0);
            output_saga_aln ( out_file, D1->A);
            }
      else if ( strm ( out_format, "cdna_fasta_seq1"))
              {if (!D1)return 1;          
            D1->A= gene2prot(D1->A);
            output_fasta_seq1 ( out_file, D1->A);
            }
      else if ( strm ( out_format, "mutate_cdna_aln"))
              {if (!D1)return 1;
                D1->A= mutate_cdna_aln ( D1->A);
                output_clustal_aln ( out_file, D1->A);
            }
      else if ( strm ( out_format, "tdna_sp_aln"))
              { if (!D1)return 1;
              if ( !DST) 
               {
               fprintf ( stderr,"\n[You Need an evaluation File: Change the output format][FATAL:%s]\n", PROGRAM);      
               myexit(EXIT_FAILURE);
               }          
             (DST->A)=aln2number (DST->A);
            D1->A=translate_splice_dna_aln (D1->A, DST->A);
            output_saga_aln ( out_file, D1->A);
            }
      else if (out_format && out_format[0] && (strcmp ( out_format,"rna_graph_fasta")==0))
            {
              if (!D1)return 1;
            sprintf ( (D1->A)->seq_al[0], "%s",(DST->S)->seq[0]);
            (D1->A)->nseq=0;
            output_fasta_seq (out_file, DST->A);
            }
      else if (strm ( out_format, "freq_mat"))
              {
              if (!D1)return 1;
            output_freq_mat (out_file, D1->A);  
            }
      else if (strm ( out_format, "maln_pval"))
              {if (!D1)return 1;
            output_maln_pval ( out_file, D1->A);
            }
      else if ( strm ( out_format, "model_aln"))
              {
              if (!D1)return 1;
            output_model_aln ( out_file, D1->A);
            }
      else if (strncmp (out_format, "mult",4)==0)
              {
              if (!D1)return 1;
            output_mult_fasta_seq ( out_file, D1->A, atoi(out_format+4));
            }
      else if (strm (out_format, "conservation"))
        {
          output_conservation_statistics (out_file, D1->A);
        }
      else if (strm (out_format, "len"))
              {
              if (!D1)return 1;
            output_statistics (out_file, D1->A, "nr");
            }
      else if ( strm (out_format, "name"))
              {
              if (!D1)return 1;
              if ( D1->A)output_statistics (out_file, D1->A, "n");
              if ( D1->T)
                {
                  Sequence *TS;
                  TS=tree2seq(D1->T, NULL);print_array_char (vfopen(out_file, "w"), TS->name, TS->nseq, "\n");
                }
            }
      else if ( strm (out_format, "code_name"))
              {
              char **nl=NULL;
              int num, n=0;
              Sequence *TS;
              FILE *lfp;
              if ( D1->A){n=(D1->A)->nseq, nl=(D1->A)->name;}
              if ( D1->T){TS=tree2seq(D1->T, NULL);nl=TS->name;n=TS->nseq;}
              
              lfp=vfopen (out_file, "w");
              for ( num=0; num<n; num++)
                fprintf (lfp, "\n%s C%d", nl[num], num+1);
              fprintf (lfp, "\n");
              vfclose (lfp);
            }
      else if ( strm ( out_format, "seq2struc"))
              {
                output_seq2struc (out_file, D1->A);
              }
      else if ( strstr  ( out_format, "pavie_age_channel"))
        {
          output_n_pavie_age_channel ( D1->S,out_file, atoi((out_format+strlen ("pavie_age_channel"))));
          myexit (EXIT_SUCCESS);
        }
      else if ( strstr ( out_format, "age_matrix"))
              {
                output_age_matrix (out_file, atoi((out_format+10)));
              }
      else if ( strm ( out_format, "transitions"))
              {
                output_transitions (out_file, D1->A);
              }
      
      else if ( strncmp (out_format, "statistics",10)==0)
              {
              if (!D1)return 1;
              
              output_statistics (out_file, D1->A,out_format+10);
            }
      else if ( strcmp (out_format, "suchard_aln")==0)
              {
              output_suchard_aln (out_file, D1->A);
            }
      else if ( strm4 (out_format, "newick_tree","newick","binary","nh"))
              {
              if (!D1)return 1;
              
              /*D1->T=unroot_tree(D1->T);*/
              vfclose (print_tree ((D1->T), out_format, vfopen ( out_file, "w")));
            }
      else if ( strncmp (out_format, "sim",3)==0)
              {
              if (!D1)return 1;
              output_similarities (out_file, D1->A,out_format);
              
            }
      else if ( strm (out_format, "pair_sim"))
        {
          if ( !D2)
            {
            fprintf ( stderr, "\n-output=pair_sim: provide aln1 via -in and aln2 via -in2 [FATAL:%s]\n", PROGRAM);
            myexit (EXIT_FAILURE);
            }
          output_similarities_pw (out_file, D1->A,D2->A,out_format);
        }
      else if ( strm (out_format, "matrix") || strm (out_format, "blast_matrix"))
        {
          output_blast_mat (D1->M, out_file);
        }
      else 
              {

                fprintf ( stderr, "\n%s is an UNKNOWN OUTPUT FORMAT [FATAL:%s]\n",out_format, PROGRAM); 
                myexit (EXIT_FAILURE);
                
            }
      return 0;
      }
int is_in_format_list ( char *name)
      {
      if ( strcmp ( name, "saga_aln")==0)return 1;
      if ( strcmp ( name, "number_aln")==0)return 1;
      if ( strcmp ( name, "clustal_aln")==0)return 1; 
      if ( strcmp ( name, "fasta_aln")==0)return 1;
      if ( strcmp ( name, "number_fasta")==0)return 1;
      if ( strcmp ( name, "fasta_seq")==0)return 1;
      if ( strcmp ( name, "pdb")==0)return 1;
      if ( strcmp ( name, "msf_aln")==0)return 1;
      if ( strcmp ( name, "dali_aln")==0)return 1;
      if ( strcmp ( name, "dali_seq")==0)return 1;
      if ( strcmp ( name, "barton_list_tc")==0)return 1;
      if ( strcmp ( name, "est_prf")==0)return 1;
      
      if ( strcmp ( name, "gotoh_aln")==0)return 1;
      if ( strcmp ( name, "amps_aln")==0)return 1;
      if ( strcmp ( name, "pir_aln")==0)return 1;
      if ( strcmp ( name, "pir_seq")==0)return 1;
      if ( strcmp ( name, "est_fasta")==0)return 1;
      if ( strcmp ( name, "amps_sd_scores")==0)return 1;
      if ( strcmp ( name, "pima_aln")==0)return 1;
      if ( strcmp ( name, "dialign_aln")==0)return 1;
      if ( strcmp ( name, "gor_seq")==0)return 1;
      if ( strcmp ( name, "gor_struc")==0)return 1;
      return 0;
      }
int is_struc_in_format_list ( char *name)
      {
      if ( strcmp ( name, "rna_number")==0)return 1;
      if ( strcmp ( name, "fasta_seq")==0)return 1;
      return 0;
      }
int is_out_format_list ( char *name)
      {
        return main_output (NULL, NULL, NULL, name, NULL);  
      }
      
int is_struc_out_format_list ( char *name)
      {
        return main_output (NULL, NULL, NULL, name, NULL);  
      }     

/**************************************************************************************************/
/*************************************REFORMAT UTIL*************************************************/
/**************************************************************************************************/

/*************************************REFORMAT IN**************************************************/
/**************************************************************************************************/
/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                               READ COG FILE                                             */
/*                                                                                         */
/***************************************************************************************** */
       
/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                               INPUT WEIGHTS                                            */
/*                                                                                         */
/***************************************************************************************** */
      
Weights* get_amps_sd_scores ( char *fname)
      {
      FILE *fp;
      char *buf;
      char *buf2;
      int nseq;
      Weights *W;
      int a, b,e;
      int c;
      float array[20];
      
      buf=vcalloc ( 1001, sizeof (char));
      buf2=vcalloc ( 1001, sizeof (char));
      
      fp=vfopen ( fname, "r");
      set_fp_id ( fp, "Index");
      buf=fgets ( buf, 1000, fp);
      fscanf ( fp, "%s", buf2);     
      
      nseq=0;
      while ( isalnum(buf2[0]) && !isalpha(buf2[0]))
            {
            nseq++;
            buf=fgets ( buf, 1000, fp);
            fscanf ( fp, "%s", buf2);
            }
      vfclose ( fp);
      
      W=declare_weights (nseq);
      
      fp=vfopen ( fname, "r");
      set_fp_id ( fp, "Index");
      buf=fgets ( buf, 1000, fp);
      fscanf ( fp, "%s", buf2);     
      
      a=0;
      while ( isalnum(buf2[0]) && !isalpha(buf2[0]))
            {
            fp=set_fp_after_char (fp, '>');
            fscanf ( fp, "%s",W->seq_name[a]);
            buf=fgets ( buf, 1000, fp);
            fscanf ( fp, "%s", buf2);
            a++;
            }
      buf=fgets ( buf, 1000, fp);
      c=1;
      while ( c!=0)
            {
            for ( e=0; e< 16; e++)
                  {
                  c=fscanf ( fp, "%f", &array[e]);
                  }
            fscanf ( fp, "\n");
            if ( c!=0)
                  {
                  
                  a=(int)array[0]-1;
                  b=(int)array[1]-1;
                  W->PW_ID[b][a]=W->PW_ID[a][b]=array[9];
                  W->PW_SD[b][a]=W->PW_SD[a][b]=array[14];
                  }
            
            }
      vfclose ( fp);
      sprintf ( W->comments, "SD WEIGHTS GENERATED WITH THE PROGRAM AMPS IN PAIRWISE MODE");
      vfree ( buf);
      return W;
      }

Weights *read_seq_weight (char **name, int nseq, char* seq_weight)
       {
       int a, p;
       Weights *W;
       float w;
       
       FILE *fp;
       char line[LONG_STRING];
       char sname[MAXNAMES];
       
       
       /*Read sequence weights:
      * comment
      name1 weight1
      .....


      NOTE:
      weights must be between 0 and 1;
      
      sequences not in S do not get any weight
      sequences in S but not in file get a weight of 1
       */
       if ( !is_single_seq_weight_file (seq_weight))
       {
         fprintf ( stderr, "\nERROR: File %s is not in Format SINGLE_SEQ_WEIGHT_FORMAT_01 [FATA:%s]", seq_weight,PROGRAM);
         myexit (EXIT_FAILURE);
         return NULL;
       }
       else
       {
         W=declare_weights(nseq);
         for ( a=0; a< nseq; a++)
           {
             sprintf ( W->seq_name[a], "%s", name[a]);
             W->SEQ_W[a]=1;
           }
         sprintf ( W->mode, "%s", seq_weight);
         fp=vfopen (seq_weight, "r");
         
         
         while ( fgets( line,LONG_STRING-1, fp))
           {
             if ( line[0]=='*' ||line[0]=='#' || isblanc(line));
             else
             {
               if (sscanf(line, "%s %f", sname, &w)!=2)continue;
               if ( (p=name_is_in_list ( sname, W->seq_name, nseq, MAXNAMES-1))!=-1)
                 {
                   W->SEQ_W[p]=w;
                 }
             }
           }
         vfclose (fp);
         return W;
       }
       }
       
  
/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                               INPUT MISC                                               */
/*                                                                                         */
/***************************************************************************************** */

char *** read_rename_file ( char *fname, int code)
{
  int n;
  FILE *fp;
  char ***convert=NULL;
  
  convert=declare_arrayN(3, sizeof (char),count_n_line_in_file(fname) +1,2,MAXNAMES+1);
  fp=vfopen (fname, "r");
  n=0;
  if ( code==CODE)      while ( fscanf ( fp, "%s %s\n", convert[n][0], convert[n][1])==2)n++;
  else if (code==DECODE)while ( fscanf ( fp, "%s %s\n", convert[n][1], convert[n][0])==2)n++;
  vfclose (fp);
  return convert;
}

void get_barton_list_tc_seq ( char *in_file)
      {
      FILE *fp, *fp_make, *fp_length, *fp_long;
      FILE *fp_small[9];
      
      static char *buf;
      int len_buf=10000;
      char name[100];
      
      char pwd[100];
      int a,c,nseq;
      int k=0;
      int *length;
      int longest=0;
      
      c=0;
      length=vcalloc ( 1000, sizeof(int));
      if ( buf==NULL)buf=vcalloc ( len_buf, sizeof (char));
      fp=vfopen (in_file, "r");
      fp_long=vfopen ( "barton_seq_list_large", "w");
      fp_make=vfopen ( "make_dir", "w");
      fp_length=vfopen ( "barton_length", "w");
      for ( a=0; a< 9; a++)
            {
            sprintf ( name, "barton_nseq%d",a);
            fp_small[a]=vfopen ( name, "w");
            }
      get_pwd (pwd);
      
      
      while ( c!=EOF)
            {a=0;
            while ( (c=fgetc(fp))!='#');
            while ( (c=fgetc(fp))=='#');
            ungetc ( c, fp);
            while ( (c=fgetc(fp))!='#')buf[a++]=c;
            buf[a]='\0';
            
            sprintf ( name, "%s", buf);
      
            while ( (c=fgetc(fp))=='#');
            
            if ( c!=EOF)
                  {
                  a=0;
                  while ( (c=fgetc(fp))!='#' && c!=EOF)
                        {
                        buf[a++]=c;
                        if (a==len_buf)
                              {
                              len_buf+=10000;
                              buf=vrealloc ( buf, len_buf*sizeof (char));
                              }
                        } 
                  buf[a]='\0';
                  if (c!=EOF)
                        {
                        
                        nseq=process_barton_entry ( buf,name);
                        length[nseq]++;
                        longest=(longest<nseq)?nseq:longest;
                        
                        if ( nseq<=8) fprintf ( fp_small[nseq], "%s.pep\n", name);
                        else fprintf ( fp_long, "%s.pep\n",name);
                        fprintf ( fp_make, "mkdir %s\nmv %s.pep %s\nmv %s.check %s\n", name, name, name, name, name);
                        k++;
                        }
                  
                        
                  }
            }
      
      vfclose (fp);
      vfclose (fp_long);
      for ( a=0; a< 9; a++)vfclose (fp_small[a]);
      vfclose (fp_make);
      for ( a=0; a<= longest; a++)fprintf ( fp_length, "%d: %d\n", a, length[a]);
      vfclose ( fp_length);
      
      }
      
int process_barton_entry (char *buf, char *name)                  
    {     
    Alignment *LA;
    Sequence *LS;
    int a,c;
    static char *buf2;
    int clen=0;
    int current=0;
    int p=0;
    int max_len_seq=0;
    int min_len_seq=999999;
    int nseq=0;
    int l;
    char fname[100];
    char com_name[100];
    int rm_gap=1;

    sprintf ( fname, "%s.pep", name);
    sprintf ( com_name, "%s.check",name);
    
    if ( buf2==NULL)buf2=vcalloc ( 10000, sizeof (char));
    a=0;          
    while (buf[a]!='\0')
            {
             if (buf[a]=='>')
                  {
                  a=get_string_line (a,2, buf, buf2); 
                  while ((c=buf[a++])!='*')
                        if (isalnum (c)|| c=='.' || c=='-')
                              clen++;
                   max_len_seq=(clen> max_len_seq)?clen: max_len_seq;
                   min_len_seq=(clen< min_len_seq)?clen: min_len_seq;
                   nseq++;
                   clen=0;
                  }
            if ( buf[a]!='\0')a++;
            }
    
    
    LS=declare_sequence (  min_len_seq,  max_len_seq,  nseq); 
    LS->nseq=nseq;
    
    
    for (a=0, current=0; current< nseq; current++) 
      {
      a=get_string_line ( a, 1, buf, buf2);
      sscanf ( buf2, ">P1;%s", LS->name[current]);
      a=get_string_line ( a, 1, buf, buf2);
      l=strlen ( buf2);
      buf2[l-1]='\0';
      sprintf ( LS->seq_comment[current], buf2);
      
      p=0;
      while ( (c=buf[a++])!='*')
            {
            if (isalpha (c))
                  LS->seq[current][p++]=tolower (c);
            else if ( isgraph(c))
                  LS->seq[current][p++]=(c);
            }     
      a++;
      }
    
    LA=declare_Alignment(LS);
    seq2aln ( LS, LA,rm_gap);
    output_fasta_seq (fname,LA);
    output_pir_check (com_name,LA->nseq, LA->seq_comment);
    free_Alignment ( LA);
    free_sequence ( LS, nseq);   
    
    return nseq;
    }

      
      

Structure *read_rna_struc_number (Alignment *A,char *fname)
      {
      FILE *fp;
      int a;
      char x,y;
      float f;
      Sequence *SA;
      Structure *ST;
      int first, last;
      
      SA=declare_sequence ( A->len_aln, A->len_aln, 1);
      SA->len[0]=A->len[0];
      for ( a=0; a< SA->len[0]; a++)
            SA->seq[0][a]='.';
      ST=declare_rna_structure_num (SA);
      ST->S=SA;
      
      fp=vfopen ( fname, "r");
      fscanf ( fp, "%c\n%d\n",&x, &(ST)->tot_list);
      for ( a=0; a<(ST)->tot_list; a++)
            {
            fscanf ( fp, "%d %d %d %c %c %f\n", &(ST)->list[a][0],&(ST)->list[a][1],&(ST)->list[a][2], &x, &y, &f);
            (ST)->list[a][0]--;
            (ST)->list[a][1]--;
            (ST)->list[a][2]--;
            if ( a==0)
                  {
                  (ST)->stem[0][0]=(ST)->list[a][0];
                  (ST)->stem[0][1]=a;
                  }
            else if ( (ST)->stem[(ST)->tot_stem][0]==(ST)->list[a][0]);
            else if ( (ST)->stem[(ST)->tot_stem][0]!=(ST)->list[a][0])
                  {
                  (ST)->stem[(ST)->tot_stem][2]=a-1;
                  (ST)->tot_stem++;
                  (ST)->stem[(ST)->tot_stem][0]=(ST)->list[a][0];
                  (ST)->stem[(ST)->tot_stem][1]=a;
                  }
                  
            SA->seq[0][(ST)->list[a][1]]='-';
            SA->seq[0][(ST)->list[a][2]]='-';
            }
      (ST)->stem[(ST)->tot_stem][2]=a-1;  
      (ST)->tot_stem++;
      for ( a=0; a< (ST)->tot_stem; a++)
            {
      
            first=(ST)->stem[a][1];
            last=(ST)->stem[a][2];
            SA->seq[0][(ST)->list[first][1]]='>';
            SA->seq[0][(ST)->list[first][2]]='<';
            SA->seq[0][(ST)->list[last][1]]='>';
            SA->seq[0][(ST)->list[last][2]]='<';      
            }
      
      return ST;  
      }
              
Structure * declare_rna_structure_num (Sequence *SA)
      {
      Structure *ST;
      ST=vcalloc ( 1, sizeof ( Structure));
      ST->list=declare_int ( SA->len[0], 3);
      ST->stem=declare_int ( SA->len[0], 3);
      return ST;
      }
char ** read_lib_list (char *name, int *n)
{

  char **lines;
  char **list;
  int a, b, l;
  
  lines=file2lines (name);
  l=atoi (lines[0]);
  
  list=vcalloc (l, sizeof (char*));
  for ( n[0]=0,a=1; a<l; a++,b++)
    if ( !strstr (lines[a], "TC_LIB_LIST_FORMAT_01"))list[n[0]++]=lines[a];
  vfree (lines);
  return list;
}
  
/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                               INPUT SEQ                                                */
/*                                                                                         */
/***************************************************************************************** */
char ***read_group ( char *file)
{
  /*Format: Fasta like, the name fo the group followed with the name of the sequences
    ><Group name> <First Seq> <second seq> ....
    Groups must NOT be overlaping
    list[group_index][0]="number of sequences"
    list[group_index][1]="group name"
    list[group_index][2...N]="sequence"
  */ 

  FILE *fp;
  char *buf;
  char ***list;
  int a, c, l;


  
  l=measure_longest_line_in_file (file)+1;
  buf=vcalloc (l, sizeof (char)); 
  list=vcalloc ( count_n_line_in_file (file )+1, sizeof (char**));
  
  fp=vfopen (file, "r");
  
  a=0;
  while ((c=fgetc(fp))!=EOF)
    {
      buf=fgets (buf,l-1, fp);
      if ( c=='>')list[a++]=string2list (buf);
    }
  vfclose (fp);
  vfree (buf);
  return list;
}
static Sequence* get_pdb_sequence_from_field   (char *fname, char *field);
Sequence* get_pdb_sequence   (char *fname)
{
  Sequence *S;

  
  if ( (S=get_pdb_sequence_from_field(fname, "SEQRES"))!=NULL);
  else if ( (S=get_pdb_sequence_from_field(fname, "ATOM"))!=NULL)
    {
      add_warning (stderr,"Warning: Read Sequence from ATOM field in %s [%s:WARNING]", fname, PROGRAM);
    }
  else
    {
      add_warning ( stderr, "\nWARNING: failed to extract sequence from %s [%s:WARNING]\n", fname, PROGRAM);
      S=NULL;
    }
  return S;
}
static Sequence* get_pdb_sequence_from_field   (char *fname, char *field)
     {
       char *tp_name;
       char *command;
       char *pdbid;
       Sequence *S;


       command=vcalloc ( LONG_STRING, sizeof (char));
       tp_name=vtmpnam (NULL);
       
       sprintf ( command, "extract_from_pdb -seq_field %s -chain FIRST -infile \'%s\' -mode fasta > %s", field, check_file_exists(fname), tp_name);          
       if ( getenv4debug ("DEBUG_EXTRACT_FROM_PDB"))fprintf ( stderr, "\n[DEBUG_EXTRACT_FROM_PDB:get_pdb_seq] %s\n", command);
       my_system ( command);
       

       S=get_fasta_sequence ( tp_name, NULL);
       if (S==NULL)return NULL;
       
       if ( (pdbid=get_pdb_id (fname))){sprintf ( S->name[0], "%s",pdbid);vfree (pdbid);} 
       S->nseq=1;

       sprintf ( S->file[0], "%s", fname);
       S->max_len=S->min_len=S->len[0];
       if ( S->len[0]==0)
         {
           free_sequence (S, -1);
           S=NULL;
         }

       vremove ( tp_name);
       vfree ( command);
       
       return S;
     }

char * get_pdb_file   ( char *fname)
     {
       char *file;
       int a, c;
       FILE *fp;
       

       a=0;
       file=vcalloc ( sizeof (char),count_n_char_in_file ( fname)+1);
       fp=vfopen ( fname, "r");
       while ( (c=fgetc(fp))!=EOF)file[a++]=c;
       file[a]='\0'; 
       return file;
     }
       
Sequence* get_struc_gor ( char *fname)
    {
    int nseq, min_len, max_len;
    int a, c;
    int len;
    char name[STRING];
    

    FILE *fp;
    Sequence *S;

    min_len=max_len=-1;
    fp=vfopen ( fname, "r");
    nseq=0;
    while ( (c=fgetc(fp))!=EOF)
          {
          if ( c!='!');
          else
            {
            nseq++;
            fscanf ( fp, "%s %d", name, &len);
            if (min_len==-1)min_len=max_len=len;
            else
                {
                min_len=(len>min_len)?min_len:len;
                max_len=(len>max_len)?len:max_len;
                }
            }
          
          }
    vfclose (fp);
   
    S=declare_sequence (  min_len,  max_len+1,nseq); 
    S->nseq=0;
    
    fp=vfopen (fname,"r");    
     while ( (c=fgetc(fp))!=EOF)
           {
           if ( c!='!');
           else
              {
            fscanf ( fp, "%s %d\n",S->name[S->nseq], &(S->len[S->nseq]));
            
            while ( (c=fgetc(fp))!='\n');
      
            for ( a=0; a<S->len[S->nseq]; a++)
                fscanf ( fp, " %*c %c %*f %*f %*f\n",&(S->seq[S->nseq][a]));
            
            S->seq[S->nseq][a]='\0';
            while ( (c=fgetc(fp))!='!' && c!=EOF);
            ungetc (c, fp);
            S->nseq++;
            }
           
           }
    vfclose (fp);
    return S;           
    }
              
Sequence* get_sequence_dali (char *fname)
    {
    Sequence *LS;
    FILE *fp;
    int c;

    char name[100];
    int clen=0;
    int current=0;
    int p=0;
    int max_len_seq=0;
    int min_len_seq=999999;
    int nseq=0;
    
    if ((fp=vfopen (fname,"r"))==NULL)
       {printf ( "\nCOULDN'T OPEN %s",fname);
        myexit(EXIT_FAILURE);
       }  
    c=fgetc(fp);
    while (c!=EOF)
            {
             if (isdigit(c))
                  {
                  ungetc(c, fp);
                  fscanf (fp, "%s",name);
                  while (!isdigit(c=fgetc(fp)) && c!=EOF)
                        if (isalnum (c) || c=='.' || c=='-')
                              clen++;
                   max_len_seq=(clen> max_len_seq)?clen: max_len_seq;
                   min_len_seq=(clen< min_len_seq)?clen: min_len_seq;
                   nseq++;
                  clen=0;
                  }
            else
                c=fgetc (fp);
            }
    vfclose (fp);
            
    LS=declare_sequence (  min_len_seq,  max_len_seq+1,nseq); 
    LS->nseq=nseq;
    
    fp=vfopen (fname,"r");
    
    current=0;
    c=fgetc(fp);
      while (c!=EOF)
            {
            if (isdigit(c))
                  {
                  ungetc(c, fp);
                  fp=fscanf_seq_name (fp, LS->name[current]);
                  p=0;
                  while (!isdigit(c=fgetc(fp)) && c!=EOF)
                        {
                        if (isalpha (c))
                            LS->seq[current][p++]=tolower (c);
                        else if ( c=='.')
                            LS->seq[current][p++]='-';
                        else if ( c=='-')
                            LS->seq[current][p++]='-';
                        }         
                  LS->seq[current][p]='\0';
                  LS->len[current]=strlen ( LS->seq[current]);
                  current++;
                  }
            else
                c=fgetc ( fp);
            }

    vfclose (fp);
    
    
    return LS;
    } 

Sequence* get_dialign_sequence (char *fname)
    {
    Sequence *LS;
    FILE *fp;
    int c;

    char name[10000];
    int clen=0;
    int current=0;
    int p=0;
    int max_len_seq=0;
    int min_len_seq=999999;
    int nseq=0, l=0;
    char *buf;
    
    buf=vcalloc ( 1000, sizeof (char));
    if ((fp=vfopen (fname,"r"))==NULL)
       {printf ( "\nCOULDN'T OPEN %s",fname);
        myexit(EXIT_FAILURE);
       }  
    c=fgetc(fp);
    while (c!=EOF)
            {
             if (c=='>')
                  {fscanf (fp, "%s",name);
                  
                  buf=fgets ( buf, 1000, fp);
                  while ((c=fgetc(fp))!='>' && c!=EOF && c!=' ' && c!='\t')
                        if (isalnum (c)|| is_gap(c))
                              clen++;
                   max_len_seq=(clen> max_len_seq)?clen: max_len_seq;
                   min_len_seq=(clen< min_len_seq)?clen: min_len_seq;
                   nseq++;
                  clen=0;
                  }
            else
                c=fgetc (fp);
            }
    vfclose (fp);
            
    LS=declare_sequence (  min_len_seq,  max_len_seq, nseq); 
    LS->nseq=nseq;
    
    fp=vfopen (fname,"r");
    
    current=0;
    c=fgetc(fp);
      while (c!=EOF)
            {
            if (c=='>')
                  {
                    
                  fp=fscanf_seq_name (fp, LS->name[current]);
                  l=strlen ( LS->name[current]);
                  if ( LS->name[current][l-1]==','||LS->name[current][l-1]==',')LS->name[current][l-1]='\0';
                  buf=fgets ( buf, 1000, fp);
                  p=0;
                  while ((c=fgetc(fp))!='>' && c!=EOF && c!=EOF && c!=' ' && c!='\t')
                        if (isalpha (c))
                            LS->seq[current][p++]=tolower (c);
                        else if ( isgraph(c))
                            LS->seq[current][p++]=(c);
                  LS->seq[current][p]='\0';
                  LS->len[current]=strlen ( LS->seq[current]);
                  current++;
                  }
            else
                c=fgetc ( fp);
            }

    vfclose (fp);
    return LS;
    }

Sequence* get_pima_sequence (char *fname)
    {
    Sequence *LS;

    FILE *fp;
    int c;

    char name[10000];
    int clen=0;
    int current=0;
    int p=0;
    int max_len_seq=0;
    int min_len_seq=999999;
    int nseq=0, l=0, len=0;
    char *buf, *buf2;
    char prefix[1000];
    
    sprintf (  prefix, "%s",fname);
    
    buf=strstr(prefix, "-");
    buf[0]='\0';
    len=strlen (prefix);
      
   
    
    buf=vcalloc ( 1000, sizeof (char));
    if ((fp=vfopen (fname,"r"))==NULL)
       {printf ( "\nCOULDN'T OPEN %s",fname);
        myexit(EXIT_FAILURE);
       }  
    c=fgetc(fp);
    while (c!=EOF)
            {
             if (c=='>')
                  {
                    fp=fscanf_seq_name (fp,name);
                    if ( strlen(name)>=len && strncmp ( name, prefix, len)==0)
                        {
                          c=fgetc(fp);
                        }
                    else
                        {
                        
                        buf=fgets ( buf, 1000, fp);
                        while ((c=fgetc(fp))!='>' && c!=EOF)
                              if (isalnum (c)|| is_gap(c))
                                    clen++;
                         max_len_seq=(clen> max_len_seq)?clen: max_len_seq;
                         min_len_seq=(clen< min_len_seq)?clen: min_len_seq;
                        nseq++;
                        clen=0;
                        }
                  }
            else
                  c=fgetc (fp);
            }
    vfclose (fp);
            
    LS=declare_sequence (  min_len_seq,  max_len_seq, nseq); 
    LS->nseq=nseq;
    
    fp=vfopen (fname,"r");
    
    current=0;
    c=fgetc(fp);
      while (c!=EOF)
            {
            if (c=='>')
                  {
                  fp=fscanf_seq_name (fp,LS->name[current]);
                  if ( strlen(LS->name[current])>=len && strncmp ( LS->name[current], prefix, len)==0)
                        c=fgetc (fp);
                  else
                        {
                        buf2=strstr (LS->name[current], ".");
                        if ( buf2!=NULL) buf2[0]='\0';
                         
                        l=strlen ( LS->name[current]);
                        if ( LS->name[current][l-1]==','||LS->name[current][l-1]==',')LS->name[current][l-1]='\0';
                        buf=fgets ( buf, 1000, fp);
                        p=0;
                        while ((c=fgetc(fp))!='>' && c!=EOF)
                              if (isalpha (c))
                                  LS->seq[current][p++]=tolower (c);
                              else if ( isgraph(c))
                                  LS->seq[current][p++]=(c);
                        LS->seq[current][p]='\0';
                        LS->len[current]=strlen ( LS->seq[current]);
                        current++;
                        }
                  }
            else
                c=fgetc ( fp);
            }

    vfclose (fp);
    return LS;
    }

Sequence* perl_reformat2fasta (char *perl_command, char *fname)
    {
      char command[1000];
      char *file;

      file=vtmpnam (NULL);
      
      check_program_is_installed ( perl_command,"", perl_command,MAIL,IS_FATAL);
      sprintf ( command, "%s %s > %s", perl_command, fname, file);
      my_system ( command);
      return get_fasta_sequence (file, NULL);
    }
Sequence* get_fasta_sequence_num (char *fname, char *comment_out)
    {
    Sequence *LS;
    char *buffer;
    FILE *fp;
    int a;

    int   c;
    char *name;
    int clen=0;
    int current=0;
    int p=0;
    int max;
    int max_len_seq=0;
    int min_len_seq=0;
    int nseq=0, l=0;
 
    
    
    
    int *sub;
    
    buffer=vcalloc (1000, sizeof (char)); 
    name=vcalloc ( 100, sizeof (char));

    nseq=count_n_char_x_in_file(fname, '>');
    min_len_seq=max=count_n_char_in_file(fname);
    sub=vcalloc (max+1, sizeof (int));

    fp=vfopen (fname,"r");

    
    c=fgetc(fp);
    while (c!=EOF)
            {
             if (c=='>')
                  {
                  fp=fscanf_seq_name (fp,name);
                  while ((c=fgetc(fp))!='\n' && c!=EOF);
                  while ((c=fgetc(fp))!='>' && c!=EOF)
                        if (isalnum (c)|| is_gap(c))
                              clen++;
                   max_len_seq=(clen> max_len_seq)?clen: max_len_seq;
                   min_len_seq=(clen< min_len_seq)?clen: min_len_seq;
                   clen=0;
                  }
            else
                c=fgetc (fp);
             
            }  

    vfclose (fp);       
    LS=declare_sequence (  min_len_seq,  max_len_seq,nseq); 
    
    LS->nseq=nseq;
    
    fp=vfopen (fname,"r");
    current=0;
    c=fgetc(fp);
    while (c!=EOF)
            {
            if (c=='>')
                  {
                  
                  fp=fscanf_seq_name (fp,LS->name[current]);
                  l=strlen ( LS->name[current]);
                  if ( LS->name[current][l-1]==','||LS->name[current][l-1]==';')LS->name[current][l-1]='\0';
                  //translate_name ( LS->name[current]);
                  a=0;
                  while ((c=fgetc(fp))!='\n' && c!=EOF && a<(COMMENT_SIZE-1))LS->seq_comment[current][a++]=c;
                  LS->seq_comment[current][a]='\0';

                  
                  p=0;
                  while ((c=fgetc(fp))!='>' && c!=EOF)
                          {
                        if (isalnum (c))
                            LS->seq[current][p++]=c;
                        else if (is_gap(c))
                            LS->seq[current][p++]=c;                    
                        }

                  LS->seq[current][p]='\0';
                  LS->len[current]=strlen ( LS->seq[current]);

                  current++;
            
                  }
            else
                c=fgetc ( fp);
            }
                  
    
    vfclose (fp);
    

    vfree (sub);
    vfree (name);
    vfree (buffer);
    return LS;
    }
Sequence *get_file_list ( char *fname)
{
  Sequence *S;
  char ***list;
  int a;
  
  S=declare_sequence (1,1,count_n_char_x_in_file(fname, '\n')); 
  list=file2list (fname, "\n");
  
  a=0;
  S->nseq=0;
  while (list[a] && !isspace(list[a][1][0]))
    {
      sprintf (S->name[S->nseq++], "%s",list[a][1]);

      a++;
    }

  free_arrayN((void ***)list, 3);
  return S;
}
Sequence* get_fasta_sequence_raw (char *fname, char *comment_out)
    {
    Sequence *LS;
    char *buffer;
    FILE *fp;
    int a;

    int   c;
    char *name;
    int clen=0;
    int current=0;
    int p=0;
    int max;
    int max_len_seq=0;
    int min_len_seq=0;
    int nseq=0, l=0;
 
    
    
    
    int *sub;
    
    buffer=vcalloc (1000, sizeof (char)); 
    name=vcalloc ( 100, sizeof (char));

    nseq=count_n_char_x_in_file(fname, '>');
    min_len_seq=max=count_n_char_in_file(fname);
    sub=vcalloc (max+1, sizeof (int));

    fp=vfopen (fname,"r");

    
    c=fgetc(fp);
    while (c!=EOF)
            {
             if (c=='>')
                  {
                  fp=fscanf_seq_name (fp,name);
                  while ((c=fgetc(fp))!='\n' && c!=EOF);
                  while ((c=fgetc(fp))!='>' && c!=EOF)
                    if (isgraph(c))
                      clen++;
                   max_len_seq=(clen> max_len_seq)?clen: max_len_seq;
                   min_len_seq=(clen< min_len_seq)?clen: min_len_seq;
                   clen=0;
                  }
            else
                c=fgetc (fp);
             
            }  

    vfclose (fp);       
    LS=declare_sequence (  min_len_seq,  max_len_seq,nseq); 
    
    LS->nseq=nseq;
    
    fp=vfopen (fname,"r");
    current=0;
    c=fgetc(fp);
    while (c!=EOF)
            {
            if (c=='>')
                  {
                  
                  fp=fscanf_seq_name (fp,LS->name[current]);
                  l=strlen ( LS->name[current]);
                  if ( LS->name[current][l-1]==','||LS->name[current][l-1]==';')LS->name[current][l-1]='\0';
                  //translate_name ( LS->name[current]);
                  a=0;
                  while ((c=fgetc(fp))!='\n' && c!=EOF && a<(COMMENT_SIZE-1))LS->seq_comment[current][a++]=c;
                  LS->seq_comment[current][a]='\0';

                  
                  p=0;
                  while ((c=fgetc(fp))!='>' && c!=EOF)
                          {
                          if (c<'A')c+='z';
                          LS->seq[current][p++]=c;
                        }

                  LS->seq[current][p]='\0';
                  LS->len[current]=strlen ( LS->seq[current]);

                  current++;
            
                  }
                  
            else
                c=fgetc ( fp);
            }
                  
    
    vfclose (fp);
    

    vfree (sub);
    vfree (name);
    vfree (buffer);
    return LS;
    }
Sequence* get_fasta_sequence (char *fname, char *comment_out)
    {
    Sequence *LS;
    Sequence *pdb_S;
    int a;

    char *pdb_name;
    
    char *buffer;
    FILE *fp;

    int   c;
    char *name;
    int clen=0;
    int current=0;
    int p=0;
    int max;
    int max_len_seq=0;
    int min_len_seq=0;
    int nseq=0, l=0;
    char *sub;
    
    /*
    FILE *in,*out;
    char *fname;

    
    fname=vtmpnam (NULL);
    in=vfopen (fname_in  , "r");
    out=vfopen (fname, "w");
    while ((c=fgetc (in))!=EOF)
      {
      fprintf (out, "%c", (c=='\t')?'\n':c);
      }
    vfclose (in);vfclose (out);
    */
       
    buffer=vcalloc (1000, sizeof (char)); 
    name=vcalloc ( 10000, sizeof (char));

    nseq=count_n_char_x_in_file(fname, '>');
    if ( nseq==0)
      {
      vfree (buffer); vfree (name);
      return NULL;
      }
    
    min_len_seq=max=count_n_char_in_file(fname);
    sub=vcalloc (max+1, sizeof (char));

    fp=vfopen (fname,"r");

    
    c=fgetc(fp);
    while (c!=EOF)
            {
             if (c=='>')
                  {
                  fp=fscanf_seq_name (fp,name);
                  while ((c=fgetc(fp))!='\n' && c!=EOF);
                  while ((c=fgetc(fp))!='>' && c!=EOF)
                        {
                          if (isalnum (c)|| is_gap(c))
                              sub[clen++]=c;
                        }
                  
                   if (strm (sub, "PDB"))
                     {
                       pdb_name=get_pdb_struc(name,0, 0);
                       pdb_S=get_pdb_sequence (pdb_name);
                       if (pdb_S)
                         {
                         clen=strlen( pdb_S->seq[0]);
                         free_sequence ( pdb_S,1);
                         }
                       else
                         clen=0;
                       
                     }


                   max_len_seq=(clen> max_len_seq)?clen: max_len_seq;
                   min_len_seq=(clen< min_len_seq)?clen: min_len_seq;
                   clen=0;
                  }
            else
                c=fgetc (fp);
             
            }  

    vfclose (fp); 
    
    
    LS=declare_sequence (  min_len_seq,  max_len_seq,nseq); 
    LS->nseq=nseq;
    
    fp=vfopen (fname,"r");
    current=0;
    c=fgetc(fp);
    while (c!=EOF)
            {
            if (c=='>')
                  {
                    fp=fscanf_seq_name (fp, LS->name[current]);
                    
                    l=strlen ( LS->name[current]);
                    if ( LS->name[current][l-1]==','||LS->name[current][l-1]==';')LS->name[current][l-1]='\0';
                    //translate_name ( LS->name[current]);              
                    a=0;
                    while ((c=fgetc(fp))!='\n' && c!=EOF && a<(COMMENT_SIZE-1))LS->seq_comment[current][a++]=c;
                    LS->seq_comment[current][a]='\0';
            
                  p=0;
                  while ((c=fgetc(fp))!='>' && c!=EOF)
                          {
                        if (isalpha (c))
                            LS->seq[current][p++]=c;
                        else if (is_gap(c))
                            LS->seq[current][p++]=(c);                        
                        }
            

                  LS->seq[current][p]='\0';
                  
                  if ( strm (LS->seq[current], "PDB"))
                    {
                       
                       pdb_name=get_pdb_struc(LS->name[current],0, 0);
                       pdb_S=get_pdb_sequence (pdb_name);
                       if (pdb_S)
                         {
                         sprintf ( LS->seq[current], "%s", pdb_S->seq[0]);
                         clen=strlen( pdb_S->seq[0]);
                         free_sequence ( pdb_S, 1);
                         }
                       else
                         {
                         add_warning (stderr, "WARNING: Could not fetch PDB file: %s", pdb_name);
                         }
                    }
                  
                  
                  LS->len[current]=strlen ( LS->seq[current]);

                  current++;        
                  }
                  
            else
                c=fgetc ( fp);
            }
     
    vfclose (fp);
    
    vfree (sub);
    vfree (name);
    vfree (buffer);
    //LS=clean_sequence (LS);
    return LS;
    }

Sequence* get_sub_fasta_sequence (char *fname, char *comment_out)
    {
    Sequence *LS;
    
    FILE *fp;

    int c;
    char name[100];
    int clen=0;
    int current=0;
    int p=0;
    int max;
    int max_len_seq=0;
    int min_len_seq=0;
    int nseq=0, l=0;
    char *buf;
    
    
    
    int *sub;

    nseq=count_n_char_x_in_file(fname, '>');
    min_len_seq=max=count_n_char_in_file(fname);
    sub=vcalloc (max+1, sizeof (int));
    buf=vcalloc ( max+1, sizeof (char));
    fp=vfopen (fname,"r");

    
    c=fgetc(fp);
    while (c!=EOF)
            {
             if (c=='>')
                  {
                  fp=fscanf_seq_name (fp,name);
                  while ((c=fgetc(fp))!='\n' && c!=EOF);
                  buf=fgets ( buf,max, fp);
                  while ((c=fgetc(fp))!='>' && c!=EOF)
                        if (isalnum (c)|| is_gap(c))
                              clen++;
                   max_len_seq=(clen> max_len_seq)?clen: max_len_seq;
                   min_len_seq=(clen< min_len_seq)?clen: min_len_seq;
                   clen=0;
                  }
            else
                c=fgetc (fp);
             
            }  

    vfclose (fp);       
    LS=declare_sequence (  min_len_seq,  max_len_seq,nseq); 
    LS->nseq=nseq;
    
    fp=vfopen (fname,"r");
    current=0;
    c=fgetc(fp);
    while (c!=EOF)
            {
            if (c=='>')
                  {
                  
                  fp=fscanf_seq_name (fp,LS->name[current]);
                  l=strlen ( LS->name[current]);
                  if ( LS->name[current][l-1]==','||LS->name[current][l-1]==';')LS->name[current][l-1]='\0';
                  //translate_name ( LS->name[current]);
                  while ((c=fgetc(fp))!='\n' && c!=EOF);
            
                  p=0;
                  while ((c=fgetc(fp))!='>' && c!=EOF)
                          {
                        if (isalpha (c))
                            LS->seq[current][p++]=tolower (c);
                        else if (is_gap(c))
                            LS->seq[current][p++]=(c);                        
                        }

                  LS->seq[current][p]='\0';
                  LS->len[current]=strlen ( LS->seq[current]);

                  current++;
            
                  }
                  
            else
                c=fgetc ( fp);
            }
                  
    
    vfclose (fp);
    

    vfree (sub);
    return LS;
    }
Sequence* get_pir_sequence (char *fname, char *comment_out)
    {
    Sequence *LS;

    FILE *fp;
    int c;

    char name[100];
    int clen=0;
    int current=0;
    int p=0;
    int max_len_seq=0;
    int min_len_seq=999999;
    int nseq=0, l=0;
    char *buf;
    
    buf=vcalloc ( 1000, sizeof (char));
    if ((fp=vfopen (fname,"r"))==NULL)
       {printf ( "\nCOULDN'T OPEN %s",fname);
        myexit(EXIT_FAILURE);
       }  
    c=fgetc(fp);
    while (c!=EOF)
            {
             if (c=='>')
                  {
                  if ( (c=fgetc(fp))=='P')while ( (c=fgetc(fp))!=';');
                  else ungetc ( c, fp);
                  fp=fscanf_seq_name (fp,name);
                  
                  buf=fgets ( buf, 1000, fp);
                  while ((c=fgetc(fp))!='>' && c!=EOF)
                        if (isalnum (c)|| is_gap(c))
                              clen++;
                   max_len_seq=(clen> max_len_seq)?clen: max_len_seq;
                   min_len_seq=(clen< min_len_seq)?clen: min_len_seq;
                   nseq++;
                  clen=0;
                  }
            else
                c=fgetc (fp);
            }
    vfclose (fp);


   
    LS=declare_sequence (  min_len_seq,  max_len_seq,nseq); 
    LS->nseq=nseq;
    
    fp=vfopen (fname,"r");
    
    current=0;
    c=fgetc(fp);
      while (c!=EOF)
            {
            if (c=='>')
                  {
                  if ( (c=fgetc(fp))=='P')while ( (c=fgetc(fp))!=';');
                  else ungetc ( c, fp);

                  fp=fscanf_seq_name (fp,LS->name[current]);
            
                  l=strlen ( LS->name[current]);
                  if ( LS->name[current][l-1]==','||LS->name[current][l-1]==',')LS->name[current][l-1]='\0';
                  //translate_name ( LS->name[current]);
                  buf=fgets ( buf, 1000, fp);
                  
                  LS->seq_comment[current]=fgets ( LS->seq_comment[current],COMMENT_SIZE-1, fp);
                  LS->seq_comment[current][strlen(LS->seq_comment[current])-1]='\0';
                  p=0;
                  while ((c=fgetc(fp))!='>' && c!=EOF)
                        if (isalpha (c))
                            LS->seq[current][p++]=tolower (c);
                        else if ( !isspace(c) && c!='*')
                            LS->seq[current][p++]=(c);
                  LS->seq[current][p]='\0';
                  LS->len[current]=strlen ( LS->seq[current]);
                  current++;
                  }
            else
                c=fgetc ( fp);
            }

    vfclose (fp);
    if (comment_out!=NULL) output_pir_check ( comment_out,LS->nseq, LS->seq_comment);
    return LS;
    }

Sequence* get_gor_sequence (char *fname, char *comment_out)
    {
    Sequence *LS;

    FILE *fp;
    int c;

    char name[100];
    int clen=0;
    int current=0;
    int p=0;
    int max_len_seq=0;
    int min_len_seq=99999;
    int nseq=0;
    char *buf;
    
    buf=vcalloc ( 1000, sizeof (char));
    if ((fp=vfopen (fname,"r"))==NULL)
       {printf ( "\nCOULDN'T OPEN %s",fname);
        myexit(EXIT_FAILURE);
       }  
    c=fgetc(fp);
    while (c!=EOF)
            {
             if (c=='!')
                  {
                  fp=fscanf_seq_name (fp,name);
                  
                  buf=fgets ( buf, 1000, fp);
                  while ((c=fgetc(fp))!='!' && c!=EOF)
                        if (isalnum (c)|| is_gap(c))
                              clen++;
                   max_len_seq=(clen> max_len_seq)?clen: max_len_seq;
                   min_len_seq=(clen< min_len_seq)?clen: min_len_seq;
                   nseq++;
                  clen=0;
                  }
            else
                c=fgetc (fp);
            }
    vfclose (fp);
            
    LS=declare_sequence (  min_len_seq,  max_len_seq,nseq); 
    LS->nseq=nseq;
    
    fp=vfopen (fname,"r");
    
    current=0;
    c=fgetc(fp);
      while (c!=EOF)
            {
            if (c=='!')
                  {
                  
                   
                  fp=fscanf_seq_name (fp,LS->name[current]);
                  //translate_name ( LS->name[current]);
                  buf=fgets ( buf, 1000, fp);
                  
                  p=0;
                  while ((c=fgetc(fp))!='!' && c!=EOF)
                        if (isalnum (c)|| is_gap(c))
                            LS->seq[current][p++]=tolower (c);
                        
                  LS->seq[current][p]='\0';
                  LS->len[current]=strlen ( LS->seq[current]);
                  current++;
                  }
            else
                c=fgetc ( fp);
            }

    vfclose (fp);

    return LS;
    }
Sequence* get_swissprot_sequence (char *fname, char *comment_out)
    {
    Sequence *LS;
    FILE *fp;
    int c;
    char *buf;    
    int nseq=0;
    int len, max_len_seq=0, min_len_seq=0;
    
    if ( !check_file_exists(fname))
      {printf ( "\nCOULDN'T OPEN %s",fname);
        myexit(EXIT_FAILURE);
      }  

    buf=vcalloc (LONG_STRING+1, sizeof (char));
    fp=NULL;   
    while ( (fp=find_token_in_file(fname,fp,"\nSQ")))
      {
      nseq++;
      fgets (buf, LONG_STRING, fp);
      len=0;
      while ((c=fgetc(fp))!='/')if(isalpha(c))len++;
      if ( max_len_seq==0)max_len_seq=min_len_seq=len;
      else
        {
          max_len_seq=MAX(len, max_len_seq);
          min_len_seq=MIN(len, min_len_seq);
        }
      }

    LS=declare_sequence (  min_len_seq,  max_len_seq,nseq);     
    LS->nseq=0;
    
    fp=NULL;
    while ( (fp=find_token_in_file(fname,fp,"\nID")))
      {
      fp=fscanf_seq_name (fp, LS->name[LS->nseq]);
      fp=find_token_in_file(fname,fp,"\nSQ");
      fgets (buf, LONG_STRING, fp);
      while ((c=fgetc(fp))!='/')if (isalpha(c))LS->seq[LS->nseq][LS->len[LS->nseq]++]=c;
      LS->seq[LS->nseq][LS->len[LS->nseq]]='\0';
      LS->nseq++;
      }

   
    return LS;
    }
FILE * fscanf_seq_name ( FILE *fp, char *sname)
{
  static char *name;
  if ( !sname) return fp;
  
  if ( !name)name=vcalloc ( 10000, sizeof (char));
  fscanf (fp, "%s", name);
  
  if ( strlen (name)>MAXNAMES)
    add_warning (stderr, "\nWARNING: Seq Name Too long: [%s]. Truncated to %d", name, MAXNAMES);
  name[MAXNAMES]='\0';
  sprintf ( sname, "%s", name);
  return fp;
}

/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                               INPUT ALN                                                 */
/*                                                                                         */
/***************************************************************************************** */
void read_aln (char *file_name, Alignment *A)
{
  char *tmp_name;
  Sequence *S;
  

  tmp_name=vtmpnam (NULL);
  if (printf_system ( "clustalw_aln2fasta_aln.pl %s > %s",file_name, tmp_name)==EXIT_FAILURE)
    {
      printf_exit ( EXIT_FAILURE, stderr, "Could Not Read File %s [FATAL:%s]\n", file_name, PROGRAM);
    }
  else
    {
      S=get_fasta_sequence ( tmp_name,NULL);
      A=seq2aln (S, A, 0);  
    }
  return;
}
  
Alignment* read_blast_aln ( char *file_name, Alignment *A)
{
  char command[1000];
  char *tmp_name;
  Sequence *S;
  
  if ( !is_blast_file (file_name))
    {
      myexit (EXIT_FAILURE);
    }


  tmp_name=vtmpnam ( NULL);
  sprintf ( command, "cat %s | blast_aln2fasta_aln.pl | fasta_aln2fasta_aln_unique_name.pl >%s", file_name, tmp_name);
  my_system ( command);
  if ( format_is_fasta (tmp_name))
    {
      
      S=get_fasta_sequence ( tmp_name,NULL);
      A=seq2aln (S, A, 0);
    }
  else
    {
      A=NULL;
    }
  vremove (tmp_name);
  return A;
}


void read_number_aln ( char *file_name, Alignment *A)
   {
    FILE *fp, *fp2;
    int * ptr_aln;
    int a,b,d;
    int c;
    char *buf=NULL;

    int tot=0;
    int flag=0;
    char *fname;   
    int n_comment=0;

    int nseq=0;
    int max_len=0;

    
    fp=vfopen ( file_name, "r");
    
    fname=vtmpnam(NULL);
    fp2=vfopen ( fname, "w");
    while ( (c=fgetc(fp))!=EOF)
      {
          fprintf ( fp2, "%c", c);
      }
    vfclose (fp);
    vfclose (fp2);

  
    /*1 Count The number of sequences*/ 
    fp=vfopen ( fname, "r");
    buf=vfgets ( buf,fp);
    if ( !isblanc (buf));
    while ( isblanc (buf))
      {
        buf=vfgets ( buf, fp);
      }
    while (!isblanc (buf))
      {
      buf=vfgets ( buf,fp);
      }
    while ( !isalnum ((c=fgetc(fp))))
      {     
      ungetc(c,fp);
      buf=vfgets ( buf,fp);         
      }
    
    if ( c!='\n')ungetc(c,fp);
    
    while ( isalnum ((c=fgetc(fp))))
      {
      ungetc(c,fp);     
      a=0;
      while ( isgraph ((c=fgetc(fp))));                     
        nseq++;
      buf=vfgets ( buf, fp);
      }    
    vfclose (fp);

    /*DONE*/
    /*2 get_max_len*/
    max_len=count_n_char_in_file(fname)/nseq;
    A=realloc_alignment2( A, nseq+1, max_len+1);

    /*DONE*/
        
   
    fp=vfopen ( fname, "r");
    buf=vfgets ( buf, fp);
    if ( !isblanc (buf))sprintf (A->aln_comment[n_comment++], "%s", buf);
    while ( isblanc (buf))
      {
        buf=vfgets ( buf,fp);       
      }
    while (!isblanc (buf))
      {
      buf=vfgets ( buf, fp);
      sprintf ( A->aln_comment[n_comment++], "%s", buf);
      
      }
    while ( !isalnum ((c=fgetc(fp))))
      {     
      ungetc(c,fp);
      buf=vfgets ( buf, fp);
      
      }
    
    if ( c!='\n')ungetc(c,fp);
    
    while ( isalnum ((c=fgetc(fp))))
      {
      ungetc(c,fp);
      
      fp=fscanf_seq_name (fp, A->name[A->nseq]); 

      if ( name_is_in_list (A->name[A->nseq], A->name, A->nseq, 100)!=-1)
        {
          fprintf ( stderr, "\nWARNING (read_number_aln): Sequence %s Duplicated in File %s ", A->name[A->nseq], A->file[A->nseq]);
          if (!getenv("ALLOW_DUPLICATE"))
            {
            fprintf ( stderr, " [FATAL:%s]\n", PROGRAM);
            myexit (EXIT_FAILURE);
            }
        }      
      A->nseq++;
      buf=vfgets ( buf,fp);
      }
    
    vfclose (fp);
  
    
     
    if ((fp=vfopen ( fname, "r"))==NULL)
      printf ( "\nCOULDN'T READ %s", fname);
   
    ptr_aln=vcalloc ( A->nseq, sizeof(int));
    while ( flag==0)
      {
      while (  (c=fgetc(fp))!='\n');
      if ( (c=fgetc(fp))=='\n')
          flag=1;
      }
    while ( !isalnum(c=fgetc(fp)));
    ungetc ( c, fp);
    while ( c!=EOF)
      {
      tot=0;
      while(tot< A->nseq && c!=EOF)
          {
           b=0;
           while ( !isgraph (c=fgetc(fp)) && c!=EOF);
           if ( c!=EOF)ungetc(c, fp);
           while ( isgraph((buf[b++]=fgetc(fp))));
           buf[b-1]='\0';
           for ( a=-1,d=0; d< A->nseq; d++)
            if ( strcmp (A->name[d], buf)==0)
                {a=d;
                 tot++;
                }

           if ( a==-1) while ( (c=fgetc(fp))!='\n' && c!=EOF);
           else
             {
             while ( (c=fgetc(fp))!='\n')
               {
                 if ( isgraph(c) || is_gap(c))
                   {if ( isalpha(c))
                   c=(A->residue_case==2)?c:tolower(c);
                   
                   if (!isspace(c))A->seq_al[a][ptr_aln[a]++]=c;
                   }
               }
             }
           }
       while ( !isalnum(c=getc(fp)) && c!=EOF);
       if ( c!=EOF)
          ungetc (c, fp);
       }
       
    vfclose (fp);
    
   
    for ( a=0; a< A->nseq; a++)
      {A->seq_al[a][ptr_aln[a]]='\0';
       A->order[a][0]=a;
       A->order[a][1]=0;
      }
    
    A->len_aln= strlen(A->seq_al[0]);  
    
    vfree (buf);
    vfree(ptr_aln);
    vremove (fname);
    
    }       
void read_amps_aln ( char *in_file, Alignment *A)
      {
      FILE *fp;
      int a, b, c, cont=1;
      A->nseq=get_amps_seq_name ( A->name, in_file);
      
      fp=vfopen ( in_file, "r");
      fp=set_fp_id(fp, "1*");
      while ( (c=fgetc(fp))!='\n');
      b=0;
      while ( cont==1)
            {
            c=fgetc ( fp);
            c=fgetc(fp);
            if ( c=='*')
                  {
                  cont=0;
                  for ( a=0; a<A->nseq; a++)
                        A->seq_al[a][b]='\0';
                  A->len_aln=b;
                  }
                   
            else
                  {
                  ungetc (c, fp);
                  for ( a=0; a< A->nseq; a++)
                        {
                        c=fgetc(fp);
                        if ( c==' ')A->seq_al[a][b]='-';
                        else
                              {
                              A->seq_al[a][b]=c;
                              A->len[a]++;
                              }
                        }
                  while ((c=fgetc(fp))!='\n');
                  b++;
                  }
            }
      }






int get_amps_seq_name ( char **name, char* fname)
      {
      FILE *fp;
      int nseq=0;
      
      fp=vfopen ( fname, "r");
      fp=set_fp_id ( fp, "Index");
      while ( (fgetc(fp))!='\n');
      while ( isspace(fgetc(fp)))
            {fscanf (fp, "%*d >%s", name[nseq++]);
             while ( (fgetc(fp))!='\n');
            }
      vfclose ( fp);
      return nseq;
      }
Alignment * read_gotoh_aln ( char *fname, Alignment *A)
   {
    FILE *fp;
    int * ptr_aln;
    int a,b,d,e;


    char *buf;
    char buf2[VERY_LONG_STRING+1];
    char buf3[VERY_LONG_STRING+1];
    char buf4[VERY_LONG_STRING+1];

    int tot=0;

    int l;
    int nseq, max_len;
    
   
    if ( !check_file_exists (fname))return NULL;
    fp=vfopen ( fname, "r");

/*1 GET THE NUMBER OF SEQUENCES*/
    nseq=0;
    buf=vcalloc ( VERY_LONG_STRING+1, sizeof (char));    
    while ( isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp)));
    while (!isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp)));
    while ( isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp)));
    while ( !isblanc ( buf) && buf!=NULL)
      {
      a=-1;
      d=sscanf ( buf, "%d %s %s %s", &a, buf2, A->name[A->nseq],buf3);
      if ( a!=-1)
            {
            if ( name_is_in_list (A->name[A->nseq], A->name, A->nseq, 100)!=-1)
              {
                fprintf ( stderr, "\nWARNING (get_amps_seq_name): Sequence %s Duplicated in File %s ", A->name[A->nseq], A->file[A->nseq]);
                if (!getenv("ALLOW_DUPLICATE"))
                  {
                  fprintf ( stderr, " [FATAL:%s]\n", PROGRAM);
                  myexit (EXIT_FAILURE);
                  }
              }             
            nseq++;
            fgets(buf, VERY_LONG_STRING, fp);
            }
      else ( buf=NULL);
      }
    vfclose (fp);
/*2 Get the MAX Len and Reallocate*/
    max_len=count_n_char_in_file(fname)/nseq;
    A=realloc_aln2( A, nseq+1, max_len+1);
/*3 Get The Sequences Names*/
    A->nseq=0;
    fp=vfopen ( fname, "r");
    while ( isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp)));
    while (!isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp)));
    while ( isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp)));
    while ( !isblanc ( buf) && buf!=NULL)
      {
      a=-1;
      d=sscanf ( buf, "%d %s %s %s", &a, buf2, A->name[A->nseq],buf3);
      if ( a!=-1)
            {
            if ( d==4)sprintf (A->name[A->nseq],"%s", buf3);      
            A->nseq++;
            fgets(buf, VERY_LONG_STRING, fp);
            }
      else ( buf=NULL);
      }
    vfclose (fp);   

/*READ THE ALN*/     
    fp=vfopen ( fname, "r");

    buf=vcalloc ( VERY_LONG_STRING+1, sizeof (char));;      
    ptr_aln=vcalloc ( A->nseq, sizeof(int));
    
    while ( isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp)));
    while (!isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp)));
    
    
    while ( isblanc (buf=fgets ( buf, VERY_LONG_STRING, fp)));
    
    while (buf!=NULL)
      {
      tot=0;
      while(tot< A->nseq)
          {
          
          e=sscanf (buf, "%d %s %s %s", &e, buf2, buf3, buf4);
          if ( e==4)sprintf( buf3, "%s", buf4);
          
          
          for ( d=0; d< A->nseq; d++)
            {
            
            if ( strcmp (A->name[d], buf3)==0)
                {a=d;
                 tot++;
                }
            }
           l=strlen (buf2);
           if ( buf2[l-1]=='|')l--;
           buf2[l]='\0';
          
           for (b=0; b<l; b++)
            {
            if ( isgraph (buf2[b]))
                  A->seq_al[a][ptr_aln[a]++]=(A->residue_case==2)?buf2[b]:tolower (buf2[b]);
             }
           buf=fgets(buf, VERY_LONG_STRING, fp);      
           }
       if ( buf!=NULL)
            {
            buf=fgets(buf, VERY_LONG_STRING, fp);
            while ( isblanc (buf) && buf!=NULL)
                  {
                  buf=fgets ( buf, VERY_LONG_STRING, fp);
                  }
            }
       
       }
       
    vfclose (fp);
    
   
    for ( a=0; a< A->nseq; a++)
      {A->seq_al[a][ptr_aln[a]]='\0';
      }
    
    A->len_aln= strlen(A->seq_al[0]);  
    
   
    
    for ( a=0; a< A->nseq; a++)
      {
      for ( b=0; b< A->len_aln; b++)
            A->len[a]+=1-is_gap(A->seq_al[a][b]);
      }
    for ( a=0, b=0; a< A->len_aln; a++)
      {
      if ( !is_gap(A->seq_al[0][a]) &&!is_gap(A->seq_al[1][a]))b++;
      }
    return A;
    }
    




void read_msf_aln ( char *fname, Alignment *A)
   {
    char command[1000];
    char *tmp_name;
    Sequence *S;
    
    tmp_name=vtmpnam(NULL);
    sprintf ( command, "msf_aln2fasta_aln.pl %s > %s", fname, tmp_name);

    if ( my_system (command)==EXIT_FAILURE)
      {
      fprintf ( stderr, "\nERROR: file %s does not have a legal msf format [FATAL:%s]", fname,PROGRAM);
      myexit (EXIT_FAILURE);
      }

    S=get_fasta_sequence ( tmp_name,NULL);
    A=seq2aln (S, A, 0);  
    vremove (tmp_name);
    return;
    }       

/**************************************************************************************************/
/*************************************REFORMAT OUT*************************************************/
/**************************************************************************************************/
/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                               OUTPUT MATRICES                                           */
/*                                                                                         */
/***************************************************************************************** */



int output_freq_mat ( char *outfile, Alignment *A)
    { /*
      function documentation: start
      
      int output_freq_mat ( char *outfile, Aligmnent *A)

      This function counts the number of residues in each column of an alignment (Prot)
      It outputs these values in the following format

      A | 0 0 0 1 0
      B | 1 0 0 0 1
      - | 0 1 1 0 0

      This format can be piped into:
      The routine used for computing the p-value  gmat-inf-gc-v2c
      
      function documentation: end
      */
      
    int a, b;
    int **freq_mat;
    FILE *fp;
    
    
    freq_mat=aln2count_mat (A);
            
    fp=vfopen ( outfile, "w");
    for ( b=0; b< 26; b++)
      {
      fprintf (fp, "%c |", 'A'+b);
      for ( a=0; a< A->len_aln; a++)fprintf (fp,"%d ", freq_mat[b][a]);
      fprintf (fp, "\n");
      }
    fprintf (fp, "- |");
    for ( a=0; a< A->len_aln; a++)fprintf (fp,"%d ", freq_mat[26][a]);
    
    free_int (freq_mat, -1);
    vfclose ( fp);
    return 1;
    }
/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                               OUTPUT P-Values                                           */
/*                                                                                         */
/***************************************************************************************** */   
float output_maln_pval ( char *outfile, Alignment *A)
    {
      /*
      function documentation: start
      float output_maln_pval ( char *outfile, Aligmnent *A)

      This function outputs the p-value of a multiple alignmnet as described 
      in Hertz, Stormo, Bioinformatics, 15-7/8, 563/577
          ftp beagle.colorado.edu /pub/cosensus
      Locally
          packages/consensus/gmat-inf-gc-v2c
      
      
      The routine used for computing the p-value is the program gmat-inf-gc-v2c
      function documentation: end
      */

  
    char *mat;
    char *result;
    FILE *fp;
    float value;
    char command[LONG_STRING];
    char string[STRING];
    mat=vtmpnam (NULL);
    result=vtmpnam (NULL);
    
    output_freq_mat (mat,A);
    sprintf ( command, "more %s | gmat-inf-gc-v2c -A abcdefghijklmnopqrstuvwxyz> %s",mat, result);
    my_system ( command);
    
    if ( !check_file_exists(result))return 0;
    fp=find_token_in_file ( result, NULL, "ln(p-value):");
    
    fscanf ( fp, "%s",string);
    value=atof ( string);
    vfclose ( fp);
    
    vremove ( mat);
    vremove ( result);
    
    fp=vfopen ( outfile, "w");
    fprintf ( fp, "%.6f\n", value);
    vfclose ( fp);
    
    return value;
    }
            
    
/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                               OUTPUT WEIGHTS                                            */
/*                                                                                         */
/***************************************************************************************** */
int output_seq_weights ( Weights *W, char *wfile)
        {
      FILE*fp;
      int a;
      
      if ( W==NULL)return 0;
      
      fp=vfopen (wfile, "w");
      if ( fp==NULL)return 0;
      
      
      for ( a=0; a< W->nseq; a++)
            {
            
              fprintf ( fp, "%s %.2f\n", W->seq_name[a],W->SEQ_W[a]);
            }
      vfclose ( fp);
      return 1;
      }  
void output_pw_weights4saga ( Weights *W, float **w_list, char *wfile)
      {
      FILE*fp;
      int a, b;
      fp=vfopen (wfile, "w");
      
      fprintf ( fp, "%s\n$\n", W->comments); 
      for ( a=0; a< W->nseq-1; a++)
            {
            for (b=a+1; b< W->nseq; b++)
                  {
                  fprintf ( fp, "%s %s %f\n", W->seq_name[a], W->seq_name[b],w_list[a][b]);
                  }
            }
      fprintf ( fp, "$\n");
      vfclose ( fp);
      }

FILE * display_weights (Weights *W, FILE *fp)
{
  int a;
  int max_len;
  
  if ( W==NULL)
    {
      fprintf ( fp, "\n\nUN-WEIGHTED MODE: EVERY SEQUENCE WEIGHTS 1\n");
      return fp;
    }
  fprintf ( fp, "\n\nWEIGHTED MODE:%s\n\n", (W)->mode);
  for ( a=0, max_len=0; a< W->nseq; a++)max_len=MAX(max_len, strlen (W->seq_name[a]));
  for ( a=0; a< (W->nseq); a++)
    {
      fprintf ( fp, "\t%*s %.2f\n", max_len,(W)->seq_name[a],W->SEQ_W[a]);
    }
  fprintf ( fp, "\n");
  return fp;
}

/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                               OUTPUT SEQ                                                */
/*                                                                                         */
/***************************************************************************************** */
int ** input_similarities (char *file, Alignment *A, char *mode)
{
  int a, b, i, n;
  int **sim;
  float score;
  char name[1000];
  FILE *fp=NULL;
  char *buf1=NULL, *buf2=NULL;
  int new_aln=0;
  

  
  if ( !check_file_exists (file) || !is_distance_matrix_file (file))
    {
      return NULL;
    }
  
  if ( A)
    {
      fp=vfopen (file, "r");
      while ((buf2=vfgets (buf1,fp))!=NULL )
      {
        if (strstr (buf2, "SEQ_INDEX"))
          {
            buf1=buf2;
            sscanf (buf1, "# SEQ_INDEX %s %d",name, &i);
            if ( !strm (A->name[i], name))
            {
              return NULL;
            }
          }
      }
      vfclose (fp);
    }
  else
    {
  
      A=similarities_file2aln(file);
      new_aln=1;
    }
  
  sim=declare_int ( A->nseq, A->nseq);
  for ( a=0; a<A->nseq; a++)sim[a][a]=100;
  
  
  fp=find_token_in_file (file, NULL, "PW_SEQ_DISTANCES");
  fp=find_token_in_file (file, fp, "BOT");
  while ((buf2=vfgets (buf1,fp))!=NULL )
    {
      if ( !(strstr (buf2, "BOT\t") || strstr (buf2, "TOP\t")))continue;
      buf1=buf2;
      n=sscanf (buf1, "%*s %d %d %f", &a, &b, &score);
      if ( n!=3)
      {
        free_int (sim, -1);
        return NULL;
      }
      else sim[a][b]=sim[b][a]=(int)score;
    }
  vfclose (fp);
  vfree (buf1);
  if (new_aln)free_aln(A);
  return sim;
}

Alignment * similarities_file2aln ( char *file)
{
  int nseq=0, i;
  FILE *fp;
  char name[1000];
  Alignment *A;
  

  fp=vfopen (file, "r");
  while ((fp=find_token_in_file (file,fp, "SEQ_INDEX")))nseq++;
  A=declare_aln2 (10, nseq+1);
  
  while ((fp=find_token_in_file (file,fp, "SEQ_INDEX")))
    {
      fscanf (fp, "%s %d", name,&i);
      sprintf ( A->name[i], "%s", name);
    }
  A->nseq=nseq;

  return A;
}
  
void output_similarities (char *file, Alignment *A, char *mode)
{
  float s;
  float *tot;
  float bigtot=0;
  int n, max;
  FILE *fp;
  int a, b;
  char *p;
  int **M=NULL;
  for (max=0, a=0; a< A->nseq; a++)max=MAX(max,(strlen (A->name[a])));
  

  tot=vcalloc ( A->nseq, sizeof (float));
  fp=vfopen (file, "w");
  fprintf (fp, "# TC_DISTANCE_MATRIX_FORMAT_01\n");
  for ( a=0; a<A->nseq; a++)
    fprintf ( fp, "# SEQ_INDEX %s %d\n",A->name[a],a);
  fprintf ( fp, "# PW_SEQ_DISTANCES \n");
  for (n=0,a=0;a< A->nseq-1; a++)
    {
      for ( b=a+1; b<A->nseq; b++, n++)
      {
        if (strstr (mode, "_sar"))
          {
            s=get_sar_sim (A->seq_al[a], A->seq_al[b]);
          }
        else if ( (p=strstr (mode, "_memory_")))
          {
            int **sim;
            sscanf ( p, "_memory_%ld", (long int*)&sim);
            s=sim[a][b];
          }
        else if ( strstr (mode, "_idscore"))
          {
            static Sequence *S;
            if (a==0 && b==1)
            {
              free_sequence (S, -1);
              if ( strstr (mode, "idscoreDNA"))
                M=read_matrice ("idmat");
              else
                M=read_matrice("blosum62mt");
            
                S=aln2seq(A);
            }
            s=idscore_pairseq(S->seq[a], S->seq[b], -10,-1, M, "sim");
          }
        else
          {

            s=get_seq_fsim2 (A->seq_al[a], A->seq_al[b],GAP_LIST, mode);
          }
        fprintf (fp, "BOT\t %4d %4d\t %5.2f %*s\t %*s\t %5.2f\n", a,b,s,max,A->name[a], max, A->name[b], s);
        fprintf (fp, "TOP\t %4d %4d\t %5.2f %*s\t %*s\t %5.2f\n", b,a,s,max,A->name[b], max, A->name[a], s);
        tot[a]+=s;
        tot[b]+=s;
        bigtot+=s;
      }
    }
  for ( a=0; a< A->nseq; a++)
    {
      fprintf (fp, "AVG\t %d\t %*s\t %*s\t %5.2f\n", a,max,A->name[a], max, "*", tot[a]/(A->nseq-1));
      
    }
  vfree (tot);free_int (M, -1);
  fprintf (fp, "TOT\t %*s\t %*s\t %5.2f\n", max,"TOT", max, "*", bigtot/n);
  vfclose (fp);
}

void output_similarities_pw (char *file, Alignment *A, Alignment *B,char *mode)
{
  float s;
  float *tot;
  float bigtot=0;
  int n, max;
  FILE *fp;
  int a, b;

  int **M=NULL;
  Sequence *SA, *SB;
  
  if ( strstr (mode, "idscoreDNA"))
    M=read_matrice ("idmat");
  else
    M=read_matrice("blosum62mt");
  
  SA=aln2seq(A);
  SB=aln2seq(B);
   
  for (max=0, a=0; a< A->nseq; a++)max=MAX(max,(strlen (A->name[a])));
  for (a=0; a< B->nseq; a++)max=MAX(max,(strlen (B->name[a])));
  

  tot=vcalloc ( A->nseq, sizeof (float));
  fp=vfopen (file, "w");
  fprintf (fp, "# TC_DISTANCE_MATRIX_FORMAT_01\n");
  for ( a=0; a<A->nseq; a++)
    fprintf ( fp, "# SEQ_INDEX %s %d\n",A->name[a],a);
  fprintf ( fp, "# PW_SEQ_DISTANCES \n");
  for (n=0,a=0;a< A->nseq; a++)
    {
      for ( b=0; b<B->nseq; b++, n++)
      {
        s=idscore_pairseq(SA->seq[a], SB->seq[b], -10,-1, M, "sim");
        fprintf (fp, "BOT\t %4d %4d\t %5.2f %*s\t %*s\t %5.2f\n", a,b,s,max,A->name[a], max, B->name[b], s);
        fprintf (fp, "TOP\t %4d %4d\t %5.2f %*s\t %*s\t %5.2f\n", b,a,s,max,B->name[b], max, A->name[a], s);
        tot[a]+=s;
        tot[b]+=s;
        bigtot+=s;
      }
    }
  
  for ( a=0; a< A->nseq; a++)
    {
      fprintf (fp, "AVG\t %d\t %*s\t %*s\t %5.2f\n", a,max,A->name[a], max, "*", tot[a]/(A->nseq-1));
    }
  vfree (tot);free_int (M, -1);
  fprintf (fp, "TOT\t %*s\t %*s\t %5.2f\n", max,"TOT", max, "*", bigtot/n);
  vfclose (fp);
}
void output_conservation_statistics ( char *file, Alignment *A)
{
  int a, b, c,c1, c2;
  double **tot;
  char aa[1000];
  int naa;
  
  sprintf (aa, "%s", BLAST_AA_ALPHABET);
  naa=strlen (aa);
  
  tot=declare_double (256, 256);
  
  
  for ( a=0; a<A->nseq; a+=2)
    {
      b=a+1;
      for ( c=0; c<A->len_aln; c++)
      {
        c1=tolower (A->seq_al[a][c]);
        c2=tolower (A->seq_al[b][c]);
        if ( !is_gap(c1) && !is_gap(c2))
          {
            tot[c1][c2]++;
            tot[c2][c1]++;
            tot[c1][0]++;
            tot[c2][0]++;
            tot[0][0]++;
          }
      }
    }
  
  fprintf ( stdout, "# BLAST_MATRIX FORMAT\n#ALPHABET=%s\n",aa);
  for (a=0; a<naa; a++)fprintf ( stdout, "%3c ", toupper(aa[a]));
  fprintf ( stdout, "\n");
  for (a=0; a< naa; a++)
    {
      fprintf (stdout, "%c", toupper(aa[a]));
      for ( b=0; b< naa; b++)
      {
        float f1, f2, f3, r, v;
        c1=tolower(aa[a]);c2=tolower(aa[b]);
        f1=(float)((tot[c1][c2]*2)/tot[0][0]);
        f2=(float)((tot[c1][0])/tot[0][0]);
        f3=(float)((tot[c2][0])/tot[0][0]);
        r=(float)(f2==0 || f3==0)?0:(f1/(f2*f3));
        v=(r==0)?0:((float)10*log((double)r));
        fprintf (stdout, " %5d",(int)v);
      }
      fprintf ( stdout, "\n");
    }
}
void output_statistics (char *file, Alignment *A, char *mode)
    {
      FILE *fp;
      int a, b, c, d=0, n;
      int maxname=0;

      
      if (!mode || !mode[0])
      mode="hnrglNL";
      else if ( mode[0]=='_')
      mode++;
      for ( a=0; a<A->nseq; a++)maxname=MAX(strlen(A->name[a]), maxname);
      maxname++;
      
      
      fp=vfopen (file, "w");
      
      if (mode[0]=='h')
      {
        b=0;
        while ((c=mode[b++])!='\0')
          {
            if ( c=='n') fprintf (fp, "%-*s ",maxname,"name");
            if ( c=='l') fprintf (fp, "%-*s ",5,"nres");
            if ( c=='g') fprintf (fp, "%-*s ",5,"ngap");
            if ( c=='t') fprintf (fp, "%-*s ",5,"len");
          }
        if (is_in_set ( c, "nlgt"))   fprintf (fp, "\n");
        mode++;
      }
      b=0;
      while ((c=mode[b++])!='\0')
      {
        if ( c=='n')break;
        if ( c=='N'){d=1;fprintf (fp, "NSEQ %d ", A->nseq);}
        if ( c=='L'){d=1;fprintf (fp, "LEN  %d ", A->len_aln);}     
      }
      if ( d) fprintf (fp, "\n");
      
      for (a=0; a<A->nseq; a++)
      {
        b=0;
        d=0;
        while ((c=mode[b++])!='\0')
          {
            if (is_in_set ( c, "nlgt"))d=1;

            if (c=='n'){d=1;fprintf ( fp, "%-*s ", maxname,A->name[a]);}
            if (c=='l')
            {
              for (n=0,d=0; d<A->len_aln; d++)n+=!is_gap(A->seq_al[a][d]);
              fprintf ( fp, "%-5d ",n);
            }
            if (c=='g')
            {
              for (n=0,d=0; d<A->len_aln; d++)n+=((is_gap(A->seq_al[a][d]) && !is_gap(A->seq_al[a][d+1]))||(is_gap(A->seq_al[a][d])&& A->seq_al[a][d+1]=='\0')) ;
              fprintf ( fp, "%-5d ",n); 
            }
            if (c=='t')
            {
             fprintf ( fp, "%-5d ",strlen (A->seq_al[a]));         
            }
             if (c=='N' && d)
            {
             fprintf ( fp, "%-5d ",A->nseq);           
            }
            if (c=='L'&& d)
            {
             fprintf ( fp, "%-5d ",A->len_aln);        
            }
          }
        if (d)fprintf ( fp, "\n"); 
      }
      vfclose (fp);
    }

int output_age_matrix ( char *outfile, int val)
{
  int **mat;
  int a, b;
  char alp[]="abcdefghij-";
  int naa;
  
  mat=declare_int ( 256, 256);
  naa=strlen (alp);
  for ( a=0; a<naa; a++)
    for ( b=0; b<naa; b++)
      {
      if (is_gap(alp[a]) ||is_gap(alp[b] ))mat[(int)alp[a]][(int)alp[b]]=((val==0)?1:val)*-1;
      else mat[(int)alp[a]][(int)alp[b]]=(FABS((a-b))*-1)*((val==0)?1:val);
      
      }
  output_mat ( mat,outfile, alp, 0);
  free_arrayN((void**)mat, 2);
  return 1;
}
     



int output_transitions(char *outfile, Alignment *A)
{
  double table[256][256];
  double symbols[256];
  double tot, l, freq, expected, log_odd;
  int a, b;
  char *s;
  char *alp;
  int naa=0;
  int **mat;
  float **fmat;
  
  FILE *fp;
  
  for ( a=0; a< 256; a++)
    for (b=0; b<256; b++) 
      {
      symbols[b]=0;
      table[a][b]=0;
      }
  alp=vcalloc ( 256, sizeof (char));
  mat=declare_int ( 256,256);
  fmat=declare_float ( 256,256);
  
  for (tot=0,a=0; a< A->nseq; a++)
    {
      ungap (A->seq_al[a]);
      lower_string (A->seq_al[a]);
      s=A->seq_al[a];
      l=strlen (s);
      if ( s[0]=='\0') continue;
      symbols[(int)s[0]]++;
      for ( b=1; b< l; b++)
      {
        symbols[(int)s[b]]++;
        table[(int)s[b-1]][(int)s[b]]++;
        tot++;
      }
    }
  for (naa=0, a=0; a< 256; a++)
    {
      if (symbols[a])alp[naa++]=a;
    }
  
      
  for ( a=0; a< 256; a++)
    for (b=0; b<256; b++) 
      {
      if (symbols[a]&& symbols[b] && table[a][b] && tot>0)
        {
          freq=(table[a][b])/tot;
          expected=(symbols[a]*symbols[b])/(tot*tot);
          log_odd=log (freq/expected);
          mat[a-'A'][b-'A']=log_odd*10;
          fmat[a-'A'][b-'A']=log_odd;
        }
      else if ( symbols[a]&& symbols[b])
        {
          mat[a-'A'][b-'A']=-999;
          fmat[a-'A'][b-'A']=-999;
        }
      }
  output_mat ( mat,outfile, alp, 'A');
  
  fp=vfopen (outfile, "a");
  for ( a=0; a<256; a++)
    if ( symbols[a])
      {
      fprintf (fp, "# %c tot: %6d freq: %7.5f\n", a, (int)symbols[a],(float)symbols[a]/tot);
      }
  
  for ( a=0; a< 256; a++)
    for (b=0; b<256; b++) 
      {
      if (symbols[a]&& symbols[b])
        {
          freq=(table[a][b])/tot;
          fprintf (fp, "# %c%c tot: %6d freq: %7.5f log_odd: %9.3f\n", a, b, (int)table[a][b],(float)freq,fmat[a-'A'][b-'A']);                
        }
      }
  vfclose (fp);
  vfree(alp);
  free_arrayN ((void **)mat, 2);
  free_arrayN ((void **)fmat, 2);
  
  return 1;
}



void output_est_prf   (char *fname, Alignment *A)
        {
      int a;
      FILE *fp;

      if ( !A->P)
        {
          fprintf ( stderr, "\nFormat output_est_prf Impossible: No profile\n");
          myexit(EXIT_FAILURE);
        }
      

      fp=vfopen ( fname, "w");
      fprintf ( fp, "Consensus Sequence\nReconstructed with %s (%s,%s)\n",PROGRAM,AUTHOR,DATE);
      fprintf ( fp, "%4c %4c %4c %4c %15s    Consensus\n",  'A','G','C','T', "Internal Gaps");

      for ( a=0; a< A->len_aln; a++)
        {
          fprintf (fp, "%4d %4d %4d %4d %15d %c\n", (A->P)->count[0][a],(A->P)->count[1][a],(A->P)->count[2][a], (A->P)->count[3][a], (A->P)->count[4][a],A->seq_al[0][a]);
        }
      return;
      }

        
void output_gotoh_seq (char *fname, Alignment*A )
      {
      int a;
      FILE *fp;
            
      fp=vfopen ( fname, "w");
      fprintf ( fp, "%d %d\n",A->nseq, A->max_len);
      for ( a=0; a< A->nseq; a++)
            {
            ungap ( A->seq_al[a]);
            fprintf ( fp, ">%s\n", A->name[a]);
            fp=output_string_wrap ( 50,A->seq_al[a] , fp);
            fprintf ( fp, "//\n");
            }
            
      vfclose (fp);
      }         

void output_mult_fasta_seq (char *fname, Alignment*A, int n )
      {
      int a;
      FILE *fp;
      
      fp=vfopen (fname, "w");
      ungap(A->seq_al[0]);
      for (a=0; a<n; a++)
        {
          fprintf (fp, ">%s_%d\n%s\n", A->name[0],a+1, A->seq_al[0]);
        }
      vfclose (fp);
      }

char * output_fasta_seqX (char *name, char *mode, Sequence *S, Alignment *A, int i)
{
  FILE *fp;
  
  if (!name)name=vtmpnam (NULL);
  fp=vfopen (name, mode);
  if ( (S && S->nseq<=i) || (A && S->nseq<=i) || (!A && !S))
    {
      fprintf ( stderr, "\nERROR in function reformat:output_fasta_seqX[FATAL:%s]", PROGRAM);
      myexit (EXIT_FAILURE);
    }

  else if ( S) 
    fprintf ( fp, ">%s %s\n%s\n", S->name[i], S->seq_comment[i], S->seq[i]);
  else if ( A)
    {
      ungap (A->seq_al[i]);
      fprintf ( fp, ">%s %s\n%s\n", A->name[i], A->seq_comment[i], A->seq_al[i]);
    }
  vfclose (fp);
  return name;
}

void output_fasta_seq1 (char *fname, Alignment*A )
      {
      char seq_name[VERY_LONG_STRING];
      int a;
      FILE *fp;
      char *extension;
      
      for ( a=0; a< A->nseq; a++)
            {
            if ( strncmp( fname, "name",4)==0)
              {
                if ( (fname+4)[0]!='\0')extension=fname+5;
                else
                  extension=NULL;
                
                 sprintf ( seq_name,"%s.%s", A->name[a],(extension==NULL)?"seq":extension);
              }
            else
               sprintf ( seq_name,"%s.seq",A->name[a]);
            
            ungap ( A->seq_al[a]);
            fp=vfopen (seq_name, "w");
            fprintf (fp, ">%s %s\n", A->name[a], A->seq_comment[a]);
            fp=output_string_wrap ( 50, A->seq_al[a],fp);
            fprintf ( fp, "\n");
            vfclose (fp);
            }
      }
void output_pir_check (char *fname,int nseq, char **comment )
      {
      int a;
      FILE *fp;
      
      if ( fname==NULL)return;
      fp=vfopen ( fname, "w");
      
      for ( a=0; a< nseq; a++)fprintf (fp, "%s\n", comment[a]);
      vfclose (fp);
      }
void output_fasta_seq (char *fname, Alignment*A )
      {
      int a;
      FILE *fp;
      
      fp=vfopen ( fname, "w");

      for ( a=0; a< A->nseq; a++)
            {
            ungap(A->seq_al[a]);
            fprintf ( fp, ">%s", A->name[a]);
            if (A->seq_comment[a][0] && !isblanc(A->seq_comment[a]))fprintf (fp,"\t%s\n",A->seq_comment[a]);
            else fprintf ( fp, "\n");
            fp=output_string_wrap ( 50, A->seq_al[a],fp);
            fprintf ( fp, "\n");
            }
      vfclose (fp);
      }    
void output_gor_seq (char *fname, Alignment*A )
      {
      int a;
      FILE *fp;
      
      fp=vfopen ( fname, "w");
      
      for ( a=0; a< A->nseq; a++)
            {
            ungap(A->seq_al[a]);
            fprintf ( fp, "!%s                                                               %d \n", A->name[a], strlen(A->seq_al[a]));
            upper_string ( A->seq_al[a]);
            fp=output_string_wrap ( 50, A->seq_al[a],fp);
            fprintf ( fp, "@\n");
            }
      vfclose (fp);
      }    
void output_pir_seq (char *fname, Alignment*A )
      {
      int a;
      for ( a=0; a< A->nseq; a++)ungap(A->seq_al[a]);
      output_pir_aln (fname, A);
      } 
void output_pir_seq1 (char *fname, Alignment*A )
      {
      char seq_name[VERY_LONG_STRING];
      int a;
      FILE *fp;
      char type[20];

      
      for ( a=0; a< A->nseq; a++)
            {
            if      ( strm ( get_string_type (A->seq_al[a]),"DNA"))sprintf(type, "DL");
            else if ( strm ( get_string_type (A->seq_al[a]),"PROTEIN"))sprintf(type, "P1"); 
            sprintf ( seq_name,"%s;%s_%s.seq",type, fname,A->name[a]);
            ungap ( A->seq_al[a]);
            fp=vfopen (seq_name, "w");
            fprintf (fp, ">%s\n\n", A->name[a]);
            fp=output_string_wrap ( 50, A->seq_al[a],fp);
            fprintf ( fp, "\n*\n");
            vfclose (fp);
            }
      } 
/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                               OUTPUT ALN                                                */
/*                                                                                         */
/***************************************************************************************** */
void output_mocca_aln (char *outfile, Alignment *A, Alignment *S)
    {
    FILE *fp;
    int **score;
    char **new_name_order;
    int a, maxl;

    score=declare_int (S->nseq, 2);
    new_name_order=declare_char ( S->nseq,MAXNAMES+1); 
    for ( a=0; a<A->nseq; a++)
      {
      score[a][0]=a;
      score[a][1]=S->score_seq[a];
      }
    sort_int_inv (score+1,2,1,0,S->nseq-2);
    for ( a=0; a<A->nseq; a++)
      {
      sprintf ( new_name_order[a], "%s", A->name[score[a][0]]);
      }
    A=reorder_aln (A, new_name_order, A->nseq);

    fp=vfopen (outfile, "w");
    fprintf ( fp, "MOCCA,(%s,%s, C. Notredame)\nSCORE %d\nNSEQ  %d\nLEN   %d\n",VERSION,DATE, A->score_aln, A->nseq, A->len_aln);     
    
    maxl=return_maxlen ( new_name_order, A->nseq); 
    
   
    for (a=0; a< A->nseq; a++)
      {
      fprintf (fp, "%-*s: %3d\n", maxl, A->name[a], score[a][1]);
      }
    
    fprintf ( fp, "\n");
    
    fp=output_Alignment_without_header ( A, fp);
    vfclose (fp);
    free_int  (score, -1);
    free_char (new_name_order, -1);
    return ;
    }
  
void print_sub_aln ( Alignment *B, int *ns, int **ls)
{
  Alignment *X;
  int a, b;
 

  X=copy_aln (B, NULL);
  X->nseq=0;
  X->len_aln=strlen ( B->seq_al[ls[0][0]]);


  for (a=0; a< 2; a++)
    for ( b=0; b<ns[a]; b++, X->nseq++)
      {
      sprintf ( X->seq_al[X->nseq], "%s", B->seq_al[ls[a][b]]);
      sprintf ( X->name[X->nseq], "%s", B->name[ls[a][b]]);
      }
  X->name[X->nseq][0]='\0';
  
  print_aln (X);
  free_aln (X);
}
void print_aln ( Alignment *B)
    {
      
    while(B)
      {
      output_Alignment_without_header ( B, stderr);
      B=B->A;
      }
    }


FILE * output_aln ( Alignment *B, FILE *fp){return output_Alignment(B, fp);}
FILE * output_Alignment ( Alignment *B, FILE *fp)
    {
      fprintf ( fp, "%s, %s (%s) [%s]\n%s\nCPU   %d sec\nSCORE %d\nNSEQ  %d\nLEN   %d\n",PROGRAM,VERSION,DATE,URL,AUTHOR,  (B->cpu+get_time())/1000, B->score_aln, B->nseq, B->len_aln);     
      return output_Alignment_without_header ( B, fp);
    }
  
FILE * output_Alignment_without_header ( Alignment *B, FILE *fp)
    {
    int a,b, c;
    int max_len=0;
    int line;         
    int *n_residues;
    char s;

    
    if (fp==NULL)return fp;
    for ( a=0; a< B->nseq; a++)
          {if ( strlen (B->name[a])>max_len)
            max_len= strlen ( (B->name[a]));
          }
    max_len=MAX(max_len+2, 16);
    line=get_msa_line_length (0, 0);
    n_residues=vcalloc ( B->nseq+1, sizeof (int));
    for ( a=0; a<B->nseq; a++)n_residues[a]=(B->output_res_num==2)?B->order[a][1]:0;
    
    
    
    
  fprintf ( fp, "\n"); 
    for (a=0; a<B->len_aln; a+=line)
         {for (b=0; b<=B->nseq; b++)
           {
             fprintf (fp,"%-*s",max_len,B->name[b]);
             if (B->output_res_num)fprintf (fp, " %4d ", n_residues[b]+1);
             for (c=a;c<a+line && c<B->len_aln;c++)
             {
               if (b==B->nseq){n_residues[b]++;s=analyse_aln_column ( B, c);}
               else 
                 {n_residues[b]+=!is_gap(B->seq_al[b][c]);
                   s=GET_CASE(B->residue_case, B->seq_al[b][c]);
                 }
               
               fprintf (fp,"%c",s );
                          }
             if (B->output_res_num)fprintf (fp, " %4d", n_residues[b]);
             fprintf (fp,"\n");
           }
           
           fprintf (fp,"\n");
         }
    
     fprintf (fp,"\n\n");
     vfree (n_residues);
     
     return fp;
    }
FILE * output_aln_score ( Alignment *B, FILE *fp){return output_Alignment_score(B, fp);}
FILE * output_Alignment_score ( Alignment *B, FILE *fp)
    {
    int a, b, c;
    static int max_len=0;
    static int line;        
    int ch;
    
    if (fp==NULL)return fp;
    if ( max_len==0)
      {
      for ( a=0; a< B->nseq; a++)
          {if ( strlen (B->name[a])>max_len)
            max_len= strlen ( (B->name[a]));
          }
      max_len+=4;

      }     
   line=get_msa_line_length(0, 0);
   sprintf (B->name[B->nseq], "CONS"); 
   fprintf ( fp, "T_COFFEE ALIGNMENT\nCPU TIME:%d sec.\n", (B->cpu+get_time())/1000);  
   fprintf ( fp, "SCORE=%d\n", B->score_aln);
   for ( a=0;a<B->nseq; a++)fprintf ( fp, "%s: %d\n", B->name[a], B->score_seq[a]);
   fprintf ( fp, "\n"); 
    for (a=0; a<B->len_aln; a+=line)
         {for (b=0; b<B->nseq; b++)
            {
            fprintf (fp,"%-*s",max_len,B->name[b]);
            for (c=a;c<a+line && c<B->len_aln;c++)
            {
            ch=B->seq_al[b][c];
            if (ch==NO_COLOR_RESIDUE)fprintf (fp,"-");
            else if ( ch==NO_COLOR_GAP)fprintf (fp,"*");
            else if ( ch<10 && ch>=0)fprintf (fp,"%d",ch);
            else if ( ch>10)fprintf (fp,"#");
            else if ( ch<0)fprintf  (fp,".");
            else fprintf (fp,"9");        
            }           
            fprintf (fp,"\n");            
            }
          fprintf (fp,"\n");
          fprintf (fp,"%-*s",max_len,B->name[b]);
          for (c=a;c<a+line && c<B->len_aln;c++)
            {
            ch=B->seq_al[b][c];
            if (ch==NO_COLOR_RESIDUE)fprintf (fp,"-");
            else if ( ch==NO_COLOR_GAP)fprintf ( fp, "*");
            else if ( ch<10 && ch>=0)fprintf (fp,"%d",ch);
            else if ( ch>10)fprintf (fp,"#");
            else if ( ch<0)fprintf (fp,".");
            else fprintf (fp,"9");        
            }           
          fprintf (fp,"\n\n\n");
         }
    fprintf (fp,"\n\n");
    return fp;
    }
FILE * output_aln_with_res_number ( Alignment *B, FILE *fp){return  output_Alignment_with_res_number(B, fp);}
FILE * output_Alignment_with_res_number ( Alignment *B, FILE *fp)
    {
    int a, b, c;
    static int max_len=0;
    static int line;        
    int**order;

    if (fp==NULL)return fp;
    if ( max_len==0)
      {
      for ( a=0; a< B->nseq; a++)
          {if ( strlen (B->name[a])>max_len)
            max_len= strlen ( (B->name[a]));
          }
      max_len+=4;
      line=60;
      }     
   order=copy_int ( B->order,declare_int ( B->nseq, 2), B->nseq, 2);
    
   fprintf ( fp, "T_COFFEE ALIGNMENT\nCPU TIME:%d sec.\n", (B->cpu+get_time())/1000);     
   fprintf ( fp, "\n"); 
    for (a=0; a<B->len_aln; a+=line)
         {for (b=0; b<B->nseq; b++)
           {
            fprintf (fp,"%-*s %3d %4d ",max_len,B->name[b], order[b][0], order[b][1] );
            for (c=a;c<a+line && c<B->len_aln;c++)
            {
            order[b][1]+=1-is_gap(B->seq_al[b][c]);
            fprintf (fp,"%c",toupper(B->seq_al[b][c]) );
            }
            fprintf (fp," %4d\n", order[b][1] );
            }
          fprintf (fp,"\n");
          }
    fprintf (fp,"\n\n");

    free_int (order, -1);
    return fp;
    }

void output_constraints ( char *fname, char *mode,Alignment *A)
      {
      FILE *fp;
      Constraint_list *CL;
      char *buf;
      char **name_list;
      
      if ( !A->CL || strm ( mode, "pdb"))
         {
             if (!A->S)
                {
                  A->S=aln2seq(A);
              }
             
             CL=declare_constraint_list ( A->S, NULL, NULL, 0, NULL, NULL);
             CL=aln2constraint_list (A,CL, mode);
             fp=save_constraint_list ( CL, 0, CL->ne,fname, NULL, "lib",A->S);
             vfclose (fp);
             free_constraint_list (CL);
             return;
         }
      else if ( strncmp ( mode, "extended_pair", 13)==0)
        {
          buf=duplicate_string (mode+14);
          
          name_list=vcalloc(2, sizeof(char*));
          name_list[0]=strtok (buf,"_");
          name_list[1]=strtok (NULL,"_");
          mode[13]='\0';
          
      

          fp=save_sub_list_header (vfopen(fname, "w"),2, name_list,CL=(A->CL));
          fp=save_extended_constraint_list_pair (CL, "pair",name_list[0],name_list[1],fp);
          fp=save_list_footer (fp, CL);
          vfree (buf);
        }
      else if ( strm2 (mode, "extended_lib","extended_cosmetic"))
        {
          fp=save_extended_constraint_list ( CL=(A->CL),mode+9, vfopen(fname, "w"));
        }
      else 
         {
             CL=(Constraint_list *)A->CL;
             fp=save_constraint_list ( CL, 0, CL->ne,fname, NULL, "lib",A->S);
         }
      vfclose ( fp);
          
      if ( (Constraint_list *)A->CL !=CL)free_constraint_list (CL);

      return;

      }
void output_model_aln (char *fname, Alignment*A )
        {
        FILE *fp;
        int a;
        Dp_Model *M;
        Dp_Result *R;
        char *string;
        
        if ( A->Dp_result==NULL)
          {
            fprintf ( stderr, "\nWARNING Could Not Output Model %s [%s]", fname, PROGRAM);
          }
        R=A->Dp_result;
        M=R->Dp_model;

        fp=vfopen ( fname, "w");
        for (a=0; a<M->nstate; a++)
          {
            if (M->model_comments[a][0])fprintf ( fp, "#STATE %c: %s\n", 'a'+a, M->model_comments[a]);
          }
        string=vcalloc ( R->len+1, sizeof (char));
        for (a=0; a<R->len; a++)string[a]=R->traceback[a]+'a';
        fprintf ( fp, ">%s\n",fname);
        fp=output_string_wrap ( 50,string, fp);
        vfree(string);
        fprintf ( fp, "\n");
      
        vfclose (fp);
        return;
      }
char * output_fasta_sub_aln (char *fname, Alignment*A, int ns, int *ls  )
{
  int a,s;
  FILE *fp;
  if (fname==NULL)fname=vtmpnam (NULL);
  fp=vfopen (fname, "w");
  for (a=0; a<ns; a++)
    {
      s=ls[a];
      fprintf (fp, ">%s %s\n%s\n", A->name[s],A->seq_comment[s],A->seq_al[s]);
    }
  vfclose (fp);
  return fname;
}
char * output_fasta_sub_aln2 (char *fname, Alignment*A, int *ns, int **ls  )
{
  int a,g,s;
  FILE *fp;
  if (fname==NULL)fname=vtmpnam (NULL);
  fp=vfopen (fname, "w");
  for ( g=0; g<2; g++)
    for (a=0; a<ns[g]; a++)
      {
      s=ls[g][a];
      fprintf (fp, ">%s %s\n%s\n", A->name[s],A->seq_comment[s],A->seq_al[s]);
      }
  vfclose (fp);
  return fname;
}

int output_suchard_aln (char *out_file, Alignment *A)
{
  int a, b, c, d;
  FILE *fp;
  
  A=back_translate_dna_aln (A);
  
  for ( c=0,a=0; a<A->len_aln; a++, c++)
           {
             if (c==3)c=0;
             for (b=0; b<A->nseq; b++)
             {
             if (c==2)
               {
                 A->seq_al[b][a]='-';
               }
             }
           }
  A=ungap_aln_n (A, 1);    
  fp=vfopen (out_file, "w");
  for ( a=0; a< A->nseq; a++)
    {
      for (b=0; b< A->len_aln; b++)
      {
        c=tolower(A->seq_al[a][b]);
        if ( c=='a')d=1;
        else if ( c=='g')d=2;
        else if ( c=='c')d=3;
        else if ( c=='t')d=4;
        else if ( c=='u')d=5;
        else d=6;

        fprintf ( fp, "%d", d);
      }
      fprintf ( fp, "\n");
    }
  vfclose (fp);
  exit (EXIT_SUCCESS);
}
  
void output_fasta_aln (char *fname, Alignment*A )
      {
      FILE *fp;
      int a;
      int line=0;

      line=get_msa_line_length (line, A->len_aln+1);
      fp=vfopen ( fname, "w");

      for ( a=0; a< A->nseq; a++)
            {
              fprintf ( fp, ">%s", A->name[a]);
              
              if ( A->seq_comment[a][0] && !isblanc (A->seq_comment[a]))fprintf ( fp, "\t%s", A->seq_comment[a]);
              fprintf ( fp, "\n");
              fp=output_string_wrap ( line,A->seq_al[a] , fp);
              fprintf ( fp, "\n");
            }
      vfclose (fp);
      }
      
void output_pir_aln (char *fname, Alignment*A )
      {
      int a;
      FILE *fp;
      char type[20];
      
      

      
      
      fp=vfopen ( fname, "w");
      for ( a=0; a< A->nseq; a++)
            {
            if      ( strm ( get_string_type (A->seq_al[a]),"DNA"))sprintf(type, "DL");
            else if ( strm ( get_string_type (A->seq_al[a]),"PROTEIN"))sprintf(type, "P1");
            fprintf ( fp, ">%s;%s\n%s\n",type, A->name[a], A->seq_comment[a]);
            fp=output_string_wrap ( 50,A->seq_al[a] , fp);
            fprintf ( fp, "\n*\n");
            }
            
      vfclose (fp);
      }         

int landscape_msa;
int  set_landscape_msa (int len)
{
  if ( len==0)landscape_msa=-1;
  else
    {
      landscape_msa=len;
    }
  return landscape_msa;
}
int get_msa_line_length (int line, int aln_len)
{
  if (landscape_msa==-1) return aln_len;
  else if ( landscape_msa)return landscape_msa;
  else if (line) return line;
  else 
    {
      return (getenv ("ALN_LINE_LENGTH"))?atoi(getenv("ALN_LINE_LENGTH")):ALN_LINE_LENGTH;
    }
}

void output_msf_aln (char *fname,Alignment *B)
        {
      int a, b, c;      
      char *seq;
      int *all_checks;
      int i,j;
      long grand_checksum;
      FILE *fp;
      int max_len;
      int line=0;
      int block=10;
      int c_block;
      char aa;
      
      line=get_msa_line_length (line, B->len_aln+1);
      

      for ( max_len=0,a=0; a< B->nseq; a++)max_len= MAX(strlen ( B->name[a]),max_len);


      max_len+=5;
      
      fp=vfopen (fname, "w");
      
      seq =vcalloc(B->len_aln,  sizeof(char));
      all_checks =vcalloc(B->nseq, sizeof(int));
      for ( i=0; i< B->nseq; i++)
        {
          for ( j=0; j<B->len_aln; j++)
            {
            if ( is_gap(B->seq_al[i][j]))seq[j]='.';
            else seq[j]=B->seq_al[i][j]=toupper(B->seq_al[i][j]);
            
            }
          all_checks[i] = SeqGCGCheckSum(seq, (int)B->len_aln);
        }
      grand_checksum = 0;
      for(i=0; i<B->nseq; i++) grand_checksum += all_checks[i];
      grand_checksum = grand_checksum % 10000;
      fprintf(fp,"PileUp\n\n");
      B=get_aln_type(B);
      fprintf(fp,"\n\n   MSF:%5d  Type: ",B->len_aln);
      if(strm ( (B->S)->type, "DNA"))
            fprintf(fp,"N");
      else
            fprintf(fp,"P");
      fprintf(fp,"    Check:%6ld   .. \n\n", (long)grand_checksum);
      for (i=0; i< B->nseq; i++)
        {
          fprintf ( fp, " Name: %s oo  Len:%5d  Check:%6ld  Weight:  %.3f\n", B->name[i], B->len_aln,(long)all_checks[i],(B->S)->W?((B->S)->W)->SEQ_W[i]:1.00);
        }
      fprintf(fp,"\n//\n\n");
      
      for (a=0; a<B->len_aln; a+=line)
         {
           fprintf ( fp,"\n\n"); 
           for (b=0; b<B->nseq; b++)
             {
             fprintf (fp,"%-*s ",max_len,B->name[b]);
             for (c_block=0,c=a;c<a+line && c<B->len_aln;c++)
               {
                 if ( c_block==block)
                      {
                        fprintf (fp, " ");
                        c_block=0;
                      }
                  c_block++;
                 aa=(is_gap(B->seq_al[b][c]))?'.': toupper(B->seq_al[b][c]);
                 fprintf (fp,"%c",aa );
               }
             if ( c_block==block)
                      {
                        fprintf (fp, " ");
                        c_block=0;
                      }
             fprintf (fp,"\n");
             
             }
         }
      fprintf ( fp,"\n");            
      vfclose ( fp);
      
      
      vfree(seq);
      vfree(all_checks);
      

      return;
} 
int SeqGCGCheckSum(char *seq, int len)
{
      int  i;
        long check;
        
        for( i=0, check=0; i< len; i++,seq++)
                check += ((i % 57)+1) * toupper(*seq);

        return(check % 10000);
}  
void old_output_msf_aln (char *fname,Alignment *B)
      {
      FILE *fp;
      static int *put_seq;
      int a, b, c;
      int line=0;
      char aa;
      char *buf;
      int max_len;
      int seq_max_len;
      
      line=get_msa_line_length (line, B->len_aln+1);
      
      
      for ( max_len=0,a=0; a< B->nseq; a++)max_len= MAX(strlen ( B->name[a]),max_len);
      for ( seq_max_len=0,a=0; a< B->nseq; a++)seq_max_len= MAX(strlen ( B->seq_al[a]),max_len);
      

      buf=vcalloc(seq_max_len+1, sizeof (int)); 
      
      if ( put_seq==NULL)
            put_seq= vcalloc ( B->nseq, sizeof (int));
      put_seq[0]=1;
      
      
      for ( b=1; b< B->nseq; b++)
            {
            sprintf ( buf, "%s", B->seq_al[b]);
            ungap(buf);
            put_seq[b]=( strlen (buf)>0)?1:0;
            }     
      
      fp=vfopen ( fname, "w");
      fprintf ( fp, "MSF: %d Type P Check: 5083 ..\n", B->len_aln);
      for ( a=0; a< B->nseq; a++)
            {
            if ( put_seq[a]==1)
                  fprintf ( fp,"Name: %s\n",B->name[a]);
            }
       fprintf ( fp, "//\n");
       for (a=0; a<B->len_aln; a+=line)
         {for (b=0; b<B->nseq; b++)
           {
           if ( put_seq[b]==1)
            {
            fprintf (fp,"%-*s ",max_len,B->name[b]);
              for (c=a;c<a+line && c<B->len_aln;c++)
                  {
                  
                    
                      
                  aa=(B->seq_al[b][c]=='-')?'.': toupper(B->seq_al[b][c]);
                  fprintf (fp,"%c",aa );
                  }
                  fprintf (fp,"\n");
                  }
            }
          fprintf (fp,"\n");
          }
      fprintf ( fp,"\n\n");          
      vfclose ( fp);

      vfree (buf);
      vfree(put_seq);
      }
      
void output_saga_aln ( char *name, Alignment *B)
    {
    int a, b, c;
    FILE *fp;



    int max_len;
    int line=0;
    
    line=get_msa_line_length (line, B->len_aln+1);
    
    
    
    for ( max_len=0,a=0; a< B->nseq; a++)max_len= (strlen ( B->name[a])>max_len)?(strlen ( B->name[a])):max_len;
          
      


    fp= vfopen ( name, "w");
    
    fprintf (fp, "\nSAGA FORMAT\nalignement  %s nseq=%d len=%d\n", name, B->nseq, B->len_aln);
       
    fprintf (fp, "\n\n");
    for (a=0; a<B->len_aln; a+=line)
         {for (b=0; b<B->nseq; b++)
           {fprintf (fp,"%-*s ",max_len,B->name[b]);
            for (c=a;c<a+line && c<B->len_aln;c++)
            {
              fprintf (fp,"%c",(B->seq_al[b][c]) );
            }
            fprintf (fp,"\n");
            }
          fprintf (fp,"\n");
          }
    fprintf (fp,"\n\n");
    vfclose ( fp);
    }
void output_compact_aln ( char *name, Alignment *B)
    {
    int a, b, c;
    FILE *fp;
    int do_print=0;


    int max_len;
    int line=0;

    line=get_msa_line_length (line, B->len_aln+1);
    
    
    for ( max_len=0,a=0; a< B->nseq; a++)max_len= (strlen ( B->name[a])>max_len)?(strlen ( B->name[a])):max_len;
          
      


    fp= vfopen ( name, "w");
    
    fprintf (fp, "\nSAGA FORMAT\nalignement  %s nseq=%d len=%d", name, B->nseq, B->len_aln);
    fprintf (fp, "\n\n");
    for (a=0; a<B->len_aln; a+=line)
         {for (b=0; b<B->nseq; b++)
           {
           
           for ( do_print=0, c=a;c<a+line && c<B->len_aln;c++)
             do_print+=1-is_gap(B->seq_al[b][c]);
           if ( do_print>0)
                 {
                 fprintf (fp,"%-*s ",max_len,B->name[b]);
           
           
           
                 for (c=a;c<a+line && c<B->len_aln;c++)
                   {
                   if ( is_gap(B->seq_al[b][c])&& B->seq_al[b][c]!='-' )fprintf (fp,"%c", '-');
                   else fprintf (fp,"%c",(B->seq_al[b][c]) );
                   }
                 fprintf (fp,"\n");
               }
            }
          fprintf (fp,"\n");
          }
    fprintf (fp,"\n\n");
    vfclose ( fp);
    }


void output_clustal_aln ( char *name, Alignment *B)
    {
    int a, b, c;
    FILE *fp;
    int max_len=0;
    int line=0;   
    int *n_residues;
    
    if ( getenv ("SEP_4_TCOFFEE"))
      {
      while ( line<B->len_aln && B->seq_al[0][line]!='o' && B->seq_al[0][line]!='O')line++;
      if ( B->seq_al[0][line]=='O' || B->seq_al[0][line]=='o')line++;
      }
    else
      {
      while ( line<B->len_aln)line++;
      }
    
    if ( line==B->len_aln)line=get_msa_line_length (0, B->len_aln+1);
    
    n_residues=vcalloc ( B->nseq+1, sizeof (int));
    for ( a=0; a< B->nseq; a++)
          {if ( strlen (B->name[a])>max_len)
            max_len= strlen ( (B->name[a]));
          n_residues[a]=B->order[a][1];
          }
    max_len=MAX(max_len+2, 16);
        

    fp= vfopen ( name, "w");
    
    fprintf (fp, "CLUSTAL FORMAT for %s %s [%s], CPU=%.2f sec, SCORE=%d, Nseq=%d, Len=%d ", PROGRAM, VERSION,URL, (float)(B->cpu+get_time())/1000, B->score_aln, B->nseq, B->len_aln);
    fprintf (fp, "\n\n");


    if ( B->len_aln==0)
      {
      for (b=0; b<=B->nseq; b++)
        fprintf (fp,"%-*s -\n",max_len, B->name[b]);
      }
    
    else
      {
      for (a=0; a<B->len_aln; a+=line)
        {for (b=0; b<=B->nseq; b++)
          {
            if (b!=B->nseq)
            {
              fprintf (fp,"%-*s",max_len, B->name[b]);
              for (c=a;c<a+line && c<B->len_aln;c++)
                {
                  if ( is_gap(B->seq_al[b][c]))fprintf (fp,"%c", '-');
                  else 
                  {
                    n_residues[b]++;
                    fprintf (fp, "%c", GET_CASE(B->residue_case, B->seq_al[b][c]));
                    
                  }
                  
                }
              if (B->output_res_num)fprintf (fp, " %d", n_residues[b]);
              fprintf (fp,"\n");
            }
            else if ( b==B->nseq)
            {
              fprintf (fp,"%-*s",max_len," ");            
              for (c=a;c<a+line && c<B->len_aln;c++)
                {
                  fprintf ( fp, "%c", analyse_aln_column (B, c));
                }
              fprintf (fp,"\n");
            }
          }
        fprintf (fp,"\n"); 
        }
      }
    fprintf (fp,"\n\n");
    vfree (n_residues);
    vfclose ( fp);
    }    

void output_phylip_aln ( char *name, Alignment *B)
    {
      int a, b, c, d;
    FILE *fp;

    int *print_name;
    static int line=0;      
    line=get_msa_line_length(0, 0);
    
    print_name=vcalloc ( B->nseq, sizeof (int));
    fp= vfopen ( name, "w");
    
    fprintf (fp, "%3d %d\n", B->nseq, B->len_aln);
    fprintf (fp, "\n");
    for (a=0; a<B->len_aln; a+=line)
         {for (b=0; b<B->nseq; b++)
           {if ( print_name[b]==0)
            {
              
              fprintf (fp,"%-10.10s  ",B->name[b]);
              print_name[b]=1;
            }
             else
             {
               fprintf (fp, "%10.10s ", " ");
             }


             for (d=0,c=a;c<a+line && c<B->len_aln;c++, d++)
             {
               if ( d==10)
                 {
                   fprintf ( fp, " ");
                   d=0;
                 }
               if ( is_gap(B->seq_al[b][c])&& B->seq_al[b][c]!='-' )fprintf (fp,"%c", '-');
               else fprintf (fp,"%c",(B->seq_al[b][c]) );
             }
            fprintf (fp,"\n");
            }
          fprintf (fp,"\n");
          }
    fprintf (fp,"\n\n");
    vfclose ( fp);
    }

void output_rnalign (char *out_file, Alignment *A, Sequence *STRUC)
    {
    int a, b;
    FILE *fp;
    char bank_file[100];
    char pep_file[100];
    char *buf;
    
    sprintf ( bank_file, "%s.mss", out_file);
    sprintf ( pep_file, "%s.one_rna", out_file);
    
   
    buf=vcalloc ( strlen ( A->seq_al[0]+1), sizeof (char));
    
    for ( b=0,a=0; a< strlen(A->seq_al[0]); a++) 
      {
      if ( is_gap(A->seq_al[0][a]))
            buf[a]='.';
      else
            buf[a]=STRUC->seq[0][b++];
      }
    buf[a]='\0';
    
    fp=vfopen ( bank_file, "w");
    
    fprintf ( fp, "ST\n");
    fp=output_string_wrap ( 50, buf, fp);
    fprintf ( fp, "\n\n");
    
    for ( a=0; a<A->nseq-1; a++)
      {
      fprintf ( fp, "AS %s\n ", A->name[a]);
      fp=output_string_wrap ( 50, A->seq_al[a], fp); 
      fprintf ( fp, "\n\n");
      }
    vfclose ( fp);
    fp=vfopen ( pep_file, "w");
    fprintf ( fp, ">%s\n", A->name[A->nseq-1]); 
    fp=output_string_wrap ( 50, A->seq_al[A->nseq-1], fp);
    fprintf ( fp, "\n");
    vfclose (fp);
    }

void output_lib (char *pw_lib_saga_aln_name, Alignment *A )
    {
    Alignment *B;
    char fname[VERY_LONG_STRING];
    int a,b;
    
    B=declare_Alignment (NULL);
    
    B->nseq=2;
    
    for ( a=0; a< A->nseq-1; a++)
      {
      for ( b=a+1; b<A->nseq; b++)
            {
            sprintf ( B->seq_al[0], "%s", A->seq_al[a]);
            sprintf ( B->name[0], "%s", A->name[a]);
            sprintf(B->name[1], "%s", A->name[b]);
            sprintf ( B->seq_al[1], "%s",A->seq_al[b]);
            B->nseq=2;
            sprintf ( fname, "%s_%s_%s.lib",pw_lib_saga_aln_name, A->name[a], A->name[b]);
      
            B->len_aln=strlen ( B->seq_al[0]);
            ungap_aln (B);
            output_clustal_aln (fname,B);  
            }
        }
    } 
void output_pw_lib_saga_aln (char *pw_lib_saga_aln_name, Alignment *A )
    {
    Alignment *B;
    char fname[VERY_LONG_STRING];
    int a,b;
    
    B=declare_Alignment (NULL);
    
    B->nseq=2;
    
    for ( a=0; a< A->nseq-1; a++)
      {
      for ( b=a+1; b<A->nseq; b++)
            {
            sprintf ( B->seq_al[0], "%s", A->seq_al[a]);
            sprintf ( B->name[0], "%s", A->name[a]);
            sprintf(B->name[1], "%s", A->name[b]);
            sprintf ( B->seq_al[1], "%s",A->seq_al[b]);
            B->nseq=2;
            sprintf ( fname, "%s_%s_%s.pw_lib_saga_aln",pw_lib_saga_aln_name, A->name[a], A->name[b]);
      
            B->len_aln=strlen ( B->seq_al[0]);
            ungap_aln (B);
            output_clustal_aln (fname,B);  
            }
        }
    }       
void output_lalign_header( char *name, Alignment *A)
    {
    FILE *fp;
    
    fp=vfopen ( name, "w");
    fprintf ( fp, " Lalign mode: best local alignments between two sequences\n");
    fprintf ( fp, " %s(%s) [%s]\n\n", VERSION, DATE, URL);
    fprintf ( fp, " Comparison of:\n(A) %s\t%s\t-%d aa\n", (A->S)->file[A->order[0][0]],(A->S)->name[A->order[0][0]], (A->S)->len[A->order[0][0]]);
    fprintf ( fp, "(B) %s\t%s\t-%d aa\n", (A->S)->file[A->order[1][0]],(A->S)->name[A->order[1][0]], (A->S)->len[A->order[1][0]]);
    
    
    vfclose ( fp);
    return;
    }

void output_glalign ( char *name, Alignment *B, Alignment *S)
{
  int a, b, g, s;
  int naln=0;
  FILE *fp;
  int **nr;
  B=B->A;
  if ( B==NULL){return;}
  
  fp=vfopen (name, "w");
  fprintf (fp, "Format: GLALIGN_01 [Generated with %s ]\n", PROGRAM);
  fprintf (fp, "#Each Line corresponds to a column\n");
  fprintf (fp, "#First column coresponds to first genome\n");
  fprintf (fp, "#Last Column gives the column reliability on a 0-9 scale\n");
  fprintf (fp, "#[-1] Indicates that the reliability was not evaluated\n");
  
  fprintf (fp, "Genome List\n");
  for ( a=0; a< B->nseq; a++)
    fprintf (fp, "\tGenome %s\n", B->name[a]);
  fprintf (fp, "Alignment List\n");
  while (B)
    {
      fprintf (fp, "Alignment %d Len %d Score %d\n", ++naln, B->len_aln, S->score_aln);
      nr=duplicate_int (B->order, -1, -1);
      for ( a=0; a< B->len_aln; a++)
      {
        fprintf ( fp, "\t");
        for ( b=0; b< B->nseq; b++)
          {
            g=is_gap (B->seq_al[b][a]);
            nr[b][1]+=1-g;

            if (g)fprintf (fp, "---- ");
            else fprintf ( fp, "%4d ",nr[b][1]);
          }
        s=((S)?S->seq_al[S->nseq][a]:-1);
        if (s==NO_COLOR_RESIDUE)s=-1;
        fprintf ( fp,"[ %d ]",s);
        fprintf ( fp, "\n");  
        
      } 
      free_int (nr, -1);
      B=B->A;
      S=S->A;
    }
  vfclose ( fp);
}
Alignment *input_conc_aln ( char *name, Alignment *IN)
{
  FILE *fp;
  char *string, *p, *file;
  Alignment *F=NULL,*A=NULL, *B=NULL;
  
  file=vtmpnam (NULL);
 
  string=file2string(name);
  string=substitute ( string, "@", "!Protected!");
  string=substitute ( string, TC_REC_SEPARATOR, "@");
  strtok (string,"@");
  

  while ( (p=strtok (NULL,"@"))!=NULL)
    {
      char *buf;
      buf=vcalloc ( strlen (p)+1, sizeof (char));
      sprintf (buf,"%s", p);
      buf=substitute (buf,"!protected!", "@");
      
      fp=vfopen (file, "w");
      fprintf ( fp, "%s",buf);
      vfclose (fp);
      vfree (buf);
            
      B=main_read_aln (file,NULL);
  
      if ( !A)
      {
        if (IN){copy_aln (B, IN);F=A=IN;}
        else F=A=B;
      }
      else
      {
        A->A=B;
        A=A->A;
      }
    }
  
  vfree (string);
  return F;
}

void output_conc_aln ( char *name, Alignment *B)
{
  FILE *fp;
  int a;
  
  fp=vfopen (name, "w");
  fprintf (fp, "# CONC_MSF_FORMAT_01\n");
  while (B)
    {
      fprintf (fp, "%s\n", TC_REC_SEPARATOR);
      for ( a=0; a< B->nseq; a++)
      {
        fprintf ( fp, ">%s\n%s\n", B->name[a], B->seq_al[a]);
      }
      B=B->A;
      
    }
  vfclose (fp);
}

void output_lalign ( char *name, Alignment *B)
{
  static int output_header;
 
  B=B->A;
  if ( B==NULL){output_header=0;return;}
    else if ( output_header==0)
      {
      output_lalign_header(name, B);
      output_header=1;
      }
  while (B)
    {
      output_lalign_aln   ( name, B);
      B=B->A;
    }
}
void output_lalign_aln   ( char *name, Alignment *B)
    {
    int a, b, c,d=0, s=0;
    char col;

    float tot=0;
    float id=0;

    FILE *fp;
    int max_len=0;
    int line;     
    int *n_residues;
    int res;

      
    n_residues=vcalloc ( B->nseq+1, sizeof (int));
    for ( a=0; a< B->nseq; a++)
          {if ( strlen (B->name[a])>max_len)
            max_len= strlen ( (B->name[a]));
          n_residues[a]=B->order[a][1];
          }
    max_len=MAX(max_len+2, 16);
    line=60;
    
    

    fp= vfopen ( name, "a");
    
    for (a=0; a< B->len_aln; a++)
      {
        if ( !is_gap(B->seq_al[0][a]) && !is_gap(B->seq_al[1][a]))
           {
             tot++;
             id+=(B->seq_al[0][a]==B->seq_al[1][a]);
           }
      }
    
    id=(id*100)/tot;
    fprintf (fp, " %.1f%% identity in %d aa overlap; score: %d\n\n", id,(int)tot, B->score_aln);
    
    
    for (a=0; a<B->len_aln; a+=line)
         {for (b=0; b<5; b++)
           {
               if ( b==0 || b==4)
               {
                 if ( b==0)s=0;
                 if ( b==4)s=1;
                 fprintf (fp,"%-*s",max_len," ");
                 for (d=0,c=a;c<a+line && c<B->len_aln;c++)
                   {
                   res=!is_gap ( B->seq_al[s][c]);
                   n_residues[s]+=res;
                   if ( (n_residues[s]%10)==0 && res && (c-a+4)<line){fprintf (fp, "%-4d", n_residues[s]);d=-3;}  
                   else
                     {
                       if ( d==0)fprintf (fp, " ");
                       else d++;
                     }
                   }
                 fprintf (fp,"\n");
               }
             else if (b==1 || b==3)
                {
                  if ( b==1)s=0;
                  if ( b==3)s=1;
                  fprintf (fp,"%-*s",max_len, B->name[s]);
                  for (c=a;c<a+line && c<B->len_aln;c++)
                      {
                        if ( is_gap(B->seq_al[s][c]))fprintf (fp,"%c", '-');
                        else 
                            {
                              fprintf (fp, "%c", GET_CASE(B->residue_case, B->seq_al[s][c]));
                            }
                      }
                  fprintf (fp,"\n");
                }
             else if ( b==2)
                {
                fprintf (fp,"%-*s",max_len," ");                
                for (c=a;c<a+line && c<B->len_aln;c++)
                      {
                      col=analyse_aln_column (B, c);
                      if ( col=='*')col=':';
                      else if ( col==':')col='.';
                      else if ( col=='.')col=' ';
                      fprintf ( fp, "%c", col);
                      }
                fprintf (fp,"\n");
                }
           }
         fprintf (fp,"\n"); 
         }
    
    fprintf (fp,"\n\n----------\n\n");
    vfree (n_residues);
    vfclose ( fp);
    }   


/****************************************************************************************************/
/*************************************UTIL *********************************************************/
/**************************************************************************************************/


/****************************************************************************************************/
/***************************                                    *************************************/
/***************************             PROCESSING         *************************************/
/***************************                                    *************************************/
/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                              THREADING                                                  */
/***************************************************************************************** */

char *thread_aa_seq_on_dna_seq( char *s)
     {
       int l, b, c;
       char *array;
       

       l=strlen ( s);
       array=vcalloc ( l*3 +1, sizeof (char));
       for ( b=0, c=0; b< l; b++, c+=3)
           {
             array[c]=s[b];
             array[c+1]='o';
             array[c+2]='o';
           }
       array[c]='\0';
       return array;
     }

Alignment *thread_dnaseq_on_prot_aln (Sequence *S, Alignment *A)
        {
          Alignment *B=NULL;
          int a, b, c, n, la, ls, ln, m;

          B=copy_aln ( A, B);
          B=realloc_aln2 ( B, B->nseq, B->len_aln*3 +1);

          for ( n=0,a=0; a< A->nseq; a++)
              {
            for ( m=0,b=0; b< S->nseq; b++)
                {
                if (strm (A->name[a], S->name[b]) )
                   {
                     m=1;
                     n++;
                     ungap ( S->seq[b]);
                     B->seq_al[a][0]='\0';
                     for (la=0, ls=0, ln=0; la< A->len_aln; la++)
                         {
                           for (c=0; c< 3; c++)
                               B->seq_al[a][ls++]=(is_gap(A->seq_al[a][la]))?'-':S->seq[b][ln++];
                         }
                   B->seq_al[a][ls]='\0';
                   }
                }
            if ( m==0)
                   {
                   for (la=0, ls=0, ln=0; la< A->len_aln; la++)
                         {
                        
                           B->seq_al[a][ls++]=A->seq_al[a][la];
                           B->seq_al[a][ls++]='-';
                           B->seq_al[a][ls++]='-';
                         }
                   }
            }
          
          B->len_aln=strlen ( B->seq_al[0]);
          return B;
      }
void thread_seq_struc2aln ( Alignment *A, Sequence *ST)
      {
      int a, b, c,d;
      int len, cons;
      
      for ( a=0; a< A->nseq; a++)
            for ( b=0; b< ST->nseq; b++)
                  {
                  if ( strcmp ( A->name[a], ST->name[b])==0)
                        {
                        ungap (ST->seq[b]);
                        len=strlen(A->seq_al[a]);
                        for ( c=0, d=0; c<len; c++)
                              {
                              if ( !is_gap(A->seq_al[a][c]))A->seq_al[a][c]=ST->seq[b][d++];
                              }
                        }
                  }
      cons=name_is_in_list ("Cons", ST->name, ST->nseq, 100);
      if ( cons!=-1 && A->len_aln==strlen ( ST->seq[cons]))
        {
          sprintf (A->name[A->nseq], "Cons");
          sprintf (A->seq_al[A->nseq],"%s", ST->seq[cons]);
          A->nseq++;
        }
      }
void cache_id ( Alignment *A)
      {
      int a, b,n;
      char r1, r2, r3;
      
      for ( a=0; a< A->len_aln; a++)
            {
            for ( b=0, n=0; b< A->nseq; b++)if ( !is_gap(A->seq_al[b][a]))n++;
            for ( b=0; b< A->nseq; b++)
                  if ( !is_gap(A->seq_al[b][a]) && n==A->nseq)A->seq_al[b][a]='h';
                  else if( !is_gap(A->seq_al[b][a]))A->seq_al[b][a]='x';
            }
      for ( a=0; a< A->nseq; a++)
            {
            for ( b=1; b< A->len_aln-1; b++)
                  {
                  r1=A->seq_al[a][b-1];
                  r2=A->seq_al[a][b];
                  r3=A->seq_al[a][b+1];
                  if (r2=='h')
                        {
                        if ( (r1=='h' || r1=='b') && (r3=='h' || r3=='b'))A->seq_al[a][b]='h';
                        else A->seq_al[a][b]='b';
                        }
                  }
            for ( b=1; b< A->len_aln-1; b++)if ( A->seq_al[a][b]=='b')A->seq_al[a][b]='x';
            }
                  
      }
                  
                                                       
/*******************************************************************************************/
/*                                                                                         */
/*                                                                                         */
/*                               PROCESING OF EST                                          */
/*                                                                                         */
/***************************************************************************************** */
int process_est_sequence ( Sequence *S, int *cluster_list)
      {
      char **inverted_seq;
      int T=20;
      int a, b;
      int V1, V2;
      int **sens;
      int **a_sens;
      int **best;
      int *solution;
      char buf [VERY_LONG_STRING];
      int n_clusters=0;
      int n;
      
      sens=declare_int ( S->nseq,S->nseq);
      a_sens=declare_int ( S->nseq,S->nseq);
      best=declare_int ( S->nseq,S->nseq);
      
      
      inverted_seq=vcalloc ( S->nseq, sizeof (char*));
      for ( a=0; a<S->nseq; a++)
            inverted_seq[a]=invert_seq ( S->seq[a]);
            
      for ( a=0; a< S->nseq-1; a++)
            {
            
            for ( b=a+1; b<S->nseq; b++)
                               {
                               
                               V1=sens[a][b]=sens[b][a]=get_best_match ( S->seq[a], S->seq[b]);
                               V2=a_sens[a][b]=a_sens[b][a]=get_best_match ( S->seq[a],inverted_seq[b]);
                             best[a][b]=best[b][a]=(V1>V2)?V1:V2;
                             }
            }
      solution=SHC ( S->nseq, a_sens, sens);
      
      
      for ( a=0; a<S->nseq; a++)cluster_list[a]=-1;
      for ( a=0; a<S->nseq; a++)
            {
            n=search_for_cluster (a, n_clusters, cluster_list, T, S->nseq, best);
            if ( n>0)n_clusters++;
            }
      fprintf ( stderr, "\nTHERE %s %d Independant Cluster(s) in your sequences",(n_clusters>1)?"are":"is",(n_clusters));
      for (a=0; a<n_clusters; a++)
            {
            fprintf (stderr, "\n");
            for ( b=0; b<S->nseq; b++)
                  {
                  if ( cluster_list[b]==a)fprintf ( stderr, "%s ", S->name[b]);
                  }
            }
            
      for ( a=0; a<S->nseq; a++)
            {
            if ( solution[a]==-1)
                  {
                  S->seq[a]=inverted_seq[a];
                  sprintf ( buf, "i_%s", S->name[a]);
                  sprintf ( S->name[a], "%s", buf);
                  }
            }
      return n_clusters;
      }

int search_for_cluster ( int seq, int cluster_number, int *cluster_list, int T, int nseq, int **S)    
      {
      int n=0,a;
      
      if (cluster_list[seq]==-1)
            {
            cluster_list[seq]=cluster_number;
            n++;
            }
      for ( a=0; a<nseq; a++)
            if ( cluster_list[a]==-1)
                  {
                  
                  if (S[seq][a]>T)
                        {
                        n++;
                        cluster_list[a]=cluster_number;
                        n+=search_for_cluster ( a, cluster_number, cluster_list, T, nseq, S);
                        }
                  }
      return n;
      }     
            
int * SHC ( int nseq, int **NST, int **ST)
      {
      int a;
      int mut;
      int score, new_score;
      int N_IT=VERY_LONG_STRING;
      int *sol;
      int count;
      
      sol=vcalloc ( nseq, sizeof (int));
      for ( a=0; a<nseq; a++)
            sol[a]=(addrand ((unsigned long)100)>49)?1:-1;
            
      score=evaluate_sol (sol, nseq, ST, NST);
      fprintf ( stderr, "\nI_Score=%d\n", score);
      N_IT=N_IT*nseq;
      
      for ( count=0,a=0; a< N_IT && score<VERY_LONG_STRING; a++, count++)
            {
            mut=mutate_sol ( sol,nseq);
            new_score=evaluate_sol (sol, nseq, ST, NST);
            if ( new_score>score)
                  {
                  score=new_score;
                  }
            else if ( (addrand ((unsigned long)VERY_LONG_STRING))>score)
                  {
                  score=new_score;
                  }
            else
                  sol[mut]=sol[mut]*-1;
            if ( count==VERY_LONG_STRING)
                  {
                  count=0;
                  fprintf ( stderr, "\nScore=%d", score);
                  }     
            }
      fprintf ( stderr, "\nScore=%d\n", score);
      return sol;
      }

int mutate_sol (int *sol, int nseq)
      {
      int n;
      n=addrand ((unsigned long)nseq);
      sol[n]=sol[n]*-1;
      return n;
      }
int evaluate_sol ( int *sol, int nseq, int **ST, int **NST)
      {
      static int max_score;
      int a, b, score=0;
      
      if ( max_score==0)
            {
            for ( a=0; a<nseq-1; a++)
                  for ( b=a+1; b<nseq; b++)
                        {
                        max_score+=(ST[a][b]>NST[a][b])?ST[a][b]:NST[a][b];
                        }
            }
      
      for ( a=0; a<nseq-1; a++)
            for (b=a+1; b<nseq; b++)
                  if ( (sol[a]*sol[b])<0)score+=NST[a][b];
                  else score+=ST[a][b];
      return (score*VERY_LONG_STRING)/max_score;
      }           
            
     
char * invert_seq ( char *seq)
      {
      int a, b;
      
      char *nseq;
      int l;
      
      
      l=strlen ( seq);
      for ( a=0; a<l; a++)
            seq[a]=tolower ( seq[a]);
      nseq=vcalloc ( l+1, sizeof (char));
      
      for ( a=0, b=l-1; a<l; a++, b--)
            {
            if (seq[b]=='n')nseq[a]='n';
            else if (seq[b]=='g')nseq[a]='c';
            else if (seq[b]=='c')nseq[a]='g';
            else if (seq[b]=='a')nseq[a]='t';
            else if (seq[b]=='t')nseq[a]='a';
            }
            
      nseq[l]='\0';
      return nseq;
      }
            
            
int get_best_match ( char *seq1, char *seq2)
      {
      static int **m;
      static int ml;
      int a, b;
      int **mdiag;
      int n_mdiag=0;
      int best;
      int l1, l2;
      
      
      l1=strlen ( seq1);
      l2=strlen (seq2);
      if ( m==NULL)
            {
            ml=(l1>l2)?l1:l2;
            m=declare_int (ml, ml);
            }
      else if ( (ml<l1) || (ml<l2))
            {
            free_int (m, ml);
            ml=(l1>l2)?l1:l2;
            m=declare_int (ml, ml);
            }
            
      for ( a=0; a<l1; a++)
            {
            for ( b=0; b<l2; b++)
                  m[a][b]=((seq1[a]==seq2[b])|| seq1[a]=='n' ||seq2[b]=='n')?1:0;
            }
      mdiag= extract_m_diag_streches ( m, l1, l2,seq1, seq2, &n_mdiag); 
      
      for ( best=0,a=0; a<n_mdiag; a++)
            best=(mdiag[a][0]>best)?mdiag[a][0]:best;
      
      return best;
      }

int** extract_m_diag_streches ( int ** m, int l1, int l2,char *seq1, char *seq2, int *n_mdiag)
      {
      
      int b, x, y, s1, s2;
      static int **mdiag;
      int in;
      static int max_diag=VERY_LONG_STRING;
       
       /*
       diag[0]=len;
       diag[1]=x_start;
       diag[2]=y_start;
       diag[3]=x_end;
       diag[4]=y_end;
       */
       
      if ( mdiag==NULL)
            mdiag=declare_int ( max_diag, 5);
      
      for ( s1=l1-1, s2=0;s2<l2;)
            {
            for ( in=0,x=s1, y=s2; x<l1 && y<l2; x++, y++)
                  { 
                  if (m[x][y]>0)
                        {
                        if (in==1)
                              mdiag[n_mdiag[0]][0]++;
                        else 
                              {
                              mdiag[n_mdiag[0]][0]=1;
                              mdiag[n_mdiag[0]][1]=x;
                              mdiag[n_mdiag[0]][2]=y;
                              in=1;
                              }
                        }
                  else
                        if (in==1)
                              {
                              in=0;
                              mdiag[n_mdiag[0]][3]=x-1;
                              mdiag[n_mdiag[0]][4]=y-1;
                              if ( !is_strech ( "ta", seq1, seq2,mdiag[n_mdiag[0]][0], mdiag[n_mdiag[0]][1],mdiag[n_mdiag[0]][2]))n_mdiag[0]++;
                              }
                  if (n_mdiag[0]==(max_diag-1))
                        {mdiag=vrealloc (mdiag, (max_diag+VERY_LONG_STRING)*sizeof (int*));
                        for ( b=max_diag; b<max_diag+VERY_LONG_STRING; b++)mdiag[b]=vcalloc ( 5, sizeof (int));
                        max_diag+=VERY_LONG_STRING;
                        }
                  }
            s2+= (s1==0)?1:0; 
            s1-= (s1==0)?0:1;
            if (in==1)
                  {
                  in=0;
                  mdiag[n_mdiag[0]][3]=x-1;
                  mdiag[n_mdiag[0]][4]=y-1;
                  if ( !is_strech ( "ta", seq1, seq2,mdiag[n_mdiag[0]][0], mdiag[n_mdiag[0]][1],mdiag[n_mdiag[0]][2]))n_mdiag[0]++;
                  }
            }
      
      return mdiag;
      }                       
int is_strech ( char *AA, char *seq1, char *seq2, int len, int x, int y)
      {
      int n, i, j, c,a,nr;
      int T=70;
      
      n=strlen ( AA);
      for ( a=0; a<n; a++)
            {
            for (nr=0, i=x, j=y, c=0; c<len; c++, i++, j++)
                  if ((seq1[i]==AA[a]) && (seq2[j]==AA[a]))nr++;
            if ( ((nr*100)/len)>T)return 1;
            }
      return 0;
      }
      
      
/************************************************************************************/
/*                                                                                  */
/*                                      STRUC                                       */
/*                                                                                  */
/*                                                                                  */
/************************************************************************************/

char * oneletaa2threeletaa(char aa);
float aa2property   (char aa, char *mode);

int output_seq2struc(char *outfile, Alignment *A)
{
  FILE *fp1, *fp2;
  int a,c, l;
  float v, h, x, y, z, dx, dy, dz;
  char *s;
  char *tmpfile1, *tmpfile2;
  char command[1000];

  tmpfile1=vtmpnam(NULL);
  tmpfile2=vtmpnam(NULL);
  
  ungap (A->seq_al[0]);
  s=A->seq_al[0];l=strlen (s);
  fp1=vfopen (tmpfile1, "w");

  x=y=z=0;
  for ( a=0; a< l; a++)
    {
      h=aa2property ( s[a], "doolittle"   );
      v=aa2property (s[a], "volume");
      /*14.398907: peptide bond length*/
      dx=(float)sqrt ((double)(14.398907/(((h*h)/(v*v))+1)));
      dy=dx*(h/v);
      dz=0;

      
      x+=dx;
      y+=dy;
      z+=dz;
      fprintf (fp1, "ATOM%7d   CA %s A%4d%12.3f%8.3f%8.3f  1.00   5.30\n",a+1, oneletaa2threeletaa(s[a]),a+1, x, y, z);
    }   
  vfclose (fp1);
  sprintf ( command, "extract_from_pdb -infile %s -force > %s", tmpfile1,  tmpfile2);
  my_system  (command);
  fp1=vfopen (tmpfile2, "r");
  fp2=vfopen (outfile, "w");
  
  while ( (c=fgetc(fp1))!=EOF)fprintf (fp2, "%c", c);
  vfclose (fp1);
  vfclose (fp2);
  
  return 0;
}

char * oneletaa2threeletaa(char aa)
  {
    aa=tolower (aa);
    if ( aa=='a')return "ALA";
    else if ( aa=='r') return "ARG";
    else if ( aa=='n') return "ASN";
    else if ( aa=='d') return "ASP";
    else if ( aa=='c') return "CYS";
    else if ( aa=='q') return "GLN";
    else if ( aa=='e') return "GLU";
    else if ( aa=='g') return "GLY";
    else if ( aa=='h') return "HIS";
    else if ( aa=='i') return "ILE";
    else if ( aa=='l') return "LEU";
    else if ( aa=='k') return "LYS";
    else if ( aa=='m') return "MET";
    else if ( aa=='f') return "PHE";
    else if ( aa=='p') return "PRO";
    else if ( aa=='s') return "SER";
    else if ( aa=='t') return "THR";
    else if ( aa=='w') return "TRP";
    else if ( aa=='y') return "TYR";
    else if ( aa=='v') return "VAL";
    else 
      {
      fprintf ( stderr, "\nERROR: %c is not an amino acid [FATAL::aa2hydropathy::%s]", aa, PROGRAM);
      myexit (EXIT_FAILURE);
      return NULL;
      }
    return NULL;
  } 
  
float aa2property   (char aa, char *mode)
  {
    if ( mode==NULL || strm (mode, "doolittle"))
       {
         aa=tolower (aa);
         if ( aa=='i')return 4.5;
         else if ( aa=='v') return 4.2;
         else if ( aa=='l') return 3.8;
         else if ( aa=='f') return 2.8;
         else if ( aa=='c') return 2.5;
         else if ( aa=='m') return 1.9;
         else if ( aa=='a') return 1.8;
         else if ( aa=='g') return -0.4;
         else if ( aa=='t') return -0.7;
         else if ( aa=='w') return -0.9;
         else if ( aa=='s') return -0.8;
         else if ( aa=='y') return -1.3;
         else if ( aa=='p') return -1.6;
         else if ( aa=='h') return -3.2;
         else if ( aa=='e') return -3.5;
         else if ( aa=='q') return -3.5;
         else if ( aa=='d') return -3.5;
         else if ( aa=='n') return -3.5;
         else if ( aa=='k') return -3.9;
         else if ( aa=='r') return -4.5;
         else 
           {
             fprintf ( stderr, "\nERROR: %c is not an amino acid [FATAL::aa2hydropathy::%s]", aa, PROGRAM);
             myexit (EXIT_FAILURE);
           }
       }
    else if (strm (mode, "volume"))
       {
         aa=tolower (aa);
         if ( aa=='a')return 0.915;
         else if ( aa=='r') return 2.02;
         else if ( aa=='n') return 1.35;
         else if ( aa=='d') return 1.24;
         else if ( aa=='c') return 1.18;
         else if ( aa=='q') return 1.61;
         else if ( aa=='e') return 1.55;
         else if ( aa=='g') return 0.66;
         else if ( aa=='h') return 1.67;
         else if ( aa=='i') return 1.69;
         else if ( aa=='l') return 1.68;
         else if ( aa=='k') return 1.71;
         else if ( aa=='m') return 1.70;
         else if ( aa=='f') return 2.03;
         else if ( aa=='p') return 1.29;
         else if ( aa=='s') return 0.99;
         else if ( aa=='t') return 1.22;
         else if ( aa=='w') return 2.37;
         else if ( aa=='y') return 2.03;
         else if ( aa=='v') return 1.41;
         else 
           {
             fprintf ( stderr, "\nERROR: %c is not an amino acid [FATAL::aa2hydropathy::%s]", aa, PROGRAM);
             myexit (EXIT_FAILURE);
           }
       }
    
    else
      {
      fprintf ( stderr, "\nERROR: %s is an unknown mode [FATAL::aa2hydropathy::%s]", mode  , PROGRAM);
      myexit (EXIT_FAILURE);
      }
  return 0;
  } 
         

        
         
      
/************************************************************************************/
/*                                                                                  */
/*                                      DNA                                         */
/*                                                                                  */
/*                                                                                  */
/************************************************************************************/

Alignment *code_dna_aln (Alignment *A)
       {
       int a, b,l,r;

       for ( a=0; a< A->nseq; a++)
         {
           for (l=0, b=0; b< A->len_aln; b++)
             {
             r=A->seq_al[a][b];
             if ( r=='-')l++;
             else if ( r=='~')continue;
             else if ( r=='.')l++;
             else if ( !islower(r))A->seq_al[a][b]='4';
             else
               {
                 A->seq_al[a][b]=(l+3)%3+'0';
                 l++;
               }
             }
         }
       return A;
       }

  
Alignment *back_translate_dna_aln (Alignment *A)
       {
       /*Given a set of aligned sequences
         starts from left to right
         1 aa->3 nuc
         ambiguities are randomly resolved.
         returns the corresponding amino acid alignment
       */    
        int a;
        char *seq    ;
       
       ungap_aln(A);
       A=realloc_aln (A, 10000);
       seq=vcalloc ( 10000, sizeof (char));

       
       for ( a=0; a< A->nseq; a++)
         {
         seq=back_translate_dna_seq (A->seq_al[a], seq, RANDOM);
         sprintf ( A->seq_al[a], "%s", seq);
         }
       A->len_aln=A->len_aln*3;
       compress_aln (A);
       vfree (seq);
       return A;
       }
char * back_translate_dna_seq ( char *in_seq,char *out_seq, int mode)     
       {
       int a,len;

       len=strlen(in_seq);
       
       if (out_seq==NULL)out_seq=vcalloc ( len*3+1, sizeof (char));
       
       out_seq[0]='\0';
       for (a=0; a<len; a++)
         {
         strcat (out_seq,  back_translate_dna_codon (in_seq[a],mode));
         }
       
       return out_seq;
       }
int get_longest_frame (char *seq);
Alignment *translate_dna_aln (Alignment *A, int frame)
       {
       /*Given a set of aligned sequences
         starts from left to right
         3 nuc->1 aa
         2nuc+1gap, 1nuc+2gap->3 gaps
         1 stop-> 3gaps
         returns the corresponding amino acid alignment
       */


       int a, b,r;


       if (frame==3)
         {
           
           for (a=0; a< A->nseq; a++)
             {
             char *d, *buf, f;
             d=A->seq_al[a];
             f=get_longest_frame (d);
             buf=vcalloc ( strlen (d)+1, sizeof (char));
             sprintf (buf, "%s", d+f);
             sprintf (d, "%s", buf);
             vfree (buf);
             }
         }
       else
         {
           
           for ( a=0; a< A->nseq; a++)
             for (b=0; b< frame; b++)
             A->seq_al[a][b]='-';
           ungap_aln(A);
         }

       for ( b=0; b< A->nseq; b++)
         for ( a=0; a< A->len_aln;)
           {
             
             r=translate_dna_codon (A->seq_al[b]+a, 'z');
             if (is_gap(r))
             {
               A->seq_al[b][a++]='-';
               A->seq_al[b][a++]='-';
               A->seq_al[b][a++]='-';
             }
             else if ( r=='x')
             {
               A->seq_al[b][a++]='o';
               A->seq_al[b][a++]='-';
               A->seq_al[b][a++]='-';
             }
             else if ( r=='z')
             {
               A->seq_al[b][a++]='x';
               A->seq_al[b][a++]='-';
               A->seq_al[b][a++]='-';
             }
             else
             {
               A->seq_al[b][a++]=r;
               A->seq_al[b][a++]='-';
               A->seq_al[b][a++]='-';
             }
           }       
       compress_aln (A);
       
       return A;
       }
int get_longest_frame (char *seq)
{
  char **prot;
  int a;
  int l[3];
  
  prot=declare_char (3, strlen (seq)*sizeof (char));
  for (a=0; a< 3; a++)
    {
      prot[a]=translate_dna_seq ( seq,a,'\0', prot[a]);
      l[a]=strlen (prot[a]);
      vfree(prot[a]);
    }
  vfree (prot);
  if ( l[0]>=l[1] && l[0]>=l[2]) return 0;
  else if ( l[1]>=l[2]) return 1;
  else return 2;
}
  
Alignment *clean_gdna_aln (Alignment *A)
       {
         int a, b, c, r1, r2,s, p, n, tn;
         int *col;
         static int **mat;
         Alignment *T=NULL;
         int **score;
         char *buffer;
         

         /*Viterbi Parameters*/
         int AL=0;        /*Allowed Transition*/
         int F=-1000000; /*Forbiden Transition*/
         int SPLICE_PENALTY=100;
         int ORF1=0, ORF2=1, ORF3=2, NC=3;
         
         int state, pstate, best_e, best_pstate_p,best_state_p, best_pstate_v, best_state_v, v;
         int nstate=4;
         int **transitions;
         int e;
         int **v_tab_p;
         int **v_tab;
         int * is_dna;
         
         best_state_p=best_state_v=best_pstate_p=best_pstate_v=best_e=0;
         buffer=vcalloc ( 100000, sizeof (char));
         is_dna=vcalloc ( A->nseq, sizeof (int));
         score=declare_int ( A->nseq+1, A->len_aln);


         if ( !mat)mat=read_matrice("pam250mt");
         T=copy_aln (A, T);
         col=vcalloc ( A->nseq, sizeof (int));
         
         for (a=0; a<= A->len_aln; a++)
             for ( b=0; b< A->nseq; b++){A->seq_al[b][a]=tolower(A->seq_al[b][a]); A->seq_al[b][a]=(A->seq_al[b][a]=='t')?'u':A->seq_al[b][a];}

         for ( a=0; a< A->nseq; a++)
             {
               sprintf ( buffer, "%s", A->seq_al[a]);
               ungap (buffer);
               is_dna[a]=strm ( get_string_type (buffer), "DNA");
             }
         

           for (a=0; a< A->len_aln-2; a++)
             {
             for (b=0; b< A->nseq; b++)
                   {
                   if (is_dna[b])col[b]=translate_dna_codon (A->seq_al[b]+a, 'x');
                   else col[b]=tolower ( A->seq_al[b][a]);   
                   }

             for (n=0,tn=0,b=0; b< A->nseq; b++)
               for ( c=b; c< A->nseq; c++   )
                   {
                     r1=col[b];
                     r2=col[c];
                     
                     if (r1=='x' || r2=='x'){score[A->nseq][a]=F;break;}
                     else if (r1=='-' && r2=='-');
                     else if (r1=='-' || r2=='-');
                     else 
                         {
                         
                           if ( is_dna[b] && is_dna[c])score[A->nseq][a]+= mat[r1-'A'][r2-'A'];
                           else score[A->nseq][a]+=mat[r1-'A'][r2-'A']* (A->nseq*A->nseq);
                         }
                     n+=( !is_gap(r1) && !is_gap(r2));
                     score[A->nseq][a]=(((tn!=0)?score[A->nseq][a]/tn:0));
                   }
             
             }

         /*initialisation*/

         transitions=declare_int ( nstate, nstate);
         v_tab=declare_int ( A->len_aln+2, nstate       );
         v_tab_p=declare_int ( A->len_aln+2, nstate       );

         for (a=0; a<nstate;a++)
             for (b=0; b<nstate;b++)
                   {transitions[a][b]=F;}

         transitions[ORF1][ORF2]=AL;
         transitions[ORF2][ORF3]=AL;
         transitions[ORF3][ORF1]=AL;         
         
         transitions[ORF3][NC]  =AL-SPLICE_PENALTY;
         transitions[NC][ORF1]  =AL-SPLICE_PENALTY;


         for ( s=0; s<A->nseq; s++)
             {
             for ( p=0; p<=A->len_aln; p++){for (state=0; state< nstate; state++)v_tab_p[p][state]=-1; }
             for (p=1+2; p<= A->len_aln; p++)
                 {

               for (state=0; state< nstate; state++)
                   {
                     
                     if ( state==NC){e=-best_e;}
                     else
                        {
                          e=score[A->nseq][(p-1)-state];
                          if ( state==0)best_e=e;
                          else best_e=MAX(e, best_e);
                        }

                     for ( pstate=0; pstate<nstate; pstate++)
                           {
                           v=e+transitions[pstate][state]+v_tab[p-1][pstate];
                           if (pstate==0 ||(v>best_pstate_v) )
                              {
                               best_pstate_v=v;
                               best_pstate_p=pstate;
                              }
                         }
                  
                     v_tab[p][state]=best_pstate_v;
                     v_tab_p[p][state]=best_pstate_p;
                     if (state==0 ||best_pstate_v>best_state_v )
                        {
                         best_state_p=state; 
                         best_state_v=best_pstate_v;
                        }
                   }

               }

             
       
             for (p=0; p< A->len_aln; p++)T->seq_al[s][p]='.';
             for (p=A->len_aln; p>0; p--)
                 {
                   
                   if ( best_state_p==0)T->seq_al[s][p-1]=translate_dna_codon (A->seq_al[s]+(p-1), 'x');
                   else if ( best_state_p==1 || best_state_p==2)T->seq_al[s][p-1]='-';
                  
                  
                   
                   best_state_p=v_tab_p[p][best_state_p];
                   
               }
             }
         
         

         vfree (col);
         return T;
       }

Alignment *clean_cdna_aln (Alignment *A)
       {
       /*Given an alignmnet of nucleotides
         Returns the same alignmnent whith non coding nucleotides replaced with dots
         
         at each position, the emission probability is the sum of pair of the substitution of amino-acids
       */
       
         int a, b, c,s, p;
         static int **mat;
         int   *emission;
         float em1, em2;
         char *buffer;
         Alignment *B=NULL;


         

         /*Viterbi Parameters*/
         int AL=0;        /*Allowed Transition*/
         int F=-1000000; /*Forbiden Transition*/
         int PENALTY=30;
         int NC, C1,C2, C3, START, END;
         int nstate=0;
         int state=0,best_state=0, score=0, best_score=0;
         int p_state;
         int e=0;
         int **score_tab;
         int **state_tab;
       
         int **transitions;
         int n;
         int r1, r2, r3;

         NC=nstate++;
         C1=nstate++;
         C2=nstate++;
         C3=nstate++;
         START=nstate++;
         END=nstate++;

         
         B=copy_aln (A, B);
         buffer=vcalloc ( 100000, sizeof (char));
         emission=vcalloc (A->len_aln, sizeof (int));

         if ( !mat)
           {
             mat=read_matrice("pam250mt");
           }

         /*Computation of the emission proba for the coding state*/


         for (a=0; a< A->len_aln; a++)
           {

             /*First component: % occupancy of the column*/
             em1=0;
             for ( b=0; b< A->nseq; b++) em1+=!is_gap(translate_dna_codon (A->seq_al[b]+a, '-'));
             em1=em1/(float)A->nseq;
             
             /*Second Component: % similarity within column*/
             em2=0;
             for (n=0,b=0; b< A->nseq-1; b++)
             {
               r1=translate_dna_codon (A->seq_al[b]+a, '-');
               
               for (c=b+1; c<A->nseq; c++)
                 {
                   r2=translate_dna_codon (A->seq_al[c]+a, '-');
                   if (is_gap(r2) || is_gap(r1));
                   else
                   {
                     n++;
                     em2+=((mat[r1-'A'][r2-'A'])>1)?1:0;
                   }
                 }
             }
             em2=em2/(float)((n==0)?1:n);
             
             
             emission[a]=(em1*100);

           }
         
       

         /*initialisation*/

         transitions=declare_int ( nstate, nstate);
         score_tab=declare_int ( A->len_aln+2, nstate       );
         state_tab=declare_int ( A->len_aln+2, nstate       );

         for (a=0; a<nstate;a++)
             for (b=0; b<nstate;b++)
                   {transitions[a][b]=F;}

         
         transitions[START][C1]=AL;
         transitions[START][NC]=AL;
         transitions[C3][END]=AL;
         transitions[NC][END]=AL;
         transitions[C1 ][C2 ]=AL;
         transitions[C2 ][C3 ]=AL;
         transitions[C3 ][C1 ]=AL;
         transitions[C3 ][NC ]=AL-PENALTY;
         transitions[NC ][C1 ]=AL-PENALTY;
         transitions[NC][NC]=AL-PENALTY;
         
         
               
         for ( s=0; s< A->nseq; s++)
           {
           for ( p=0; p<=A->len_aln; p++){for (state=0; state< nstate; state++){score_tab[p][state]=F;state_tab[p][state]=-1;} }
           score_tab[0][START]=0;
           
           for (p=1; p<= A->len_aln; p++)
             {
             for (state=0; state< nstate; state++)
               {
                 if ( state==START || state==END)continue;
                 else if      ( state==NC)  e=-10;
                 else if ( state==C1)
                   {
                   e=emission[p-1];
                   }
                 else if ( state ==C2)
                   {
                   if ( p-2<0)e=F;
                   else e=emission[p-2];
                   }
                 else if ( state==C3)
                   {
                   if ( p-3<0)e=F;
                   else e=emission[p-3];
                   }
                 
                 for (p_state=0; p_state<nstate; p_state++)
                   {
                   
                   if (e==F)score=F;
                   else 
                     {
                       score=(score_tab[p-1][p_state]==F)?F:(e+transitions[p_state][state]+score_tab[p-1][p_state]);
                     }
                   
                   if(p_state==0 || score>best_score){ best_score=score;best_state=p_state;}
                   
                   }
                 
                 score_tab[p][state]=best_score;
                 state_tab[p][state]=best_state;
                 
               }
             }
           
           best_score=best_state=UNDEFINED;
           for (state=0; state<nstate; state++)
             {
             if (state==START || state==END)continue;
             e=transitions[state][END];
             if (e==F || score_tab[p-1][state]==F)continue;
             
             if (best_score==UNDEFINED || score_tab[p-1][state]>best_score)
               {
                 best_score=score_tab[p-1][state]+e; 
                 best_state=state;
               }
             
             }
           
           for (p=A->len_aln; p>0;)
             {
             B->seq_al[s][p-1]=best_state+'0';
             best_state=state_tab[p][best_state];
             p--;
             }
           }

         for ( a=0; a< A->nseq; a++)
           for ( b=0; b< A->len_aln;)
             {
             s=B->seq_al[a][b];
             if ( s==C1+'0')
               {
                 r1=A->seq_al[a][b];
                 r2=A->seq_al[a][b+1];
                 r3=A->seq_al[a][b+2];
                 

                 if ( is_gap(r1) ||is_gap(r2) ||  is_gap(r3))
                   {
                   A->seq_al[a][b]=(is_gap(r1))?'~':'.';
                   A->seq_al[a][b+1]=(is_gap(r2))?'~':'.';
                   A->seq_al[a][b+2]=(is_gap(r3))?'~':'.';
                   }
                 b+=3;
               }
             else if ( s==NC+'0')
               {
                 A->seq_al[a][b]=(is_gap(A->seq_al[a][b]))?'~':'.';
                 b++;
               }
             else 
               {
                 fprintf (stderr, "\nPROBLEM: [%d %d]->%d", a, b, s-'0');
               }
             }
             

         free_aln (B);
         free_int (transitions, -1);
         free_int (score_tab, -1);
         free_int (state_tab, -1);
         vfree (emission);
         vfree (buffer);
         
         return A;
       }




Alignment *translate_splice_dna_aln (Alignment *A, Alignment *ST)
       {
         int a, b, c, r1, r2,s, p, n, tn;
         int *col;
         static int **mat;
         Alignment *T=NULL;
         int **score;
         
         /*Viterbi Parameters*/
         int AL=0;        /*Allowed Transition*/
         int F=-1000000; /*Forbiden Transition*/
         int ORF1=0, ORF2=1, ORF3=2,SPL1=3, SPL2=4, SPL3=5, SPL4=6, NC=7;
         int SPLICE_PENALTY;
         int frame1, frame2, frame3, best_frame;
         int nstate=8;
         char r;
         


         int state=0, pstate=0, best_pstate_p=0,best_state_p=0, best_pstate_v=0, best_state_v=0, v=0;
         
         int **transitions;
         int e=0;
         int **v_tab_p;
         int **v_tab;

         score=declare_int ( A->nseq+1, A->len_aln);


         if ( !mat)mat=read_matrice("pam250mt");
         T=copy_aln (A, T);
         col=vcalloc ( A->nseq, sizeof (int));
         
         for (a=0; a<= A->len_aln; a++)
             for ( b=0; b< A->nseq; b++){A->seq_al[b][a]=tolower(A->seq_al[b][a]); A->seq_al[b][a]=(A->seq_al[b][a]=='t')?'u':A->seq_al[b][a];}

         
         

         for (a=0; a< A->len_aln-2; a++)
             {
             for (b=0; b< A->nseq; b++)
                   {
                   col[b]=translate_dna_codon (A->seq_al[b]+a, 'x');
                   }
             
             for (n=0,tn=0,b=0; b< A->nseq-1; b++)
               for ( c=b+1; c< A->nseq; c++, tn++   )
                   {
                     r1=col[b];
                     r2=col[c];
                     
                     if (r1=='x' || r2=='x')score[A->nseq][a]=F;
                     else if (r1=='-' && r2=='-');
                     else if (r1=='-' || r2=='-');
                     else 
                         {
                           score[A->nseq][a]+= mat[r1-'A'][r2-'A'];
                           
                         }
                     n+=( !is_gap(r1) && !is_gap(r2));
                   }   
             score[A->nseq][a]=(((tn!=0)?score[A->nseq][a]/tn:0));
             
             }
             
         /*initialisation*/

         transitions=declare_int ( nstate, nstate);
         v_tab=declare_int ( A->len_aln+2, nstate*nstate);
         v_tab_p=declare_int ( A->len_aln+2, nstate*nstate);

         for (a=0; a<nstate;a++)
           for (b=0; b<nstate;b++)
             {transitions[a][b]=F;}

         SPLICE_PENALTY=-1000;

         transitions[ORF1][ORF2]    =AL;
         transitions[ORF1][SPL1]    =AL-SPLICE_PENALTY;
         
         transitions[ORF2][ORF3]    =AL;
         transitions[ORF2][SPL1]    =AL-SPLICE_PENALTY;
         
         transitions[ORF3][ORF1]    =AL;
         transitions[ORF3][SPL1]    =AL-SPLICE_PENALTY;
         
         transitions[ORF3][ORF1]    =AL;
         transitions[ORF3][SPL1]    =AL-SPLICE_PENALTY;
         
         transitions[ORF3][NC]=AL-100;
         transitions[NC][ORF1]=AL-100;


         transitions[SPL1][SPL2]=AL;
         transitions[SPL2][NC  ]=AL-SPLICE_PENALTY;
         transitions[NC  ][NC  ]=AL;
         transitions[NC  ][SPL3]=AL-SPLICE_PENALTY;
         transitions[SPL3][SPL4]=AL;
         transitions[SPL4][ORF1]=AL;
         transitions[SPL4][ORF2]=AL;
         transitions[SPL4][ORF3]=AL;
         

         for ( s=0; s<A->nseq; s++)
             {
             for ( p=0; p<=A->len_aln; p++){for (state=0; state< nstate; state++)v_tab_p[p][state]=-1; }
             for (p=1+2; p<= A->len_aln; p++)
                 {
                frame1=score[A->nseq][(p-1)];
                frame2=score[A->nseq][(p-1)-1];
                frame3=score[A->nseq][(p-1)-2];  
                best_frame=best_int (3, 1, &a, frame1, frame2, frame3);
                for (state=0; state< nstate; state++)
                   {
                   r=tolower (A->seq_al[s][p-1]);
                   r=(r=='u')?'t':r;
                   
                   if      (state==ORF1)e=frame1;
                   else if (state==ORF2)e=frame2;
                   else if (state==ORF3)e=frame3;
                   else if (state==SPL1)e=(r=='g')?best_frame:F;
                   else if (state==SPL2)e=(r=='t')?best_frame:F;
                   else if (state==SPL3)e=(r=='a')?best_frame:F;
                   else if (state==SPL4)e=(r=='g')?best_frame:F;
                   else if (state==NC)e=-best_frame;
                   for ( pstate=0; pstate<nstate; pstate++)
                           {
                           v=e+transitions[pstate][state]+v_tab[p-1][pstate];
                           if (pstate==0 ||(v>best_pstate_v) ){best_pstate_v=v;best_pstate_p=pstate;}
                         }
                  
                     v_tab[p][state]=best_pstate_v;
                     v_tab_p[p][state]=best_pstate_p;
                     if (state==0 ||best_pstate_v>best_state_v ){best_state_p=state; best_state_v=best_pstate_v;}
                   }
               }

             
       
             for (p=0; p< A->len_aln; p++)T->seq_al[s][p]='.';
             for (p=A->len_aln; p>0; p--)
                 {
                   if ( best_state_p==0)T->seq_al[s][p-1]=toupper(translate_dna_codon (A->seq_al[s]+(p-1), 'x'));
                   else if ( best_state_p>=SPL1  && best_state_p<=SPL4)T->seq_al[s][p-1]='-';
                   best_state_p=v_tab_p[p][best_state_p];
               }
             }
         
         

         vfree (col);
         return T;
       }

Alignment * mutate_cdna_aln ( Alignment *A)
{
    int a, b, c, n;
    int n1, n2, r1, r2;
    int **pos, ps;
    int neutral_substitution=50;
    int random_substitution=0;
    int random_deletion=0;
    int amino_acid_deletion=0;
    int amino_acid_substitution=0;
    char nuc_list[]="agct";
    char *new_codon;

    neutral_substitution=atoi(get_env_variable ("NEUTRAL_SUBSTITUTION", 1));
    random_substitution =atoi(get_env_variable ("RANDOM_SUBSTITUTION", 1));
    random_deletion     =atoi(get_env_variable ("RANDOM_DELETION", 1));
    amino_acid_deletion =atoi(get_env_variable ("AMINO_ACID_DELETION", 1));
    amino_acid_substitution =atoi(get_env_variable ("AMINO_ACID_SUBSTITUTION", 1));
    
    
    if (A->S)free_sequence ( A->S, (A->S)->nseq);
    A->S=aln2seq(A);

    addrandinit(time (NULL));

    
    pos=aln2pos_simple ( A, A->nseq);
    
    /* 1 Apply neutral substitutions    */
    
    if ( neutral_substitution)
        {
      for (  c=0; c< neutral_substitution; c++)
          {
          for (  a=0; a< A->nseq; a++)
                {
                
                for ( b=0; b< A->len_aln; b++)
                    {
                  
                  if (pos[a][b]<=0)continue; 
                  ps=MAX(0,pos[a][b]-(pos[a][b]-1)%3-1);


                  n1=(A->S)->seq[a][pos[a][b]-1];
                  r1=translate_dna_codon ( (A->S)->seq[a]+ps, 'o');
                  
                  n2=nuc_list[(int)addrand((unsigned long) 4)];
                  (A->S)->seq[a][pos[a][b]-1]=n2;
                  r2=translate_dna_codon ( (A->S)->seq[a]+ps, 'o');
                  
                  
                  if ( r1==r2 && r1!='o')A->seq_al[a][b]=n2;
                  
                  else (A->S)->seq[a][pos[a][b]-1]=n1;
                  }
            }
          }
      }

    /* 2 Apply         substitutions    */
     if ( random_substitution)
        {
      for (  a=0; a< A->nseq; a++)
            {
            for ( b=0; b< A->len_aln; b++)
                {
                if (pos[a][b]<=0)continue; 
                if (addrand ((unsigned long) 100)>random_substitution)continue; 
                
                n1=nuc_list[(int)addrand((unsigned long)4)];
                (A->S)->seq[a][pos[a][b]-1]=n1;
                A->seq_al[a][b]=n1;
                }
          }
      }
    
    /* 3 Apply amino acid substitutions */
      if ( amino_acid_substitution)
        {
      for (  a=0; a< A->nseq; a++)
            {
            for ( b=0; b< A->len_aln; b+=3)
                {
                if (pos[a][b]<=0)continue; 
                if (addrand ((unsigned long) 100)>amino_acid_substitution)continue; 
                ps=MAX(0,pos[a][b]-(pos[a][b]-1)%3-1);
                
                r1=translate_dna_codon ( (A->S)->seq[a]+ps, 'o');
                new_codon=mutate_amino_acid(r1, "clustalw_col");
                
                for ( c=ps; c<ps+3; c++)(A->S)->seq[a][c]=new_codon[c-ps];
                }
            for ( b=0; b< A->len_aln; b++)
                {
                if (pos[a][b]<=0)continue; 
                else A->seq_al[a][b]=(A->S)->seq[a][pos[a][b]-1];
                }
          }
      }  
    /* 3 Apply amino acid deletions     */
     if ( amino_acid_deletion)
        {
      for (  a=0; a< A->nseq; a++)
            {
            for ( b=0; b< A->len_aln; b+=3)
                {
                if (pos[a][b]<=0)continue; 
                if (addrand ((unsigned long) 1000)>amino_acid_deletion)continue; 
                ps=MAX(0,pos[a][b]-(pos[a][b]-1)%3-1);
                n=addrand ((unsigned long) 4)+1;
                
                for ( c=ps; c<ps+(3*n) && c<A->len_aln; c++)(A->S)->seq[a][c]='-';
                }
            for ( b=0; b< A->len_aln; b++)
                {
                if (pos[a][b]<=0)continue; 
                else A->seq_al[a][b]=(A->S)->seq[a][pos[a][b]-1];
                }
          }
      }
    /* 4 Apply amino acid insertions    */

/*FRAMESHIFT MUTATIONS*/
    /* 5 Apply nucleotide deletions*/
     if ( random_deletion)
        {
      for (  a=0; a< A->nseq; a++)
            {
            for ( b=0; b< A->len_aln; b++)
                {
                if (pos[a][b]<=0)continue; 
                if (addrand ((unsigned long) 1000)>random_deletion)continue; 
                
                n1='-';
                (A->S)->seq[a][pos[a][b]-1]=n1;
                A->seq_al[a][b]=n1;
                }
          }
      }
    /* 6 Apply nucleotide deletions*/
     free_int (pos, -1);
   return A;

}    
    
Alignment* clean_est  ( Alignment *A)
        {
        /*Rules are as follow:
          Internal Gap > 30% Requences ----> -
          Best Residue < 50% Residues  ----> 'N'
        */
        int a, b,c;
        int best;
        int tot;

        for ( a=0; a< A->len_aln; a++)
          {
            
            for (tot=0, b=0; b<4; b++)tot+=(A->P)->count[b][a];
            best=best_int (5,1, &c, (A->P)->count[0][a],(A->P)->count[1][a],(A->P)->count[2][a],(A->P)->count[3][a],(A->P)->count[4][a]);
            
            if ( tot==0)
            {
              fprintf ( stderr, "\nWARNING: POSITION WITH NO INFORMATION [clean_est:%s]", PROGRAM);
              A->seq_al[0][a]='-';
            }
            else if (((A->P)->count[4][a]*100)/tot >30)A->seq_al[0][a]='-';
            else if ( (best*100)/tot<50)A->seq_al[0][a]='n';
            
          }
      return A;
      }
         
    

char **make_symbols ( char *name, int *n)
    {
    char **symbol;

    symbol=declare_char ( STRING, STRING);
    
    if ( strcmp (name, "3d_ali")==0)
        {
      sprintf ( symbol[0], "gih");
      sprintf ( symbol[1], "eb");
      sprintf ( symbol[2], "x");
      sprintf ( symbol[3], "#l");
      n[0]=4;
      }
    
    else if ( strcmp (name, "all")==0)
        {
        int a, i;
        for ( i=0,a=0; a<26; a++)
          {
            sprintf ( symbol[i++], "%c%c", 'a'+a, 'a'+a);
            sprintf ( symbol[i++], "%c%c", 'A'+a, 'A'+a);
          }
        sprintf ( symbol[i++], "--");
        n[0]=i;
      }
    
    else if ( strcmp (name, "set1")==0)
        {
      sprintf ( symbol[0], "ilvmfywhktcagH");
      sprintf ( symbol[1], "reqdnsP");
      sprintf ( symbol[2], "--");
      sprintf ( symbol[3], "#l");
      n[0]=4;
      }
    else if ( strcmp (name, "set2")==0)
        {
      n[0]=0;
      sprintf ( symbol[n[0]++], "gsacT");
      sprintf ( symbol[n[0]++], "ndtvpS");
      sprintf ( symbol[n[0]++], "ilkreqL");
      sprintf ( symbol[n[0]++], "--");
      sprintf ( symbol[n[0]++],"#l"); 
      }
    else if ( strcmp ( name, "any")==0)
        {
      sprintf ( symbol[0], "*x");
      n[0]=1;
            }




    return symbol;
    }

char * translate_dna_seq_on3frame (  char *dna_seq, char stop, char *prot)
       {
        int a, l;
        char *buf;

        l=strlen (dna_seq);
        if ( prot==NULL)prot=vcalloc ( l+2, sizeof (char));
         
         buf=vcalloc (l+4, sizeof (char));
         sprintf (buf, "%s", dna_seq);
         lower_string ( buf);
         for ( a=0; a< l; a++)buf[a]=(buf[a]=='t')?'u':buf[a];
         
         for (a=0; a< l; a++)
             prot[a]=translate_dna_codon (buf+a, stop);
         vfree (buf);
         prot[a]='\0';

         return prot;
       }
char * translate_dna_seq ( char *dna_seq, int frame, char stop, char *prot)
       {
         int a, b, l;
         char *buf;

           l=strlen (dna_seq);
         if ( prot==NULL)prot=vcalloc ( l/3 +2, sizeof (char));
         
         buf=vcalloc (l+4, sizeof (char));
         sprintf (buf, "%s", dna_seq);
         lower_string ( buf);
         for ( a=0; a< l; a++)buf[a]=(buf[a]=='t')?'u':buf[a];
         
         for ( b=0,a=0+frame; a< l; a+=3,b++)
             prot[b]=translate_dna_codon (buf+a, stop);
         vfree (buf);
         prot[b]='\0';

         return prot;
       }
char * back_translate_dna_codon ( char aa, int deterministic)
        {
      static char *r;
      int choice;
      
      vsrand(0);
      if ( r==NULL)r=vcalloc (4, sizeof (char));
      if (!is_gap(aa))aa=tolower(aa);
        
      if (is_gap(aa))sprintf (r, "---");
      else if ( aa=='a')
        {
          choice=(deterministic)?0:rand()%4;
          if      ( choice==0)sprintf (r, "gca");
          else if ( choice==1)sprintf (r, "gcg");
          else if ( choice==2)sprintf (r, "gcc");
          else if ( choice==3)sprintf (r, "gct");
        }
      else if ( aa=='c')
        {
         choice=(deterministic)?0:rand()%2;
          if      ( choice==0)sprintf (r, "tgc");
          else if ( choice==1)sprintf (r, "tgt");
        } 
      else if ( aa=='d')
        {
        choice=(deterministic)?0:rand()%2;
        if ( choice==0)sprintf (r, "gac");
        else if ( choice==1)sprintf (r, "gat");
        }
      
      else if ( aa=='e')
        {
          choice=(deterministic)?0:rand()%2;
          if ( choice==0)sprintf (r, "gaa");
          else sprintf (r, "gag");
        }
      else if ( aa=='f')
        {
          choice=(deterministic)?0:rand()%2;
          if ( choice==0)sprintf (r, "ttc");
          else sprintf (r, "ttt");
        }
      else if ( aa=='g')
        {
          choice=(deterministic)?0:rand()%4;
          if  ( choice==0)     sprintf (r, "gga");
          else if ( choice==1) sprintf (r, "ggg");
          else if ( choice==2) sprintf (r, "ggc");
          else if ( choice==3) sprintf (r, "ggt");
        }   
      else if ( aa=='h')
        {
          choice =rand()%2;
          if ( choice==0)sprintf (r, "cac");
          else sprintf (r, "cat");
        }
      else if ( aa=='i')
        {
          choice=(deterministic)?0:rand()%3;
          if  ( choice==0)     sprintf (r, "ata");
          else if ( choice==1) sprintf (r, "atc");
          else if ( choice==2) sprintf (r, "att");
        }   
      else if ( aa=='k')
        {
          choice=(deterministic)?0:rand()%2;
          if  ( choice==0)     sprintf (r, "aaa");
          else if ( choice==1) sprintf (r, "aag");
          
        }
      else if ( aa=='l')
        {
          choice=(deterministic)?0:rand()%6;
          if  ( choice==0)     sprintf (r, "cta");
          else if ( choice==1) sprintf (r, "ctg");
          else if ( choice==2) sprintf (r, "ctc");
          else if ( choice==3) sprintf (r, "ctt");
          else if ( choice==4) sprintf (r, "tta");
          else if ( choice==5) sprintf (r, "ttg");        
        }   
      else if ( aa=='m')sprintf ( r, "atg");
      else if ( aa=='n')
        {
          choice=(deterministic)?0:rand()%2;
          if  ( choice==0)     sprintf (r, "aac");
          else if ( choice==1) sprintf (r, "aat");
        }   
      else if ( aa=='p')
        {
          choice=(deterministic)?0:rand()%4;
          if  ( choice==0)     sprintf (r, "cca");
          else if ( choice==1) sprintf (r, "ccg");
          else if ( choice==2) sprintf (r, "ccc");
          else if ( choice==3) sprintf (r, "cct");
        }   
      else if ( aa=='q')
        {
          choice=(deterministic)?0:rand()%2;
          if  ( choice==0)     sprintf (r, "caa");
          else if ( choice==1) sprintf (r, "cag");
        }
        else if ( aa=='r')
        {
          choice=(deterministic)?0:rand()%6;
          if  ( choice==0)     sprintf (r, "cga");
          else if ( choice==1) sprintf (r, "cgg");
          else if ( choice==2) sprintf (r, "cgc");
          else if ( choice==3) sprintf (r, "cgt");
          else if ( choice==4) sprintf (r, "aga");
          else if ( choice==5) sprintf (r, "agg");
          
        }
      else if ( aa=='s')
        {
          choice=(deterministic)?0:rand()%6;
          if  ( choice==0)     sprintf (r, "tca");
          else if ( choice==1) sprintf (r, "tcg");
          else if ( choice==2) sprintf (r, "tcc");
          else if ( choice==3) sprintf (r, "tct");
          else if ( choice==4) sprintf (r, "agt");
          else if ( choice==5) sprintf (r, "agc");
          
        }
      else if ( aa=='t')
        {
          choice=(deterministic)?0:rand()%4;
          if  ( choice==0)     sprintf (r, "aca");
          else if ( choice==1) sprintf (r, "acg");
          else if ( choice==2) sprintf (r, "acc");
          else if ( choice==3) sprintf (r, "act");
        }
      else if ( aa=='v')
        {
          choice=(deterministic)?0:rand()%4;
          if  ( choice==0)     sprintf (r, "gta");
          else if ( choice==1) sprintf (r, "gtg");
          else if ( choice==2) sprintf (r, "gtc");
          else if ( choice==3) sprintf (r, "gtt");
        }
      else if ( aa=='w')
        {
          sprintf (r, "tgg");
        }
      else if ( aa=='y')
        {
           choice=(deterministic)?0:rand()%2;
          if  ( choice==0)     sprintf (r, "tac");
          else if ( choice==1) sprintf (r, "tat");
        }
      else
        {
          sprintf (r, "nnn");
        }
      return r;
            
      }
int translate_dna_codon ( char *sequence, char stop)
        {
      char seq[4];
      int a,b;


      if ( (b=strlen (sequence))<3)
        {
          for ( a=0; a<b; a++)
            if ( !is_gap(sequence[a]))return 'x';
        return '-';
        }
      else 
        {
          seq[0]=tolower(sequence[0]);
          seq[1]=tolower(sequence[1]); 
          seq[2]=tolower(sequence[2]);
          seq[3]='\0';
          
          seq[0]=(seq[0]=='u')?'t':seq[0];
          seq[1]=(seq[1]=='u')?'t':seq[1];
          seq[2]=(seq[2]=='u')?'t':seq[2];
         
      }
      

      
      if ( is_gap(seq[0])||is_gap(seq[1]) || is_gap(seq[2]))return '-';
      else if ( strm5(seq, "gca", "gcg", "gcc", "gct","gcn"))return 'a';
      else if ( strm2(seq, "tgc","tgt"))return 'c';
      else if ( strm2(seq, "gac","gat"))return 'd';
      else if ( strm2(seq, "gaa","gag"))return 'e';
        else if ( strm2(seq, "ttc","ttt"))return 'f';
      else if ( strm5(seq, "gga","ggg","ggc", "ggt", "ggn"))return 'g';
      else if ( strm2(seq, "cac","cat"))return 'h';
      else if ( strm3(seq, "ata","atc","att"))return 'i';
      else if ( strm2(seq, "aaa","aag"))return 'k';
        else if ( strm6(seq, "cta","ctg","ctc", "ctt", "tta", "ttg"))return 'l';
      else if ( strm (seq, "ctn"))return 'l';
      else if ( strm (seq, "atg"))return 'm';
      else if ( strm2(seq, "aac","aat"))return 'n';
      else if ( strm5(seq, "cca","ccg","ccc", "cct","ccn"))return 'p';
      else if ( strm2(seq, "cag","caa"))return 'q';
      else if ( strm6(seq, "cga","cgg","cgc", "cgt","aga","agg"))return 'r';
      else if ( strm (seq, "cgn"))return 'r';
      else if ( strm6(seq, "tca","tcg","tcc", "tct","agc","agt"))return 's';
      else if ( strm (seq, "ccn"))return 's';
        else if ( strm5(seq, "aca","acg","acc", "act", "acn"))return 't';
      else if ( strm5(seq, "gta","gtg","gtc", "gtt", "gtn"))return 'v';
      else if ( strm (seq, "tgg"))return 'w';
      else if ( strm2(seq, "tac","tat"))return 'y';
      else if ( strm3(seq, "tag","taa","tga"))return stop;
      else if ( seq[0]=='n' || seq[1]=='n' || seq[2]=='n') return stop;
      else
        {
          fprintf ( stderr, "\n%s is an unknown codon [FATAL:%s]",seq, PROGRAM);
          myexit (EXIT_FAILURE);
          return 1;
        }
      }
         
Alignment * mutate_aln ( Alignment *A, char *r)
{
  int a, b, c, mut,type, ratio;
  char alp[30];
  int alp_size;
  Sequence *S;
  Alignment*B;
  int n_mut, tot;

  vsrand(0);
  if ( r[0]=='\0')ratio=0.01*RAND_MAX;
  else ratio=atof(r)*RAND_MAX;

  S=aln2seq(A);
  S=get_sequence_type(S);
  


  if ( strm(S->type, "DNA"))sprintf (alp, "AGCT");
  else if (  strm(S->type, "PROTEIN"))sprintf (alp, "ACDEFGHIKLMNPQRSTVWY");

  alp_size=strlen(alp);

  B=copy_aln (A,NULL);
  B=realloc_aln(B, B->len_aln*2+1);

  for ( a=0, b=0; a< A->len_aln; a++, b+=2)
    {
      for ( c=0; c< A->nseq; c++)
      {
        B->seq_al[c][b]=tolower(A->seq_al[c][a]);
        B->seq_al[c][b+1]='~';
      }      
    }

  for ( c=0; c< A->nseq; c++)B->seq_al[c][b]='\0';
  B->len_aln=A->len_aln*2;
  

 
  tot=n_mut=0;
  for (a=0; a< B->len_aln; a+=2)
    for ( b=0; b<B->nseq; b++)
      {
      if ( is_gap(B->seq_al[b][a]))continue;
      mut=((rand()%RAND_MAX)>ratio)?0:1;
      tot++;
      n_mut+=mut;

      if (mut)
        {
          type=rand()%2;
          if (type==0)/*deletion*/
            {
            B->seq_al[b][a]='.';
            }
          else if ( type==1)
            {
            B->seq_al[b][a+1]=alp[rand()%alp_size];
            }
          else if (type==2)
            {
            B->seq_al[b][a]=alp[rand()%alp_size];
            }
          
        }
      }
  ungap_aln (B);
  
  
  free_sequence (S, S->nseq);
  free_aln (A);
  return B;
  
}

char* mutate_amino_acid ( char aa, char *mode)

     {
       int a, b, c, d;
       char nucleotide[]="agct";
       char amino_acid[]="acdefghiklmnpqrstvwy";
       static char **triplet;
       static char **cw_col;
       int ng_cw_col;
       static int **amino_acid_list;
       static int *lu;
       char a1, a2;
       char *mat;
       
       aa=tolower(aa);
       declare_name(mat);
       if ( !mode)sprintf (mat, "clustalw_col");
       else sprintf (mat, "%s", mode);
       if (!triplet)
          {
            triplet=declare_char ( 64, 4);
            for (d=0, a=0; a< 4;a++)
                for ( b=0; b< 4; b++)
                  for ( c=0; c< 4; c++, d++)
                      {
                        triplet[d][0]=nucleotide[a];
                        triplet[d][1]=nucleotide[b];
                        triplet[d][2]=nucleotide[c];
                      }
          }
       if ( !cw_col)cw_col=make_group_aa ( &ng_cw_col,mat);
       if ( !amino_acid_list)
          {
            amino_acid_list=declare_int ( 20, 65);
            for ( a=0; a< 20; a++)
                for ( b=0; b< 64; b++)
                    {
                      a1=translate_dna_codon ( triplet[b], 'x');
                      a2=amino_acid[a];
                      for ( d=0; d< ng_cw_col; d++)
                        if ( is_in_set ( a1, cw_col[d]) && is_in_set ( a2, cw_col[d]))
                           {
                               amino_acid_list[a][++amino_acid_list[a][0]]=b;
                           }
                  }
            lu=vcalloc ( 26, sizeof (int));
            for ( a=0; a<20; a++)
                {
                  lu[amino_acid[a]-'a']=a;
                }
            /*
            for ( a=0; a< 20; a++)
                {
                  fprintf ( stderr, "\n%c", amino_acid[a]);
                  for ( b=1; b<=amino_acid_list[a][0]; b++)
                      fprintf ( stderr, "\n\t%s %c", triplet[amino_acid_list[a][b]], translate_dna_codon (triplet[amino_acid_list[a][b]], 'x'));
                }
            */              
          }
      
       return triplet [addrand((unsigned long)amino_acid_list[lu[aa-'a']][0])+1];
     }                  
                         
/**************************************************************************************************/
/********************************                      ********************************************/
/********************************    PROCESSING        ********************************************/
/********************************                      ********************************************/


       
void modify_data  (Sequence_data_struc *D1, Sequence_data_struc *D2, Sequence_data_struc *DST, char **action_list,int n_actions, Action_data_struc *RAD)
     {
       Sequence  *COOR=NULL, *NS=NULL,*BUFS=NULL, *OUT_S=NULL;
       Constraint_list *CL;
       char *s;
       int value,upper_value, lower_value, start, end, a, b,c;
       int *count_table=NULL;
       char *action;
       

       int s1, s2, r1, r2;

       Alignment *BUF;
       
       /*Switches*/

       action=action_list[0];

       if (  strm(action, "seqnos"))
       {
        (D1->A)->output_res_num=1;
       } 
       else if ( strm ( action, "sar2subsar"))
       {
         /*in->sequences
           in2->sar data
         */
         Alignment *subA, *subS;
         
         if ( n_actions==1)
           {
             fprintf ( stderr, "\nin=aln, in2=sar sar2subsar [filter value compound1 compound2...] | [jack1] | [file]\n");
             myexit (EXIT_FAILURE);
           }
         
         sarset2subsarset ( D1->A, D2->A, &subA, &subS, main_read_aln (action_list[2], NULL));
         D1->A=subA;D2->A=subS;
       }
       else if ( strm (action, "display_sar"))
       {
         D1->A=display_sar (D1->A, D2->A, action_list[1]);
       }
       else if ( strm ( action, "sar2simpred"))
       {
         /*in->sequences
           in2->sar data
         */
         sar2simpred ( D1->A, D2->A, action_list[1], action_list[2], atoi(action_list[3]), atoi (action_list[4]));
       }
       else if ( strm ( action, "sar2simpred2"))
       {
         /*in->sequences
           in2->sar data
         */
         if ( n_actions!=5)
           {
             fprintf ( stderr, "\nERROR: +sar2simpred2 seqnamesfile posfile compound limit");
             myexit (EXIT_FAILURE);
           }
         sar2simpred2 ( D1->A, D2->A, action_list[1], action_list[2], action_list[3], atoi (action_list[4]));
       }
        else if ( strm ( action, "sar_analyze"))
       {
         /*in->sequences
           in2->sar data
         */
         sar_analyze ( D1->A, D2->A,action_list[1]);
       }
       else if ( strm ( action, "weight2sar"))
       {
         /*in->sequences
           in2->sar data
         */
         if ( n_actions!=3)
           {
             fprintf ( stderr, "\nERROR: +weight2sar <weight_file> <limit>");
             myexit (EXIT_FAILURE);
           }
         D1->A=weight2sar ( D1->A,D2->A, action_list[1], atoi(action_list[2]));
         
       }
      else if ( strm ( action, "sar_weight"))
       {
         /*in->sequences
           in2->sar data
         */
         if ( n_actions!=3)
           {
             fprintf ( stderr, "\nERROR: +sar_weight <sar_analyze> <compound>");
             myexit (EXIT_FAILURE);
           }
         D1->A=aln2weighted_sar_score ( D1->A,D2->A, action_list[1], action_list[2]);
         D1->S=aln2seq ( D1->A);
       }
       
       else if ( strm (action, "name2unique_name"))
       {
         char *tmp1, *tmp2;
         char command[1000];
         tmp1=vtmpnam (NULL); tmp2=vtmpnam (NULL);
         
         output_fasta_aln (tmp1,D1->A);
         free_aln (D1->A);free_sequence (D1->S, -1);
         sprintf ( command, "fasta_aln2fasta_aln_unique_name.pl %s >%s", tmp1, tmp2);
         my_system ( command);
         D1->S=get_fasta_sequence ( tmp2, NULL);
         D1->A=seq2aln (D1->S,NULL, 1);
       }
       else if ( strm (action, "rm_tag") || strm (action, "rm_template"))
       {

         char **temp_name=NULL,**temp_list=NULL, temp_nseq=0;
         int z;
                  
         if ( D1 && D1->A){temp_name=(D1->A)->name;temp_nseq=(D1->A)->nseq;}
         else if ( D1 && D1->S){temp_name=(D1->S)->name;temp_nseq=(D1->S)->nseq;}
           temp_list=rm_name_tag (temp_name,temp_nseq, NULL);
         if ( n_actions>1 && strm (action_list[1], "template"))
            {
            
               for ( z=0; z<temp_nseq; z++)
            {
            if (temp_list[z][0])
                  {fprintf (stdout, "%s\n", temp_list[z]);}
                  }
                  myexit (EXIT_SUCCESS);
            }     
       }
       else if (strm (action, "add_template") || strm (action, "swap_header"))
       {
         D1->S=seq2template_seq (D1->S, action_list[1], NULL);
          D1->A=seq2aln(D1->S, NULL, 1);
       }
       else if ( strm ( action, "seq2year"))
       {
         D1->S=seq2year (D1->S, (n_actions>1)?atoi(action_list[1]):1);
         D1->A=seq2aln(D1->S, NULL, 1);
       }
       else if ( strm (action, "swap_lib_header"))
       {
         Sequence *S;
         S=main_read_seq (action_list[1]);
         (D1->CL)->S=S;
         
       }
       else if ( strm (action, "weight_lib"))
       {
         int l;
         int w;
         w=atoi (action_list[1]);
         if ( D1->CL)
           {
           for (l=0; l<(D1->CL)->ne; l++)
             (D1->CL)->L[l][WE]=w;
           }
       }
       else if ( strm (action, "struc2nb"))
       {
         int c;
         for ( c=0; c< (D1->S)->nseq; c++)
           {
             struclist2nb ((D1->S)->name[c],(D1->S)->seq[c], (D1->S)->seq_comment[c], atof(action_list[1]),ACTION(2),ACTION(3) );
           }
         myexit (EXIT_SUCCESS);
       }
      
     
      
       else if ( strm(action, "seq2contacts"))
       {
         int z;
         D1->S=swap_header (D1->S, D2->S); 
         for ( z=0; z< (D1->S)->nseq; z++)sprintf ( (D1->A)->name[z], "%s", (D1->S)->name[z]);
         DST->S=seq2contacts (D1->S, atof (action_list[1]));
         DST->A=copy_aln (D1->A, NULL);
         thread_seq_struc2aln ( DST->A,DST->S);
         for (z=0; z< (D1->S)->nseq; z++)
         (DST->A)->S=D1->S;
         
       }
       else if ( strm(action, "struc2contacts"))
       {
         char *seq;
         if ( atof (action_list[3])>0)
           {
             seq=map_contacts  (action_list[1], action_list[2], atof (action_list[3]));
             fprintf ( stderr, "\n>%s %s\n%s",action_list[1], action_list[2],seq);
           }
         else 
           print_contacts  (action_list[1], action_list[2], atof (action_list[3]));
         
         myexit (EXIT_SUCCESS);
       }
       else if ( strm(action, "tree_prune") || strm(action, "prune_tree"))
       {
         D1->T=main_prune_tree ( D1->T, D2->S);
       }
       else if ( strm ( action, "tree2seq"))
       {
         D1->S=tree2seq(D1->T, NULL);
         D1->A=seq2aln (D1->S, D1->A, 1);
         (D1->A)->len_aln=1;
         for ( a=0; a< (D1->A)->nseq; a++)sprintf ( (D1->A)->seq_al[a], "sequence");
       }
       else if ( strm (action, "seq2dpatree"))
       {
         D1->T= seq2dpa_tree(D1->S,"ktup");
       }
       else if ( strm (action, "tree2dpatree"))
       {
         D1->T= tree2dpa_tree(D1->T,(D2 && D2->A)?D2->A:D1->A, (n_actions==1)?"idmat":action_list[1]);
       }
       else if ( strm (action, "tree2group"))
       {
         vfclose (tree2group (D1->T, (tree2seq(D1->T,NULL)), atoi(action_list[1]), atoi(action_list[2]),(n_actions==4)?action_list[3]:NULL, stdout));    
         myexit (EXIT_SUCCESS);
       }
       else if ( strm(action, "unroot"))
       {
         D1->T=unroot_tree(D1->T);
       }
       else if ( strm(action, "treelist2group")|| strm(action, "treelist2groups") )
       {
         treelist2groups (D1->S,(D2)?D2->S:NULL );
       }
       else if ( strm (action, "tree2dist"))
       {
         int ta, tb, ***td;
         Sequence *TS;
         
         TS=(D2)?D2->S:NULL;
         td=tree2dist (D1->T,TS, NULL);
         if (!TS)TS=tree2seq(D1->T, NULL);
         for (ta=0; ta<TS->nseq; ta++)
           {
             fprintf ( stdout, "%-15s ",TS->name[ta]);
             for ( tb=0; tb<TS->nseq; tb++)
             {
               int n=0;
               if ( ACTION(1) && strm (ACTION(1), "length"))n=1;
               
               fprintf (stdout, " %4d", td [n][ta][tb]);
             }
             fprintf ( stdout, "\n");
           }
         exit (EXIT_SUCCESS);
       }
       else if ( strm (action, "treelist2lti"))
       {
         treelist2lti (D1->S, (D2)?D2->S:NULL, ACTION(1));
         exit (0);
       }     
       else if ( strm (action, "treelist2leafgroup"))
       {
         treelist2leafgroup (D1->S, (D2)?D2->S:NULL);
         exit (0);
       }     
       else if ( strm(action, "treelist2splits"))
       {
         treelist2splits (D1->S, (D2)?D2->S:NULL);
       }
       else if ( strm(action, "treelist2dmat"))
       {
         treelist2dmat (D1->S);
       }
       else if ( strm(action, "tree_cmp") || strm (action, "tree_compare"))
       {
         D1->T=main_compare_trees ( D1->T, D2->T, stdout);     
       }
       else if ( strm(action, "node_sort"))
       {
         node_sort ( action_list[1], D1->T);
         exit (EXIT_SUCCESS);
       }
       
       else if ( strm ( action, "avg_bootstrap"))
       {
         display_avg_bootstrap (D1->T);
         myexit (EXIT_SUCCESS);
       }
         
       else if ( strm (action, "tree_cog_cmp"))
       {
         main_compare_cog_tree (D1->T,action_list[1]);
         exit (EXIT_SUCCESS);
       }
       else if ( strm (action, "tree_aln_cmp"))
       {
         main_compare_aln_tree (D1->T, D2->A, stdout);
         exit (EXIT_SUCCESS);
       }
       else if ( strm(action, "change_bootstrap"))
       {
         D1->T=reset_boot_tree ( D1->T, (n_actions>=2)?atoi(action_list[1]):0);
       }
       else if ( strm(action, "change_distances"))
       {
         D1->T=reset_dist_tree ( D1->T, (n_actions>=2)?atof(action_list[1]):0.00);
       }
       
       else if ( strm(action, "aln2tree"))
       {
         D1->T=tree_compute (D1->A, n_actions-1, action_list+1);
       }
       else if ( strm(action, "similarities2tree"))
       {
         D1->T=similarities_file2tree (ACTION(1));
       }
       
       else if (  strm(action, "original_seqnos"))
       {
        (D1->A)->output_res_num=2;
       }
       else if ( strm(action, "evaluate"))
       {
         Alignment *A;


         DST->A=copy_aln (D1->A, NULL);
         DST->S=aln2seq(DST->A);
         if (n_actions>1 && strm (  action_list[1], "categories"))
           {
             CL=declare_constraint_list ( DST->S,NULL, NULL, 0,NULL, read_matrice("pam250mt"));
             DST->A=  main_coffee_evaluate_output(DST->A, CL, "categories");
           }
         else if (n_actions>1 && strm (  action_list[1], "sar"))
           {
             CL=declare_constraint_list ( DST->S,NULL, NULL, 0,NULL, read_matrice("pam250mt"));
             DST->A=  main_coffee_evaluate_output(DST->A, CL, "sar");
           }
         else if (n_actions>1 && strstr (  action_list[1], "boxshade"))
           {
             char color_mode[1000];
             sprintf (color_mode,"boxshade_%d", atoi(ACTION2(2,"30")));
             CL=declare_constraint_list ( DST->S,NULL, NULL, 0,NULL, read_matrice("pam250mt"));
             DST->A=  main_coffee_evaluate_output(DST->A, CL, color_mode);
           }
         
         else
           {
             float score;
             score=(int)sum_pair (DST->A,ACTION(1),atoi(ACTION2(2,"0")),atoi(ACTION2(3,"0")));

             CL=declare_constraint_list ( DST->S,NULL, NULL, 0,NULL, read_matrice((n_actions==1)?"pam250mt":action_list[1]));
             DST->A=  main_coffee_evaluate_output(DST->A, CL, "matrix");
             (D1->A)->score=(D1->A)->score_aln=(DST->A)->score=(DST->A)->score_aln=(int)score;
           }
         
         DST->S=aln2seq ( DST->A);
        
         A=D1->A;
         sprintf ( A->name[A->nseq], "cons");
         sprintf ( A->seq_al[A->nseq], "%s", aln2cons_seq_mat (A, "idmat"));
       }
         
       else if ( strm (action, "aln2cons"))
       {
         char *cons_seq;
         char *cons_name;
         cons_name=vcalloc (100, sizeof (char));
         sprintf(cons_name, "%s", (n_actions<=2)?"Cons":action_list[2]);
         cons_seq=aln2cons_seq_mat (D1->A, (n_actions==1)?"blosum62mt":action_list[1]);
         free_aln (D1->A);free_sequence(D1->S, -1);
         D1->S=fill_sequence_struc (1, &cons_seq, &cons_name);
         /*keep the gaps*/
         (D1->S)->len[0]=strlen (cons_seq); sprintf ( (D1->S)->seq[0], "%s", cons_seq);
         D1->A=seq2aln (D1->S, NULL, KEEP_GAP);
         vfree (cons_name);vfree (cons_seq);
       }
       else if ( strm (action, "seq2filter"))
       {
         D1->S=seq2filter ( D1->S, atoi(action_list[1]), atoi(action_list[2]));
                        
       }
       else if (strm(action, "keep_name"))
       {
         RAD->keep_name=1-RAD->keep_name;
       }
        else if (strm(action, "use_consensus") ||strm(action, "use_cons") )
       {
         RAD->use_consensus=1-RAD->use_consensus;
       }
       else if ( strm(action, "ungap"))
       {
         seq2aln (D1->S, D1->A, 1);
       }
       else if ( strm2(action, "rmgap", "rm_gap"))
       {

         ungap_aln_n (D1->A, (n_actions==1)?100:atoi(action_list[1]));
         free_sequence ( D1->S, (D1->S)->nseq);
         D1->S=aln2seq ( D1->A);
         (D1->A)->S=D1->S;
       }
       else if ( strm(action, "rmgap_col"))
       {
         D1->A=remove_gap_column ( D1->A,action_list[1]);
       }
       else if ( strm(action,"random"))
       {
         
         D1->A= make_random_aln(NULL,(n_actions==1)?1:atoi(action_list[1]),(n_actions==2)?100:atoi(action_list[2]),"acdefghiklmnpqrstvwy");
         
         D1->S=aln2seq ( D1->A);
       }
          
       else if ( strm(action, "landscape"))
        {
          
          set_landscape_msa ((n_actions==1)?0:atoi(action_list[1]));
        }
       else if ( strm(action, "clean_maln"))
        {
          if ( !DST) 
               {
               fprintf ( stderr,"\n[You Need an evaluation File: Change the output format][FATAL:%s]\n", PROGRAM);      
               myexit(EXIT_FAILURE);
               }
          (DST->A)=aln2number (DST->A);
          D1->A=clean_maln(D1->A, DST->A,(n_actions==1)?1:atoi(action_list[1]),(n_actions==1)?1:atoi(action_list[2]));   
        }
       else if ( strm (action, "extract"))
       {
         
         COOR=get_pir_sequence  (RAD->coor_file, NULL);
         D1->S=extract_sub_seq ( COOR, D1->S);
         free_aln (D1->A);
         D1->A=declare_Alignment(D1->S);
         seq2aln (D1->S, D1->A, RAD->rm_gap);
         free_sequence (COOR, COOR->nseq);
       }
       
       else if ( strm (action, "reorder"))
       {
         
         if ( n_actions==2 && strm (action_list[1], "random"))
           {
             D1->A=reorder_aln ( D1->A, NULL, (D1->A)->nseq);
           }
         else
           {
             (D2->A)->S=aln2seq (D2->A);
             (D1->A)->S=aln2seq (D1->A);
             OUT_S=trim_aln_seq_name(D2->A, D1->A);
             D1->A=reorder_aln(D1->A, OUT_S->name, OUT_S->nseq);
             free_sequence (D1->S,(D1->S)->nseq);
             D1->S=aln2seq (D1->A);
           }
       }
       else if ( strm (action, "cat_aln"))
       {
         /*D1->A=aln_cat ( D1->A, D2 ->A);*/
         if (D2 && D2->A)
           D1->A=concatenate_aln (D1->A, D2->A, ACTION(1));
         else
           {
             Alignment *A, *B;
             
             A=main_read_aln ((D1->A)->name[0], NULL);
             
             for ( a=1; a<(D1->A)->nseq; a++)
             {
               B=main_read_aln ((D1->A)->name[a], NULL);
               A=concatenate_aln (A, B, ACTION(1));
               
             }
             D1->A=A;
             D1->S=aln2seq(D1->A);
           }
       }
       else if ( strm ( action, "msalist2cat_pwaln"))
       {
         int a, b, c;
         int sim, min, max;
         
         if (n_actions!=3)
           {
             min=0;
             max=100;
           }
         else
           {
             min=atoi(action_list[1]);
             max=atoi(action_list[2]);
           }
         
         fprintf ( stdout, ">A\n");
         for (a=0;a<(D1->S)->nseq; a++)
           {
             Alignment *A;
             HERE ("process %s",  (D1->S)->name[a]);
             A=main_read_aln((D1->S)->name[a],NULL);
             for (b=0; b<A->nseq-1; b++)
             {
               for ( c=b+1; c<A->nseq; c++)
                 {
                   sim=get_seq_sim (A->seq_al[b], A->seq_al[c], "-", "");
                   if (sim>=min && sim<=max)fprintf (stdout, "xxx%s", A->seq_al[b]);
                 }
             }
             free_aln (A);
           }
         fprintf ( stdout, "\n>B\n");
         for (a=0;a<(D1->S)->nseq; a++)
           {
             Alignment *A;
             HERE ("process %s",  (D1->S)->name[a]);
             A=main_read_aln((D1->S)->name[a],NULL);
             for (b=0; b<A->nseq-1; b++)
             {
               for ( c=b+1; c<A->nseq; c++)
                 {
                   sim=get_seq_sim (A->seq_al[b], A->seq_al[c], "-", "");
                   if (sim>=min && sim<=max)fprintf (stdout, "xxx%s", A->seq_al[c]);
                 }
             }
             free_aln (A);
           }
         
         fprintf ( stdout, "\n");
         exit (EXIT_SUCCESS);
       }
       
       else if ( strm (action, "collapse_tree"))
       {
         D1->T=tree2collapsed_tree (D1->T, n_actions-1, action_list+1);
       }
       else if ( strm (action, "collapse_aln"))
       {
         D1->A=aln2collapsed_aln (D1->A, n_actions-1, action_list+1);
       }
       else if ( strm (action, "extract_aln"))
       {
         D1->A=aln2sub_aln_file (D1->A, n_actions-1, action_list+1);
         myexit (EXIT_SUCCESS);
       }
      
       
      
       else if ( strm (action, "remove_aa"))
       {
         int pos,len, n;
         pos=atoi(action_list[1]);
         len=atoi(action_list[2]);
         n=atoi (action_list[3]);
         if ( atoi (action_list[4])==1)len=-len; 
         if (pos && n>1) 
           {
             fprintf ( stderr, "\nWARNING: rm_aa, position (pos) and iteration number (n) simulatneously defined. Iteration number reset to 1 [%s]\n", PROGRAM);
             n=1;
           }
         for ( a=0; a< n; a++)
           D1->A=probabilistic_rm_aa (D1->A, pos, len);
       }
       else if ( strm (action, "remove_nuc"))
       {
         int pos;
         pos=atoi(action_list[1]);
         
         if ( pos>3 || pos<1)
           printf_exit (EXIT_FAILURE, stderr, "Remove_nuc: indicate a number between 1 and 3\n");
         
         pos--;
         for ( c=0,a=0; a<(D1->A)->len_aln; a++, c++)
           {
             if (c==3)c=0;
             for (b=0; b<(D1->A)->nseq; b++)
             {
             if (c==pos)
               {
                 (D1->A)->seq_al[b][a]='-';
               }
             }
           }
         
         D1->S=aln2seq (D1->A);
       }
       
       else if (strm ( action, "conserved_positions"))
       {
         Alignment *A;
         int  a, b, c;
         int *cache=NULL;
         

         A=D1->A;
         for ( a=0; a< A->nseq && !cache; a++)
           {
             if ( strm (action_list[1], A->name[a]))
             {
               cache=vcalloc ( A->len_aln+1, sizeof (int));
               for ( c=0,b=0; b<A->len_aln; b++)
                 {
                   if ( is_gap (A->seq_al[a][b]))cache[b]=-1;
                   else cache[b]=++c;
                 }
             }
           }

         for ( a=0; a< A->len_aln; a++)
           {
             r1=A->seq_al[0][a];
             if ( is_gap(r1))continue;
             for ( c=0,b=0; b<A->nseq; b++)
             {
               r2=A->seq_al[b][a];
               c+=(r1==r2)?1:0;
             }
             if ( (c*100)/A->nseq>=atoi(action_list[2]))
             fprintf ( stdout, "COL: %d Res: %c %s %d\n", a+1, r1, action_list[1], cache[a]+atoi(action_list[3]));
           }
         exit (EXIT_FAILURE);
       }
       else if (strm ( action, "extract_block") )
       {
         
         BUF=copy_aln (D1->A, NULL);
         if ( check_file_exists(action_list[1]))
           BUF=extract_aln3(BUF,action_list[1]);
         else
           BUF=extract_aln2(BUF,atoi(action_list[2]),atoi(action_list[3]),action_list[1]);
         D1->A=copy_aln (BUF,D1->A);
        
       }
      
       else if ( strm ( action, "seq2msa"))
       {
         D1->A=simple_progressive_aln ( D1->S, NULL, NULL, action_list[1]);
       }
       else if ( strm ( action, "realign_block") )
       {
         D1->A=realign_block ( D1->A, atoi (action_list[1]), atoi (action_list[2]), (n_actions==4)?action_list[3]:NULL);
       }
       else if ( strm (action, "extract_seq"))   
       {
         int is_file;
         if ( check_file_exists (action_list[1]))
           {
             is_file=1;
             BUFS=main_read_seq (action_list[1]);
             action_list=BUFS->name;
             n_actions=BUFS->nseq;         
           }
         else
           {
             is_file=0;
             action_list++;
             n_actions--;
           }
         
         for ( a=0; a< n_actions;)
           {
             s=action_list[a];
             
             if ( n_actions==1 || is_file==1)
             {
               start=1;
               end=0;
               a+=1;
             }
             else
             {

               start=(strm2 (s,"#","*"))?1:(atoi(action_list[a+1]));
               end=  (strm2 (action_list[a+2],"#","*"))?0:(atoi(action_list[a+2]));
               a+=3;
             }
             
             if ( strm2 (s, "#", "*"))
             {
               OUT_S=extract_one_seq((D1->A)->name[0],start, end, D1->A, RAD->keep_name);
               for (b=1; b< (D1->A)->nseq; b++)
                 {
                  NS=extract_one_seq((D1->A)->name[b],start, end, D1->A, RAD->keep_name);
                  if (count_n_res_in_array(NS->seq[0], -1))
                  OUT_S=add_sequence ( NS,OUT_S, 0);
                 }
             }
             else
             {
              if ( a==1)OUT_S=extract_one_seq(s,start, end, D1->A, RAD->keep_name);
              else
                {
                  NS=extract_one_seq(s,start, end, D1->A, RAD->keep_name); 
                  OUT_S=add_sequence ( NS,OUT_S, 0);
                }
             }
           }
         D1->S=OUT_S;
         free_aln (D1->A);
         D1->A=declare_Alignment(D1->S);
         seq2aln (D1->S, D1->A, RAD->rm_gap);
       }
       
       else if ( strm (action, "extract_seq_list"))    
       {
         if ( check_file_exists (action_list[1]))
           {
            
             BUFS=main_read_seq (action_list[1]);
             action_list=BUFS->name;
             n_actions=BUFS->nseq;         
           }
         else
           {
             action_list++;
             n_actions--;
           }
         
         for ( a=0; a< n_actions;a++)
           {
             NS=extract_one_seq(action_list[a],1,0, D1->A, KEEP_NAME); 
             OUT_S=add_sequence ( NS,OUT_S, 0);
           }
         
         D1->S=OUT_S;
         free_aln (D1->A);
         D1->A=declare_Alignment(D1->S);
         seq2aln (D1->S, D1->A, RAD->rm_gap);
       }
        else if ( strm (action, "remove_seq"))   
       {
         
         for ( a=0; a< (D1->A)->nseq; a++)
           {
             for (c=1, b=1; b< n_actions; b++)if ( strm (action_list[b], (D1->A)->name[a])){c=0;break;}
           
             if ( c)
             {
               
               NS=extract_one_seq((D1->A)->name[a],1,0, D1->A,1);
               OUT_S=add_sequence ( NS,OUT_S, 0);
             }
           }
         D1->S=OUT_S;
         free_aln (D1->A);
         D1->A=declare_Alignment(D1->S);
         seq2aln (D1->S, D1->A, RAD->rm_gap);
         
       }
       else if (  strm (action, "clean_cdna"))
       { 
         Alignment *A;
         A=D1->A;
         for (a=0; a< A->nseq; a++)
           {
             char *d, *buf, f;
             
             d=A->seq_al[a];
             f=get_longest_frame (d);
             buf=vcalloc ( strlen (d)+1, sizeof (char));
             sprintf (buf, "%s", d+f);
             sprintf (d, "%s", buf);
             vfree (buf);
           }
       }
       else if ( strm (action, "clean_cdna2"))
       {
         D1->A=clean_cdna_aln ( D1->A);
         free_sequence ( D1->S, (D1->S)->nseq);
         D1->S=aln2seq ( D1->A);
       }
       else if ( strm  (action, "aln2short_aln"))
         {
           D1->A=aln2short_aln (D1->A, action_list[1], action_list[2], atoi(action_list[3]));
           free_sequence ( D1->S, (D1->S)->nseq);
           D1->S=aln2seq ( D1->A);
         }
       else if ( strm ( action, "translate"))
       {
         D1->A=translate_dna_aln( D1->A,(n_actions==1)?0:atoi(action_list[1]));
         free_sequence ( D1->S, (D1->S)->nseq);
         D1->S=aln2seq ( D1->A);
       }
       else if (strm2 ( action, "back_translate","backtranslate"))
       {
        D1->A=back_translate_dna_aln( D1->A);
        free_sequence ( D1->S, (D1->S)->nseq);
        D1->S=aln2seq ( D1->A);
       }
       else if (strm ( action, "rotate"))
       {
         D1->A=rotate_aln( D1->A, action_list[1]);
         free_sequence ( D1->S, (D1->S)->nseq);
         D1->S=aln2seq ( D1->A);
       }
       else if (strm ( action, "invert"))
       {
        D1->A=invert_aln( D1->A);
        free_sequence ( D1->S, (D1->S)->nseq);
        D1->S=aln2seq ( D1->A);
       }
       else if (strm ( action, "code_dna_aln"))
       {
        D1->A=code_dna_aln( D1->A);
        free_sequence ( D1->S, (D1->S)->nseq);
        D1->S=aln2seq ( D1->A);
       }
       
       else if ( strm ( action, "mutate"))
       {
         D1->A=mutate_aln( D1->A,(n_actions==1)?"0":action_list[1]);
         free_sequence ( D1->S, (D1->S)->nseq);
         D1->S=aln2seq (D1->A);
       }
       else if ( strm ( action, "thread_profile_on_msa"))
       {
         D1->A=thread_profile_files2aln (D1->A, action_list[1], NULL);
         D1->S=aln2seq(D1->A);
       }
       else if ( strm ( action, "thread_dna_on_prot_aln"))
        {
          D1->A=thread_dnaseq_on_prot_aln (D1->S, D2->A);
          free_sequence (D1->S,(D1->S)->nseq);
          D1->S=aln2seq (D1->A); 
        }
       else if ( strm ( action, "thread_struc_on_aln"))
       {
         thread_seq_struc2aln ( D2->A, D1->S);
         D1->A=copy_aln(D2->A, D1->A);
         free_sequence ( D1->S, (D1->S)->nseq);
         D1->S=aln2seq (D1->A);
       }
       
       else if ( strm (action, "trim"))
       {
         D1->A=simple_trimseq (D1->A,(D2)?D2->A:NULL, action_list[1], ACTION (2));
      
         free_sequence (D1->S,(D1->S)->nseq);
         D1->S=aln2seq (D1->A); 
       }
       
       else if (strm ( action, "trimTC"))
       {
         value=(n_actions==1)?10:atoi(action_list[1]);
      
         D1->A=tc_trimseq(D1->A,D1->S,action_list[1]);
         free_sequence (D1->S,(D1->S)->nseq);
         D1->S=aln2seq (D1->A); 
       }
       else if (strm ( action, "trimTC2"))
       {
         char *group_file;
         Alignment *B=NULL;
         char trim_mode[100];
         if ( n_actions==1 || !(strm (action_list[1], "NSEQ") ||strm (action_list[1], "MINID")) )
           {
             fprintf ( stderr, "\nTrimTC2 <NSEQ | MINID>  <number sequences| minimum identity> (<matrix>)\n");
             myexit (EXIT_FAILURE);
           }
         sprintf (trim_mode, "%s", action_list[1]);action_list+=2; n_actions-=2;
         
         if ( strm ( trim_mode, "NSEQ"))
           {
             group_file=tree2Ngroup( (D1)?D1->A:NULL, (D2)?D2->T:NULL, atoi (action_list[0]), vtmpnam(NULL), (n_actions==1)?"idmat":action_list[1]);
           }
         else
           {
             group_file=tree2Ngroup( (D1)?D1->A:NULL, (D2)?D2->T:NULL, -1*atoi (action_list[0]), vtmpnam(NULL), (n_actions==1)?"idmat":action_list[1]);
           }
         
         B=copy_aln (D1->A, B);
         B=aln2sub_aln_file (B,1,&group_file);
         B=aln2sub_seq (B, 1, &group_file);
         D1->A=extract_sub_aln2 (D1->A, B->nseq, B->name);
       }
       else if ( strm (action, "chain"))
       {
         D1->A=seq2seq_chain (D1->A,D2->A, ACTION(2));
       }
       
       
       else if (strm ( action, "master_trim"))
       {
         value=(n_actions==1)?10:atoi(action_list[1]);
      
         D1->A=master_trimseq(D1->A,D1->S,action_list[1]);
         free_sequence (D1->S,(D1->S)->nseq);
         D1->S=aln2seq (D1->A); 
       }
        else if ( strm (action, "force_aln"))    
       {
         char ***rlist=NULL;
         int count=0;

         if ( n_actions==2)
           {
             if (!is_lib_02(action_list[1]))
             {
               fprintf ( stderr, "\nERROR: force_aln requires files in TC_LIB_FORMAT_02 [FATAL:%s]", PROGRAM);
               myexit (EXIT_FAILURE);
             }
             else
               rlist=file2list (action_list[1], " ");
           }
         else
           {
             rlist=declare_arrayN(3, sizeof (char),3,7, 10);
             
             strcat (rlist[1][1],action_list[1]);strcat (rlist[1][3],action_list[2]);
             strcat (rlist[1][4],action_list[3]);strcat (rlist[1][6],action_list[4]);
             sprintf ( rlist[2][0], "-1");
           }
         count=1;
         while (rlist[count] && atoi(rlist[count][0])!=-1)
           {
             char st1[100], st2[100], st3[100], st4[100];
             
             sprintf ( st1, "%s", rlist[count][1]);sprintf ( st2, "%s", rlist[count][3]);
             sprintf ( st3, "%s", rlist[count][4]);sprintf ( st4, "%s", rlist[count][6]);
             fprintf ( stderr, "\nFORCE: %s %s %s %s", st1, st2, st3, st4);
             
             if (is_number (st1))s1=atoi (st1)-1;
             else s1=name_is_in_list (st1,(D1->A)->name, (D1->A)->nseq, 100);
             if ( s1<0 || s1>= (D1->A)->nseq)crash ("wrong sequence index");
             r1=atoi (st2)-1;
             
             if (is_number (st3))s2=atoi (st3)-1;
             else s2=name_is_in_list (st3,(D1->A)->name, (D1->A)->nseq, 100);
             if ( s2<0 || s2>= (D1->A)->nseq)crash ("wrong sequence index");
             r2=atoi (st4)-1;
             
             (D1->A)=add_constraint2aln ((D1->A), s1, r1, s2, r2);
             count++;
           }
         fprintf ( stderr, "\n");
         free_arrayN((void*)rlist,3);
       }
      
        else if (strm ( action, "grep"))
        {
          D1->A=grep_seq (D1->A, ACTION(1),ACTION(2), ACTION(3));
          if (D1->A==NULL) myexit (EXIT_SUCCESS);
          else D1->S=aln2seq (D1->A); 
        }
                  
      else if (strm (action, "find"))
        {
          int r, l;
          char *search_string;

          search_string=vcalloc ( 30, sizeof (char));
          if ( strm (action_list[1], "lower"))sprintf ( search_string, "abcdefghijklmnopqrstuvwxyz");
          else if ( strm ( action_list[1], "upper"))sprintf ( search_string, "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
          else 
            {
            vfree (search_string);search_string=vcalloc ( strlen (action_list[1])+1, sizeof (char));
            sprintf (search_string, "%s", action_list[1]);
            }
          
          for (a=0; a<(D1->A)->nseq; a++)
            for ( l=0,b=0; b< (D1->A)->len_aln; b++)
            {
              r=(D1->A)->seq_al[a][b];
              l+=!is_gap(r);
              if ( r!='\0' && strrchr (search_string, r))
                {
                  /*fprintf ( stdout, "%-15s res %c alnpos %4d seqpos %4d\n", (D1->A)->name[a], r, b+1, l);*/
                  fprintf ( stdout, "%s %d %d\n", (D1->A)->name[a], l, l+1);
                }
            }
          myexit (EXIT_SUCCESS);
        }
        else if ( strm (action, "merge_annotation"))
        {
          D1->A=merge_annotation (D1->A, DST?DST->A:NULL, ACTION(1));
          D1->S=aln2seq (D1->A); 
        }
      else if ( strm5 (action, "convert","upper","lower", "keep", "switchcase"))
       {
         b=1;
         if ( n_actions>1 && is_number (action_list[b]))
           {
             lower_value=upper_value=atoi(action_list[b++]);
           }
         else if (n_actions>1 && action_list[b] && action_list[b][0]=='[')
           
           {
             lower_value=atoi(strtok (action_list[b]+1, "-[]"));
             upper_value=atoi(strtok (NULL, "-[]"));
           
             b++;
           }
         else
           {
             lower_value=upper_value=-1;
           }

         if ( n_actions >b ||strm (action, "keep") )
           {
             if ( !RAD->symbol_list)RAD->symbol_list=declare_char (STRING, STRING);
             RAD->n_symbol=0;
             if ( strm (action, "keep") )sprintf ( RAD->symbol_list[RAD->n_symbol++], "#-");
             else
             {
               for (a=b; a< n_actions; a++)
                 {
                   sprintf ( RAD->symbol_list[RAD->n_symbol], "%s", action_list[a]);
                   RAD->n_symbol++;
                 }
             }
           }
         
         for ( value=0; value<=9; value++)
           {
             if ( lower_value==-1)value=-1;
             
             if ( (value>=lower_value && value<=upper_value)|| value==-1)
             {
               if (strm(action,"convert")) D1->A=filter_aln_convert (D1->A, DST?DST->A:NULL,RAD->use_consensus,value,RAD->n_symbol, RAD->symbol_list);
               else if (strm(action,"upper"))D1->A=filter_aln_lower_upper (D1->A, DST?DST->A:NULL,RAD->use_consensus,value);
               else if (strm(action,"lower"))D1->A=filter_aln_upper_lower (D1->A, DST?DST->A:NULL,RAD->use_consensus,value);
               else if (strm(action,"switchcase"))D1->A=filter_aln_switchcase (D1->A, DST?DST->A:NULL,RAD->use_consensus,value);
             }
             else
             {
               if (strm(action,"keep")) D1->A=filter_aln_convert (D1->A, DST?DST->A:NULL,RAD->use_consensus,value,RAD->n_symbol, RAD->symbol_list);
             }
             if (value==-1)break;

           }

         /*free_sequence (D1->S,(D1->S)->nseq);*/
         if (!D1->S)D1->S=aln2seq (D1->A); 
       } 
      else if ( strm ( action, "count_pairs"))
        {
          int a, b,c,v, **matrix;
          Alignment *A;
          matrix=declare_int (300,300);
          A=D1->A;
          for ( a=0; a< A->nseq-1; a++)
            for (b=0; b< A->nseq; b++)
            for (c=0; c<A->len_aln; c++)
              matrix[(int)A->seq_al[a][c]][(int)A->seq_al[b][c]]++;
          for ( a=0; a<255; a++)
            for ( b=a; b<256; b++)
            {
              v=matrix[a][b]+matrix[b][a];
              if (v)fprintf ( stdout, "\n%c %c %d", a, b, v);
            }
          exit (EXIT_SUCCESS);
        }
       else if ( strm (action, "count"))
       {
         b=1;
         if ( n_actions>1 && is_number (action_list[b]))
           {
             lower_value=upper_value=atoi(action_list[b++]);
           }
         else if (n_actions>1 && action_list[b] && action_list[b] && action_list[b][0]=='[')
           
           {
             lower_value=atoi(strtok (action_list[b]+1, "-[]"));
             upper_value=atoi(strtok (NULL, "-[]"));
           
             b++;
           }
         else
           {
             lower_value=upper_value=-1;
           }
         if ( n_actions >b)
           {
             if ( !RAD->symbol_list)RAD->symbol_list=declare_char (STRING, STRING);
             RAD->n_symbol=0;
             for (a=b; a< n_actions; a++)
             {
               sprintf ( RAD->symbol_list[RAD->n_symbol], "%s", action_list[a]);
               RAD->n_symbol++;
             }
           }
         for ( value=lower_value; value<=upper_value; value++)
           {
             count_table=count_in_aln (D1->A, DST?DST->A:NULL,value,RAD->n_symbol, RAD->symbol_list, count_table);             
           }
         for ( a=0; a<RAD->n_symbol; a++)
           {
             fprintf ( stdout, "%s %d\n", RAD->symbol_list[a], count_table[a]);
           }
         free_sequence (D1->S,(D1->S)->nseq);
         D1->S=aln2seq (D1->A); 
         vfree(count_table);
         exit(EXIT_SUCCESS);
       }
       else if ( strm (action, "msa_weight"))
       {
         int random_value;
         char command [LONG_STRING];
         char aln_name[FILENAMELEN];
         char tree_name[FILENAMELEN];
         char dist_matrix_name[FILENAMELEN];
         char weight_name[FILENAMELEN]; 
         char method_4_msa_weights[1000];
         
         if ( n_actions==1)
           {
             fprintf ( stderr, "\nError: msa_weight requires a weight_method");
           }
         
         sprintf ( method_4_msa_weights, "%s", (get_env_variable ("METHOD_4_MSA_WEIGHTS", 0))?get_env_variable ("METHOD_4_MSA_WEIGHTS", 0):METHOD_4_MSA_WEIGHTS);
         
         /*1 Computation of the tree and the distance matrix*/
         random_value=addrand ((unsigned long) 100000)+1;
         sprintf (aln_name, "%d.aln", random_value);
         sprintf (tree_name, "%d.ph", random_value);
         sprintf (dist_matrix_name, "%d.dst", random_value);
         sprintf (weight_name, "%d.weight", random_value);
         output_fasta_aln (aln_name, D1->A);
        
         sprintf ( command, "clustalw -infile=%s -tree -outputtree=dist %s", aln_name, TO_NULL_DEVICE);
         my_system ( command);
         sprintf ( command, "%s -method %s -aln %s -tree %s -dmatrix %s -weightfile %s %s",method_4_msa_weights, action_list[1],aln_name, tree_name, dist_matrix_name,weight_name, TO_NULL_DEVICE); 
         my_system ( command);
         
         (D1->A)->S=aln2seq (D1->A);
         ((D1->A)->S)->W=read_seq_weight ( (D1->A)->name, (D1->A)->nseq,weight_name);        
         vremove (weight_name);
         vremove (aln_name);
         vremove (tree_name);
         vremove (dist_matrix_name);         
       }
       else if ( strm (action, "pavie_seq2random_seq"))
       {
         D1->S=pavie_seq2random_seq (D1->S, action_list[1]);
         D1->A=seq2aln (D1->S,NULL,1);
       }
       else if ( strm ( action, "pavie_seq2noisy_seq"))
       {
         /*<amount of noise: 0-100> (<alp>)*/
         
         D1->S=pavie_seq2noisy_seq (D1->S, atoi(action_list[1]),ACTION(2));
         D1->A=seq2aln (D1->S,NULL,1);
       }
       else if ( strm (action, "pavie_seq2pavie_mat"))
       {

         pavie_seq2trained_pavie_mat ( D1->S, (n_actions==2)?action_list[1]:NULL);
         myexit (EXIT_SUCCESS);
       }
       else if ( strm (action, "pavie_seq2pavie_aln"))
       {

         pavie_seq2pavie_aln ( D1->S, action_list[1], ACTION(2));
         myexit (EXIT_SUCCESS);
       }
       else if ( strm (action, "pavie_seq2pavie_dm"))
       {
          if (strstr (ACTION2(2,""), "_MSA_"))
            D1->S=aln2seq_main(D1->A, KEEP_GAP);
         
               
         pavie_seq2pavie_aln ( D1->S, action_list[1],(n_actions==3)?action_list[2]:"_MATDIST_");
         myexit (EXIT_SUCCESS);
       }
       else if ( strm (action, "pavie_seq2pavie_msa"))
       {
         D1->A=pavie_seq2pavie_msa ( D1->S, action_list[1], (n_actions==3)?action_list[2]:NULL);
       }
       else if ( strm (action, "pavie_seq2pavie_tree"))
       {
         D1->T=pavie_seq2pavie_tree ( D1->S, action_list[1], (n_actions==3)?action_list[2]:NULL);
       }
       else if ( strm (action, "pavie_seq2pavie_sort"))
       {
         D1->A=pavie_seq2pavie_sort ( D1->S, action_list[1], (n_actions==3)?action_list[2]:NULL);
       }
       
       else if ( strm (action, "train_latmat"))
       {
         seq2latmat ( D1->S, "stdout");
         myexit (EXIT_SUCCESS);
       }
       else if ( strm (action , "rm_target_pdb"))
       {
         int i, j;
         char *buf;
         
         for (i=0; i< (D1->A)->nseq; i++)
           {
             j=1;buf=(D1->A)->name[i];
             while (buf[j]!='_' && buf[j-1]!='_' && buf[j]!='\0')j++;
             buf[j]='\0';
           }
       }
             
       else if ( strm ( action, "mat2cmp"))
       {
         double *r;
         r=mat2cmp (D1->M, D2->M);
         fprintf ( stdout, "\nMATRIX COMPARISON: R=%.3f R2=%.3f On %d pairs of values\n", (float)r[0], (float)r[1], (int)r[2]);
         myexit (EXIT_SUCCESS);
       }
       else
       {
         fprintf ( stderr, "\nWARNING: ACTION %s UNKNOWN and IGNORED\n", action);
       }
     }

int **seq2latmat ( Sequence *S, char *fname)
{
  int a, b, r0, r1;
  char *aa;
  int naa;
  int *count, tot;
  int **mat;
  double observed, expected;
  FILE *fp;

  fp=vfopen (fname, "w");
  
  count=vcalloc ( 256, sizeof (int));
  mat=declare_int (256, 256);
  
  naa=strlen ( BLAST_AA_ALPHABET);
  aa=vcalloc ( naa+2, sizeof (char));
  sprintf ( aa, "%s", BLAST_AA_ALPHABET);
  lower_string (aa);
  
  for ( tot=0,a=0; a< S->nseq; a++)
    for ( b=1; b<S->len[a]; b++)
      {
      r0=tolower(S->seq[a][b-1]);
      r1=tolower(S->seq[a][b]);
      mat[r0][r1]+=2;
      count[r0]++;
      count[r1]++;
      tot+=2;
      }
  for ( a=0; a< naa; a++)
    for (b=0; b< naa; b++)
      {
      if ( aa[a]=='*' || aa[b]=='*');
      else
        {
          expected=((double)count[(int)aa[a]]/(double)tot)* ((double)count[(int)aa[b]]/(double)tot)*(double)tot;
          observed=((double)mat[(int)aa[a]][(int)aa[b]]);
      
          /*
            fprintf ( stderr, "\n%c=%d %c=%d Tot=%d Obs=%d Exp=%d\n", aa[a],count[aa[a]], aa[b],count[aa[b]],tot, mat[aa[a]][aa[b]],(int)expected);  
            fprintf ( stderr, "\n%d", mat[aa[a]][aa[b]]);
            fprintf ( stderr, "\n%d", mat[aa[a]][aa[b]]);
          */
          mat[(int)aa[a]][(int)aa[b]]=(expected==0)?0:((int)10*log((observed/expected)));
        }
      }
 
  fprintf (fp,"# BLAST_MATRIX FORMAT\n#ALPHABET=%s\n#TRANSITION MATRIX TRAINED ON %d Sequence\n#", BLAST_AA_ALPHABET, S->nseq);
  for (a=0; a< naa; a++)fprintf ( fp, "%3c ", toupper(aa[a]));
  fprintf (fp,"\n");
  for (a=0; a< naa; a++)
    {
      
      fprintf (fp, "%c", toupper(aa[a]));
      for ( b=0; b< naa; b++)
      {
        fprintf (fp, "%3d ", mat[(int)aa[a]][(int)aa[b]]);
      }
      fprintf ( fp, "\n");
    }
  vfclose (fp);
  vfree (count);
  vfree (aa);
  
  return mat;
}
 
double* mat2cmp ( int **mat1, int **mat2)
{
  int a, b, n, x, y;
  double **list, *r;
  if ( !mat1 || !mat2)
    {
      fprintf ( stderr, "\nERROR: mat2cmp needs two matrices [FATAL:%s]", PROGRAM);
      myexit (EXIT_FAILURE);
    }
  
  for (n=0, a=0; a< 256; a++)
    for ( b=0; b<256; b++)
      {
      x=mat1[a][b];
      y=mat2[a][b];
      if ( x|| y)n++;
      }
  if ( n==0) return 0;
  list=declare_double (n, 2);
  
  for (n=0, a=0; a<256; a++)
    for ( b=0; b<256; b++)
      {
      x=mat1[a][b];
      y=mat2[a][b];
      if ( x || y)
        {
          list[n][0]=x;
          list[n][1]=y;
          n++;
        }
      }
  r=return_r (list, n);
  free_double(list, -1);
  return r;
}
   
int ** read_blast_matrix ( char *mat_name)
        {
      FILE *fp;
      int n_aa,aa1, aa2;
      int a, b, c;
      int **matrix;
      int value;
      char sbuf[VERY_LONG_STRING];
      char buf[2];
      char alp[257];

      matrix=declare_int (256,256);
      vfree ( matrix[30]);
      matrix[30]=vcalloc(10000, sizeof (int));
      fp=vfopen ( mat_name, "r");
      while ( (c=fgetc(fp))=='#' || isspace(c) )
        {
          char *p;
          fgets ( sbuf, VERY_LONG_STRING, fp);
          if ( (p=strstr (sbuf, "ALPHABET")))
            sscanf (p, "ALPHABET=%s", alp);
      }
      ungetc(c, fp);
      lower_string (alp);
      n_aa=strlen (alp);

      for ( a=0; a< n_aa; a++)
          {
          fscanf ( fp, "%s ", buf);

          aa1=tolower(buf[0]);
          
          if ( aa1!=alp[a])
            {
            fprintf ( stderr, "\nParsing_error when reading blast_matrix %s:\n%c %c",mat_name, aa1,alp[a]);
            fprintf ( stderr, "\n%c ", fgetc(fp));
            myexit (EXIT_FAILURE);
            }
          for ( b=0; b<n_aa; b++)
              {
            aa2=tolower ((char) alp[b]);
            fscanf ( fp, "%d ", &value);
            if (is_gap(aa1) || is_gap(aa2))
              {
                int c1, c2;
                c1=(is_gap(aa1))?GAP_CODE:aa1;
                c2=(is_gap(aa2))?GAP_CODE:aa2;
                if ( c1==GAP_CODE && c2==GAP_CODE)
                  matrix[c1][c2]=value;
                else if ( c1==GAP_CODE)
                  {
                  matrix[c1][tolower(c2)]=value;
                  matrix[c1][toupper(c2)]=value;
                  }
                else
                  {
                  matrix[tolower(c1)][c2]=value;
                  matrix[toupper(c1)][c2]=value;
                  }
              }
            else if ( aa1!='*' && aa2!='*')
              {
                matrix[tolower(aa1)-'A'][tolower(aa2)-'A']=value;
                matrix[toupper(aa1)-'A'][toupper(aa2)-'A']=value;
                matrix[tolower(aa1)-'A'][toupper(aa2)-'A']=value;
                matrix[toupper(aa1)-'A'][tolower(aa2)-'A']=value;
              }
            }
          fscanf(fp, "\n");
          }
      fclose (fp);

      return matrix;
      }

int output_blast_mat (int **mat, char *fname)
{
  return output_mat(mat, fname, BLAST_AA_ALPHABET, 'A');
  
}
                  
int output_mat (int **mat, char *fname, char *alp, int offset)
{
  char *aa;
  int a,b, naa;
  FILE *fp;


  
  naa=strlen (alp);
  aa=vcalloc ( naa+2, sizeof (char));
  sprintf ( aa, "%s",alp);
  lower_string (aa);
  if (!(fp=vfopen (fname, "w")))return 0;
  fprintf (fp,"# BLAST_MATRIX FORMAT\n#ALPHABET=%s\n",alp);
  for (a=0; a< naa; a++)fprintf ( fp, "%3c ", toupper(aa[a]));
  fprintf (fp,"\n");
  for (a=0; a< naa; a++)
    {
      
      fprintf (fp, "%c", toupper(aa[a]));
      for ( b=0; b< naa; b++)
      {
        fprintf (fp, " %5d", mat[aa[a]-offset][aa[b]-offset]);
      }
      fprintf ( fp, "\n");
    }
  vfree (aa);
  vfclose (fp);
  return 1;
}           

void output_pavie_mat (int **mat, char *fname, double gep, char *alp)
{
  int n, a, b;
  FILE *fp;
  
  n=strlen (alp);
  fp=vfopen (fname, "w");
  fprintf (fp,"# PAVIE_MATRIX FORMAT\n#ALPHABET=%s\n",alp);
  
  for(a=0; a< n; a++)
     {
       for ( b=a; b<n; b++)
      {       
        fprintf (fp, "%c %c %.3f\n", toupper(alp[a]), toupper(alp[b]), (float)mat[alp[a]-'A'][alp[b]-'A']/PAVIE_MAT_FACTOR);
      }
     }
   if ( gep!=UNDEFINED)fprintf ( fp, "- - %.3f\n", gep/PAVIE_MAT_FACTOR);
   vfclose(fp);
 }

int ** read_pavie_matrix ( char *mat_name)
        {
      FILE *fp;
      int c, n_aa;
      char aa1, aa2;
      float v;
      int **matrix;
      char sbuf[VERY_LONG_STRING];
      char alp[257];
      int gep=UNDEFINED;
      
      matrix=declare_int (256,256);
      
      
      fp=vfopen ( mat_name, "r");
      while ( (c=fgetc(fp))=='#' || isspace(c) )
        {
          fgets ( sbuf, VERY_LONG_STRING, fp);
          if ( sscanf (sbuf, "ALPHABET=%s", alp)==1);
        }
      ungetc(c, fp);
      
      n_aa=strlen (alp);
      while ( fgets ( sbuf, VERY_LONG_STRING, fp)!=NULL)
          {
            aa1=aa2='Z';
            if (sscanf (sbuf, "%c %c %f",&aa1, &aa2, &v)==3)
            {
              v*=PAVIE_MAT_FACTOR;
              if (aa1=='-' && aa2=='-')gep=v;
              else
                {
                  matrix[tolower(aa1)-'A'][tolower(aa2)-'A']=v;
                  matrix[toupper(aa1)-'A'][toupper(aa2)-'A']=v;
                  matrix[tolower(aa1)-'A'][toupper(aa2)-'A']=v;
                  matrix[toupper(aa1)-'A'][tolower(aa2)-'A']=v;
                  
                  matrix[tolower(aa2)-'A'][tolower(aa1)-'A']=v;
                  matrix[toupper(aa2)-'A'][toupper(aa1)-'A']=v;
                  matrix[tolower(aa2)-'A'][toupper(aa1)-'A']=v;
                  matrix[toupper(aa2)-'A'][tolower(aa1)-'A']=v;
                }         
            }
          }
      if ( gep!=UNDEFINED)
        {
          int a;
          for (a=0; a< n_aa; a++)
            {
            if (!matrix[tolower(alp[a])-'A'][GAP_CODE])
              {
                matrix[tolower(alp[a])-'A'][GAP_CODE]=gep;
                matrix[toupper(alp[a])-'A'][GAP_CODE]=gep;
              }
            }
        }
      vfclose (fp);
      return matrix;
      }

Sequence *seq2year ( Sequence *S, int modulo)
{
  int a, b, y;
  int first;
  char *s;
  char new_channel[100];
  
  sprintf( new_channel, "_agechannel%d",modulo);
  
  for ( a=0; a<S->nseq; a++)
    {
      if (S->seq_comment[a] && (s=strstr(S->seq_comment[a], "_FIRSTYEAR")))
      {
        sscanf (s, "_FIRSTYEAR%d_", &first);
      }      
      else first=1;
     
      for ( y=first,b=0; b<S->len[a]; b++)
      {
        if ( !is_gap(S->seq[a][b]))
          {
            S->seq[a][b]='a'+((y/modulo))%10;
            y++;
          }
      }
      if ( (s=strstr ( S->name[a], "_agechannel")))
         {
           sprintf ( s, "%s", new_channel);
         }
      else strcat (S->name[a], new_channel);
    }
  return S;
}

Sequence* output_n_pavie_age_channel (Sequence *S, char *name, int n)
{
  int x, a;
  if (!n)n=2;
  

  for ( x=1,a=0; a< n; a++, x*=10)
    {
      S=output_pavie_age_channel(S, name,x);
    }
return S;
}
  



Sequence* output_pavie_age_channel (Sequence *S, char *name, int modulo)
  {
    Alignment *A;
    FILE *fp;
    static int display;
    char mat_list_name[100];
    char seq_list[1000];
    char mat_name[1000];
    char *tmp;
    
    sprintf ( mat_list_name, "%s_pavie_age_matrix.mat_list", name);
    sprintf (seq_list, "%s_age_channel.fasta",name);
    
    if ( display==0 )
      {
      if (check_file_exists(seq_list))vremove (seq_list);
      if (check_file_exists(mat_list_name))vremove (mat_list_name);
      }
    sprintf (mat_name, "%s_age_mat_mod%d.mat",name, modulo);
    output_age_matrix ( mat_name, modulo);
      
    fp=vfopen  ( mat_list_name,"a"); 
    fprintf ( fp, "%s\n", mat_name);
    vfclose ( fp);
    
    S=seq2year (S,modulo);
    A=seq2aln (S, NULL, KEEP_GAP);
    output_fasta_seq (tmp=vtmpnam (NULL),A);
    file_cat (seq_list,tmp,seq_list);
    
    if ( display==0)
      {
      display_output_filename ( stdout, "AGE_MAT_LIST", "MAT_LIST", mat_list_name, CHECK);
      display_output_filename ( stdout, "AGE_SEQ", "FASTA", seq_list, CHECK);
      display=1;
      }
    fprintf ( stderr, "\nModulo:%d years", modulo); 
    fprintf ( stderr, "\n");
    free_aln (A);
    return S;
  }
//
// Name MAnipulation
//

Alignment *clean_aln (Alignment *A)
{
  if ( A)
    {
      A->seq_comment=clean_string (A->nseq, A->seq_comment);
      A->aln_comment=clean_string (A->nseq, A->aln_comment);
      A->name=translate_names(A->nseq, A->name);
      (A->S)=clean_sequence ((A->S));
    }
  return A;
}
Sequence *clean_sequence ( Sequence *S)
{
  if ( !S) return S;
  
  S->seq_comment=clean_string (S->nseq, S->seq_comment);
  S->name=translate_names(S->nseq, S->name);
  return S;
}
char ** translate_names (int n, char **name)
{
  int a;
  for ( a=0; a<n; a++)
    translate_name(name[a]);
  return name;
}
void translate_name ( char *name)
      {

      int len;
      int a;
      
      len=strlen (name);
      
      if ( name[0]=='\'')return;
      for ( a=0; a<len; a++)
            {
            if ( isspace(name[a]))name[a]='\0';
            else if ( name[a]==';' ||name[a]==':' ||name[a]=='(' || name[a]==')' || name[a]==',')
                  name[a]='_';
            }
      } 

FILE * display_sequences_names (Sequence *S, FILE *fp, int check_pdb_status, int print_templates)
        {
          int a;
          int max_len;
          char *r;
          
          if ( !S)
             {
               fprintf (fp,"\nERROR: NO SEQUENCE READ [FATAL:%s]\n", PROGRAM); myexit (EXIT_FAILURE);
             }
          for ( a=0, max_len=0; a< S->nseq; a++)max_len=MAX(max_len, strlen (S->name[a]));
          fprintf ( fp, "\nINPUT SEQUENCES: %d SEQUENCES  [%s]", S->nseq,(S->type)?S->type:"Unknown type");
          for ( a=0; a< S->nseq; a++)
              {
                fprintf (fp, "\n  Input File %-*s Seq %-*s Length %4d type %s",max_len,S->file[a], max_len,S->name[a],strlen ( S->seq[a]), S->type);
                if (check_pdb_status)
                  {
                  if ((r=seq_is_pdb_struc (S, a)))fprintf (fp, " Struct Yes PDBID %s", get_pdb_id(r));
                  else fprintf (fp, " Struct No");
                  /*
                  if (is_pdb_struc (S->name[a])||is_pdb_struc (S->file[a]) )fprintf (fp, " Struct Yes");
                  else fprintf (fp, " Struct No");
                  */
                  }
                else fprintf (fp, " Struct Unchecked");
                if ( print_templates)fp=display_sequence_templates (S, a, fp);
                

            }
          fprintf ( fp, "\n");
          return fp;
          
      }
/*********************************COPYRIGHT NOTICE**********************************/
/* Centre National de la Recherche Scientifique (CNRS) */
/*and */
/*Cedric Notredame */
/*Fri Oct 26 17:03:04     2007. */
/*All rights reserved.*/
/*This file is part of T-COFFEE.*/
/**/
/*    T-COFFEE is free software; you can redistribute it and/or modify*/
/*    it under the terms of the GNU General Public License as published by*/
/*    the Free Software Foundation; either version 2 of the License, or*/
/*    (at your option) any later version.*/
/**/
/*    T-COFFEE is distributed in the hope that it will be useful,*/
/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
/*    GNU General Public License for more details.*/
/**/
/*    You should have received a copy of the GNU General Public License*/
/*    along with Foobar; if not, write to the Free Software*/
/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
/*...............................................                                                                                      |*/
/*  If you need some more information*/
/*  cedric.notredame@europe.com*/
/*...............................................                                                                                                                                     |*/
/**/
/**/
/*    */
/*********************************COPYRIGHT NOTICE**********************************/

Generated by  Doxygen 1.6.0   Back to index