Logo Search packages:      
Sourcecode: t-coffee version File versions  Download package

aln_convertion_util.c

#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <stdarg.h>
#include <string.h>
 
#include "io_lib_header.h"
#include "util_lib_header.h"
#include "dp_lib_header.h"
#include "define_header.h"

int **index_seq_res      ( Sequence *S1, Sequence *S2, int **name_index)
{
  /*Index the residues of S1 according to S2
    index[seq1 of S1][z]->x, where x is the position of residue z of seq1/S1 in S2->seq[index[Seq1/S1]] 
  */ 
  int a;
  int **index;
  char *seq1, *seq2;
  Alignment *Profile;
  
  index=vcalloc ( S1->nseq, sizeof (int*));
  
  for (a=0; a< S1->nseq; a++)
    {

      seq1=S1->seq[a];
      
      if (name_index[a][0]==-1)
      seq2=NULL;
      else if (name_index[a][1]==-1)
      seq2=S2->seq[name_index[a][0]];
      else if ((Profile=seq2R_template_profile (S2, name_index[a][0])) !=NULL)
      {
        seq2=Profile->seq_al[name_index[a][1]];
      }
      index[a]=get_res_index ( seq1, seq2);
    }
  return index;
}

int **index_seq_name ( Sequence *S1, Sequence *S2)
{
  /*Index the names of S1 according to S2
    index[seq1 of S1][0]->x  if seq1 is in seq2
                        ->-1 if seq1 is nowhere to be found
    index[seq1 of S1][1]->z if seq1 is the zth sequence within the xth profile of S2
  */
  int **index;
  int a, b, x, z;
  Alignment *Profile;
  index=declare_int (S1->nseq, 2);

  
  for ( a=0; a<S1->nseq; a++)
    {
      index[a][0]=index[a][1]=-1;
      x=name_is_in_list (S1->name[a],S2->name,S2->nseq,100);
      if ( x!=-1){index[a][0]=x;index[a][1]=-1;}
      for ( b=0; b<S2->nseq; b++)
          {
            if ((Profile=seq2R_template_profile (S2,b)))
            {
              z=name_is_in_list (S1->name[a],Profile->name,Profile->nseq,100);
              if ( z!=-1){index[a][0]=b;index[a][1]=z;b=S2->nseq;}
            }
          }
    }
  return index;
}
              
              
          
        
int *get_name_index (char **l1, int n1, char **l2, int n2)
{
  int *r;
  int a;
  /*return Array[Index_L1]=Index_L2 */
  r=vcalloc ( n1, sizeof (int));
  for ( a=0; a< n1; a++)
    r[a]=name_is_in_list (l1[a],l2,n2,100);
  return r;
}

int* get_res_index (char *seq0, char *seq1)
{
  int *coor, a;

  if ( !seq0 || !seq1) return NULL;
  

  coor=vcalloc ( strlen (seq0)+1, sizeof (int));
  if (!strm (seq0, seq1))
    {
      int r0, r1 , isr0, isr1;
      int l0=0, l1=0;
      Alignment *A;
      A=align_two_sequences (seq0,seq1,"pam250mt", -10, -1, "myers_miller_pair_wise");
      for ( a=0; a< A->len_aln; a++)
      {
        r0=A->seq_al[0][a];r1=A->seq_al[1][a];
        isr0=!is_gap(r0);
        isr1=!is_gap(r1);
        l0+= isr0;
        l1+= isr1;

        if (isr0 && isr1)coor[l0-1]=l1-1;
        else if (isr0)  coor[l0-1]=-1;
      }
      free_aln (A);
    }
  else
    {
      int l0;
      
      l0=strlen (seq0);
      for ( a=0;a< l0; a++)
      coor[a]=a;
    }
  
  return coor;
}

int change_residue_coordinate ( char *in_seq1, char *in_seq2, int v)
{
  /*Expresses the coordinate of a residue in seq1, in the coordinate system of seq2*/

  
  static char *seq1, *seq2;
  static int *coor;
  static int max;

  if ( seq1 !=in_seq1 || seq2 !=in_seq2)
    {
      int r0, r1 , isr0, isr1;
      int l0=0, l1=0;
      Alignment *A;
      int a;
      
      vfree (coor);
      seq1=in_seq1, seq2=in_seq2;
      A=align_two_sequences (seq1,seq2,"pam250mt", -10, -1, "myers_miller_pair_wise");
            
      coor=vcalloc ( A->len_aln, sizeof (int));
      for ( a=0; a< A->len_aln; a++)
      {
        r0=A->seq_al[0][a];r1=A->seq_al[1][a];
        
        isr0=!is_gap(r0);
        isr1=!is_gap(r1);
        l0+= isr0;
        l1+= isr1;

        if (isr0 && isr1)coor[l0-1]=l1-1;
        else if (isr0)  coor[l0-1]=-1;
      }
      free_aln (A);
    }
  return coor[v];
}
        
   
int ** minimise_repeat_coor (int **coor, int nseq, Sequence *S)
    {
    int **new_coor;
    int a, min;
    new_coor=declare_int ( nseq, 3);
    min=return_min_int (coor, nseq, 2);
    for ( a=0; a< nseq; a++)
        {
      new_coor[a][0]=coor[a][0];
      new_coor[a][1]=coor[a][1];
      new_coor[a][2]=min;
      }
    return new_coor;
    }
int ** get_nol_seq ( Constraint_list *CL, int **coor, int nseq, Sequence *S)
    {
    int a, s, p, l, nl;
    int **buf;
    int **new_coor;
    
    new_coor=declare_int ( nseq+1, 3);
    
    
    buf=get_undefined_list ( CL);
    
    

    for ( a=0; a< nseq; a++)buf[coor[a][0]][coor[a][1]]=1;

    
    for ( a=0; a< nseq; a++)
        {
      s=coor[a][0];
      p=coor[a][1]+1;
      l=strlen(S->seq[s]);
      nl=0;
      while ( p<=l && !buf[s][p++])nl++;
      new_coor[a][0]=s;
      new_coor[a][1]=coor[a][1];
      new_coor[a][2]=nl;
      }
    free_int ( buf, -1);
    return new_coor;
    }



int compare_pos_column( int **pos1,int p1, int **pos2,int p2, int nseq)
    {
    int a,v1, v2;
    int identical=0;


    
    for ( a=0; a< nseq; a++)
      {
      
      v1=pos1[a][p1];
      v2=pos2[a][p2];
      
      if (v1>0 || v2>0) 
          {
          if ( v1!=v2)return 0;
          else identical=1;
          }
      }
    
    return identical;
    }
char *seq2alphabet (Sequence *S)
{
  return array2alphabet (S->seq, S->nseq);
}
char *aln2alphabet (Alignment *A)
{
  return array2alphabet (A->seq_al, A->nseq);
}  

char *array2alphabet (char **array, int n)
{
  int a, b, l;
  int *hasch;
  char *alphabet;

  hasch=vcalloc (256, sizeof (int));
  alphabet=vcalloc ( 257, sizeof (char));
  
  
  for ( a=0; a<n; a++)
    {
      l=strlen (array[a]);
      for ( b=0; b<l; b++)
      hasch[tolower(array[a][b])]++;
    }
  for ( a=0, b=0; a< 256; a++)
    if (hasch[a])alphabet[b++]=a;
  alphabet[b]='\0';
  vfree (hasch);
  return alphabet;
}
  
Alignment * seq2pdb   ( char *name, char *seq,Blast_param *B)
{
  Alignment *R;
  char *new_pdb;
  char *tmp_pdb;
  char *buf_seq;
  char command [LONG_STRING];
  /*1-Reads a sequence
    2-Make a blast against nlr3d
    3-pick up the best match
    4-get the pdb structure and write it in name.pdb
  */

 
  
  R=seq2blast ( name, seq,B);

  if ( R==NULL || R->nseq<2){free_aln (R);R= NULL;}
  else 
    {
      R->nseq=2;
      R->score=R->score_aln=aln2sim (R, "idmat");
      
      buf_seq=vcalloc ( R->len_aln+1, sizeof (char));
      sprintf (buf_seq, "%s", R->seq_al[1]);
      ungap (buf_seq);
        
      tmp_pdb=normalize_pdb_file(is_pdb_struc(R->name[1]),buf_seq, vtmpnam (NULL));
        
      if   ( !tmp_pdb){free_aln(R);R=NULL;}
      else 
      {
        declare_name(new_pdb);
        sprintf (new_pdb, "%s%s.pdb",get_cache_dir(), name);
        if ( !check_file_exists(new_pdb))
          {
            sprintf (command, "cp %s %s", tmp_pdb, new_pdb);
            my_system (command);
            add2file2remove_list (new_pdb); /*make sure this pdb file disappears*/
          }
        vfree (new_pdb);
      }
      vfree (buf_seq);
    }
  return R;      
}

Alignment* seq2blast ( char *name, char *seq, Blast_param *B)
    {

      int **sim=NULL;
      int **cov=NULL;
      char loc_blast_server[100];
      char command [LONG_STRING];
      char *seq_file, *blast_result, *prf_result;
      char  permanent_blast_file[100];
      int n_failure=0;
      int a;

      Alignment *BProfile,*NBProfile;
      FILE *fp;

 
      
      seq_file=vtmpnam (NULL);
      blast_result=vtmpnam (NULL);
      prf_result=vtmpnam (NULL);

      sprintf ( loc_blast_server, "%s", B->blast_server);
      fp=vfopen (seq_file, "w");
      fprintf (fp, ">query\n%s",seq);
      vfclose (fp); 
      
      sprintf ( permanent_blast_file , "%sVs%s.blast_result",name, B->db);

      if (is_blast_file (permanent_blast_file))
      {
        sprintf ( command, "cp %s %s", check_file_exists (permanent_blast_file), blast_result);
        sprintf ( loc_blast_server, "File");
      }
      else
      {

        
        if ( strm2 (loc_blast_server, "sib", "SIB"))
          {
            check_program_is_installed ( "blastall.remote", SIB_BLAST_4_TCOFFEE, "SIB_BLAST_4_TCOFFEE","not public", 1); 
             
            sprintf (command, "%s -p blastp -s  sib-blast.unil.ch -d %s -i %s -m 6 >%s",SIB_BLAST_4_TCOFFEE,B->db, seq_file, blast_result);
          }
        
        else if (strm2 (loc_blast_server,"ncbi","NCBI"))
          {
            check_program_is_installed ( "blastcl3",NCBI_BLAST_4_TCOFFEE, "NCBI_BLAST_4_TCOFFEE","ftp://ftp.ncbi.nih.gov/blast/network/netblast/", 1); 
     
            sprintf (command, "%s -p blastp  -d %s -i %s -m 6 -o %s", NCBI_BLAST_4_TCOFFEE,B->db, seq_file, blast_result);
          }
        else if (strm2 (loc_blast_server,"local","LOCAL"))
          {
            check_program_is_installed ( "blastall",LOCAL_BLAST_4_TCOFFEE, "LOCAL_BLAST_4_TCOFFEE","ftp://ftp.ncbi.nih.gov/blast/", 1); 
            sprintf (command, "%s -p blastp -d %s -i %s -m 6 -o %s",LOCAL_BLAST_4_TCOFFEE,B->db, seq_file, blast_result);
          }
        else
          {
            sprintf ( "ERROR: %s is not a legal blast mode [FATAL:%s]", loc_blast_server, PROGRAM);
            exit ( EXIT_FAILURE);
          }
      }
      
      fprintf (stderr, "\n\t\t[seq2blast: %s Vs %s",name, B->db); 
      while ( !check_file_exists ( blast_result))
      {
        my_system (command);     
        if ( !is_blast_file (blast_result))
          {
            fprintf ( stderr, "\n\t\t***Blast %s Failed on %s: Retry***",loc_blast_server,name );
            n_failure++;
            if ( n_failure==10)exit (EXIT_FAILURE);
          }

        check_program_is_installed ( "seq_reformat",SEQ_REFORMAT_4_TCOFFEE, "SEQ_REFORMAT_4_TCOFFEE",MAIL, 1); 
        sprintf ( command, "%s -in %s -input blast_aln -action +rmgap_col '#1'  -output clustalw_aln > %s",SEQ_REFORMAT_4_TCOFFEE,blast_result, prf_result);
        my_system (command);
      }
      
      fprintf (stderr, "(%s)]", loc_blast_server);
      
      
      if ( !is_blast_file (permanent_blast_file))
      {
        sprintf (command, "cp %s %s%s", blast_result,get_cache_dir(),permanent_blast_file);
        my_system ( command);
      }

     if ( !is_aln(prf_result))
       {
       BProfile=NULL;
       return NULL;
       }
     else 
       {
       
       BProfile=main_read_aln(prf_result, NULL);
       }

     if ( getenv4debug ("DEBUG_BLAST")){fprintf ( stderr, "\n[DEBUG_BLAST:seq2blast]");print_aln (BProfile);}
     
     NBProfile=declare_aln (NULL);
     NBProfile=realloc_aln2 (NBProfile,100,BProfile->len_aln);
     NBProfile->nseq=0;
     NBProfile->len_aln=BProfile->len_aln;
     
     sprintf ( NBProfile->seq_al[NBProfile->nseq], "%s", BProfile->seq_al[0]);
     sprintf ( NBProfile->name[NBProfile->nseq], "%s", BProfile->name[0]);
     NBProfile->nseq++;

     sim=get_sim_master_aln_array (BProfile,0, "idmat");
     cov=get_cov_master_aln_array (BProfile,0, "idmat");

     for ( a=1; a< BProfile->nseq && NBProfile->nseq <100; a++)
       {
       if ( sim[0][a]>=B->min_id && sim[0][a]<=B->max_id && cov[0][a]>=B->min_cov)
         {
           sprintf ( NBProfile->seq_al[NBProfile->nseq], "%s", BProfile->seq_al[a]);
           sprintf ( NBProfile->name[NBProfile->nseq], "%s", BProfile->name[a]);
           NBProfile->nseq++;
         }
       }
     print_aln (NBProfile);
     free_int (sim, -1);
     free_int (cov, -1);
     free_sequence (free_aln (BProfile), -1);

     return NBProfile;
    }
Sequence * seq2unique_name_seq ( Sequence *S)
      {
      int duplicated=0;
      int a, b;
            
      for (a=0; a< S->nseq-1; a++)
          for (b=a+1; b<S->nseq; b++)
            {
            if ( strm (S->name[a], S->name[b]))
                  {
                  fprintf ( stderr, "\nWarning: Sequence %s is duplicated in file %s. The sequence will be renamed [WARNING:%s]", S->name[a], S->file[a], PROGRAM);
                  duplicated=1;
                  }
            }
      if (duplicated==0) return S;
      else
            {
            Alignment *A;
            char *tmp1, *tmp2;
            char command[1000];

            A=seq2aln (S, NULL, 0);
            tmp1=vtmpnam (NULL);tmp2=vtmpnam (NULL);
            output_fasta_seq (tmp1, A);
            sprintf ( command, "t_coffee -other_pg seq_reformat -in %s -action +name2unique_name > %s", tmp1, tmp2);
            my_system (command);
            free_aln (A);
            free_sequence (S, S->nseq);
            S=get_fasta_sequence (tmp2, NULL);
            return S;
            }
      }

Sequence * seq2blast_profile ( Sequence *S, int n, Blast_param *B)
    {
      Alignment *A;
      char *tmp;
      
      
      A=seq2blast ( S->name[n], S->seq[n],B);
      A=local_maln2global_maln ( S->seq[n], A); 
      output_clustal_aln ( tmp=vtmpnam (NULL), A);
            
      (S->T[n])->RB=fill_X_template ( S->name[n], tmp, "_RB_");
      fprintf (stderr, " [Kept %d Seqs]\n",A->nseq);
      
      if ( getenv4debug ( "DEBUG_BLAST")){fprintf ( stderr, "\n[DEBUG_BLAST:seq2blast_profile]");print_aln (A);}
      free_sequence (free_aln (A), -1);
    
      return S;
      }
      

      
int ** seq2aln_pos      (Alignment *A, int *ns, int **l_s)
    {
    int **code;
    int a, b,c, d,l, p , g;
    

    l=MAX(strlen (A->seq_al[l_s[0][0]]), strlen (A->seq_al[l_s[1][0]]));
    code=declare_int ((A->S)->nseq,l+1);
    
    for (c=0; c<2; c++)
        {
      l=strlen (A->seq_al[l_s[c][0]]);
      for (d=0; d<ns[c]; d++)
          {
          a=A->order[l_s[c][d]][0];
          for (p=0, b=0; b<l; b++)
              {
                g=is_gap (A->seq_al[l_s[c][d]][b]);
                if (!g){p++; code[a][p]=b+1;}
            }
          }
      }
    return code;
    }

Alignment *local_maln2global_maln (char *seq, Alignment *A)
    {
      /*inputs a BLAST alignmnent where the master sequence may be partila
      outputs the same alignment, while amkeing sure the profile is perfectly in sink with its master sequence
      */

      int a, b, c;
      int start, end, rend;
      char qname[100], *p;
      Alignment *B=NULL;
      
      sprintf ( qname, "%s", A->name[0]);
      p=strtok (qname, "_");
      if ( !strm (p, "QUERY"))
         {
           fprintf ( stderr, "\nUnappropriate format for the alignment [%s:FATAL]", PROGRAM);
           exit (EXIT_FAILURE);
         }
           
      start=atoi(strtok (NULL, "_"));
      end=atoi(strtok (NULL, "_"));
      rend=strlen (seq);
      
      B=copy_aln (A,NULL);
      if ( start>1 || end<rend )A=realloc_aln (A,rend+1);

      for (a=0; a<start-1; a++)
      {
        A->seq_al[0][a]=seq[a];
        for ( b=1; b< A->nseq; b++)A->seq_al[b][a]='-';
      }
      
      for (c=0,a=start-1; a< end; a++, c++)
      {
        A->seq_al[0][a]=seq[a];
        for ( b=1; b< A->nseq; b++)
          {
            A->seq_al[b][a]=B->seq_al[b][c];
          }
      }
      for ( a=end; a<rend; a++)
      {
        A->seq_al[0][a]=seq[a];
        for ( b=1; b< A->nseq; b++)A->seq_al[b][a]='-';
      }
      for ( a=0; a< A->nseq; a++) A->seq_al[a][rend]='\0';
      free_aln (B);
      
      A->len_aln=rend;
      return A;
    }
  
int *  seq2inv_pos ( char *seq)
{
  /*returns a list where each value gives the index of the corresponding residue in seq*/
  /*Numbering: 1 to L : Analogy to the aln2pos*/
  
  int a,l1, l2;
  int *pos;
  
  l1=strlen ( seq);
  for ( l2=a=0; a< l1; a++)l2+=1-is_gap(seq[a]);
  pos=vcalloc (l2+1, sizeof (int));
  for ( l2=a=0; a< l1; a++)if (!is_gap(seq[a]))pos[++l2]=a+1;
  return pos;
}
  
  
int ** aln2pos_simple_2 (Alignment *A)
    {
    int **pos1;
    int **pos2;
    pos1=aln2pos_simple (A, A->nseq);
    pos2=duplicate_int  (pos1, A->nseq,read_size_int (pos1[0],sizeof (int)));
    pos1=aln2pos_simple (NULL, 0);
    return pos2;
    }
int ** aln2pos_simple (Alignment *A, int n_nseq, ...)
    {
    /*
    function documentation: start
    int ** aln2pos_simple (Alignment *A, int n_nseq, ...)

####with two parameter only: Alignment *A, int n_nseq

    this function turns A into pos, a matrix where each residue is replace by its inice acvcording to the complete sequence.
    the indices in pos are computed using A->order[x][1] that contains the indice of the first residue of seq x of A
    
    n_nseq MUST not be null

####with more than two param:
     int ** aln2pos_simple (Alignment *A, int n_nseq, int *ns, int **ls)
     n_nseq must be set to 0 for the param 3 and four to be read
     
     ns[x]=number seq in group 
     ls[x]=list of the sequences in group x ( size=ns[x])
     
    The computation of the indices is only carried out on the scpecified residues

####IMPORTANT
      in pos, the numbering of the residues goes from 1 to L:
        pos[0][0]=3, means that the first position of the first sequence 
      in the alignmnet contains residue #3 from sequence A->order[0][0];
      
    function documentation: end
    */

    int a, b,c, p, g,l;
    int **T;

    int max_nseq;
    int n_len=0;

    int *list=NULL;
    int *ns=NULL;
    int **ls=NULL;



    va_list ap;
    
    
    if ( A==NULL)
       {
       return NULL;
       }
    else
       {
       if ( n_nseq>0)
          {
        list=vcalloc(n_nseq, sizeof (int));
        for ( a=0; a< n_nseq; a++)list[a]=a;
        }
       else
          { 
        va_start (ap, n_nseq);
        ns=va_arg(ap, int * );
        ls=va_arg(ap, int **);
        va_end(ap);
        list=vcalloc ( ns[0]+ns[1], sizeof (int));
        n_nseq=0;
        for ( a=0; a< ns[0]; a++)list[n_nseq++]=ls[0][a];
        for ( a=0; a< ns[1]; a++)list[n_nseq++]=ls[1][a];

        }
       max_nseq=MAX(read_size_int(A->order,sizeof (int*)),return_max_int (A->order, read_size_int(A->order,sizeof (int*)),0))+1;
       n_len=get_longest_string ( A->seq_al,A->max_n_seq, NULL, NULL)+1;  

       T=declare_int (max_nseq, n_len);
      
       for ( c=0; c< n_nseq; c++)
           {
         a=list[c];            
         l=strlen ( A->seq_al[a]);
         
         for ( p=A->order[a][1],b=0; b<l; b++)
             {
             g=1-is_gap(A->seq_al[a][b]);
             p+=g;
             T[a][b]=(g==1)?p:-(1+p);
             if ( A->seq_al[a][b]==UNDEFINED_RESIDUE)T[a][b]=0;
             if ( A->seq_cache && T[a][b]>0)T[a][b]=A->seq_cache[A->order[a][0]][T[a][b]];
             } 
         }
       vfree (list);
       }
   
   return T;
   }
Alignment ** split_seq_in_aln_list ( Alignment **aln, Sequence *S, int n_seq, char **seq_list)
        {
      int a, b, c;
      char * long_seq=NULL;
      int    len,l;
      int  **translation;
      int  **table;


      

      if ( aln==NULL)return NULL;
      translation=declare_int ( S->nseq,2);
      
      for (len=0,a=0; a< S->nseq; a++)
          {
          if((b=name_is_in_list (S->name[a],seq_list, n_seq, 100))!=-1)
             {
             l=strlen(S->seq[a])+1;
             long_seq=vrealloc(long_seq,(len+l+1)*sizeof(char));
             long_seq=strcat(long_seq, S->seq[a]);
             long_seq=strcat(long_seq, "*");   
             
             translation[a][0]=b;
             translation[a][1]=len;
             len+=l;
             }
          else translation[a][0]=-1;
          }

      long_seq[len-1]='\0';
      len--;

      table=declare_int ( len+1, 2);

      for ( b=0,a=0; a< S->nseq; a++)
          {
          if ( translation[a][0]!=-1)
             {
             c=1;
             while (long_seq[b]!='\0' && long_seq[b]!='*')
               {
               table[b+1][1]=c++;
               table[b+1][0]=translation[a][0];
               b++;
               }
             table[b][1]=c++;
             table[b][0]=translation[a][0];
             b++;
             }
          }

      for ( a=0; a< (aln[-1])->nseq; a++)
          {
          for ( b=0; b< (aln[a])->nseq; b++)
              {
            
            (aln[a])->order[b][0]=table[(aln[a])->order[b][1]][0];
            (aln[a])->order[b][1]=table[(aln[a])->order[b][1]][1];
            sprintf ( (aln[a])->name[b],"%s_%d_%d", S->name[(aln[a])->order[b][0]],a+1,b+1); 
            }
          }
      free_int (translation, -1);
      free_int (table,       -1);
      return aln;
      }



Sequence  *  fill_sequence_struc ( int nseq, char **sequences, char **seq_name)
      {
      int a;
      Sequence *S;
      int shortest, longuest;

      if ( nseq>1)
        {
          shortest=get_shortest_string( sequences, nseq, NULL, NULL);
          longuest=get_longest_string (sequences, nseq, NULL, NULL);
        }
      else if ( nseq==1)
        {
          shortest=longuest=strlen (sequences[0]);
        }
      else
        {
          return NULL;
        }
      

      S=declare_sequence (shortest, longuest,nseq);
      S->nseq=nseq;

      S->seq=copy_char ( sequences, S->seq, nseq, -1);
      S->name=copy_char ( seq_name, S->name,nseq, -1);
      
      ungap_array (S->seq,nseq);
      for ( a=0; a< S->nseq; a++)S->len[a]=strlen(S->seq[a]);
      return S;
      }

Alignment * expand_aln (Alignment *A)
  {
  /*This function expands the profiles within an alignment*/
  
  
  int a, b, d, e;
  Alignment *MAIN=NULL, *SUB=NULL;
  int n_sub_seq=0;
  int new_nseq=0;
  int *list;
  Alignment *Profile;
  
  if ( !A)return A;


  
  list=vcalloc (A->nseq, sizeof (int)); 
  for ( a=0; a< A->nseq; a++)
    {
      Profile=seq2R_template_profile (A->S, A->order[a][0]);
      if (Profile)new_nseq+=Profile->nseq;
      else 
      {
        new_nseq++;
        list[n_sub_seq++]=a;
      }      
    }
  
  if ( n_sub_seq==A->nseq){vfree(list);return A;}
  else if (n_sub_seq==0){MAIN=copy_aln (A, MAIN);MAIN->nseq=0;}
  else
    {
      MAIN=extract_sub_aln (A, n_sub_seq, list);
    }
  vfree(list);
  
  
  for ( a=0; a< A->nseq; a++)
    {
      Profile=seq2R_template_profile (A->S, A->order[a][0]);
      if ( Profile)
      {
        SUB=copy_aln (Profile,SUB);
        SUB=realloc_aln2(SUB, SUB->nseq, A->len_aln+1);
        
        for ( e=0,b=0; b<= A->len_aln; b++)
          {
            if ( is_gap(A->seq_al[a][b]))
            {for (d=0; d< SUB->nseq; d++)SUB->seq_al[d][b]='-';}
            else 
               {
               for(d=0; d<SUB->nseq; d++)SUB->seq_al[d][b]=Profile->seq_al[d][e];
               e++;
             }
               
          }
        MAIN=stack_aln(MAIN, SUB);
      }
    }
  free_aln (SUB);
  free_aln (A);
  return MAIN;
  }
Alignment * expand_number_aln (Alignment *A,Alignment *EA)
  {
  /*This function expands the profiles within an alignment*/
  
    
  int a, b, d, e;
  Alignment *MAIN=NULL, *SUB=NULL, *C=NULL;
  int n_sub_seq=0;
  int new_nseq=0;
  int *list;
  Alignment *Profile;

  if ( !EA || !A)return EA;

  if ( EA->nseq<A->nseq){fprintf (stderr, "\n[ERROR:expand_number_aln] Using as a master an expanded aln [FATAL:%s]", PROGRAM);exit (EXIT_FAILURE);}
  

  list=vcalloc (EA->nseq, sizeof (int)); 
  for ( a=0; a< EA->nseq; a++)
    {
      Profile=seq2R_template_profile (EA->S, EA->order[a][0]);
      if (Profile)new_nseq+=Profile->nseq;
      else 
      {
        new_nseq++;
        list[n_sub_seq++]=a;
      }      
    }
  
  if ( n_sub_seq==EA->nseq){vfree(list);return EA;}
  else if (n_sub_seq==0){MAIN=copy_aln (EA, MAIN);MAIN->nseq=0;}
  else
    {
      MAIN=extract_sub_aln (EA, n_sub_seq, list);      
    }
  
  
  list[0]=EA->nseq;
  C=extract_sub_aln (EA,1, list);  
  vfree(list);
  
  
  
  for ( a=0; a< EA->nseq; a++)
    {
      Profile=seq2R_template_profile (EA->S, EA->order[a][0]);
      if ( Profile)
      {
        SUB=copy_aln (Profile,SUB);
        SUB=realloc_aln2(SUB, SUB->nseq, EA->len_aln+1);
        
        for ( e=0,b=0; b<= EA->len_aln; b++)
          {
            if (is_gap(A->seq_al[a][b]))
            {
            for ( d=0; d<SUB->nseq; d++)
              SUB->seq_al[d][b]=NO_COLOR_RESIDUE;
            }
            else
            {
              for ( d=0; d<SUB->nseq; d++)
                {

                  if ( is_gap (Profile->seq_al[d][e]))
                  {
                    SUB->seq_al[d][b]=NO_COLOR_RESIDUE;
                  }
                  else SUB->seq_al[d][b]=EA->seq_al[a][b];
                }
              e++;
            }
          }
        for (d=0; d< SUB->nseq; d++)SUB->score_seq[d]=EA->score_seq[a];
        
        MAIN=stack_aln(MAIN, SUB);
      }
    }
  
  MAIN=stack_aln(MAIN, C);
  MAIN->nseq--;
  MAIN->score=MAIN->score_aln=EA->score_aln;
  
  free_aln (SUB);
  free_aln (EA);
  
  free_aln (C);
  
  return MAIN;
  }

Alignment * probabilistic_rm_aa ( Alignment *A, int pos, int len)
{
  int random_len=0;
  int a, b;
  int left, right;

  if ( len<0)
    {
      random_len=1;
      len=-len;
    }
  
  vsrand(0);

  if (pos==0)pos= (rand()%(A->len_aln-(2*len+len))) +len;
  
   
  for ( a=0; a< A->nseq; a++)
      {
        if (random_len)left =rand()%len;
        else left=len;
        if (random_len)right=rand()%len;
        else right=len;
        if ( (pos-right)<0 || (pos+left)>A->len_aln)
          {
            fprintf ( stderr, "\nWarning: probabilistic_rm_aa, pos out of range [%s]\n", PROGRAM);
          }
        else
          for ( b=pos-right; b<pos+left; b++)A->seq_al[a][b]=(b==pos)?'~':'*';
      }
  /*print_aln (A);*/ 
  ungap_aln (A);
  free_sequence ( A->S, A->nseq);
  A->S=aln2seq (A);
  return A;
  
}
        
Alignment * remove_gap_column ( Alignment *A, char *mode)
  {
    int   a, b;
    char *p;
    int  *seq_list;
    int   nseq=0;
    int keep_col, cl;


    seq_list =vcalloc ( A->nseq, sizeof (int));
    while (  (p=strtok(mode, ":")))
      {
      mode=NULL;
      if (p[0]=='#')
        {
          seq_list[nseq++]=atoi(p+1)-1;
        }
      else if ( (a=name_is_in_list (p, A->name, A->nseq, 100))!=-1)
        {
          seq_list[nseq++]=a;
        }
      }
    
    if ( nseq==0)
      {
      for ( a=0; a< A->nseq; a++)seq_list[a]=a;
      nseq=A->nseq;
      }

    for ( cl=0,a=0; a<=A->len_aln; a++)
      {
      for (keep_col=1, b=0; b< nseq && keep_col; b++)
        {
          keep_col=(is_gap(A->seq_al[seq_list[b]][a]))?0:keep_col;
        }
      
      if ( keep_col)
        {
          for ( b=0; b< A->nseq; b++)
            {
            A->seq_al[b][cl]=A->seq_al[b][a];
            }
          cl++;
        }
      else
        {
          for ( b=0; b< A->nseq; b++)
            {
            A->seq_al[b][cl]='-';
            }
          cl++;
        }
      }
    A->len_aln=cl;
    vfree (seq_list);
    
    return A;
  }


Alignment * ungap_sub_aln (Alignment *A, int ns, int *ls)
        {

      int a, b, c,t;
      int len;

      len=strlen ( A->seq_al[ls[0]]);

      for ( c=0,a=0; a<len; a++)
            {
            for ( t=0,b=0; b<ns; b++)
                  t+=is_gap(A->seq_al[ls[b]][a]);
            if (t==ns);
            else
                {
                for ( b=0; b<ns; b++)
                  A->seq_al[ls[b]][c]=A->seq_al[ls[b]][a];
                c++;
                }
            }
       for ( b=0; b<ns; b++)A->seq_al[ls[b]][c]='\0';  
       return A;
      }
Sequence * ungap_seq ( Sequence *S)
        {
        int a;
        
        if ( !S)return NULL;
        ungap(S->seq[0]);
        S->max_len=S->min_len=strlen (S->seq[0]);
        for ( a=0; a< S->nseq; a++)
          {
            ungap(S->seq[a]);
            S->len[a]=strlen (S->seq[a]);
            S->max_len=MAX(S->max_len,S->len[a]);
            S->min_len=MAX(S->min_len,S->len[a]);
          }
        return S;
        
      }

Alignment *ungap_aln_n ( Alignment *A, int p)
      {
/*remove all the columns of gap-only within an alignment*/  
      int a, b, c;
      int t;
      
      if ( A->nseq==0)return A;

      for ( c=0,a=0; a< A->len_aln; a++)
            {
            for ( t=0,b=0; b<A->nseq; b++)
                  t+=is_gap(A->seq_al[b][a]);
            if ((t*100)/A->nseq>=p || (t==A->nseq && p==100) || (t && p==1));
            else
                {
                for ( b=0; b<A->nseq; b++)
                  A->seq_al[b][c]=A->seq_al[b][a];
                c++;
                }
            }
       for ( b=0; b<A->nseq; b++)A->seq_al[b][c]='\0';
       A->len_aln=c; 
       return A;

       }

Alignment *ungap_aln ( Alignment *A)
{
  return ungap_aln_n (A, 100);
}
/*
Alignment *ungap_aln ( Alignment *A)
      {
      int a, b, c,t;
      
      for ( c=0,a=0; a< A->len_aln; a++)
            {
            for ( t=0,b=0; b<A->nseq; b++)
                  t+=is_gap(A->seq_al[b][a]);
            if (t==A->nseq);
            else
                {
                for ( b=0; b<A->nseq; b++)
                  A->seq_al[b][c]=A->seq_al[b][a];
                c++;
                }
            }
       for ( b=0; b<A->nseq; b++)A->seq_al[b][c]='\0';
       A->len_aln=c; 
       return A;

       }
*/


Alignment *remove_end (Alignment *A)
        {
      int a, b, d;
      int left, right;

      for (a=0; a< A->len_aln; a++)
          {
          for ( b=0, d=0; b< A->nseq; b++)
            if ( !is_gap(A->seq_al[b][a]))d++;
          if ( d>1)break;
          }
      left=a;
      for (a=A->len_aln-1; a>0; a--)
          {
          for ( b=0, d=0; b< A->nseq; b++)
            if ( !is_gap(A->seq_al[b][a]))d++;
          if ( d>1)break;
          }
      right=a;

      return extract_aln(A, left, right+1);
      }

Alignment* condense_aln (Alignment *A)
{
  /* condense complementarz columns:
     X-       X
     -X  ....>X
     X-       X

  */
  int a, b, plen, n,m, r1, r2;
  
  plen=0;
  while ( A->len_aln !=plen)
    {
      plen=A->len_aln;
      for ( a=0; a< A->len_aln-1; a++)
      {
        for ( n=m=b=0; b< A->nseq; b++)
          {
            r1=is_gap(A->seq_al[b][a]);
            r2=is_gap(A->seq_al[b][a+1]);
            n+=(r1 || r2);
            m+=r1;
          }
      
        if ( n==A->nseq && m!=A->nseq)
          {
            for (b=0; b< A->nseq; b++)
            {
              if (!is_gap(A->seq_al[b][a+1]))
                  {
                  A->seq_al[b][a]=A->seq_al[b][a+1];
                  A->seq_al[b][a+1]='-';
                  }
            }
            a++;
          }
      }
    }
  A=ungap_aln(A);  
  return A;
}
                
                  
              

void compress_aln ( Alignment *A)
        {

        /*remove all the columns of gap-only within an alignment*/  
      int a, b, c, d;
        
        

      for (c=0, a=0; a< A->len_aln; a++)
        {
          for ( b=0, d=0; b< A->nseq; b++)
            if ( A->seq_al[b][a]!='-'){d=1; break;}
          if ( d==0);
          else
            {
            for (b=0; b< A->nseq; b++)
              A->seq_al[b][c]=A->seq_al[b][a];
            c++;
            }
        }
       A->len_aln=c;
      
      for ( a=0; a< A->nseq; a++)
        A->seq_al[a][c]='\0';
      }

Alignment *seq_coor2aln ( Sequence *S, Alignment *A, int **coor, int nseq)
        {
      int a;
      char *buf;

      A=realloc_alignment2(A, nseq, return_maxlen ( S->seq, S->nseq)+1);
      for ( a=0; a< S->nseq; a++)sprintf ( A->file[a], "%s", S->file[a]);
      for ( a=0; a< nseq; a++)
          {
          sprintf (A->name[a], "Repeat_%d_%d", a, coor[a][0]);
          buf=extract_char ( S->seq[coor[a][0]], coor[a][1]-1, coor[a][2]);
          sprintf ( A->seq_al[a],"%s", buf);
          vfree(buf);
          A->order[a][0]=0;
          A->order[a][1]=coor[a][1]-1;
          }
      A->nseq=nseq;
      return A;
      }

Alignment *strings2aln (int nseq,...)
        {
        /*strings2aln(nseq, <name1>, <seq1>, <name2>, <seq2>....)*/
        va_list ap;
        char **list, **list2;
        char **name, **name2;
        Sequence *S;
        Alignment *A;
        int a, max;

        va_start(ap, nseq);
        list=vcalloc (nseq, sizeof (char*));
        name=vcalloc (nseq, sizeof (char*));
        for ( a=0; a< nseq; a++)
          {
            name[a]=va_arg(ap,char*);
            list[a]=va_arg(ap,char*);
            
          }
        va_end(ap);
        
        for ( max=0,a=0; a< nseq; a++)
          {
            max=(strlen (list[a])>max)?strlen(list[a]):max;
          }
        list2=declare_char (nseq, max+1);
        name2=declare_char (nseq, MAXNAMES+1);
        
        for ( a=0; a< nseq; a++)
          {
            sprintf ( list2[a], "%s", list[a]);
            sprintf ( name2[a], "%s", name[a]);
          }

        
        S=fill_sequence_struc(nseq,list2,name2);
        
        free_char (list2, -1);
        free_char (name2, -1);
        vfree (list);
        vfree(name);
        A=seq2aln(S,NULL, 1); 
        return A;
      }

Alignment *seq2aln ( Sequence *S, Alignment *A,int rm_gap)
      {
      int a;

      A=realloc_alignment2(A, S->nseq, S->max_len+1);       
      for ( a=0; a< S->nseq; a++)sprintf ( A->file[a], "%s", S->file[a]);
      A->nseq=S->nseq;
      A->max_len=S->max_len;
      A->min_len=S->min_len;

      for ( a=0; a< S->nseq; a++)
            {
            A->order[a][0]=a;
            A->order[a][1]=0;
            sprintf ( A->seq_comment[a], "%s", S->seq_comment[a]);
            sprintf ( A->aln_comment[a], "%s", S->aln_comment[a]);
            
            sprintf ( A->name[a], "%s", S->name[a]);
            sprintf ( A->seq_al[a], "%s", S->seq[a]);
            
            ungap ( A->seq_al[a]);
            A->len[a]=strlen ( A->seq_al[a]); 
            
            if ( rm_gap==0)sprintf ( A->seq_al[a], "%s", S->seq[a]);
            
            }
      padd_aln (A);
      A->S=S;
      return A;
      }

Alignment *padd_aln ( Alignment *A)
{
  A->seq_al=padd_string (A->seq_al, A->nseq, '-');
  A->len_aln=strlen (A->seq_al[0]);
  return A;
}

char **padd_string ( char **string, int n,char pad)
{
  /*Pads a the strings so that they all have the same length*/
  
  int max_len, a;
  char *buf;
  
  max_len=get_longest_string  (string,n, NULL, NULL);
  for (a=0; a<n; a++)
          {
          buf=generate_null (max_len-strlen (string[a]));
          strcat ( string[a], buf); 
          vfree (buf);      
          } 
  return string;
}

      
Alignment * add_align_seq2aln ( Alignment *A, char *seq, char *seq_name)
        {
        if ( !A)
          {
            A=declare_aln (NULL);
            A=realloc_aln2 ( A, 1, strlen (seq)+1);
            A->nseq=0;
            sprintf ( A->name[A->nseq], "%s", seq_name);
            sprintf ( A->seq_al[A->nseq], "%s", seq);
            A->nseq++;
            
          }   
        else if ( strlen (seq)!=A->len_aln)
          {
            fprintf ( stderr, "\nError: Attempt to stack incompatible aln and aligned sequence[FATAL]\n");
            exit (EXIT_FAILURE);
            A=NULL;
          }
        else
          {

            A=realloc_aln2 ( A, A->nseq+1, A->len_aln+1);
            sprintf ( A->name[A->nseq], "%s", seq_name);
            sprintf ( A->seq_al[A->nseq], "%s", seq);
            A->nseq++;
          }
        return A;
      }
  

Alignment *aln2number (Alignment *A)
        {
      A->seq_al=char_array2number(A->seq_al, A->nseq);
      return A;
      }
Sequence *seq2number (Sequence *A)
        {
      A->seq=char_array2number(A->seq, A->nseq);
      return A;
      }


Sequence * aln2seq (Alignment *A)
      {
      Sequence *LS;
      int a;

      LS=declare_sequence ( A->len_aln+1, A->len_aln+1, A->nseq);
      LS->nseq=A->nseq;
      for ( a=0; a< LS->nseq; a++)
            {
            sprintf (LS->file[a], A->file[a]); 
            LS->len[a]=A->len[a];
            sprintf ( LS->seq[a], A->seq_al[a]);
            if ( LS->contains_gap==0)ungap ( LS->seq[a]);
            
            LS->len[a]=strlen ( LS->seq[a]);
            sprintf ( LS->seq_comment[a], A->seq_comment[a]);
            sprintf ( LS->aln_comment[a], A->aln_comment[a]);
            sprintf ( LS->name[a], "%s", A->name[a]);
            }
      return LS;
      }

Sequence  *keep_residues_in_seq ( Sequence *S, char *list, char replacement)
{
  Alignment *A=NULL;
  int a;
  
  A=seq2aln (S, A,1);
  A=keep_residues_in_aln ( A, list, replacement);
  for ( a=0; a< A->nseq; a++)
    {
      ungap (A->seq_al[a]);
      sprintf ( S->seq[a], "%s", A->seq_al[a]);
    }
  free_aln (A);
  return S;
}


Alignment *aln2short_aln ( Alignment *A, char *list, char *new, int spacer)
{
  int a, b, r, cl, l;
  char *buf;
  
  for ( a=0; a< A->nseq; a++)
    {
      buf=vcalloc ( strlen (A->seq_al[a])+1, sizeof (char));
      
      for (l=0,cl=0, b=0; b< A->len_aln; b++)
      {
        r=A->seq_al[a][b];
        if ( is_gap(r));
        else if ( is_in_set (r, list))
          {
            if (cl){cl=0; buf[l++]=new[0];}
            buf[l++]=r;
          }
        else
          {
            if ( cl==spacer){buf[l++]=new[0];cl=0;}
            cl++;
          }
        
      }
      
      buf[l]='\0';
      sprintf (A->seq_al[a], "%s", buf);
      vfree (buf);
    }
  return A;
}
  
Alignment *keep_residues_in_aln ( Alignment *A, char *list, char replacement)
{
  return filter_keep_residues_in_aln (A,NULL, 0, -1, list, replacement);
}
Alignment *filter_keep_residues_in_aln ( Alignment *A,Alignment *ST, int use_cons, int value, char *list, char replacement)
{
  char **sl;
  int n, a;
  
  n=strlen (list);
  sl=declare_char (n+1, 256);
  for (a=0; a< n; a++)
    sprintf ( sl[a], "%c%c", list[a], list[a]);
  sprintf ( sl[a],"#%c", replacement);
  A=filter_aln_convert (A, ST,use_cons,value, n+1, sl);
  free_char (sl, -1);
  return A;
}
  

Alignment *filter_convert_aln ( Alignment *A,Alignment *ST, int use_cons, int value, int n, ...)
{
  va_list ap;
  char **sl;
  int a;
  va_start (ap, n);
  sl=vcalloc ( n,sizeof(char*));
  for ( a=0; a< n; a++)
    {
      sl[a]=va_arg(ap, char * );
    }
  va_end(ap);
  A=filter_aln_convert (A,ST,use_cons,value, n,sl);
  vfree(sl);
  return A;
}

Alignment * filter_aln ( Alignment *A, Alignment *ST, int value)
        {
        return filter_aln_convert (A, ST,0,value,DELETE, NULL);
      }
Alignment * filter_aln_upper_lower ( Alignment *A, Alignment *ST,int use_cons, int value)
        {
        
        return filter_aln_convert (A, ST,use_cons,value, LOWER, NULL);
      }
Alignment * filter_aln_lower_upper ( Alignment *A, Alignment *ST,int use_cons, int value)
        {
        
        return filter_aln_convert (A, ST,use_cons,value, UPPER, NULL);
      }
Alignment * filter_aln_convert ( Alignment *A, Alignment *ST,int use_cons, int value, int n_symbol,char **symbol_list)
        {
        int a, b, c;
        int st;
        int cons;

        if ( use_cons)
          {
            cons=name_is_in_list ("cons", ST->name,ST->nseq, 100);
            if ( cons==-1)cons=name_is_in_list ("Cons", ST->name,ST->nseq, 100);
            if ( cons==-1)use_cons=0;
          }
        
        A->residue_case=2;
        for ( a=0; a< A->nseq; a++)
              {
            if(value!=10 && ST && !use_cons)for ( c=0; c< ST->nseq; c++)if ( strm(ST->name[c], A->name[a]))break;
            for ( b=0; b< A->len_aln; b++)
                {
                  if ( value==10 || !ST)st=11;
                  else if ( ST && use_cons)
                  {
                    st=(isdigit(ST->seq_al[cons][b]))?ST->seq_al[cons][b]-'0':ST->seq_al[cons][b];
                  }
                  else st=(isdigit(ST->seq_al[c][b]))?ST->seq_al[c][b]-'0':ST->seq_al[c][b];
                  
                 
                  if ( st==value || value==-1 || st==NO_COLOR_RESIDUE) 
                  {
                    if      ( n_symbol==UPPER  && !symbol_list)A->seq_al[a][b]=toupper (A->seq_al[a][b]);
                    else if ( n_symbol==LOWER  && !symbol_list)A->seq_al[a][b]=tolower (A->seq_al[a][b]);
                    else if ( n_symbol==DELETE && !symbol_list)A->seq_al[a][b]='-';
                    else
                      {
                        A->seq_al[a][b]=convert(A->seq_al[a][b],n_symbol,symbol_list);
                      }
                  }
                 
                }
            }
        return A;
      } 
int  * count_in_aln ( Alignment *A, Alignment *ST, int value, int n_symbol,char **symbol_list, int *table)
        {
        int a, b, c, d;
        int st;
        
        if (!table)table=vcalloc (n_symbol, sizeof (int));

        A->residue_case=2;
        for ( a=0; a< A->nseq; a++)
              {
            if(value!=10 && ST)for ( c=0; c< ST->nseq; c++)if ( strm(ST->name[c], A->name[a]))break;
            for ( b=0; b< A->len_aln; b++)
                {
                  if ( value==10 || !ST)st=11;
                  else st=(isdigit(ST->seq_al[c][b]))?ST->seq_al[c][b]-'0':ST->seq_al[c][b];
                  if ( st==value || value==-1) 
                  {
                    for ( d=0; d<n_symbol; d++)table[d]+=is_in_set ( A->seq_al[a][b], symbol_list[d]);
                  }
                }
            }
        return table;
      } 

char *dna_aln2cons_seq ( Alignment *A)
        {
      int a, b, best;
      static int **column_count;
      static int **old_tot_count;
      static int **new_tot_count;
      static char *string1, *string2;
      int **count_buf;
      char r1, r2,*seq;
      int NA=0, NG=1, NC=2, NT=3, IGAP=4;
      static int   MAX_EST_SIZE=10000;
      static int   size_increment=1000;
      static int first;
      int overlap=0, best_overlap=0;
      

      seq=vcalloc ( A->len_aln+1, sizeof (char));

      if (!column_count )
        {
          column_count=vcalloc(MAX_EST_SIZE, sizeof (int*));
          for ( a=0; a< MAX_EST_SIZE; a++)
            column_count[a]=vcalloc (5, sizeof (int));
        
          old_tot_count=vcalloc(MAX_EST_SIZE, sizeof (int*));
          new_tot_count=vcalloc(MAX_EST_SIZE, sizeof (int*));
          A->P=declare_profile( "agct-",MAX_EST_SIZE);
          string1=vcalloc (MAX_EST_SIZE, sizeof (char));
          string2=vcalloc (MAX_EST_SIZE, sizeof (char));
        }
      else if (A->len_aln>MAX_EST_SIZE)
        {
          if ( column_count)
            {
            for ( a=0; a< MAX_EST_SIZE; a++)
              vfree(column_count[a]);
            vfree(column_count);
            vfree(old_tot_count);
            vfree(new_tot_count);
            vfree(string1);
            vfree(string2);
            }
          
        column_count=vcalloc(MAX_EST_SIZE+ size_increment, sizeof (int*));
        for ( a=0; a< MAX_EST_SIZE+ size_increment; a++)
            column_count[a]=vcalloc (5, sizeof (int));
        
        old_tot_count=vcalloc(MAX_EST_SIZE+ size_increment, sizeof (int*));
        new_tot_count=vcalloc(MAX_EST_SIZE+ size_increment, sizeof (int*));
        
        for (a=0; a< MAX_EST_SIZE; a++)
          {
            old_tot_count[a]=*(column_count++);
            for ( b=0; b<5; b++)old_tot_count[a][b]=(A->P)->count[b][a];
          }
        free_int ( (A->P)->count, -1);
        
        (A->P)->count=declare_int (5, MAX_EST_SIZE+ size_increment);
        (A->P)->max_len=MAX_EST_SIZE+ size_increment;
        MAX_EST_SIZE+= size_increment;
        string1=vcalloc (MAX_EST_SIZE, sizeof (char));
        string2=vcalloc (MAX_EST_SIZE, sizeof (char));
        }
      
      
      sprintf ( string1, "%s",A->seq_al[0]);
      sprintf ( string2, "%s",A->seq_al[1]);
      

      string1=mark_internal_gaps(string1,'.');
      string2=mark_internal_gaps(string2,'.');

      
      
      for (b=0,a=0; a< A->len_aln; a++)
        {
          r1=string1[a];
          r2=string2[a];
          
          if ( r1==r2)
            {
            overlap++;
            }
          else
            {
            best_overlap=MAX(overlap, best_overlap);
            overlap=0;
            }


          if (!is_gap(r1) && first==1)new_tot_count[a]=old_tot_count[b++]; 
          else if (is_gap(r1) || first==0){new_tot_count[a]=*column_count;column_count++;};
          
          if ( first==0)
            {
            if(r1=='a')       new_tot_count[a][NA]++;
            else if ( r1=='g')new_tot_count[a][NG]++;
            else if ( r1=='c')new_tot_count[a][NC]++;
            else if ( r1=='t')new_tot_count[a][NT]++; 
            else if (is_gap(r1));
            else
              {
               new_tot_count[a][NA]++;
               new_tot_count[a][NG]++;
               new_tot_count[a][NC]++;
               new_tot_count[a][NT]++;
              }
            }
          if ( a> 0 && a<A->len_aln-1 && r1=='.')
            {
            new_tot_count[a][IGAP]+=((new_tot_count[a-1][NA]+new_tot_count[a-1][NG]+new_tot_count[a-1][NC]+new_tot_count[a-1][NT]));
            }
          

          if(r2=='a')       new_tot_count[a][NA]++;
          else if ( r2=='g')new_tot_count[a][NG]++;
          else if ( r2=='c')new_tot_count[a][NC]++;
          else if ( r2=='t')new_tot_count[a][NT]++;
          else if ( r2=='.')new_tot_count[a][IGAP]++;
          else if ( r2=='-');
          else 
            {
            new_tot_count[a][NA]++;
            new_tot_count[a][NG]++;
            new_tot_count[a][NC]++;
            new_tot_count[a][NT]++; 
            }
          (A->P)->count[0][a]=new_tot_count[a][NA];
          (A->P)->count[1][a]=new_tot_count[a][NG];
          (A->P)->count[2][a]=new_tot_count[a][NC];
          (A->P)->count[3][a]=new_tot_count[a][NT];
          (A->P)->count[4][a]=new_tot_count[a][IGAP];

          best_int(4,1, &best,new_tot_count[a][NA], new_tot_count[a][NG],new_tot_count[a][NC],new_tot_count[a][NT]); 
          if( best==0)      seq[a]='a';
          else if ( best==1)seq[a]='g';
          else if ( best==2)seq[a]='c';
          else if ( best==3)seq[a]='t';
        }

      first=1;

      seq[a]='\0';
      fprintf ( stderr, "[Best Overlap: %d Residues]", best_overlap);
      count_buf=old_tot_count;
      old_tot_count=new_tot_count;
      new_tot_count=count_buf;

      return seq;
      
      }
char *aln2cons_seq ( Alignment *A, int ns, int *ls, int n_groups, char **group_list)
        {
      char *seq;
      int a, b, c;
      int best_group=0;
      int aa_group=0;
      int *group;
      int len;
      int clean_ls=0;
      
      len=strlen  (A->seq_al[ls[0]]);
      seq=vcalloc (len+1, sizeof (char));

      if ( ns==0)
        {
          ns=A->nseq;
          ls=vcalloc ( A->nseq, sizeof (int));
          for ( a=0; a< A->nseq; a++)ls[a]=a;
          clean_ls=1;
        }


      if ( !group_list)
         {
             group_list=declare_char ( 26, 2);
             for ( a=0; a<26; a++)group_list[a][0]=a+'a';
             n_groups=26;
             aa_group=1;
         }
      
      
      for ( a=0; a<len; a++)
          {
            group=vcalloc (n_groups+1, sizeof (int));
            for (best_group=0,b=0; b< ns; b++)
                {
                if ( !is_gap(A->seq_al[ls[b]][a]))
                   {
                   for (c=0; c< n_groups; c++)
                       if ( is_in_set (tolower(A->seq_al[ls[b]][a]), group_list[c]))
                                   {group[c]++;
                                best_group=(group[c]>group[best_group])?c:best_group;
                               }
                   }
                seq[a]=group_list[best_group][0];
                }
            vfree (group);
          }
      seq[a]='\0';
      if ( aa_group) free_char (group_list, -1);

      if ( clean_ls)vfree(ls);
      
      return seq;
      }
            
char *aln2cons_seq_mat ( Alignment *A, char *mat_name)
{
  return sub_aln2cons_seq_mat (A, A->nseq, NULL, mat_name);
}
char *sub_aln2cons_seq_mat2 ( Alignment *A,int ns, char **ls, char *mat_name)
{
  char *cons;
  int *list;
  list=name_array2index_array(ls, ns, A->name, A->nseq);
  cons=sub_aln2cons_seq_mat  ( A,ns, list, mat_name);
  vfree (list);
  return cons;
}

char *sub_aln2cons_seq_mat  ( Alignment *A,int ns, int *ls, char *mat_name)
{
 int a, b, c, s;
 char *seq, r1, r2;
 int **mat;
 int score=0, best_score, best_r;
 int len;
 int naa;
 
 mat=read_matrice (mat_name);
 len=strlen ( A->seq_al[(ls==NULL)?0:ls[0]]);
 seq=vcalloc (len+1, sizeof (char));
 for ( a=0; a<len; a++)     
   {
     for (b=0; b<20; b++)
       {
       r1=AA_ALPHABET[b];
       for ( naa=0,score=0,c=0; c<ns; c++)
         {
           s=(ls==NULL)?c:ls[c];
           if ( ls && ls[c]==-1) continue;
           else if (is_gap(A->seq_al[s][a]))continue;
           else 
             {
             naa++;
             r2=A->seq_al[s][a];
             score+=mat[r1-'A'][r2-'A'];
             }
         }
       if (naa==0)best_r='-';
       if ( b==0 || score>best_score){best_score=score; best_r=r1;}
       }
     seq[a]=best_r;
   }
 free_int (mat, -1);
 return seq;
}
  
int  seq_list2fasta_file( Sequence *S,  char *list, char *file)
        {
      FILE *fp;
      int n, a, s;
      static char *buf;
      static int blen;
      int l;
      

      /*Buf is used because cmalloced functions cannot go through strtok*/
      if ( !S)return 0;
      else
        {
          fp=vfopen ( file, "w");
          if ( !list)
            {
            for ( a=0; a<S->nseq; a++)
              {
                fprintf ( fp, ">%s\n%s\n", S->name[a], S->seq[a]);
              }
            }
          else
            {
            l=strlen (list);
            if ( l>blen)
              {
                if (buf)free(buf);
                buf=vcalloc ( strlen (list)+1, sizeof (char));
                sprintf ( buf, "%s", list);
                blen=l;
              }
            
            n=atoi(strtok (list,SEPARATORS));
            for ( a=0; a< n; a++)
              {
                s=atoi(strtok (NULL, SEPARATORS));
                fprintf ( fp, ">%s\n%s\n", S->name[s], S->seq[s]);
              }
            }
          vfclose (fp);
        }
      return 1;
      }
Structure * seq2struc ( Sequence *S, Structure *ST)
        {
      int a, b;
      
      for ( a=0; a< S->nseq; a++)
          for ( b=0; b< S->len[a]; b++)
            ST->struc[a][b+1][ST->n_fields-1]=S->seq[a][b];
      return ST;
      }

void aln2struc (Alignment *A, Structure *ST) 
        {
      int a, b, c;

      for ( a=0; a< A->nseq; a++)
          for (c=0, b=0; b< A->len_aln; b++)
              {
            if ( !is_gap (A->seq_al[a][b]))
                 {
                 ST->struc[a][c][ST->n_fields-1]=A->seq_al[a][b];
                 c++;
                 }
            }
      }
Alignment *stack_aln (Alignment *A, Alignment *B)
        {
      int a,b;
      int max_len=0, max_nseq=0;
      if ( B==NULL)return A;
      if ( A==NULL)return B;
      
      max_nseq=A->nseq+B->nseq;
      for (a=0; a< A->nseq; a++)max_len=MAX(strlen(A->seq_al[a]),max_len);
      for (a=0; a< B->nseq; a++)max_len=MAX(strlen(B->seq_al[a]),max_len);
      
      A=realloc_aln2 ( A,max_nseq,max_len+1);
      
      for (a=A->nseq,b=0; b< B->nseq; b++, a++)
          {
          sprintf ( A->seq_comment[a] , "%s", B->seq_comment[b]);
          sprintf ( A->aln_comment[a] , "%s", B->aln_comment[b]);
          
          sprintf ( A->seq_al [a] , "%s", B->seq_al [b]);
          sprintf ( A->name   [a] , "%s", B->name[b]);
          sprintf ( A->file   [a], "%s" , B->file[b]);
          A->order[a][0]=B->order[b][0];
          A->order[a][1]=B->order[b][1];
          A->score_seq[a]=B->score_seq[b];
          A->len[a]=B->len[b];
          }
      
      A->len_aln=MAX(A->len_aln, B->len_aln);
      A->nseq=A->nseq+B->nseq;
      A->score_aln=A->score_aln+B->score_aln;
      
      A->finished=A->finished+B->finished;
      return A;
      }
          
Alignment *chseqIaln(char *name, int seq_n, int start,int len,Sequence *S, int seqIaln, Alignment *A)
        {
      char *seq;

      seq=extract_char ( S->seq[seq_n], start, len);
      A=realloc_aln2 (A, (A==NULL)?(seqIaln+1):MAX(A->nseq,seqIaln+1), ((A==NULL)?(strlen (seq)):MAX(strlen (seq),A->len_aln))+1);
      
      
      sprintf ( A->seq_al[seqIaln], "%s",seq);

      
      A->order[seqIaln][0]=seq_n;
      A->order[seqIaln][1]=start;
      sprintf ( A->name[seqIaln], "%s", name);
      A->nseq=MAX(A->nseq, seqIaln+1);
      A->len_aln=return_maxlen(A->seq_al, A->nseq);
      A->S=S;
      vfree (seq);
      return A;
      }

Alignment * aln_gap2random_aa(Alignment *A)
        {
       int a, b,l;
       char alp[200];
       
       if (strm ( (A->S)->type, "PROTEIN"))
         sprintf ( alp, "acefghiklmnpqrstuvwy");
       else if ( strm ( (A->S)->type, "DNA"))
         sprintf ( alp, "agct");
       l=strlen (alp);
       
       
       for (a=0; a<A->nseq; a++)
          for ( b=0; b<A->len_aln; b++)
            if ( is_gap (A->seq_al[a][b]))A->seq_al[a][b]=alp[(int)rand()%(l)];
        return A;
      }

Alignment * make_random_aln(Alignment *A,int nseq, int len, char *alphabet)
        {
      int a;
      

      A=realloc_aln2(A, nseq, len+1);

      A->nseq=0;
      A->len_aln=len;
      for ( a=0; a< A->nseq; a++)sprintf ( A->file[a], "random alignment");
      for ( a=0; a< nseq; a++)
          A=add_random_sequence2aln(A,alphabet);
      return A;
      }
Alignment * add_random_sequence2aln( Alignment *A, char *alphabet)
        {
      int a, n;

      vsrand(0);

      n=strlen(alphabet);
      A=realloc_alignment2 (A, A->nseq+1, A->len_aln+1);
      
      for ( a=0; a< A->len_aln; a++)A->seq_al[A->nseq][a]=alphabet[rand()%n];
      if (! A->name[A->nseq][0])
        {
          for ( a=0; a<10; a++)A->name[A->nseq][a]=alphabet[rand()%n];
          A->name[A->nseq][a]='\0';
        }
      
      A->nseq++;
      return A;
      }

Sequence *get_defined_residues( Alignment *A)
        {
          char *buf;
          Sequence *S;
          int a, b, s, l, r;
          if ( !A || !A->S) return NULL;

          S=duplicate_sequence (A->S);
          for ( a=0; a< S->nseq; a++)
            for ( b=0; b< S->len[a]; b++)S->seq[a][b]=UNDEFINED_RESIDUE;
          buf=vcalloc(A->len_aln+1,sizeof (char));
          for ( a=0; a< A->nseq; a++)
              {
                sprintf ( buf, "%s",A->seq_al[a]);
                ungap(buf);
                l=strlen (buf);
                s=A->order[a][0];
                
                for ( b=1; b<= l; b++)
                  {
                  r=A->seq_cache[s][b];
                  
                  if ( r>=0)S->seq[s][r-1]=(A->S)->seq[s][r-1];
                  }
            }
          vfree(buf);
          return S;
      }
Alignment *thread_defined_residues_on_aln ( Alignment *A, Sequence *S1)
      {
      int a, b;
      int gap, r,s, r2;
      for ( a=0; a< A->nseq; a++)
          {
            s=A->order[a][0];
            r=A->order[a][1];
            for (b=0;b< A->len_aln; b++)
                {
                gap=is_gap(A->seq_al[a][b]);
                
                if (!gap)
                  {
                  r+=!gap;
                  r2=A->seq_cache[s][r]-1;
                  
                  if (r2>=0 && S1->seq[s][r2]==UNDEFINED_RESIDUE)
                      A->seq_al[a][b]=UNDEFINED_RESIDUE;
                  }
                }
          }
      return A;
      }

int ** trim_aln_borders (char **seq1, char **seq2, int nseq)
        {
      int a, b, c,l1,l2;
      char *buf1;
      char *buf2;
      int max;

      

      
      max=MAX(get_longest_string (seq1,-1, NULL, NULL),get_longest_string (seq2,-1, NULL, NULL))+1;
      buf1=vcalloc ( max, sizeof(char));
      buf2=vcalloc ( max, sizeof(char));
      
      for ( a=0; a< nseq; a++)
          {
          sprintf ( buf1, "%s", seq1[a]);
          sprintf ( buf2, "%s", seq2[a]);


         
          ungap (buf1);
          ungap (buf2);

          if (str_overlap ( buf1, buf2,'*')!=0)
            {                 
            l1=strlen ( seq1[a]);
              l2=strlen ( seq2[a]);
            for ( b=0,c=0; c< l1; c++)
                if ( !is_gap(seq1[a][c]))seq1[a][c]=buf1[b++];
            seq1[a][c]='\0';
            for ( b=0,c=0; c< l2; c++)
                if ( !is_gap(seq2[a][c]))seq2[a][c]=buf2[b++]; 
            seq2[a][c]='\0';
                  }
          }
      vfree (buf1);
        vfree (buf2);
      return NULL;

      }
Sequence * merge_seq    ( Sequence *IN, Sequence *OUT)
        {
      int a;
      
      if ( OUT==NULL)return duplicate_sequence (IN);
      else
          {
           if ( IN && check_list_for_dup( IN->name, IN->nseq))
                {
                  fprintf ( stderr, "\nERROR: %s is duplicated in file %s[FATAL]\n", check_list_for_dup( IN->name, IN->nseq), IN->file[0]);
                  exit (EXIT_FAILURE);
              }
          for ( a=0; a< IN->nseq; a++)
            if ((OUT=add_sequence ( IN, OUT, a))==NULL)return NULL;
          return OUT;
          }
      }

Alignment *seq_name2removed_seq_name(Sequence *S, Alignment *NA, float **diff)
{
  int a, b, rb, s;
  float min_diff;
  for (a=0; a< S->nseq; a++)
    {
      if (name_is_in_list( S->name[a], NA->name, NA->nseq, 100)!=-1) continue;
      for ( min_diff=100, s=0, b=0; b< NA->nseq; b++)
      {
        rb=name_is_in_list ( NA->name[b], S->name, S->nseq, 100);
        if ( diff[a][rb]<min_diff)
          { 
            s=b;
            min_diff=diff[a][rb];

          }
      }
      strcat ( NA->seq_comment[s], " ");
      strcat ( NA->seq_comment[s], S->name[a]);
    }
  return NA;
}
  
      
            
     
int seq_name2index (char *name, Sequence *S)
{
  if ( !S) return -1;
  else return name_is_in_list ( name, S->name, S->nseq, MAXNAMES+1);
}
char * seq_name2coor ( char *s, int *start, int *end, char sep)
{
  /*name|start|end */
  char n1[100], n2[100];
  int a=0, b=0, c=0;
  
  n1[0]=n2[0]='\0';
  start[0]=end[0]=0;
  
  while ( s[a]!=sep && s[a]!='\0')a++;
  if ( s[a]=='\0')return s;
  else 
    s[a++]='\0';

 
  
  while ( s[a]!=sep && s[a]!='\0')n1[b++]=s[a++];
  
  if ( s[a]=='\0'){n1[b]='\0';if ( n1[0])start[0]=atoi(n1);return s;}
  else s[a++]=n1[b]='\0';
 
  
  while ( s[a]!=sep && s[a]!='\0')n2[c++]=s[a++];
  n2[c]='\0';

  
  if ( n1[0])start[0]=atoi(n1);
  if ( n2[0])end[0]=atoi(n2);


  return s;
}
  
Sequence *extract_one_seq(char *n,int start, int end, Alignment *S, int keep_name)
       {
      
       int seq, a;
       FILE*fp;
       char *name;
       Sequence *OUT_S;
       

       if ( n[0]=='#')seq=S->nseq;
       else if ( (seq=name_is_in_list (n, S->name, S->nseq, 100)+1)!=0);
       else if (is_number (n) && (seq=atoi(n))!=0) seq=atoi(n);
       else
         {
           fprintf ( stderr, "\nCould not find Sequence %s [FATAL]", n);
           exit (EXIT_FAILURE);
         }
       seq--;
       
       name=vtmpnam ( NULL);
       fp=vfopen ( name, "w");
       if ( start && end &&!keep_name)fprintf (fp, ">%s_%d_%d\n",S->name[seq],start, end);
       else if ( start && end==0 && !keep_name)fprintf (fp, ">%s_%d_%d\n",S->name[seq],start,strlen ( S->seq_al[seq]));
       else fprintf (fp, ">%s\n", S->name[seq]);
       
       if ( start==0 && end==0){fprintf (fp, "%s\n", S->seq_al[seq]);}
       else if (end==0){fprintf (fp, "%s\n", S->seq_al[seq]+start-1);}
       else
         {
           for ( a=start-1; a<end; a++){fprintf ( fp, "%c", S->seq_al[seq][a]);}
           fprintf ( fp, "\n");
         }
       
       
       vfclose (fp);
       OUT_S=get_fasta_sequence_num (name, NULL);
       
       return OUT_S;
       }
       

       
Sequence * extract_sub_seq( Sequence  *COOR, Sequence *S)
        {
      int a, b, c,s;
      int start, end;
      
      for ( a=0; a< S->nseq; a++)
          {
          if ( (s=name_is_in_list ( S->name[a], COOR->name, COOR->nseq, 100))!=-1)
             {
             
             sscanf ( COOR->seq_comment[s], "%d %d", &start, &end);
             for (c=0,b=start-1; b< end; b++, c++)S->seq[a][c]=S->seq[a][b];
             S->seq[a][c]='\0';
             sprintf ( S->seq_comment[a], "%s",COOR->seq_comment[s]);
             
             }
          }
      S=reorder_seq ( S, COOR->name, COOR->nseq);
      return S;
      }
             



Alignment * fix_aln_seq  ( Alignment *A, Sequence *S)
        {
      int a, b, c;
      char *buf1, *buf2;
      int g0, g1, nr0, nr1;
      int id, tot;
      Alignment *B;


      /*This function establishes the correspondance between every (1..N+1) residue of each aligned sequence
        and its correspondance in S:
        A->seq_cache[a][b]=x means that residue b of aligned sequence a corresponds to residue x of the sequence with tye same index in S
        A->seq_cache[a][b]=0 means there is no correspondance.
        a is the index of the sequence
        Applying this function is needed for turning an alignment into a constraint list
      */
      

      if ( S==NULL)return A;

      A->seq_cache=declare_int ( S->nseq, MAX((A->len_aln+1), S->max_len+1));
      
      for (a=0; a< S->nseq; a++)
        for ( b=0; b< A->len_aln; b++)A->seq_cache[a][b]=-1;
      
      buf1=buf2=NULL;
      for ( a=0; a< S->nseq; a++)
          {
          for (b=0; b< A->nseq; b++) 
              {
            if (strm ( S->name[a], A->name[b]))
               {
               A->order[b][0]=a;
               
               vfree (buf1);
               buf1=vcalloc ( A->len_aln+1, sizeof (char));
               sprintf (buf1, "%s", A->seq_al[b]);
               ungap (buf1);
               upper_string (buf1);
               
               vfree(buf2);
               buf2=vcalloc (strlen(S->seq[a])+1, sizeof (char));
               sprintf (buf2, "%s",S->seq[a]);
               ungap (buf2);
               upper_string (buf2);
               
               

               if ( strm (buf1,buf2))
                   {
                     
                     for ( c=0; c<S->len[a]; c++)A->seq_cache[a][c+1]=c+1;
                   }
               else
                   {
                    
                     B=align_two_sequences (buf2,buf1,"blosum62mt",-4,-1, "myers_miller_pair_wise");
                     if ( getenv ("DEBUG_RECONCILIATION"))
                       {
                         fprintf (stderr, "\n[DEBUG_RECONCILIATION:fix_aln_seq]\nReconciliation of %s\nA=Ref_sequence\nB=New_seq", S->name[a]);
                         print_aln (B);
                       }
                     
                     for (id=0, tot=0,nr0=0,nr1=0,c=0; c<B->len_aln; c++)
                       {
                         g0=is_gap(B->seq_al[0][c]);
                         g1=is_gap(B->seq_al[1][c]);
                         nr0+=1-g0;
                         nr1+=1-g1;
                         if ( !g0 && !g1)
                         {
                           tot++;
                           id+=(B->seq_al[0][c]==B->seq_al[1][c])?1:0;
                           A->seq_cache[a][nr1]=nr0;
                         }
                         else if (g0 && !g1)
                         {
                           A->seq_cache[a][nr1]=0;
                         }
                       }
                     if ( ((id*100)/tot)<20)
                       {
                         print_aln (B);
                         fprintf ( stderr, "\nTwo different sequences have the same name: %s", S->name[a]);       
                         fprintf ( stderr, "\nIf %s is a PDB ID, Make sure it identifies the right chain (A, B, 1, 2...)", S->name[a]);
                         fprintf ( stderr, "\nChain number or index must be added to the PDB id (i.e. 1gowA)");
                         fprintf ( stderr, "\nIf You want to use %s anyway, rename it with a non-PDB identifier such as seq_%s\n",S->name[a],S->name[a]); 
                         exit (EXIT_FAILURE);
                       }

                     free_sequence ( B->S, -1);
                     free_aln (B);
                   }
               
               }
            }
          }
          vfree(buf1);vfree(buf2);
          return A;
      }

Sequence * add_prf2seq  ( char *file, Sequence *S)
    {
      char **new_seq;
      Sequence *NS;
      
      if ( !is_aln (file))return S;
      else
      {
        X_template *R;
        Alignment *A;
        R=fill_R_template(file,file, S);
        A=(R->VR)->A;
        new_seq=declare_char (1,A->len_aln+1);
        sprintf ( new_seq[0], "%s",aln2cons_seq_mat(A, "blosum62mt"));
        
        NS=fill_sequence_struc(1, new_seq,A->file);
        S=add_sequence (NS, S, 0);
        (S->T[S->nseq-1])->R=R;
        
        free_sequence (NS, NS->nseq);
        free_char( new_seq, -1);
        
        return S;
      }
    }
int prf_in_seq ( Sequence *S)
{
  int a;
  
  if ( !S) return 0;
  else 
    {
      for ( a=0; a< S->nseq; a++)
      if (seq2R_template_profile(S, a)) return 1;
    }
  return 0;
}
Sequence * add_sequence ( Sequence *IN, Sequence *OUT, int i)
    {
    int s;
   
    char *buf;
    if (OUT==NULL)
      {
      OUT=duplicate_sequence (IN);
      return OUT;
      }

    /*Adds sequence i of IN at the end of OUT*/

    if ((s=name_is_in_list ( IN->name[i], OUT->name, OUT->nseq,STRING))==-1 )
       {
       OUT=realloc_sequence (OUT, OUT->nseq+1, IN->len[i]);  
       sprintf ( OUT->name[OUT->nseq],"%s",IN->name[i]);
       sprintf ( OUT->file[OUT->nseq],"%s",IN->file[i]);
       sprintf ( OUT->seq_comment[OUT->nseq],"%s",IN->seq_comment[i]);
       sprintf ( OUT->aln_comment[OUT->nseq],"%s",IN->aln_comment[i]);
       
       sprintf ( OUT->seq[OUT->nseq],"%s",IN->seq[i]);
       OUT->len[OUT->nseq]=IN->len[i];
       OUT->T[OUT->nseq][0]=IN->T[i][0];
       OUT->nseq++;
       return OUT;
       }
    else if ( s!=-1 && !case_insensitive_strcmp ( IN->seq[i], OUT->seq[s]))
       {
             
       if ( getenv4debug("DEBUG_RECONCILIATION"))fprintf ( stderr,"[DEBUG_RECONCILIATION:add_sequence]\n%s\n%s\n", IN->seq[i], OUT->seq[s]);
       
       fprintf ( stderr, "WARNING: DISCREPANCY:%s in [%s] and  [%s]\n", IN->name[i], IN->file[i], OUT->file[s]);
       
       if (((buf=build_consensus(IN->seq[i], OUT->seq[s],"cfasta_pair_wise" ))!=NULL)||((buf=build_consensus(IN->seq[i], OUT->seq[s],"myers_miller_pair_wise" ))!=NULL))
         {
          
           OUT->max_len=MAX(OUT->max_len, strlen(buf));
           OUT->min_len=MIN(OUT->min_len, strlen(buf));
           OUT->seq    =realloc_char ( OUT->seq, -1, -1,OUT->nseq,OUT->max_len+1);
           
           sprintf ( OUT->seq[s],"%s",buf);
           OUT->len[s]=strlen (buf);
           vfree (buf);
           return OUT;
         }
       else
       {
         fprintf ( stderr, "IMPOSSIBLE TO RECONCILIATE SOME SEQUENCES[FATAL:%s]\n", PROGRAM);
             print_aln ( align_two_sequences (IN->seq[i], OUT->seq[s], "idmat", 0, 0, "fasta_pair_wise"));
             return NULL;
         }
       
       }
    else
       {
       return OUT;
       }
    }
                     

Sequence  * trim_seq       ( Sequence  *A, Sequence  *B)
        {
      int a;
      Sequence *R;
      
      R=declare_sequence (MIN(A->min_len,B->min_len), MAX(A->max_len, B->max_len), MIN(A->nseq, B->nseq));
      R->nseq=0;
      
      for (a=0; a< A->nseq; a++)
          {  
          if ( name_is_in_list ( A->name[a], B->name, B->nseq,STRING+1)!=-1)
              {
              sprintf ( R->name[R->nseq], "%s", A->name[a]);
              sprintf ( R->seq[R->nseq], "%s", A->seq[a]);
              sprintf ( R->file[R->nseq], "%s", A->file[a]);
              sprintf ( R->aln_comment[R->nseq], "%s", A->aln_comment[a]);
              sprintf ( R->seq_comment[R->nseq], "%s", A->seq_comment[a]);
              
              R->len[R->nseq]=A->len[a];
              R->nseq++;
            }
          }
      return R;
      }

Sequence * trim_aln_seq ( Alignment *A, Alignment *B)
        {
      int a;
      static char **name_list;
      int n=0;
      Sequence *SA, *SB;
      int **cache_A=NULL;
      int **cache_B=NULL;
      int * p;

      /*This function inputs two alignments A and B
        It removes sequences that are not common to both of them
        It rearange the sequences so that they are in the same order
        A decides on the order
        The Sequences (A->S) and (B->S) are treated the same way
        Sequences are also merged in order to detects discrepencies.
        A pointer to S is returned
      */
      if (name_list)free_char (name_list, -1);
      name_list=declare_char (MAX(A->nseq, B->nseq), STRING+1);
        
      for ( a=0; a< A->nseq; a++)
          {  
          if ( name_is_in_list ( A->name[a], B->name, B->nseq,STRING)!=-1)
              {
            sprintf ( name_list[n++], "%s", A->name[a]);
            }
          }
      
      
      
      reorder_aln ( A, name_list, n);
      if (A->seq_cache)cache_A=duplicate_int (A->seq_cache, -1, -1);
      if (B->seq_cache)cache_B=duplicate_int (B->seq_cache, -1, -1);
      reorder_aln ( B, name_list, n);
      for ( a=0; a< n; a++)
        {
          if ( cache_A)
            {
            p=A->seq_cache[A->order[a][0]];
            A->seq_cache[A->order[a][0]]=cache_A[a];
            cache_A[a]=p;
            }
         if ( cache_B)
            {
            p=B->seq_cache[B->order[a][0]];
            B->seq_cache[B->order[a][0]]=cache_B[a];
            cache_B[a]=p;
            } 
         A->order[a][0]=B->order[a][0]=a;
        }
        free_int(A->seq_cache, -1);
      free_int(B->seq_cache, -1);

      A->seq_cache=cache_A;
      B->seq_cache=cache_B;
        

      
      SA=aln2seq(A);
      SB=aln2seq(B);
      
      A->S=B->S=merge_seq (SA, SB);
      return A->S;
      }
Sequence * trim_aln_seq_name ( Alignment *A, Alignment *B)
        {
      int a;
      Sequence *S;
      
      /*This function inputs two alignments A and B
        It removes sequences that are not common to both of them
        It rearange the sequences so that they are in the same order
        A decides on the order
      */
      S=declare_sequence ( 1, 1, A->nseq+B->nseq);
      S->nseq=0;
      for ( a=0; a< A->nseq; a++)
          {  
          if ( name_is_in_list ( A->name[a], B->name, B->nseq,STRING)!=-1)
              {
            sprintf ( S->name[S->nseq++], "%s", A->name[a]);
            }
          }
      return S;
      }



char ** rm_name_tag (char **name, int nseq, char *tag)
{
  int a , b, ntag;
  char **tag_list;
  char *s;
  char **template_list; 
  if ( !name )return NULL;

  tag_list=declare_char (10, 4);

  if ( tag)
    {
      ntag=1; sprintf ( tag_list[0], "%s", tag);
    }
  else
    {
      ntag=0;
      sprintf ( tag_list[ntag++], "_S_");
      sprintf ( tag_list[ntag++], "_G_");
    }
  template_list=declare_char (nseq, 100);
  for ( a=0; a<nseq ; a++)
    {
      for ( b=0; b<ntag; b++)
      {
      s=strstr(name[a], tag_list[b]);
      if ( s)
        {
          s[0]='\0';
            s[2]='\0';
          sprintf ( template_list[a], ">%s _%s_ %s", name[a], s+1, s+3);
          break;
        }
      }
    }
            
  free_char (tag_list, -1);   
  return template_list;
}
Sequence * swap_header ( Sequence *S, Sequence *H)
{
  int a, b, n;

  for ( a=0; a< S->nseq; a++)
    {
      if ( (n=name_is_in_list (S->name[a],H->name, H->nseq, 1000))!=-1)
         {
           char **list;
           int ll;
           
           list=string2list (H->seq_comment[n]);
           if ( list==NULL || atoi(list[0])==1)continue;
           S->seq_comment[a]='\0';
           sprintf (S->name[a], "%s%s%s",H->name[n], list[1], list[2]);
           vfree ( S->seq_comment[a]);S->seq_comment[a]=vcalloc ( strlen (H->seq_comment[n])+1, sizeof (char));
           for (b=3; b< atoi(list[0]); b++)S->seq_comment[a]=strcat (S->seq_comment[a], list[b]);
           free_char (list, -1);
         }
    }
  return S;
}


Sequence * profile_seq2template_seq ( Sequence *S, char *template_file)
{
  /*This function fetches potential templates associated with sequences within a profile*/
  int i, j;
  Alignment *A;
  static int prf_index_name;

  for ( i=0; i< S->nseq; i++)
    {
      if ( A=seq2R_template_profile (S, i))
      {
        A->S=aln2seq (A);
        A->S=seq2template_seq (A->S, template_file);    
      }
    }

  return S;
}
      
Sequence * seq2template_type(Sequence *Seq)
{
  int a, e;
  int s, p;
  struct X_template *S=NULL;
  struct X_template *P=NULL;
  struct X_template *R=NULL;
  struct X_template *G=NULL;
  Alignment *A;
  e=' ';
  for (a=0; a< Seq->nseq; a++)
    {
      if (!Seq->T[a])continue;
      P=seq_has_template (Seq, a, "_P_");
      S=seq_has_template (Seq, a, "_S_");
      R=seq_has_template (Seq, a, "_R_");
      G=seq_has_template (Seq, a, "_G_");
      
      s=(!P)?1:0;
      p=(P && (P->VP)->public_pdb)?1:0;
      sprintf ( (Seq->T[a])->seq_type, "%c%c%c%c%c%c", (P)?'P':e, (S)?'S':e, (S)?'s':e, (p)?'p':e, (R)?'R':e, (G)?'G':e);
      if (R && (A=seq2R_template_profile (Seq,a)))
      {
        A->S=seq2template_type ( A->S);
      }       
    }
  return Seq;
}

char * string_contains_template_tag (char *string)
{
  if ( strstr (string, "_P_"))return "_P_";
  if ( strstr (string, "_S_"))return "_S_";
  if ( strstr (string, "_R_"))return "_R_";
  if ( strstr (string, "_G_"))return "_G_";
  return NULL;
}

Sequence * seq2template_seq ( Sequence *S, char *template_list)
{
  /*Expected format for the template file:
    >seq_name _X_ Target_template
    X: S for Structures 
    G for genomes (Exoset)
  */

  /*Fill the sequences*/
  
  if ( (template_list && template_list[0]=='\0') || strm ( template_list, "no_template")) return S;
  else if ( string_contains_template_tag (template_list))
    {
      int a;
      char *p;
      for (a=0; a< S->nseq; a++)
      {
        if ( p=strstr (template_list,"_self"))p=S->name[a];
        else if ( p=strstr (template_list, "_seqfile_"));
        else 
          {
            fprintf ( stderr, "\nUnkown mode for Template [FATAL:%s]\n", PROGRAM);
            exit (EXIT_FAILURE);
          }
        
        if (      strstr (template_list, "_P_" ))(S->T[a])->P  =fill_P_template  ( S->name[a], p,S);
        else if ( strstr (template_list, "_S_")) (S->T[a])->S  =fill_S_template  ( S->name[a], p,S);    
        else if ( strstr (template_list, "_R_" ))(S->T[a])->R  =fill_R_template  ( S->name[a], p,S);
        else if ( strstr (template_list, "_G_" ))(S->T[a])->G  =fill_G_template  ( S->name[a], p,S);
        else if ( strstr (template_list, "_RB_"))(S->T[a])->RB =fill_RB_template ( S->name[a], p,S);
      }
    }
  else if ( template_list==NULL || format_is_fasta (template_list))
    {
      Sequence *T;
      int a, i;
      
      
      T=(template_list!=NULL)?get_fasta_sequence (template_list, NULL):S;
      for (a=0; a< T->nseq; a++)
        {
          char *p;
          
          if ((i=name_is_in_list(T->name[a], S->name, S->nseq, MAXNAMES))!=-1)
            {
            
            if (       (p=strstr (T->seq_comment[a], "_P_")))(S->T[i])->P=fill_P_template (S->name[i],p,S);
            else if (  (p=strstr (T->seq_comment[a], "_S_")))(S->T[i])->R=fill_S_template (S->name[i],p,S);
            else if (  (p=strstr (T->seq_comment[a], "_R_")))(S->T[i])->R=fill_R_template (S->name[i],p,S);
            else if (  (p=strstr (T->seq_comment[a], "_G_")))(S->T[i])->G=fill_G_template (S->name[i],p,S);
            else if (  (p=strstr (T->seq_comment[a], "_RB_")))(S->T[i])->RB=fill_RB_template (S->name[i],p,S);
            if (T!=S)strcat (S->seq_comment[i], T->seq_comment[a]);
            }
        }
      if (T!=S)free_sequence (T, -1);
    }
  else
    {
      char *tmp1, *tmp2, command[10000];
      Alignment *A;
      tmp1=vtmpnam (NULL); tmp2=vtmpnam (NULL);
      
      A=seq2aln (S,NULL, 0);
      output_fasta_seq (tmp1, A);
      sprintf ( command, "%s -infile=%s -outfile=%s", template_list, tmp1, tmp2);
      my_system ( command);
      free_aln (A);
      if ( check_file_exists (tmp2) && format_is_fasta(tmp2))
      S=seq2template_seq (S, tmp2);
      else 
      fprintf ( stderr, "\nCould not Run %s to find templates [WARNING:%s]\n", template_list, PROGRAM);
    }
  return S;
}

char* seq2template_file (Sequence *S, char *file)
{
  Alignment *A;
  int i;
  
  if (file==NULL)file=vtmpnam (NULL);
  
  seq2template_file2 (S, file, "w");
  for (i=0; i<S->nseq; i++)
    if ( A=seq2R_template_profile (S, i))
      {
      seq2template_file2 (A->S, file, "a");
      }
  return file;
}
  
int seq2template_file2 (Sequence *S, char *file, char *mode)
{
  FILE *fp;
  int i;
  char buf1[10000];
  char buf2[10000];
  struct X_template *X;
  
  fp=vfopen ( file, mode);
  for ( i=0; i< S-> nseq; i++)
    {
      buf1[0]=0;
      if ( (X=(S->T[i])->P)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}
      if ( (X=(S->T[i])->S)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}
      if ( (X=(S->T[i])->R)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}
      if ( (X=(S->T[i])->G)){sprintf (buf2, " %s %s ", X->template_type, X->template_file);strcat (buf1, buf2);}
      if (buf1[0])fprintf ( fp, ">%s %s\n", S->name[i], buf1);
    }
  vfclose (fp);
}
         
      
      
      
int seq2n_X_template ( Sequence *S, char *type)
{
  int a, n;
  
  for (n=0,a=0; a< S->nseq; a++)
    {
      if ( strm2 (type, "_P_","_*_") && (S->T[a])->P)n++;
      if ( strm2 (type, "_S_","_*_") && (S->T[a])->S)n++;
      if ( strm2 (type, "_R_","_*_") && (S->T[a])->R)n++;
      if ( strm2 (type, "_G_","_*_") && (S->T[a])->G)n++;
    }
  return n;
}
struct X_template *fill_X_template ( char *name, char *p, char *token)
{
  struct X_template *X;
  struct P_template *P;
  struct S_template *S;
  struct R_template *R;
  struct G_template *G;
  char *k;
  
  X=vcalloc (1, sizeof (X_template));
  sprintf ( X->seq_name, "%s", name);
  if ( k=strstr (p, token))sscanf (k+strlen(token), "%s",X->template_name);
  else sprintf (X->template_name, "%s", p);
  
  sprintf ( X->template_type, "%s", token);
  if ( strm (token, "_P_"))X->VP=vcalloc (1, sizeof (P_template));
  if ( strm (token, "_S_"))X->VS=vcalloc (1, sizeof (S_template));
  if ( strm (token, "_R_"))X->VR=vcalloc (1, sizeof (R_template));
  if ( strm (token, "_G_"))X->VG=vcalloc (1, sizeof (G_template));
  if ( strm (token, "_RB_"))X->VRB=vcalloc (1, sizeof (R_template));
  
  return X;
}

struct X_template* free_X_template ( struct X_template *X)
{
  if (X->VP)
    {
      vfree (X->VP);
    }
  else if ( X->VS)
    {
      free_sequence ((X->VS)->S, -1);
      vfree (X->VS);
    }
  else if ( X->VR)
    {
      free_aln ((X->VR)->A);
      vfree (X->VR);
    }
  else if ( X->VG)
    {
      free_sequence ((X->VG)->S, -1);
      vfree (X->VG);
    }
  else if ( X->VRB)
    {
      free_aln ((X->VRB)->A);
      vfree (X->VG);
    }
  
  vfree (X);
  return NULL;
}

FILE * display_sequence_templates (Sequence *S,int i, FILE *io)
{
  
  io=display_X_template ( (S->T[i])->P, io);
  io=display_X_template ( (S->T[i])->S, io);
  io=display_X_template ( (S->T[i])->R, io);
  io=display_X_template ( (S->T[i])->G, io);
  io=display_X_template ( (S->T[i])->RB, io);
  
  return io;
}
 
FILE * display_X_template (struct X_template *X, FILE *io)
{
  if ( !X) return io;
  if ( !strm (X->template_type, "_S_"))fprintf (io, "\n\t%s: Template=%s, File=%s",template_type2type_name (X->template_type), X->template_name,X->template_file);
  return io;
}
char *template_type2type_name (char *type)
{
  if ( strstr (type, "_P_"))       return "PDB struc";
  else if ( strstr (type, "_S_")) return "Sequence ";
  else if ( strstr (type, "_R_")) return "Profile  ";
  else if ( strstr (type, "_G_")) return "Genomic  ";
  else if ( strstr (type, "_RB_")) return "BlastProf";
  
  else return type;
}

struct X_template *fill_P_template ( char *name,char *p, Sequence *S)
{
  struct X_template *P;
  char *template_file;
  
  P=fill_X_template ( name, p, "_P_");  
  

  template_file=is_pdb_struc(P->template_name);

  if (!template_file)
    {
      fprintf ( stderr, "\nWarning: _P_ Template |%s| Could Not Be Found [WARNING:%s]\n", p,PROGRAM);
      free_X_template (P);
      return NULL;
    }
  else
    {

      sprintf ( P->template_file, "%s",template_file);
      sprintf ((P->VP)->pdb_id, "%s", get_pdb_id (template_file));
      
      (P->VP)->public_pdb=is_pdb_name ((P->VP)->pdb_id);
    }
  return P;
}

struct X_template *fill_S_template ( char *name,char *p, Sequence *Seq)
{
  struct X_template *S;
  S=fill_X_template ( name, p, "_S_"); 
  if ( strm (name, p))sprintf ( S->template_file, "%s",output_fasta_seqX (NULL,"w",Seq,NULL, seq_name2index (name, Seq)));
  (S->VS)->S=get_fasta_sequence (S->template_file, NULL);
  return S;
}

struct X_template *fill_R_template ( char *name,char *p, Sequence *S)
{
  /*Profile template*/
  struct X_template *R;
  
  R=fill_X_template ( name, p, "_R_");
  sprintf ( R->template_file, "%s", R->template_name);
  
  if (!is_aln(R->template_file) && !is_seq (R->template_file))
    {
      fprintf ( stderr, "\nWarning: _R_ Template %s Could Not Be Found [WARNING:%s]\n",R->template_name,PROGRAM);
      free_X_template (R);
      return NULL;
    }
  else
    {
      (R->VR)->A=main_read_aln (R->template_file, NULL);
      (R->VR)->A=aln2profile ((R->VR)->A);
    }
  return R;
}

struct X_template *fill_G_template ( char *name,char *p, Sequence *S)
{
  struct X_template *G;
  G=fill_X_template ( name, p, "_G_");  
  

  /*1: Get the sequence from another file if needed*/
  if ( strm (name, p))sprintf ( G->template_file, "%s",output_fasta_seqX (NULL,"w",S,NULL, seq_name2index (name, S)));
  else if ( strstr (p, "_seqfile_"))
    {
      Sequence *ST;
      int i1, i2;
      
      ST=main_read_seq (after_strstr ( p,"_seqfile_"));
      i1=seq_name2index (name, S);
      i2=seq_name2index (name, ST);
      sprintf ( G->template_file, "%s",output_fasta_seqX (NULL,"w",ST,NULL, i2));
      sprintf ( G->template_name, "%s", name);
      free_sequence (ST, -1);
    }
  else sprintf (G->template_file, "%s", G->template_name);
      
  
  /*2: Put the template in VG->S*/
  if (!is_seq (G->template_file))
    {
      fprintf ( stderr, "\nWarning: _G_ Template %s Could Not Be Found [WARNING:%s]\n",p, PROGRAM);

      free_X_template (G);
      return NULL;
    }
  else
    {
      (G->VG)->S=get_fasta_sequence (G->template_file, NULL);
    }
  return G;
}

struct X_template *fill_RB_template ( char *name,char *p, Sequence *S)
{
 
  /*Profile template*/
  return fill_R_template (name, p, S);
}

char *seq2T_value ( Sequence *S, int n, char *value, char *type)
{
  static char *rv_buf;
  X_template *X;

  if ( !rv_buf)rv_buf=vcalloc (100, sizeof(char));
  if (!(X=seq_has_template (S, n, type)))return NULL;
  else
    {
      if (strm (value, "template_file"))return X->template_file;
      else if ( strm (value, "template_name"))return X->template_name;
      else if ( strm (value, "seq_name"))return X->seq_name;
      else if (strm (type, "_P_"))
      {
        if ( strm (value, "pdb_id"))return (X->VP)->pdb_id;
        else if ( strm (value, "public_pdb"))
          {
            if ((X->VP)->public_pdb){sprintf ( rv_buf, "1");return rv_buf;}
            else return NULL;
          }
      }
      else if ( strm (type, "_R_"))
      {
        if ( strm (value, "A"))
          {
            if ((X->VR)->A){sprintf ( rv_buf, "%d", (X->VR)->A);return rv_buf;}
            else return NULL;
          }
      }
      else if ( strm (type, "_RB_"))
      {
         if ( strm (value, "A"))
          {
            if ((X->VRB)->A){sprintf ( rv_buf, "%d", (X->VRB)->A);return rv_buf;}
            else return NULL;
          }
      }
    }
  return NULL;
}

char *seq2P_template_file(Sequence *S, int n)
{
  return seq2T_value (S, n, "template_file", "_P_");
}

Alignment * seq2R_template_profile (Sequence *S, int n)
{
  char *r;
  return (Alignment *)atop(seq2T_value (S, n, "A", "_R_"));
}

struct X_template* seq_has_template ( Sequence *S, int n, char *mode)
{
  Template *T;
  
  if ( !S || !mode) return NULL;
  else if ( n<0 || n>=S->nseq)return NULL;
  else if ( !(S->T)) return NULL;
  else if ( !(S->T[n]))return NULL;

  T=S->T[n];
  if      ( strm (mode, "_P_"))return T->P;
  else if ( strm (mode, "_S_"))return T->S;
  else if ( strm (mode, "_R_"))return T->R;
  else if ( strm (mode, "_G_"))return T->G;
  else return NULL;
}  
  
Alignment * reorder_aln ( Alignment *A, char **name, int nseq)
      {
      int a,sn;
      Alignment *BUF;
      int  n=0;
      int *tpp_int;
      
      BUF=copy_aln ( A,NULL); 
      for ( a=0; a<nseq; a++)
            {
              sn =name_is_in_list ( name[a],BUF->name, A->nseq,STRING);
              if ( sn==-1)
                  {
                      ;
                  }
            else
                {
                
                  
                SWAPP(A->order[n], BUF->order[sn], tpp_int);
                sprintf ( A->name[n], "%s", BUF->name[sn]);           
                sprintf ( A->seq_al[n], "%s",BUF->seq_al[sn]);
                sprintf ( A->seq_comment[n], "%s",  BUF->seq_comment[sn]);
                
                n++;
                  
                }
            }
      
      for ( a=n; a< A->nseq; a++)A->name[a][0]=A->seq_al[a][0]='\0';
      A->nseq=n;
      
      if ( A->A)A->A=reorder_aln(A->A, name, nseq);
      
      free_aln (BUF);
      return A;
      } 

Sequence * reorder_seq ( Sequence *A, char **name, int nseq)
      {
      int a,sn;
      Sequence *nA;
      Alignment *iA;
      
      nA=duplicate_sequence (A);
      
      
      for ( a=0; a< nseq; a++)
        {
          sn=name_is_in_list (name[a] ,nA->name, nA->nseq, 100);
          if (sn==-1)continue;
          
          if ( nA->file)       sprintf ( A->file[a], "%s", nA->file[sn]);
          
          if ( nA->seq_comment)sprintf ( A->seq_comment[a], "%s", nA->seq_comment[sn]);
          if ( nA->aln_comment)sprintf ( A->aln_comment[a], "%s", nA->aln_comment[sn]);
          sprintf ( A->seq[a], "%s", nA->seq[sn]);
          A->len[a]=nA->len[sn];
          sprintf ( A->name[a], "%s", nA->name[sn]);
          A->T[a][0]=nA->T[sn][0];
        }
      A->nseq=nseq;
      free_sequence (nA, nA->nseq);
      
      return A;
} 

char * concatenate_seq ( Sequence *S, char *conc, int *order)
        {
          int a;
          
          vfree (conc);
          conc=vcalloc ( S->nseq*S->max_len, sizeof (char));

          for ( a=0; a< S->nseq; a++)
              {
                conc=strcat ( conc, S->seq[order[a]]);
            }
          return conc;

      }

Alignment * rotate_aln ( Alignment *A, char *name)
{
  Alignment *B;
  int a, b;
  
  B=declare_aln2 (A->len_aln, A->nseq+1);
  for ( a=0; a< A->nseq; a++)
    for ( b=0; b< A->len_aln; b++)
      {
      B->seq_al[b][a]=A->seq_al[a][b];
      }
  for (a=0; a< A->len_aln; a++)sprintf ( B->name[a], "%s_%s%d", name, (a<9)?"0":"",a+1);
  for (a=0; a< A->len_aln; a++)B->seq_al[a][A->nseq]='\0';
  B->len_aln=A->nseq;
  B->nseq=A->len_aln;
  free_aln (A);
  return B;
}
  
Alignment * invert_aln ( Alignment *A)
{
  char *buf;
  int l, a, b, c;
  
  l=strlen ( A->seq_al[0]);
  buf=vcalloc ( l,sizeof (char) );
  for ( a=0; a< A->nseq; a++)
    {
      
      for ( c=l-1,b=0; b< l; b++, c--)
      {
        buf[c]=A->seq_al[a][b];
       }
      buf[l]='\0';
      sprintf ( A->seq_al[a], "%s", buf);
    }
  vfree(buf);
  return A;
}
Alignment * extract_nol_local_aln(Alignment *A, int start, int max_end)
     {
     A=extract_aln ( A, start, max_end);
     A=trunkate_local_aln (A);
     return A;
     }
Alignment * extract_aln ( Alignment *A, int start, int end)
{
  return extract_aln2 ( A, start, end, NULL);
}

Alignment * extract_aln2 ( Alignment *A, int in_start, int in_end, char *seq)
     {
       char *tmp;
       FILE *fp;
       

       tmp=vtmpnam (NULL);
       fp=vfopen (tmp, "w");
       fprintf ( fp, "%s %d %d\n", seq, in_start, in_end);
       vfclose (fp);
       return extract_aln3 (A,tmp);
     }
Alignment * extract_aln3 ( Alignment *B, char *file)
     {
     int a, b, c;
     int start, end;
     int n, i, s, nline=0;
     FILE *fp;
     Alignment *A=NULL;
     int *col;
     char name[MAXNAMES];
     char line[VERY_LONG_STRING];
     
     /*Reads in a file
       #comment
       seqname pos
       OR
       seqname start end[
       modifies the incoming alignment
     */
     
     A=copy_aln (B, A);     
     col=vcalloc ( A->len_aln, sizeof (int));
     
     fp=vfopen ( file, "r");
     while ( (c=fgetc(fp))!=EOF)
       {
       nline++;
       if ( c=='#')fgets ( line, VERY_LONG_STRING,fp);
       else
         {
           ungetc(c, fp);
           fgets ( line, VERY_LONG_STRING,fp);
           
           
           if (sscanf (line, "%s %d %d\n", name, &start, &end)==3);
           else if (sscanf (line, "%s %d\n", name, &start)==2)
             {
             end=start+1;
             }
           else
             {
             fprintf ( stderr, "\nWARNING: wrong format in coordinate file (line=%d) [NON FATAL:%s]\n", nline, PROGRAM);
             continue;
             }
           s=name_is_in_list (name,A->name,A->nseq,MAXNAMES);
       
           if ( s==-1 && !strm (name, "cons"))
             {
             fprintf ( stderr, "\nWARNING: Seq %s does not belong to the alignment (line %d)  [NON FATAL:%s]\n", name,nline, PROGRAM);
             continue;
             }
           else if ( start>end)
             {
             fprintf ( stderr, "\nWARNING: Illegal coordinates [%s %d %d] (line %d)  [NON FATAL:%s]\n", name,start, end,nline, PROGRAM);
             continue;
             }
           else
             {
             for (n=0, a=0; a< A->len_aln; a++)
               {
                 
                 i=(strm (name, "cons"))?1:!is_gap(A->seq_al[s][a]);
                 n+=i;
                 if (n>=start && n<end && !(i==0 && n==end-1))
                   {
                   col[a]=1;
                   }
                 else if ( n>=end)a=A->len_aln;
               }
             }
         }
       }
     vfclose ( fp);
     

             
     /*Extract [start-end[*/
     for ( b=0,a=0; a< A->len_aln; a++)
       {
       if ( col[a])
         {
           for (c=0; c< A->nseq; c++)A->seq_al[c][b]=A->seq_al[c][a];
           b++;
         }
       }
     A->len_aln=b;
     for (c=0; c< A->nseq; c++)A->seq_al[c][A->len_aln]='\0';
     vfree (col);
     
     return A;

     }
Alignment * trunkate_local_aln ( Alignment *A)
     {
     int a, b;
     int **pos;
     int **cache;
     int seq;
   
     
     cache=declare_int (return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),0)+1,return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),1)+A->len_aln+1);    
     pos=aln2pos_simple(A,A->nseq);
     
     for ( b=0; b<A->len_aln; b++)
       for ( a=0; a< A->nseq; a++)   
           {
           seq=A->order[a][0];
           if ( pos[a][b]<=0);
           else if ( pos[a][b]>0)
             {
             
             if (cache[seq][pos[a][b]]==0)cache[seq][pos[a][b]]++;
             else if ( cache[seq][pos[a][b]]>=1)
                  {          
                  cache[seq][pos[a][b]]++;
                  A->seq_al[a][b]='\0';
                  }
             }
           }
     
     A->len_aln=get_shortest_string ( A->seq_al, A->nseq, NULL, NULL);
     pad_string_array ( A->seq_al, A->nseq, A->len_aln, '-');
     
     free_int (pos, -1);
     free_int ( cache,-1);
     
     
     return A;
     }

int get_nol_aln_border ( Alignment *A, int start, int direction)
     {
     int a, b;
     int **pos;
     int **cache;
     int seq,end;
     
     /*This Function Returns the limit position for a non overlaping alignment*/
     
     cache=declare_int (return_max_int (A->order,read_size_int ( A->order,sizeof (int*)),0)+1,return_max_int (A->order,read_size_int ( A->order,sizeof (int)),1)+A->len_aln+1);
     pos=aln2pos_simple(A,A->nseq);
     end=(direction==GO_RIGHT)?A->len_aln:-1;
     

     for ( b=start; b!=end;b+=direction)
       for ( a=0; a< A->nseq; a++)   
           {
           seq=A->order[a][0];
           if ( pos[a][b]<=0);
           else if ( pos[a][b]>0)
             {
             
             if (cache[seq][pos[a][b]]==0)cache[seq][pos[a][b]]++;
             else if ( cache[seq][pos[a][b]]>=1)
                  {          
                  cache[seq][pos[a][b]]++;
                  free_int(cache, -1);
                  return b-direction;
                  }
             }
           }
     
     free_int ( cache,-1);
     free_int (pos, -1);
     return end-direction;
     }





char * extract_defined_seq ( char *in, int in_of, int in_start, int *aa_def, int dir, int *out_start, char *out)
     {
     int start, end,l;
     int b, c, d;

     

     if ( dir==GO_LEFT){start=in_start-1;}
     else if ( dir==GO_RIGHT){start=in_start+1;}        
      
     end=start;
     while (aa_def[end]!=UNDEFINED)
       {
       end+=dir;
       }
     end-=dir;
     
     if (end<start)SWAP(end,start);
     
     l=strlen ( in);
     out_start[0]=-1;
     for (b=0,d=0,c=in_of;b<l; b++)
         {
       c+=1-is_gap(in[b]);
       if ( c>=start && c<=end)
           {
           if ( out_start[0]==-1)out_start[0]=c-!is_gap(in[b]);
           out[d++]=in[b];
           }
       }
     out[d]='\0';
     
    
     return out;
     }
Alignment * aln_cat ( Alignment *A, Alignment *B)
     { 
     int a;
     
     if ( A->nseq!=B->nseq) 
       {
       fprintf ( stderr, "\nERROR IN ALN CAT: DIFFERENT NSEQ\n");
       exit(EXIT_FAILURE);
       }

     A=realloc_alignment2(A, A->nseq,A->len_aln+B->len_aln+1);
    
     for ( a=0;a< A->nseq; a++)
         {  
       strcat ( A->seq_al[a], B->seq_al[a]);
       }
     A->len_aln+=B->len_aln;
     return A;
     }
int verify_aln ( Alignment *A, Sequence *S, char *message)
     {
     int a, b, c,s,r;


     for ( a=0;a< A->nseq; a++)
         {
       s=A->order[a][0];
       r=A->order[a][1];
       for ( b=0, c=0; b< A->len_aln; b++)
           {
           if ( !is_gap(A->seq_al[a][b]))
              {
              if (tolower(A->seq_al[a][b])!=tolower(S->seq[s][c+r]))
                  {
                  fprintf ( stderr, "\n%s\nResidue [%c %d, %c %d] line %d seq %d",message,A->seq_al[a][b], b,S->seq[s][c+r], c+r,a,s);  
                  output_Alignment_with_res_number(A, stderr);
                  exit(EXIT_FAILURE);
                  return 0;
                  }
              c++;
              }
           }
       }
     return 1;
     }

Alignment *adjust_est_aln ( Alignment *PW, Alignment *M, int s)
{
  /*This function reajusts M, threading M onto PW
   two seqences in PW
   s+1 seq in M
   
   seq 0 PW ----> 0->s-1 in M
   seq 1 PW ----> 1->s   in M;
   
   */
  int a, b;
  static char **array;

  
  int top_M=0;
  int bottom_M=0;
  
  
  if ( array==NULL)
    {
      array=declare_char (500, 100000);
    }

  for ( a=0; a< PW->len_aln; a++)
    {
      if ( is_gap(PW->seq_al[0][a]))
      {
        for ( b=0; b< s; b++)
          array[b][a]='-';
      }
      else
      {
        for ( b=0; b< s; b++)
          array[b][a]=M->seq_al[b][top_M];
      top_M++;
      }
      
      if ( is_gap(PW->seq_al[1][a]))
      {
        array[s][a]='-';
      }
      else
      {
        
        array[s][a]=M->seq_al[s][bottom_M];
      bottom_M++;
      } 
    }
  
  M->len_aln=PW->len_aln;
  for (a=0; a<s; a++)
    {
      for (b=0; b<PW->len_aln; b++)
      M->seq_al[a][b]=array[a][b];
      M->seq_al[a][b]='\0';
    }


  M->nseq=s+1;
  
  return M;
}


Alignment * rename_seq_in_aln (Alignment *A, char ***list)
{
  int n, i;
  if ( !A)return A;
  

  
  n=0;
  while ( list[n][0][0])
    {
      if ( (i=name_is_in_list (list[n][0], A->name, A->nseq, 100))!=-1)
      {
        sprintf ( A->name[i], "%s", list[n][1]);
      }
      n++;
    }
  
  A->S=rename_seq_in_seq (A->S, list);
  return A;
}
Sequence * rename_seq_in_seq (Sequence *A, char ***list)
{
  int n, i;
  if ( !A || !list)return A;
  
  n=0;
  while ( list[n][0][0])
    {
      if ( (i=name_is_in_list (list[n][0], A->name, A->nseq, 100))!=-1)
      {
        sprintf ( A->name[i], "%s", list[n][1]);
      }
      n++;
    }
  return A;
}
/********************************************************************/
/*                                                                  */
/*                   FLOAT SIMILARITIES                             */
/*                                                                  */
/*                                                                  */
/*                                                                  */
/********************************************************************/
float get_seq_fsim ( char *string1, char *string2, char *ignore, char *similarity_set,int **matrix, int MODE )
      {
      int len, a, r1, r2, nr1=0, nr2=0;
      float pos=0, sim=0;
            

      len=MIN((strlen (string1)),(strlen (string2)));
      if ( len==0)return 0;
      
      for ( a=0; a< len; a++)
            {
              
              r1=string1[a];
              r2=string2[a];
              nr1+=!is_gap(r1);
              nr2+=!is_gap(r2);
              
              if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))
                  {
                  pos++;
                  if ( matrix)sim+=matrix[r1-'A'][r2-'A'];
                  else if (is_in_same_group_aa(r1,r2,0, NULL,similarity_set))
                        {
                        sim++;
                        }
                  }
            }
      if ( MODE==UNGAPED_POSITIONS)return ( sim*100)/pos;
      else if ( MODE==ALIGNED_POSITIONS)return (sim*100)/len;
      else if ( MODE==AVERAGE_POSITIONS)return (sim*200)/(nr1+nr2);
      else
        {
          return 0;
        }
      
      }


/********************************************************************/
/*                                                                  */
/*                   ALIGNMENT ANALYSES                             */
/*                                                                  */
/*                                                                  */
/*                                                                  */
/********************************************************************/
int **dist_array2sim_array ( int **p, int max)
{
  int s1, s2, a, b;
  s1=read_array_size ((void *)p, sizeof (void *));
  s2=read_array_size ((void*)p[0],sizeof (void *));
  for ( a=0; a< s1; a++)
    for ( b=0; b< s2; b++)
      {
      p[a][b]=max-p[a][b];
      }     
  return p;
}
int **sim_array2dist_array ( int **p, int max)
{
  int s1, s2, a, b;
  s1=read_array_size ((void *)p, sizeof (void *));
  s2=read_array_size ((void*)p[0],sizeof (void *));
  for ( a=0; a< s1; a++)
    for ( b=0; b< s2; b++)
      {
      p[a][b]=max-(int)p[a][b];
      }     
  return p;
}


int aln2most_similar_sequence ( Alignment *A, char *mode)
{
  int **w;
  int a, b;
  int avg, best_avg=0, best_seq=0;
  char *buf;
  int coverage;

  
  if ( !A) return -1;
  else if ( A->nseq==1)return 0;
  else
    {
      buf=vcalloc ( A->len_aln+1, sizeof (char));
      w=get_sim_aln_array ( A, mode);
      
      for ( a=0; a< A->nseq; a++)
      {
        sprintf ( buf, "%s", A->seq_al[a]);
        ungap(buf);
        coverage=(strlen(buf)*MAXID)/A->len_aln;
        
        for ( avg=0,b=0; b< A->nseq; b++)avg+=w[a][b]*coverage;
        if ( avg>best_avg){best_avg=avg; best_seq=a;}
      }
      free_int (w, -1);
      vfree (buf);
      return best_seq;
    }
  
}
int aln2coverage ( Alignment *A, int ref_seq)
{
  int a,b;
  int cov_pos=0, npos=0;

  
  for ( a=0; a< A->len_aln; a++)
    {
      if ( !is_gap ( A->seq_al[ref_seq][a]))
      {
        npos++;
        for ( b=0; b< A->nseq; b++)
          {
            if ( b!=ref_seq && !is_gap ( A->seq_al[b][a])){cov_pos++;break;}
          }
      }
    }
  return  (int) (npos==0)?0:(( MAXID*cov_pos)/npos);
}
 
int aln2sim ( Alignment *A, char *mode)
{
  int **w;
  float avg=0;
  int a, b, c;

  if ( !A || A->nseq<2) return -1;
  w=get_sim_aln_array ( A, mode);

  for (c=0, a=0; a< A->nseq-1; a++)
    for ( b=a+1; b< A->nseq; b++, c++)
      {
      avg+=(float)w[a][b];
      }
  free_int (w, -1);
  return (int)((float)avg/(float)c);
}

int aln_is_aligned ( Alignment *A)
{
  int a, b;
  
  if ( !A)return 0;
  for (a=0; a< A->nseq; a++)
    for ( b=A->len_aln-1; b>0; b--)
      {
      if (!is_gap(A->seq_al[a][b]) && is_gap(A->seq_al[a][b-1]))return 1;
      }
  return 0;
}
      
int* get_cdna_seq_winsim ( int *cache, char *string1, char *string2, char *ignore, char *mode,int *w )
      {
      int len1, len2;
      int a, x;

      
      len1=strlen (string1);
      len2=strlen (string2);
            
      if ( len1!=len2)
        {
          fprintf ( stderr, "\nTHE TWO cDNAs DO NOT HAVE THE SAME LENGTH [FATAL:get_cdna_seq_sim:%s", PROGRAM);
          crash("");
        }
      
      x=get_cdna_seq_sim(cache, string1, string2, ignore, "");
      for ( a=0; a< len1; a++)
        w[a]=x;

      fprintf (stderr, "\nWARNING: winsim not implemented for cDNA");
      return w;
      }

int get_cdna_seq_sim ( int *cache, char *string1, char *string2, char *ignore, char *mode)
      {
      int len1;
      int len2;
      int a;
      int pos=0;
      int sim=0;
      char r1, r2;
      
      len1=strlen (string1);
      len2=strlen (string2);


      
      if ( len1!=len2)
        {
          fprintf ( stderr, "\nTHE TWO cDNAs DO NOT HAVE THE SAME LENGTH [FATAL:get_cdna_seq_sim:%s", PROGRAM);
          crash("");
        }
          
      for ( a=0; a< len1;)
            {
             
              if ( cache[a]==0){a++;continue;}
              else if ( cache[a]==1)
                {
                  
                  r1=translate_dna_codon (string1+a, 'x');
                  r2=translate_dna_codon (string2+a, 'x');
                  a+=3;
                }
              
              if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))
                  {
                  pos++;
                  if (is_in_same_group_aa(r1,r2,0, NULL,mode+4))
                        {
                        sim++;
                        }
                  }
            }



      if (pos==0)
             return 0;
      else  
            return (int) (sim*MAXID)/pos;
      
      }     

int* get_seq_winsim ( char *string1, char *string2, char *ignore, char *mode, int*w)
      {
      int len1, len2, len;
      int left, right;
      int a,b;
      int sim=0;
      int window;
      int r1, r2;

      len1=strlen (string1);
      len2=strlen (string2);
      window=atoi(mode);
      len=2*window+1;
      
      if ( len1!=len2)return 0;
      if (window==0 || (window*2+1)>=len1)
        {
          sim=get_seq_sim (string1, string2, ignore, "");
          for (a=0; a<len1; a++)w[a]=sim;
          return w;
        }
      

      for ( a=0; a< len1; a++)
            {
              
              left =MAX(0, a-window);
              right=MIN(len1, left+len);
              for (sim=0,b=left; b<right; b++)
                {
                  r1=string1[b];
                  r2=string2[b];
                  if (  !is_in_set (r1, ignore) && !is_in_set (r2, ignore))
                  {
                    if (r1==r2)sim++;
                  }
                }
              w[a]=(sim*MAXID)/len;
            }
      return w;
      }


int get_seq_sim ( char *string1, char *string2, char *ignore, char *mode)
      {
      int len1;
      int a;
      int pos1, pos2, pos0, sim;
      int p1, p2;
      int r0,r1, r2;
      
      
      
      len1=strlen (string1);


      for ( sim=pos1=pos2=pos0=0,a=0; a< len1; a++)
            {
              
              r1=string1[a];
              r2=string2[a];
              p1=1-is_in_set (r1, ignore);
              p2=1-is_in_set (r2, ignore);
              pos1+=p1; pos2+=p2;
              if (p1 && p2)
                  {
                    pos0++;
                    if (is_in_same_group_aa(r1,r2,0, NULL, mode))
                      {             
                        sim++;
                      }
                  }
            }
      
      r0=(pos0==0)?0:(sim*MAXID)/pos0;
      r1=(pos1==0)?0:(sim*MAXID)/pos1;
      r2=(pos2==0)?0:(sim*MAXID)/pos2;
      

      return r0;
      
      }     
int get_seq_sim_2 ( char *string1, char *string2, char *ignore, char **gr, int ng)
      {
      int len1;
      int len2;
      int a;
      int pos=0;
      int sim=0;
      char r1, r2;
      
      
      len1=strlen (string1);
      len2=strlen (string2);
      
      if ( len1!=len2)return 0;
      
      for ( a=0; a< len1; a++)
            {
            r1=string1[a];
            r2=string2[a];
            if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))
                  {
                  pos++;
                  if (is_in_same_group_aa(r1,r2,ng, gr, NULL))
                        {
                        sim++;
                        }
                  }
            }
      
      if (pos==0)
             return 0;
      else  
            return (int) (sim*MAXID)/pos;
      
      }

int get_seq_sim_3 ( char *string1, char *string2, char *ignore, int **mat)
      {
      int len1;
      int len2;
      int a;
      
      int sim=0;
      char r1, r2;
      
      
      len1=strlen (string1);
      len2=strlen (string2);
      
      if ( len1!=len2)return 0;
      
      for ( a=0; a< len1; a++)
            {
            r1=string1[a];
            r2=string2[a];
            if ( !is_in_set (r1, ignore) && !is_in_set (r2, ignore))
                  {
                  sim+=mat[r1-'A'][r2-'A'];
                  }
            }
      return sim;
      
      }     
int * get_aln_col_weight ( Alignment *A, char *mode)
      {
      int a, b;
      char *col;
      int *weight;
      
      col=vcalloc ( A->nseq, sizeof (int));
      weight=vcalloc (A->len_aln, sizeof (int));
      
      for (a=0; a< A->len_aln; a++)
            {
            for ( b=0; b< A->nseq; b++)
                  col[b]=A->seq_al[b][a];
            weight[a]=(find_group_aa_distribution (col, A->nseq,0,NULL,NULL, mode )*MAXID)/A->nseq;          
            }
      vfree (col);
      return weight;
      
      }     
                  
int analyse_aln_column ( Alignment *B, int col)
    {

    char r=' ';
    int a, b, c;
    static char *mat;
    static int ng_cw_star;
    static char **cw_star;
    int *cw_star_count;
    
    static int ng_cw_col;
    static char **cw_col;
    int *cw_col_count;
    
    static int ng_cw_dot;
    static char **cw_dot;
    int *cw_dot_count;
    

    
    if ( !B->S)B= get_aln_type (B);
    
    
    if ( !B->S || !(B->S)->type)B= get_aln_type (B);
    
    if ( !mat)mat=vcalloc ( STRING, sizeof (char));

    if ( !ng_cw_star)
       {
         cw_star=make_group_aa ( &ng_cw_star, strcpy ( mat,"idmat"));
         cw_col=make_group_aa ( &ng_cw_col, strcpy (mat,"clustalw_col"));
         cw_dot=make_group_aa ( &ng_cw_dot, strcpy (mat, "clustalw_dot"));
       }

    cw_star_count=vcalloc (ng_cw_star, sizeof (int));
    cw_col_count=vcalloc ( ng_cw_col, sizeof (int));
    cw_dot_count=vcalloc (ng_cw_dot, sizeof (int));
    
    for ( a=0; a< B->nseq; a++)
        {
      c=tolower (B->seq_al[a][col]);
      if (is_gap(c)){r=' ';break;}
        
      for ( b=0; b< ng_cw_star; b++)
          cw_star_count[b]+=is_in_set (c, cw_star[b]);      
      for ( b=0; b< ng_cw_col; b++)
          cw_col_count[b]+=is_in_set  (c, cw_col[b]);
      for ( b=0; b< ng_cw_dot; b++)
          cw_dot_count[b]+=is_in_set  (c, cw_dot[b]);
      }
    
   
   
   
    
    if ( !is_gap(c) && r==' ')
      for ( b=0; b< ng_cw_star; b++)if ( cw_star_count[b]==B->nseq){r='*'; break;}
    if ( !is_gap(c) && r==' ' && !(strm((B->S)->type, "DNA")))
      for ( b=0; b< ng_cw_col ; b++)if ( cw_col_count [b]==B->nseq){r=':'; break;}
    if ( !is_gap(c) && r==' ' && !(strm((B->S)->type, "DNA")))
      for ( b=0; b< ng_cw_dot ; b++)if ( cw_dot_count [b]==B->nseq){r='.'; break;}
    
    
    
    vfree(cw_star_count);
    vfree(cw_col_count);
    vfree(cw_dot_count);   
      
    return r;
    }


int ** get_cov_aln_array ( Alignment *A, char *mode)
{
  int **w;
  int a, b, c, t;
  
  w=declare_int ( A->nseq, A->nseq);
  
  
  for ( a=0; a< A->nseq-1; a++)
    {
      w[a][a]=100;
      for ( t=0,b=a+1; b< A->nseq; b++)
      {
        for ( c=0; c< A->len_aln; c++)
          {
            t+=(!is_gap(A->seq_al[a][c]) &&!is_gap(A->seq_al[b][c]));
          }
        w[a][b]=w[b][a]=(t*100)/A->len_aln;
      }
    }
  return w;
}

int ** get_cov_master_aln_array ( Alignment *A,int n, char *mode)
{
  int **w;
  int    b, c, t;
  
  w=declare_int ( A->nseq, A->nseq);
  
  
  for (b=0; b< A->nseq; b++)
      {
        
        for (t=0, c=0; c< A->len_aln; c++)
          {
            t+=(!is_gap(A->seq_al[n][c]) &&!is_gap(A->seq_al[n][c]));
          }
        w[n][b]=w[b][n]=(t*100)/A->len_aln;
      }
    
  return w;
}
int ** get_sim_master_aln_array ( Alignment *A,int n, char *mode)
      {
      int **w;
      int a, b;
      
      w=declare_int ( A->nseq, A->nseq);
      
      
      for ( a=0; a< A->nseq; a++)
        {
          if ( strm (mode, "cdna"))
          w[n][a]=w[a][n]=get_cdna_seq_sim ( A->cdna_cache[0], A->seq_al[a], A->seq_al[n], "-", mode);                  
        else
          w[n][a]=w[a][n]=get_seq_sim ( A->seq_al[n], A->seq_al[a], "-", mode);
        }
      return w;
      }
int ** get_dist_aln_array ( Alignment *A, char *mode)
{
  int a, b;
  int **w;

  w=get_sim_aln_array ( A, mode);
  return sim_array2dist_array(w,MAXID);
}

int ** get_sim_aln_array ( Alignment *A, char *mode)
      {
      int **w;
      int a, b;
      
      
      w=declare_int ( A->nseq, A->nseq);
      
      for ( a=0; a< A->nseq-1; a++)
        {
          for ( b=a+1; b< A->nseq; b++)
                  {
                    if ( strm (mode, "cdna"))
                      w[a][b]=w[b][a]=get_cdna_seq_sim ( A->cdna_cache[0], A->seq_al[a], A->seq_al[b], "-", mode);                  
                    else if ( strnm (mode, "ktup",4))
                      w[a][b]=w[b][a]=ktup_comparison (A->seq_al[a], A->seq_al[b],atoi(mode+4));
                    else
                      w[a][b]=w[b][a]=get_seq_sim ( A->seq_al[a], A->seq_al[b], "-", mode);
                  }
        }
      return w;
      }

int *** get_winsim_aln_array ( Alignment *A,char *mode, int ***w)
      {
      int a, b;
      for ( a=0; a< A->nseq; a++)
            for ( b=0; b< A->nseq; b++)
                  {
                    if ( strm (mode, "cdna"))
                      w[a][b]=get_cdna_seq_winsim ( A->cdna_cache[0], A->seq_al[a], A->seq_al[b], "-", mode, w[a][b]);              
                    else
                      w[a][b]=get_seq_winsim ( A->seq_al[a], A->seq_al[b], "-", mode, w[a][b]);
                  }
      return w;
      }

Alignment * seq2profile (Sequence *S, int i)
{
  Alignment *A;
  
  if (A=seq2R_template_profile (S, i))return A;
  else
    {
      char *tmp;
      FILE *fp;
      
      tmp=vtmpnam (NULL);
      fp=vfopen ( tmp, "w");
      fprintf (fp, ">%s\n%s\n", S->name[i], S->seq[i]);
      vfclose (fp);
      
      (S->T[i])->R=fill_R_template (S->name[i], tmp, S);
      
      return  seq2R_template_profile (S, i);
    }
}

Alignment* aln2sub_aln_file (Alignment *A, int n, char **string)
{
  char ***list;
  int a;

  list=vcalloc (A->nseq, sizeof (char***));
  if ( n==0)return A;
  else if (n>1)
    {
      int l;
      char *buf;
      
      for (l=0,a=0; a< n; a++)l+=strlen (string[a]);
      buf=vcalloc ( 2*n+l+1, sizeof (char));
      for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");}     
      list[0]=string2list (buf);
      vfree (buf);
    }
  else if ( file_exists (string[0]))
    {
      list=read_group (string[0]);

    }
  else
    {
      fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM);
      exit (EXIT_FAILURE);
    }  

  
  a=0;
  while (list[a])
    {
      int i, b;
      FILE *fp;
      n=atoi (list[a][0]);
      fp=vfopen (list[a][1], "w");
      for (b=2; b<n; b++)
      {
        i=name_is_in_list (list[a][b], A->name, A->nseq, MAXNAMES);
        if (n==3)ungap (A->seq_al[i]);
        fprintf (fp, ">%s\n%s\n", A->name[i], A->seq_al[i]);        
      }
      vfclose (fp);
      free_char (list[a], -1);
      a++;
    }
  vfree(list);
  return A;
}

Alignment* aln2sub_seq (Alignment *A, int n, char **string)
{
  char ***list;
  int a;
  Sequence *S=NULL;
    
  list=vcalloc (A->nseq, sizeof (char***));
  if ( n==0)return A;
  else if (n>1)
    {
      int l;
      char *buf;
      
      for (l=0,a=0; a< n; a++)l+=strlen (string[a]);
      buf=vcalloc ( 2*n+l+1, sizeof (char));
      for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");}     
      list[0]=string2list (buf);
      vfree (buf);
    }
  else if ( file_exists (string[0]))
    {
      list=read_group (string[0]);

    }
  else
    {
      fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM);
      exit (EXIT_FAILURE);
    }  

 
  
  a=0;
  while (list[a])
    {
      int t;
      Alignment *B;
      Sequence *subS;
      
      
      B=main_read_aln (list[a][1], NULL);
      t=aln2most_similar_sequence(B, "idmat");
      subS=extract_one_seq(B->name[t],0,0,B,KEEP_NAME);
      S=add_sequence (subS,S,0);
      free_aln (B);free_sequence (subS, -1);
      vremove (list[a][1]);
      a++;
    }
  vfree(list);
  return seq2aln (S, NULL, RM_GAP);
}

Alignment * aln2collapsed_aln (Alignment * A, int n, char **string)
{
  Alignment *B;
  char ***list;
  char **list2;
  char *buf;
  FILE *fp;
  int a, b,c, ns, m, l;
  int *collapsed;
  
  list=vcalloc (A->nseq, sizeof (char***));
  ns=0;
  if ( n==0)return A;
  else if (n>1)
    {
      for (l=0,a=0; a< n; a++)l+=strlen (string[a]);
      buf=vcalloc ( 2*n+l+1, sizeof (char));
      for (a=0; a< n; a++){buf=strcat (buf,string[a]), buf=strcat ( buf, " ");}
     
      list[0]=string2list (buf);ns=1;
      
    }
  else if ( file_exists (string[0]))
    {
      /*Format: Fasta like, the name fo the group followed with the name of the sequences
      ><Group name> <First Seq> <second seq> ....
      Groups must NOT be overlaping
      */
      l=measure_longest_line_in_file (string[0])+1; 
      buf=vcalloc (l, sizeof (char));
      ns=0;
      fp=vfopen (string[0], "r");
      while ((c=fgetc(fp))!=EOF)
      {
        buf=fgets (buf,l-1, fp);
        if ( c=='>')list[ns++]=string2list (buf);
      }
      vfclose (fp);
    }
  else
    {
      fprintf (stderr, "\nERROR: file <%s> does not exist [FATAL:%s]\n",string[0], PROGRAM);
      exit (EXIT_FAILURE);
    }
  
  vfree (buf); buf=NULL;

  /*Identify lost sequences*/
  collapsed=vcalloc (A->nseq, sizeof (int));
  for ( a=0; a< ns; a++)
      {
      m=atoi (list[a][0]); 
      for (b=2; b<m ; b++)
        {
          c=name_is_in_list (list[a][b], A->name, A->nseq, MAXNAMES);
          if ( c>=0)collapsed[c]=1;
        }
      }
  for ( a=0; a< A->nseq; a++)
    {
      if ( collapsed[a]==0)
      {
        list[ns]=declare_char (3, MAXNAMES);
        sprintf ( list[ns][0], "3");
        sprintf ( list[ns][1], "%s", A->name[a]);
        sprintf ( list[ns][2], "%s", A->name[a]);
        ns++;
      }
    }
  vfree (collapsed);
  
  
  


  list2=declare_char (A->nseq, 100);
  /*1 Collapse the alignment*/
  for ( a=0; a< ns; a++)
    {
      sprintf ( list2[a], "%s", list[a][2]);
    }
   B=extract_sub_aln2 ( A, ns, list2);
  /*2 Rename the sequences*/
  for ( a=0; a< ns; a++)
    {
      sprintf ( B->name[a], "%s", list[a][1]);
    }
  /*replace sequence with consensus*/
  
  for ( a=0; a< ns; a++)
    {
      m=atoi (list[a][0]);
      for (c=0, b=2; b<m;c++, b++)
      {
        sprintf ( list2[c], "%s", list[a][b]);
      }
      buf=sub_aln2cons_seq_mat2 ( A,m-2,list2, "blosum62mt");
      sprintf (B->seq_al[a], "%s", buf);     
    }
  vfree (buf);

  free_aln (A);
  B->S=aln2seq(B);
  return B;
}
Alignment * aln2profile (Alignment * A)
    {
      Alignment *B=NULL;
      char *cons;
      
      if (!A->P)
      {
        A->P=declare_profile (AA_ALPHABET,A->len_aln+1);
      }
      B=copy_aln (A, B);
      free_int ((A->P)->count, -1);
      free_int ((A->P)->count2, -1);
      free_int ((A->P)->count3, -1);
      (A->P)->count=aln2count_mat (A);
      (A->P)->count2=aln2count_mat2 (A);
      
      cons=aln2cons_seq_mat (A, "blosum62mt");
      
      sprintf (B->seq_al[0], "%s", cons);
      B->nseq=1;
      (A->P)->count3=aln2count_mat2 (B);
      vfree (cons);
      free_aln (B);
      
      

      return A;
        
    }

int** aln2count_mat2 ( Alignment *A)
{
  return sub_aln2count_mat2 (A, 0, NULL);
}
int** sub_aln2count_mat2 ( Alignment *A, int ns, int *l_s)
{
  int **count;
  int used[1000];
  int a, b;
  char r;
  int l_s_flag=0;
  int len;
  int us;
  
  /*count[x][0]=n symbols in column
    count[x][1]=total_size of line
    count[x][2]=Gap frequency
    count[x][n]=symbol n
    count[x][n+1]=N occurence symbol n;
    count[x][n+1]=N frequence symbol n*100;
 
    special multi-channeling
    count[x][count[x][1]]=Nseq
    count[x][count[x][1]+s]=residue col x, sequence s
  */

  for (a=0; a< 1000; a++)used[a]=0;
  if ( ns==0)
    {
      ns=A->nseq;
      l_s=vcalloc ( A->nseq, sizeof (int));
      for ( a=0; a< A->nseq; a++)l_s[a]=a;
      l_s_flag=1;
    }
  len=strlen (A->seq_al[l_s[0]]);
            
  count=declare_int (len,100+ns+2);
  for (a=0; a<len; a++)
    {
      
      for (us=ns, b=0; b<ns; b++)
      {
        
        r=tolower (A->seq_al[l_s[b]][a]);
        
        if (is_gap(r))us--;
        else if (used[r])
          {
            count[a][used[r]*3+1]++;
          }
        else
          {
            used[r]=++count[a][0];
            count[a][used[r]*3]=r;
            count[a][used[r]*3+1]++;
          }
      }
      count[a][1]=count[a][0]*3+2;
      count[a][2]=(A->nseq-us)*100/A->nseq;
      
      for (b=3; b<count[a][1]; b+=3)
      {
        count[a][b+2]=(count[a][b+1]*100)/us;
        used[count[a][b]]=0;
      }
      /*Option for multi channeling*/
      count[a][count[a][1]]=A->nseq;
      for (b=1; b<=A->nseq; b++)
      count [a][count[a][1]+b]=(is_gap(A->seq_al[b-1][a]))?0:A->seq_al[b-1][a];
      
    }

  if ( l_s_flag)vfree(l_s);
  return count;
}
        
int** aln2count_mat ( Alignment *A)
    { /*
      function documentation: start
      
      int output_freq_mat ( char *outfile, Aligmnent *A)

      This function counts the number of residues in each column of an alignment (Prot/NA)
      It outputs these values in the following format

      This format can be piped into:
      The routine used for computing the p-value  gmat-inf-gc-v2c
      
      function documentation: end
      */
      
    int a, b,x;
    int **freq_mat;
    int alp_size;

    alp_size=sizeof (AA_ALPHABET); 
    freq_mat=declare_int (alp_size+2, A->len_aln);
      

    for ( a=0; a<A->len_aln; a++)
      {
      for ( b=0; b< A->nseq; b++)
        {
          if ( is_gap ( A->seq_al[b][a]))freq_mat[alp_size][a]++;
          else
            {
            x=tolower(A->seq_al[b][a]);
            freq_mat[x-'a'][a]++;
            freq_mat[alp_size+1][a]++;
            
            }
        }
      }
    
    return freq_mat;
    }
char *aln2random_seq (Alignment *A, int pn1, int pn2, int pn3, int gn)
    {

      /* 

       
       Given the frequencies in A ( read as total counts of each Residue in
       freq[A->nseq][A->len_aln], and pn1, pn2 and pn3:
        
                        1-Generate a new amino-acid at each position
                    2-Insert Gaps, using a HMM.

       
       pn3=Weight of the noise induced with sub mat.

       pn1=% noise type 1 ( Varies with entropi)
        n1=Ratio noise type 1
        
       T =Nseq 
       t1=Noise 1 expressed in Nseq
       al=alphabet size;
       ncat=number of non 0 cat for a given position
       ICi initial count for residue i

       Ci=freq[seq][AA]
       t1=T*n1*(1-1/ncat);
       t2=T*n2;
       
       Ci= ICi*(T-(t1+t2))/T +(t1)/al+(t2)/al
       
      */
      
      int **freq;
      int **count;
      float T, tot_t1, tot_t2,tot_t3, n1, n2, n3;
      float ncat;
      
      double gf;
      double *init_freq;
      double *blur_freq;
      double *t1, *t2,*t3;
      int a, b, c, x;
      char *seq;
      int tot;
      /*Viterbi  Parameters */
      
      int p;
      int AL=0;        /*Allowed Transition*/
      int F=-100000; /*Forbiden Transition*/

      int GAP_TRANSITION;
      int IGAP=0, IAA=1;
      
      int state,best_state, score, best_score;
      int p_state;
      int e;
      int **score_tab;
      int **state_tab;
      int nstate=2;
      int **transitions;
      
      int max;

      seq=vcalloc ( A->len_aln+1, sizeof (char));     
      count=aln2count_mat(A);
      freq=aln2count_mat(A);

      T=100;

      n1=(float)pn1/100;
      n2=(float)pn2/100;
      n3=(float)pn3/100;
      
      for ( a=0; a< A->len_aln; a++)
      {
        for ( b=0; b<26; b++)
          freq[b][a]=freq[b][a]*((T)/(A->nseq-freq[26][a]));
        freq[26][a]= (freq[26][a]*T)/A->nseq;
      }

      
      init_freq=vcalloc ( 26, sizeof (double));
      blur_freq=vcalloc ( 26, sizeof (double));
      
      tot_t1=tot_t2=tot_t3=0;
      
      t1=vcalloc ( 27, sizeof (double));
      t2=vcalloc ( 27, sizeof (double));
      t3=vcalloc ( 27, sizeof (double));
      for (a=0; a< A->len_aln; a++)
      {

        /*Compute Frequencies*/
        for (tot=0, b=0; b<26; b++)
            {
              if ( is_aa(b+'A'))
                {
                  init_freq[b]=freq[b][a];
                  tot+=freq[b][a];
                }
            }
        /*Count the number of  different amino acids*/
      for ( ncat=0, b=0; b<=26; b++)
        {
          ncat+=(freq[b][a]!=0)?1:0;          
        }
      /*Blurr the distribution using */
            blur_freq=compute_matrix_p (init_freq,tot);     
            
      
      /*compute noise 1: biased with blurred content * enthropy--> keeps prosite motifs*/
      tot_t1=T*n1*(1-1/ncat);
      for (  b=0; b< 26; b++)if ( is_aa(b+'A')){t1[b]=blur_freq[b]*(1-1/ncat)*n1;}
      
      /*Compute noise 2: completely random*/
      tot_t2=T*n2;
      for (  b=0; b< 26; b++)if ( is_aa(b+'A')){t2[b]=tot_t2/21;}
      
      /*compute noise 3: biased with the sole content(pam250mt)*/
      tot_t3=T*n3;
      for (  b=0; b<26; b++)if ( is_aa(b+'A')){t3[b]=blur_freq[b]*n3;}
      
      for ( b=0; b<26; b++)
        {
          if ( is_aa('A'+b))
            freq[b][a]=freq[b][a]*(T-(tot_t1+tot_t2+(tot_t3)))/T+t1[b]+t2[b]+t3[b];
        }
      
      /*end of the loop that mutates position a*/
      }
     
        vfree (blur_freq);
      vfree (init_freq);
      vfree ( t3);
      
      /*1-Generate the amino acids of the new sequence new*/
      
      
      vsrand (0);
      
      for ( a=0; a< A->len_aln; a++)
      {

        for (T=0,b=0; b<26; b++)T+=freq[b][a];
        x=rand ()%((int)T);
        for (c=0,b=0; b<26; b++)
          {
           c+=freq[b][a];
           if ( c>=x)
             {
             seq[a]='A'+b;
             c=-1;
             break;
             }
          }
        if ( c!=-1)seq[a]='-';
      }
      seq[a]='\0';
      

      /*2 Generate the gaps in the new sequence*/
      
      

      if ( gn<0);
      else
      {

        transitions=declare_int ( nstate, nstate);
        score_tab=declare_int ( A->len_aln+2, nstate       );
        state_tab=declare_int ( A->len_aln+2, nstate       );
        
        
        
        for (a=0; a<nstate;a++)
          for (b=0; b<nstate;b++)
            {transitions[a][b]=F;}
        
        GAP_TRANSITION=AL-gn;
        
        

       



        transitions[IGAP ][IGAP ]=AL;
        transitions[IAA][IAA]=AL;
        transitions[IAA ][IGAP]=GAP_TRANSITION;
        transitions[IGAP][IAA ]=GAP_TRANSITION;
        
        
        for ( p=1; p<=A->len_aln; p++){for (state=0; state< nstate; state++){score_tab[p][state]=F;state_tab[p][state]=-1;} }
        
        for (p=1; p<= A->len_aln; p++)
          {
            for (max=0,a=0; a<26; a++)max=MAX(max, freq[a][p-1]);
            max=(max*(A->nseq-count[26][p-1]))/A->nseq;
            
            for (state=0; state< nstate; state++)
            {
              
              
              gf=freq[26][p-1];
              if      ( state==IGAP)  e=gf-50;
              else if ( state==IAA )  e=max-50;
              for (p_state=0; p_state<nstate; p_state++)
                {
                  score=(score_tab[p-1][p_state]==F)?F:(e+transitions[p_state][state]+score_tab[p-1][p_state]);
                  if(p_state==0 || score>best_score){ best_score=score;best_state=p_state;}
                }
              score_tab[p][state]=best_score;
              state_tab[p][state]=best_state;
            }
          }
        
        for (state=0; state<nstate; state++)
          {
            if (state==0 || score_tab[p-1][state]>best_score){best_score=score_tab[p-1][state]; best_state=state;}
          }
        
        for (p=A->len_aln; p>0;)
          {
            if ( best_state==IGAP)
            {
              seq[p-1]='-';
            }
            else if ( best_state==IAA)
            {
              seq[p-1]=seq[p-1];
            }
            best_state=state_tab[p][best_state];
            p--;
          }
        }
      
      free_int (freq, -1);
      return seq;
    }

/********************************************************************/
/*                                                                  */
/*                Weighting functions                         */
/*                                                                  */
/*                                                                  */
/*                                                                  */
/********************************************************************/
Alignment * master_trimseq( Alignment *A, Sequence *S,char *mode)
     {
       Alignment *NA;
       char *p;
       int a, b;
       int use_aln=0, upper_sim=0, min_nseq=0, lower_sim=0;
       float f_upper_sim, f_lower_sim;
       char weight_mode[1000];
       char method[1000];
       int statistics=0;
       int trim_direction=TOP;
       float **sim_weight;
       int *seq_list;
       int table=0;
       
       
     

     /*
       mode: 
           (trim)_<seq or aln>_%<percentage of tot weight to keep>_n<number of seq to keep>_w<weight mode>
     */
     

     
     seq_list=vcalloc ( S->nseq, sizeof (int));
     for ( a=0; a< A->nseq; a++)
       {
       seq_list[a]=1;
       }
     

     use_aln=aln_is_aligned(A);
     
     if ( mode[0]=='\0')
       {
      
       upper_sim=50;
       lower_sim=0;
       min_nseq=0;
       sprintf (weight_mode, "pwsim");
       sprintf ( method, "clustering2");
       }
     else 
       {
      
       upper_sim=lower_sim=min_nseq;
       sprintf (weight_mode, "pwsim");
       sprintf ( method, "clustering2");
       }

     /*
      U or % (deprecated) Upper bound for pairwise similarity
      L or m (depercated) Lower  bound for pairwise similarity
      n max number of sequences
      N max number of sequences as a fraction of thet total
      S print Statistics
      T print Table of distances
     */

     

     while ( (p=strtok(mode, "_")))
         {       
           mode=NULL;
           if (strm (p, "seq"))use_aln=0;
           else if ( strm(p,"aln"))use_aln=1;
           else if  (p[0]=='s')statistics=1;
           else if  (p[0]=='t')table=1;
           else if  (p[0]=='U')upper_sim=atoi(p+1);
           else if  (p[0]=='L')lower_sim=atoi(p+1);
           else if  (p[0]=='n')min_nseq=atoi(p+1);
           else if  (p[0]=='N')min_nseq=atoi(p+1)*-1;
           else if  (p[0]=='B')trim_direction=BOTTOM;
           else if  (p[0]=='T')trim_direction=TOP;
           else if  (p[0]=='W')sprintf (weight_mode, "%s", p+1);
           else if  (p[0]=='M')sprintf (method, "%s", p+1);
           else if  (p[0]=='K')
             {
             
             while (p=strtok(NULL, ":"))
               {
                 
                 if ( p[0]=='#')
                   {
                   seq_list[atoi(p+1)-1]=2;
                   }
                 else if ( (a=name_is_in_list (p, A->name, A->nseq, 100))!=-1)

                   {
                   seq_list[a]=2;
                   } 
               }
             }
         }
     
     if ( !upper_sim && !min_nseq && !lower_sim)upper_sim=50;
     
     

     if  (!S)
       {
       fprintf ( stderr, "\ntrimseq requires a set of sequences[FATAL:%s]\n", PROGRAM);
       crash("");
       }
     
     else if ( min_nseq> S->nseq)
       {
       min_nseq=S->nseq;
       }
     else if ( min_nseq<0)
       {
       if ( min_nseq<-100)
         {
           fprintf ( stderr, "\nWARNING: trimseq: Nseq(N)  max_val=100%% [Automatic reset]\n");
           min_nseq=-100;
         }
       
       min_nseq=(int)((float)S->nseq*((float)min_nseq/100)*-1);
       }


     NA=seq2subseq3 (A, S,use_aln,lower_sim,upper_sim,min_nseq,trim_direction, weight_mode,&sim_weight, seq_list );
     
     if ( table)
       {
       fprintf ( stderr, "\nSIMILARITY MATRIX\n");
       for ( a=0; a< A->nseq-1; a++)
         for ( b=a+1; b< A->nseq; b++)
           {
             fprintf ( stderr, "%15s Vs %15s : %3.2f %% id\n", A->name[a], A->name[b], 100-sim_weight[a][b]);
           }
       }
     if ( statistics)
       {
       f_upper_sim=(upper_sim>100)?((float)upper_sim/(float)100):upper_sim;
       f_lower_sim=(upper_sim>100)?((float)lower_sim/(float)100):lower_sim;
       
       fprintf ( stderr, "\nTRIM Informations:\n");
       fprintf ( stderr, "\tUse...........: %s\n",(use_aln)?"multiple_aln":"pairwise_aln");
       fprintf ( stderr, "\tcluster_mode..: %s\n"  ,method);
       fprintf ( stderr, "\tsim_mode......: %s\n"  ,weight_mode);
       fprintf ( stderr, "\tlower_id_bound: %.2f%%\n"  ,(f_lower_sim==0)?-1:f_lower_sim);
       fprintf ( stderr, "\tupper_id_bound: %.2f%%\n",(f_upper_sim==0)?-1:f_upper_sim);
       fprintf ( stderr, "\tnseq_kept.....: %d (out of %d)\n"  ,NA->nseq, S->nseq);
       fprintf ( stderr, "\treduction.....: %d%% of original set\n"  ,(NA->nseq*100)/S->nseq);
       fprintf ( stderr, "\tTrim_direction: From %s \n"  ,(trim_direction==BOTTOM)?"Bottom":"Top");
       }

     return NA;
   }

Alignment * trimseq( Alignment *A, Sequence *S,char *mode)
   {
     Alignment *NA;
     char *p;
     int a, b;
     int use_aln=0, upper_sim=0, min_nseq=0, lower_sim=0;
     char weight_mode[1000];
     char method[1000];
     int statistics=0;
     int trim_direction=TOP;
     float **sim_weight;
     int *seq_list;
     int table=0;
     int print_name=0;
     float f_lower_sim, f_upper_sim;
     
         

     /*
       mode: 
           (trim)_<seq or aln>_%<percentage of tot weight to keep>_n<number of seq to keep>_w<weight mode>
     */
     

     
     seq_list=vcalloc ( S->nseq, sizeof (int));
     for ( a=0; a< A->nseq; a++)
       {
       seq_list[a]=1;
       }
     

     use_aln=aln_is_aligned(A);
     
     
     if ( mode[0]=='\0')
       {
      
       upper_sim=50;
       lower_sim=0;
       min_nseq=0;
       sprintf (weight_mode, "pwsim_fragment");
       sprintf ( method, "clustering2");
       }
     else 
       {
      
       upper_sim=lower_sim=min_nseq;
       sprintf (weight_mode, "pwsim_fragment");
       sprintf ( method, "clustering2");
       }

     /*
      U or % (deprecated) Upper bound for pairwise similarity
      L or m (depercated) Lower  bound for pairwise similarity
      n max number of sequences
      N max number of sequences as a fraction of thet total
      S print Statistics
      T print Table of distances
     */

     

     while ( (p=strtok(mode, "_")))
         {       
           mode=NULL;
           if (strm (p, "seq"))use_aln=0;
           else if ( strm(p,"aln"))use_aln=1;
           else if  (p[0]=='s')statistics=1;
           else if  (p[0]=='t')table=1;
           else if  (p[0]=='p')print_name=1;
           else if  (p[0]=='U')upper_sim=atoi(p+1);
           else if  (p[0]=='L')lower_sim=atoi(p+1);
           else if  (p[0]=='n')min_nseq=atoi(p+1);
           else if  (p[0]=='N')min_nseq=atoi(p+1)*-1;
           else if  (p[0]=='B')trim_direction=BOTTOM;
           else if  (p[0]=='T')trim_direction=TOP;
           else if  (p[0]=='W')sprintf (weight_mode, "%s", p+1);
           else if  (p[0]=='M')sprintf (method, "%s", p+1);
           else if  (p[0]=='K')
             {
             
             while (p=strtok(NULL, ":"))
               {
                 
                 if ( (a=name_is_in_list (p, A->name, A->nseq, 100))!=-1)
                   {
                   seq_list[a]=2;
                   } 
               }
             }
         }
     
     if ( !upper_sim && !min_nseq && !lower_sim)upper_sim=50;
     
     

     if  (!S)
       {
       fprintf ( stderr, "\ntrimseq requires a set of sequences[FATAL:%s]\n", PROGRAM);
       crash("");
       }
     
     else if ( min_nseq> S->nseq)
       {
       min_nseq=S->nseq;
       }
     else if ( min_nseq<0)
       {
       if ( min_nseq<-100)
         {
           fprintf ( stderr, "\nWARNING: trimseq: Nseq(N)  max_val=100%% [Automatic reset]\n");
           min_nseq=-100;
         }
       
       min_nseq=(int)((float)S->nseq*((float)min_nseq/100)*-1);
       }


     NA=seq2subseq2 (A, S,use_aln,lower_sim,upper_sim,min_nseq,trim_direction, weight_mode,&sim_weight, seq_list );
     
     if ( table)
       {
       fprintf ( stderr, "\nSIMILARITY MATRIX\n");
       for ( a=0; a< A->nseq-1; a++)
         for ( b=a+1; b< A->nseq; b++)
           {
             fprintf ( stderr, "%15s Vs %15s : %3.2f %% id\n", A->name[a], A->name[b], 100-sim_weight[a][b]);
           }
       }
     
     NA=seq_name2removed_seq_name(S, NA,sim_weight);

     if ( print_name)
       {
       fprintf ( stderr, "\nList of sequences with their closest removed neighbors\n");
       for ( a=0; a< NA->nseq; a++)fprintf ( stderr, "\n%s: %s\n", NA->name[a], NA->seq_comment[a]);
       }
     
     if ( statistics)
       {
       f_lower_sim=(lower_sim>100)?(float)lower_sim/100:lower_sim;
       f_upper_sim=(upper_sim>100)?(float)upper_sim/100:upper_sim;

       fprintf ( stderr, "\nTRIM seq Informations:\n");
       fprintf ( stderr, "\tUse...........: %s\n",(use_aln)?"multiple_aln":"pairwise_aln");
       fprintf ( stderr, "\tcluster_mode..: %s\n"  ,method);
       fprintf ( stderr, "\tsim_mode......: %s\n"  ,weight_mode);
       fprintf ( stderr, "\tlower_id_bound: %.2f%%\n"  ,(f_lower_sim==0)?-1:f_lower_sim);
       fprintf ( stderr, "\tupper_id_bound: %.2f%%\n",(f_upper_sim==0)?-1:f_upper_sim);
       fprintf ( stderr, "\tnseq_kept.....: %d (out of %d)\n"  ,NA->nseq, S->nseq);
       fprintf ( stderr, "\treduction.....: %d%% of original set\n"  ,(NA->nseq*100)/S->nseq);
       fprintf ( stderr, "\tTrim_direction: From %s \n"  ,(trim_direction==BOTTOM)?"Bottom":"Top");
       }

     return NA;
   }

Alignment * tc_trimseq( Alignment *A, Sequence *S,char *mode)
   {
     Alignment *NA;
     Sequence  *TS;
     char *trimfile, *alnfile;
     int *seq_list;
     int a, nseq=0, sim=0;
     char *p;
     char command[100000];
     char keep_list[10000];
     
     int top, bottom, middle, pmiddle;
     
     keep_list[0]='\0';
    
     seq_list=vcalloc ( S->nseq, sizeof (int));
     for ( a=0; a< A->nseq; a++)
       {
       seq_list[a]=1;
       }
     
     trimfile=vtmpnam (NULL);
     alnfile=vtmpnam (NULL);
     if ( !aln_is_aligned (A))
       {
       fprintf ( stderr, "\ntrimTC: computation of an Approximate MSA  [");
       A=compute_tcoffee_aln_quick ( A, NULL);
       fprintf ( stderr, "DONE]\n");
       }
     output_clustal_aln (alnfile, A);
     
    
     while ( (p=strtok(mode, "#")))
         {       
           mode=NULL;

                 
           if (p[0]=='%' || p[0]=='S')sim=(p[1]=='%')?atoi(p+2):atoi(p+1);
           else if  (p[0]=='n' || p[0]=='N')nseq=atoi(p+1);
           else if  (p[0]=='K')
             {
             if ( (a=name_is_in_list (p+1, A->name, A->nseq, 100))!=-1)
               {
                 seq_list[a]=2;
               }  
             
             }
         }
     if ( nseq ==0 && sim ==0)
       {
       fprintf ( stderr, "\nERROR: trimTC\nIndicate the maximum number of sequences Nnseq\nOR the maximum average similarity of the chosen sequencesSx\nEX: +trimTC S20 OR +trimTC N5"); 
       fprintf ( stderr, "\n[FATAL:%s]", PROGRAM);
       exit (EXIT_FAILURE);
       }
     
     for ( a=0; a<A->nseq; a++)if (seq_list[a]==2){strcat ( keep_list, A->name[a]);strcat ( keep_list," ");}
     
     if ( sim)
       {
       sprintf ( command , "t_coffee -infile %s -trim  -trimfile=%s  -split_score_thres %d -convert -iterate 0 ", alnfile, trimfile,sim);
       if ( keep_list[0]){strcat ( command, " -seq_to_keep ");strcat ( command, keep_list);}
       my_system ( command);
       TS=read_sequences (trimfile);
       }
     else if ( nseq && A->nseq>nseq)
       {
       
       top=100;bottom=0;
       pmiddle=0;middle=50;
       
       sprintf ( command , "t_coffee -infile %s -trim  -trimfile=%s  -split_score_thres %d -convert -iterate 0", alnfile, trimfile,middle);
       if ( keep_list[0]){strcat ( command, " -seq_to_keep ");strcat ( command, keep_list);}
       my_system ( command);
       
       TS=read_sequences (trimfile);
       fprintf ( stderr, "\n\tTrimTC: Sim %d Nseq %d\t",middle, TS->nseq);
       
       if ( TS->nseq>nseq)top=middle;
       else if ( TS->nseq<nseq)bottom=middle;
       pmiddle=middle;
       middle=(top-bottom)/2+bottom;
       
       while (TS->nseq!=nseq && pmiddle!=middle)
         {
           
           sprintf ( command , "t_coffee -infile %s -trim  -trimfile=%s  -split_score_thres %d -convert -iterate 0 ", alnfile, trimfile,middle);
           if ( keep_list[0]){strcat ( command, " -seq_to_keep ");strcat ( command, keep_list);}
           my_system ( command);
           free_sequence (TS, -1);
           TS=read_sequences (trimfile);
           fprintf ( stderr, "\n\tTrimTC: Sim %d Nseq %d\t", middle, TS->nseq);
           
           if ( TS->nseq>nseq)top=middle;
           else if ( TS->nseq<nseq)bottom=middle;
           pmiddle=middle;
           middle=(top-bottom)/2+bottom;
         }
       }
     else
       {
       TS=aln2seq (A);
       }
     NA=seq2aln (TS, NULL, 1);
     vremove ( alnfile);
     fprintf ( stderr, "\n");
     
     return NA;
   }   

Alignment* seq2subseq3( Alignment *A, Sequence *S,int use_aln, int int_lower_sim,int int_upper_sim, int min_nseq, int trim_direction, char *weight_mode, float ***sim_weight, int *seq_list)
{
  int a, b;
  int new_nseq;

  /*OUTPUT*/
  char **seq, **name;
  Sequence *NS;
  Alignment *NA;
  float sim, lower_sim, upper_sim;
  
  lower_sim=(int_lower_sim>100)?(float)int_lower_sim/100:int_lower_sim;
  upper_sim=(int_upper_sim>100)?(float)int_upper_sim/100:int_upper_sim;

  sim_weight[0]=get_weight   ((use_aln)?A:NULL, S, weight_mode);
  
  name=declare_char (S->nseq, (MAXNAMES+1));
  seq= declare_char (S->nseq, S->max_len+1);
  
  /*
    Remove every sequence that is more than upper_sim and less than lower_sim similar to the master sequences
    the master sequence(s) are those for which seq_list[x]==2
  */


  

  new_nseq=A->nseq;
  

  for (a=0; a< A->nseq; a++)
    {
      if ( seq_list[a]==2)
      {
      
        for ( b=0; b< A->nseq;b++)
          {
            sim=100-sim_weight[0][a][b];
            if (seq_list[b]==1 && (sim>upper_sim || sim<lower_sim))
            {
             seq_list[b]=0;
             new_nseq--;
            }
          }
        
      }
    }
  
  /*Prepare the new sequence List*/

  for (b=0, a=0; a<S->nseq; a++) 
        {
          if ( seq_list[a])
            {
            sprintf ( name[b], "%s", S->name[a]);
            sprintf ( seq[b] , "%s",(use_aln)?A->seq_al[a]: S->seq[a] );
            b++;
            }
        }
  
  
  NS=fill_sequence_struc (new_nseq,seq,name);
  NA=seq2aln(NS,NULL,1);       
  
  if ( use_aln && A)
    {
      NA=realloc_aln2  ( NA,A->max_n_seq,A->len_aln+1);
      
      for (b=0, a=0; a<S->nseq; a++) 
      {
        if ( seq_list[a])
          {
            sprintf ( NA->seq_al[b] , "%s",A->seq_al[a]);
            b++;
            }
      }

      NA->len_aln=A->len_aln;
      ungap_aln(NA);
    }
  

  return NA;
}
Alignment* seq2subseq2( Alignment *A, Sequence *S,int use_aln, int int_lower_sim,int int_upper_sim, int min_nseq, int trim_direction, char *weight_mode, float ***sim_weight, int *seq_list)
{
  int a, b;
  int new_nseq;
  int seq_index=0;
  /*OUTPUT*/
  char **seq, **name;
  Sequence *NS;
  Alignment *NA;
  float lower_sim, upper_sim;
  
  lower_sim=(int_lower_sim>100)?(float)int_lower_sim/100:int_lower_sim;
  upper_sim=(int_upper_sim>100)?(float)int_upper_sim/100:int_upper_sim;

 
  sim_weight[0]=get_weight   ((use_aln)?A:NULL, S, weight_mode);
  
  name=declare_char (S->nseq, (MAXNAMES+1));
  seq= declare_char (S->nseq, S->max_len+1);
  
  /*
    1 REMOVE OUTLAYERS
    2 REMOVE CLOSELY RELATED SEQUENCES
    3 IF STILL TOO MANY SEQUENCES:
      REMOVE THE MOST CLOSELY RELATED ONES
  */


  /*1 Remove outlayers*/
  
  new_nseq=A->nseq;
 
 
  /*1 Remove outlayers*/
  while ( lower_sim && (extreme_seq(BOTTOM,A,sim_weight[0],seq_list, &seq_index) <lower_sim) && ((new_nseq)>min_nseq) && seq_index!=-1)
    {
  
      if ( seq_list[seq_index]==1)
      {
        seq_list[seq_index]=0;
        new_nseq--;
      }
    }
   /*2 Remove close relative*/
  
 
  while ( upper_sim && (extreme_seq(TOP, A,sim_weight[0],seq_list, &seq_index)>upper_sim) && ((new_nseq)>min_nseq)&& seq_index!=-1)
    { 
  
      if ( seq_list[seq_index]==1)
      {
        seq_list[seq_index]=0;
        new_nseq--;
      }
    }


  /*Remove extra sequences*/
  
  while ( min_nseq>0 && new_nseq>min_nseq && seq_index!=-1)
    {
     
      extreme_seq(trim_direction, A,sim_weight[0],seq_list, &seq_index);
  
      if ( seq_index==-1)break;
      if ( seq_list[seq_index]==1)
      {
        seq_list[seq_index]=0;
        new_nseq--;
      }
    }


  /*Prepare the new sequence List*/

  for (b=0, a=0; a<S->nseq; a++) 
        {
          if ( seq_list[a])
            {
            sprintf ( name[b], "%s", S->name[a]);
            sprintf ( seq[b] , "%s",(use_aln)?A->seq_al[a]: S->seq[a] );
            b++;
            }
        }
  
  
  NS=fill_sequence_struc (new_nseq,seq,name);
  NA=seq2aln(NS,NULL,1);       
  
  if ( use_aln && A)
    {
      NA=realloc_aln2  ( NA,A->max_n_seq,A->len_aln+1);
      
      for (b=0, a=0; a<S->nseq; a++) 
      {
        if ( seq_list[a])
          {
            sprintf ( NA->seq_al[b] , "%s",A->seq_al[a]);
            b++;
            }
      }

      NA->len_aln=A->len_aln;
      ungap_aln(NA);
    }
  

  return NA;
}

float extreme_seq (int direction, Alignment *A,float **sim_weight,int *seq_list, int *seq_index)
{
  
  /*find the closest relative of each sequence
    Return:
          Direction= BOTTOM: the sequence whose closest relative is the most distant
        Direction= TOP:    the sequence whose closest relative is the closest
  weight: different sequences=100
          similar sequences  =0
  */
  int a, b;
 
  float top_sim,bottom_sim, best_sim, sim;
  int   top_seq, bottom_seq;
  
  bottom_seq=top_seq=seq_index[0]=-1;
  top_sim=-1;
  bottom_sim=101;
  
  for (a=0; a< A->nseq; a++)
    {
      if (seq_list[a]!=1)continue;

      for ( best_sim=0, b=0; b< A->nseq; b++)
      {
        if ( a==b || !seq_list[b])continue;
        
        sim=100-sim_weight[a][b];
        if (sim>best_sim)
          {
            best_sim=sim;
          }
      }

      if ( best_sim>top_sim)
      {
        top_seq=a;
        top_sim=best_sim;
      }
     
      if ( best_sim<bottom_sim)
      {
        bottom_seq=a;
        bottom_sim=best_sim;
      }
      
    }
  if ( direction==BOTTOM  ){seq_index[0]= bottom_seq; return bottom_sim;}
  else if ( direction==TOP){seq_index[0]= top_seq; return top_sim;}
  else
    {
      seq_index[0]=-1;
      return -1;
    }
}
  
  
      
      
Alignment* seq2subseq1( Alignment *A, Sequence *S,int use_aln, int percent,int max_nseq, int ms,char *weight_mode)
   {
     float **pw_weight,**sim_weight, **seq_weight;
     int a,b,c,d;
     float sum, chosen,last_chosen, last_nchosen,nchosen;
     int condition1, condition2;
     Sequence *NS;
     Alignment *NA;
     char **name, **seq;
     float score, best_score;
     int best_seq;
     int *seq_list, *used_seq_list;
     
     /*
       mode: 
           (trim)_<seq or aln>_%<percentage of tot weight to keep>_n<number of seq to keep>_w<weight mode>
     */
     
     sim_weight=get_weight   ((use_aln)?A:NULL, S, weight_mode);
     pw_weight=declare_float (S->nseq, S->nseq);
     seq_weight=declare_float ( S->nseq, 2);

     
     for (best_score=0,a=0; a<S->nseq; a++)
       {
       for ( b=0; b<S->nseq; b++)
         {
           if ( a==b)continue;
           seq_weight[a][0]+=sim_weight[a][b];
         }
       seq_weight[a][0]=seq_weight[a][0]/(S->nseq-1);
       score=seq_weight[a][0]=100-seq_weight[a][0];
       
       if ( score>best_score)
         {
           best_seq=a;
           best_score=score;
         }

       }
      for (a=0; a<S->nseq; a++)
       {
       for ( b=0; b<S->nseq; b++)
         {
           if ( a==b)continue;
           pw_weight[a][b]=sim_weight[a][b]*seq_weight[a][0]*seq_weight[b][0]/(100*100);
           
         }
       }
      
 
     seq_list=vcalloc ( S->nseq, sizeof (int));
     used_seq_list=vcalloc ( S->nseq, sizeof (int));

    

     name=declare_char (S->nseq, (MAXNAMES+1));
     seq= declare_char (S->nseq, S->max_len+1);
     
     /*compute the normalization factor*/
     for (sum=0,d=0; d< S->nseq; d++)
         {
           for (score=0,c=0; c<S->nseq; c++)
             {
             if ( c!=d)
               score=MAX(score, 100-sim_weight[c][d]);
             }
           sum+=score;
         }
     sum=sum/S->nseq;
     /*chose the first sequence */
     for ( best_score=0,a=0; a< S->nseq; a++)
       {
       for (score=0, b=0; b< S->nseq; b++)
         {
           score+=100-sim_weight[a][b];
         }
       if ( score>best_score)
         {
           best_seq=a;
           best_score=score;
         }
       
       }


     last_chosen=chosen=((best_score/S->nseq)*100)/sum;
     nchosen=last_nchosen=1;
     seq_list[0]=best_seq;
     used_seq_list[best_seq]=1;

     sprintf ( name[0],"%s", S->name[seq_list[0]]);
     sprintf ( seq[0],"%s", S->seq[seq_list[0]]);
     nchosen=last_nchosen=1;
     

     fprintf ( stderr, "\nTRIM:\n");
     fprintf ( stderr, "\n1-Chosen Sequences\n");
     /*Assemble the list of sequences*/
     for (a=1; a< S->nseq; a++)
       {
       for (best_score=0,b=0; b< S->nseq; b++)
         {
           if (used_seq_list[b]);
           else
             {
             score=pw_weight[seq_list[0]][b]+1;
             for (c=0; c<a; c++)
               score=MIN(score,pw_weight[seq_list[c]][b]);
               
             if ( score>=best_score)
               {
               best_seq=b;
               best_score=score;
               }
             
             }
         }
       seq_list[a]=best_seq;
       used_seq_list[best_seq]=1;
       
       

       for ( chosen=0,d=0; d< S->nseq; d++)
         {
           for (score=0, c=0; c<=a; c++)
             {
             if ( seq_list[c]!=d)
               score=MAX(score, 100-sim_weight[seq_list[c]][d]);
             }
           chosen+=score;
           
         }
       
       chosen=((chosen/S->nseq)*100)/sum;
       nchosen=a+1;
             
       condition1= (int)chosen<=(int)percent || !percent;
       condition2=(nchosen)<=max_nseq     || !max_nseq;
       
       if (condition1 && condition2)
         {
           fprintf ( stderr, "\tADD %s (set score: %.2f %%)\n", S->name[seq_list[a]], chosen);
           sprintf ( name[a],"%s", S->name[seq_list[a]]);
           sprintf ( seq[a],"%s", S->seq[seq_list[a]]);
           
         }
       else
         {
           break;  
         }
       last_chosen=chosen;
       last_nchosen=nchosen;
       }
       
     NS=fill_sequence_struc (last_nchosen,seq,name);
     NA=seq2aln(NS,NULL,1);    
     fprintf ( stderr, "\n2-Informations:\n");
     fprintf ( stderr, "\tUse...........: %s\n",(use_aln)?"multiple_aln":"pairwise_aln");
     fprintf ( stderr, "\tweight_mode...: %s\n"  ,weight_mode);
     fprintf ( stderr, "\tpercent_weight: %.2f%% (max=%d%%)\n",last_chosen,percent);
     fprintf ( stderr, "\tn_seq.........: %d\n"  ,NS->nseq);
     fprintf ( stderr, "\treduction.....: %d%% of original set\n"  ,(NS->nseq*100)/S->nseq);
     
     return NA;
   }   
float ** get_weight ( Alignment *A, Sequence *S, char *mode)
{
 char *aln_name;
 char *weight_name;
 char *seq_name;
 char command[LONG_STRING];
 char program[LONG_STRING];
 float **weight;
 FILE *fp;
 int c;
  
 if ( !mode || !mode[0] || strm (mode, "msa"))
      {
      if ( getenv ( "SEQ2MSA_WEIGHT")==NULL)sprintf (program, "%s",SEQ2MSA_WEIGHT);
      else sprintf ( program, "%s", (getenv ( "SEQ2MSA_WEIGHT")));   
      }
 else if ( strm(mode, "pwsim") ||strm(mode, "pwsim_fragment") )
      {
      return seq2pwsim (A, S, mode);
      }
 else
     {
       if (getenv (mode))sprintf ( program, "%s", (getenv (mode)));
       else fprintf ( stderr, "\nERROR: %s is not a valid mode for weight computation [FATAL:%s]", mode, PROGRAM);
     }

 /*MSA weights*/
 seq_name=vtmpnam(NULL);
 aln_name=vtmpnam(NULL);
 weight_name=vtmpnam(NULL);
 weight=declare_float (S->nseq+1, 2);
 

  
  if (A)
    {
      output_clustal_aln (seq_name,A);
      output_fasta_seq   (aln_name,A);
      sprintf ( command, "%s %s -i %s -w %s", program, seq_name, aln_name, weight_name);
    }
  else
    {
      A=seq2aln(S,A,1);
      output_fasta_seq   (seq_name,A);
      sprintf ( command, "%s %s -w %s", program, seq_name, weight_name);
    }
  
 
  my_system ( command);
  
  fp=vfopen( weight_name, "r");
  while ( (c=fgetc(fp))!='$');
  c=fgetc(fp);
  c=0;
  while ( (fscanf (fp, "%*s %f\n",&(weight[c][1])))==1)
    {weight[c][0]=c;c++;}
  vfclose (fp);
  
  
  return weight;
}

float **seq2pwsim (        Alignment *A, Sequence *S, char *mode)
{
  int a, b, c;
  float d,t;
  float  **W;
  Alignment *B;
  W=declare_float (S->nseq, S->nseq);



  for (a=0; a< S->nseq; a++)
      for ( b=a; b<S->nseq; b++)
        {
          if ( a==b){d=1;}
          else if (!A)
            {

            B=align_two_sequences ((S)->seq[a], (S)->seq[b],"pam250mt", -10, -1, "fasta_pair_wise");
            for (t=0,d=0,c=0; c<B->len_aln; c++)
              {
                d+=(B->seq_al[0][c]==B->seq_al[1][c] && !is_gap(B->seq_al[0][c]));
                t+=(!is_gap(B->seq_al[0][c]) && !is_gap(B->seq_al[1][c]));
              }
            t=(strm ( mode, "pwsim_fragment"))?B->len_aln:t;
            
            d=d/((t==0)?1:t);
            free_aln(B);
            }
          else
            {
            for (t=0,d=0,c=0; c<A->len_aln; c++)
              {
                d+=(A->seq_al[a][c]==A->seq_al[b][c] && !is_gap(A->seq_al[a][c]));
                t+=(!is_gap(A->seq_al[a][c]) && !is_gap(A->seq_al[b][c]));
              }
            d=d/((t==0)?1:t);
            }
        

          W[a][b]=W[b][a]=(1-d)*100;
        }
 
  
  return W;

}
 
float **seq2pwsim_fragment (     Alignment *A, Sequence *S, char *mode)
{

  
  int a, b, c;
  float d,t;
  float  **W;
  Alignment *B;
  W=declare_float (S->nseq, S->nseq);




  for (a=0; a< S->nseq; a++)
      for ( b=a; b<S->nseq; b++)
        {
          if ( a==b){d=1;}
          else if (!A)
            {

            B=align_two_sequences ((S)->seq[a], (S)->seq[b],"pam250mt", -10, -1, "fasta_pair_wise");
            for (t=0,d=0,c=0; c<B->len_aln; c++)
              {
                d+=(B->seq_al[0][c]==B->seq_al[1][c] && !is_gap(B->seq_al[0][c]));
                t+=(!is_gap(B->seq_al[0][c]) && !is_gap(B->seq_al[1][c]));
              }
      
            d=d/((t==0)?1:t);
            free_aln(B);
            }
          else
            {
            for (t=0,d=0,c=0; c<A->len_aln; c++)
              {
                d+=(A->seq_al[a][c]==A->seq_al[b][c] && !is_gap(A->seq_al[a][c]));
                t+=(!is_gap(A->seq_al[a][c]) && !is_gap(A->seq_al[b][c]));
              }
            d=d/((t==0)?1:t);
            }
        

          W[a][b]=W[b][a]=(1-d)*100;
        }
 
  
  return W;

} 

/********************************************************************/
/*                                                                  */
/*                AMINO ACID FUNCTIONS                        */
/*                                                                  */
/*                                                                  */
/*                                                                  */
/********************************************************************/

char** make_group_aa (int *ngroup, char *mode)
      {
/*mode:         indicates which matrix will be used for the grouping*/
/*n_group:      pointer to the number of groups                     */
/*return value: an array of strings containing the AA of each group */


      int **matrix;
      int a, b,c,is_in;
      char buf[28];
      char **group_list;
      char *matrix_name;
      matrix_name=vcalloc ( 100, sizeof (char));

      
      ngroup[0]=0;
      group_list=declare_char ( 100, 27);
            
      if ( mode[0]=='_'){mode++;sprintf ( matrix_name, "%s", mode);}

      if (mode==NULL || mode[0]=='\0')sprintf ( matrix_name, "idmat");
      else if ( strm (mode, "simple"))
           {
             ngroup[0]=6;
             sprintf ( group_list[0], "avilmAVILM");
             sprintf ( group_list[1], "dekrDEKR");
             sprintf ( group_list[2], "stcnqhSTCNQH");
             sprintf ( group_list[3], "wfyWFY");
             sprintf ( group_list[4], "gG");
             sprintf ( group_list[5], "pP");
             vfree (matrix_name);
             return group_list;
           }
      else if ( strm (mode, "clustalw"))
           {
             ngroup[0]=11;
             sprintf ( group_list[0] ,"astaASTA");
             sprintf ( group_list[1] ,"bneqkBNEQK");
             sprintf ( group_list[2] ,"cnhqkCNHQK");
             sprintf ( group_list[3] ,"dndeqDNDEQ");
             sprintf ( group_list[4] ,"eqhrkEQHRK");
             sprintf ( group_list[5] ,"fmilvFMILV");
             sprintf ( group_list[6] ,"gmilfGMILF");
             sprintf ( group_list[7] ,"hhyHHY");
             sprintf ( group_list[8] ,"ifywIFYW");
             sprintf ( group_list[9] ,"jcJC");
             sprintf ( group_list[10],"kpKP");
             vfree (matrix_name);
             return group_list;
           }
      else if ( strm (mode, "polarity"))
           {
             ngroup[0]=6;
             sprintf ( group_list[0], "eqrsdnkhtEQRSDNKHT");
             sprintf ( group_list[1], "pP");
             sprintf ( group_list[2], "gG");
             sprintf ( group_list[3], "cC");
             sprintf ( group_list[4], "fywFYW");
             sprintf ( group_list[5], "iavlmIAVLM");
             vfree (matrix_name);
             return group_list;
           }
      else if ( strm (mode, "vasiliky"))
           {
             ngroup[0]=0;
             sprintf ( group_list[ngroup[0]++], "rkRK");
             sprintf ( group_list[ngroup[0]++], "deDE");
             sprintf ( group_list[ngroup[0]++], "qhQH");
             sprintf ( group_list[ngroup[0]++], "vilmVILM");
             sprintf ( group_list[ngroup[0]++], "fyFY");
             sprintf ( group_list[ngroup[0]++], "sS");
             sprintf ( group_list[ngroup[0]++], "wW");
             sprintf ( group_list[ngroup[0]++], "aA");
             sprintf ( group_list[ngroup[0]++], "cC");
             sprintf ( group_list[ngroup[0]++], "gG");
             sprintf ( group_list[ngroup[0]++], "nN");
             sprintf ( group_list[ngroup[0]++], "pP");
             sprintf ( group_list[ngroup[0]++], "tT");
             vfree (matrix_name);
             return group_list;
           }
      else if ( strm (mode, "clustalw_col"))
           {
             sprintf ( group_list[ngroup[0]++], "staSTA");
             sprintf ( group_list[ngroup[0]++], "neqkNEQK");
             sprintf ( group_list[ngroup[0]++], "nhqkNHQK");
             sprintf ( group_list[ngroup[0]++], "ndeqNDEQ");
             sprintf ( group_list[ngroup[0]++], "qhrkQHRK");
             sprintf ( group_list[ngroup[0]++], "milvMILV");
             sprintf ( group_list[ngroup[0]++], "milfMILF");
             sprintf ( group_list[ngroup[0]++], "hyHY");
             sprintf ( group_list[ngroup[0]++], "fywFYW");
             sprintf ( group_list[ngroup[0]++], "gG");
             sprintf ( group_list[ngroup[0]++], "pP");
             sprintf ( group_list[ngroup[0]++], "cC");
             vfree (matrix_name);
             
             return group_list;
           }
      else if ( strm (mode, "clustalw_dot"))
           {
             sprintf ( group_list[ngroup[0]++], "csaCSA");
             sprintf ( group_list[ngroup[0]++], "atvATV");
             sprintf ( group_list[ngroup[0]++], "sagSAG");
             sprintf ( group_list[ngroup[0]++], "stnkSTNK");
             sprintf ( group_list[ngroup[0]++], "stpaSTPA");
             sprintf ( group_list[ngroup[0]++], "sgndSGND");
             sprintf ( group_list[ngroup[0]++], "sndeqkSNDEQK");
             sprintf ( group_list[ngroup[0]++], "ndeqhkNDEQHK");
             sprintf ( group_list[ngroup[0]++], "neqhrkNEQHRK");
             sprintf ( group_list[ngroup[0]++], "fvlimFVLIM");
             sprintf ( group_list[ngroup[0]++], "hfyHFY");
             vfree (matrix_name);
             return group_list;
           }
      else if ( strm (mode, "make_all"))
           {
             ngroup[0]=1;
             sprintf ( group_list[0], "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz");
             vfree (matrix_name);
             return group_list;
           }
      else sprintf ( matrix_name, "%s", mode);
      
      matrix=read_matrice ( matrix_name); 
      
      for ( a=0;a< 26; a++)
            {
            if ( matrix[a][a]>0)
                  {
                  for ( c=0,b=0;b< 26; b++)
                        {
                        
                        if ( matrix[a][b]>0 && matrix[b][b]>0)
                              {
                              buf[c++]=b+'A';
                              buf[c++]=b+'a';
                              }
                        }
                  buf[c]='\0';
                  for ( is_in=0,b=0; b< ngroup[0]; b++)if ( strcmp (buf, group_list[b])==0)is_in=1;
                  if (is_in==0)sprintf ( group_list[ngroup[0]++], "%s", buf);
                              
                  }
            }
      free_int (matrix, -1); 
      vfree (matrix_name);
             
      return group_list;
      }

int find_group_aa_distribution (char *col, int nseq,int n_group, char **gl,  int *distrib, char *mode )
      {
      static int *distribution;
      static char **lgl;
      static int ln_group;
      int a, b, c;
      int *d;
      char **gl2;
      int n_group2;
      
      
      
      if ( lgl==NULL)
            lgl=make_group_aa ( &ln_group, mode);
            
            if ( gl==NULL)
            {
            gl2=lgl;
            n_group2=ln_group;
            }
      else
            {
            gl2=gl;
            n_group2=n_group;
            }
      
      if ( distribution==NULL || ln_group<n_group)distribution=vcalloc ( n_group2, sizeof (int));
      if ( distrib==NULL)d=distribution;
      else d=distrib;
      
      
      for ( a=0; a< n_group2; a++)d[a]=0;
      
      for ( a=0; a< nseq; a++)
            {
            for ( b=0; b< n_group2; b++)
                  d[b]+=is_in_set (col[a], gl2[b]);
            }
      c=d[0];
      for ( a=0; a< n_group2; a++)
            c=(d[a]>c)?d[a]:c;
      return c;
      }



int is_in_same_group_aa ( char r1, char r2, int n_group, char **gl, char *mode)
      {
      int a;
      static char **lgl;
      static int ln_group;
      
      char **gl2;
      int n_group2;
      
      /*use mode=idmat for similarity based on id*/

      r1=toupper(r1);
      r2=toupper(r2);
      if (mode==NULL)return (r1==r2)?1:0;
      
      if ( strm (mode, "clean"))
           {
           free_char (lgl, -1);
           lgl=NULL;
           ln_group=0;
           return 0;
           }
      
      if ( lgl==NULL)
            {
              lgl=make_group_aa ( &ln_group, mode);
            }
            
      if ( gl==NULL)
            {
            gl2=lgl;
            n_group2=ln_group;
            }
      else
            {
            gl2=gl;
            n_group2=n_group;
            }
      
      for ( a=0; a< n_group2; a++)
            if ( is_in_set ( r1, gl2[a]) && is_in_set ( r2, gl2[a]))return 1;
            return 0;
      }
                  

Alignment * gene2prot (Alignment *A){return A; }
char * test_gene2prot (Constraint_list *CL, int s1)
       {
         int a, b,q, nal;
         int F=-10000000; /*FORBIDEN STATE*/
         int AL=0;       /*ALLOWED STATE*/
         int SPLICE_PENALTY=1000;
         int FRAME_PENALTY=1000;
         

         int START,  ORF1,  ORF2, ORF3, s5NC; 
         int s3NC,ORF3_G1, ORF3_T2, ORF3_NC, ORF3_A3, ORF3_T4;
         int U1_G1,   U1_T2,   U1_NC,   U1_A3,   U1_T4;
         int U2_G1,   U2_T2,   U2_NC,   U2_A3,   U2_T4;
         int U1,     U2, U3,  U4, U5, END;
         
         int nstate=0;
         int **transitions;
         int **v_tab;
         int **v_tab_p;
         int **last_coding;
         int **last_t4;
         int *potential;
         int v;

         int orf1, orf2, orf3, ncp, p, state, pstate, e, best_state_p, best_state_v, best_pstate_p, best_pstate_v;
         char *seq, *seq2, *seq3;
         int l;
         int  *is_coding;
         int *is_t4;
         char *codon;

         static int *entry;
         int tot=0;
         
         seq=vcalloc ( strlen ((CL->S)->seq[s1])+1, sizeof (char));
         seq2=vcalloc ( strlen ((CL->S)->seq[s1])+1, sizeof (char));
         seq3=vcalloc ( strlen ((CL->S)->seq[s1])+1, sizeof (char));
         sprintf ( seq, "%s", (CL->S)->seq[s1]);
         ungap (seq);

         l=strlen (seq);
         for ( a=0; a< l; a++) seq[a]=tolower ( seq[a]);
         for ( a=0; a< l; a++) seq[a]=(seq[a]=='t')?'u': seq[a];
         

         potential=vcalloc (l+1, sizeof (int));
         CL=index_constraint_list ( CL);
         for (nal=0, a=0; a<(CL->S)->nseq; a++)
             for ( b=CL->start_index[s1][a]; b< CL->end_index[s1][a];b++)
                 {
                   entry=extract_entry(entry, b, CL);
                   if ( entry[SEQ1]==s1)potential[entry[R1]-1]+=entry[WE];
                   else if (  entry[SEQ2]==s1)potential[entry[R2]-1]+=entry[WE];
                   tot+=entry[WE];
                   nal++;
               }


         SPLICE_PENALTY=10000;
         FRAME_PENALTY=1000;

         
         nstate=0;
         START=nstate++;  ORF1=nstate++;  ORF2=nstate++; ORF3=nstate++; s5NC=nstate++; 
         s3NC=nstate++;
         ORF3_G1=nstate++;U1_G1=nstate++;U2_G1=nstate++; 
         ORF3_T2=nstate++;U1_T2=nstate++;U2_T2=nstate++;
         ORF3_NC=nstate++;U1_NC=nstate++;U2_NC=nstate++; 
         ORF3_A3=nstate++;U1_A3=nstate++;U2_A3=nstate++; 
         ORF3_T4=nstate++;U1_T4=nstate++;U2_T4=nstate++;
         
         
         U1=nstate++;     U2=nstate++; U3=nstate++;  U4=nstate++; U5=nstate++;  
         END=nstate++;
         
         is_coding=vcalloc ( nstate, sizeof (int));
         is_coding[ORF1]=is_coding[ORF2]=is_coding[ORF3]=is_coding[U1]=is_coding[U2]=1;
         is_coding[U3]=is_coding[U4]=is_coding[U5]=1;
         
         is_t4=vcalloc ( nstate, sizeof (int));
         is_t4[ORF3_T4]=is_t4[U1_T4]=is_t4[U2_T4]=1;
         transitions=declare_int ( nstate, nstate);
         for (a=0; a< nstate; a++)
               for ( b=0; b< nstate; b++)transitions[a][b]=F;
         
         transitions[START][ORF1]=AL;
         transitions[START][s5NC]=AL-FRAME_PENALTY;
         transitions[s5NC][s5NC]=AL;

         transitions[s5NC][ORF1]=AL-FRAME_PENALTY;

         transitions[ORF1][ORF2]=AL;
         transitions[ORF2][ORF3]=AL;
         transitions[ORF3][U1]=AL;
         transitions[ORF3][ORF1]=AL;
         transitions[ORF3][ORF3_G1]=AL-SPLICE_PENALTY;
         
         
         transitions[ORF3_G1][ORF3_T2]=AL;
         transitions[ORF3_T2][ORF3_NC]=AL;
         transitions[ORF3_NC][ORF3_NC]=AL;
         transitions[ORF3_NC][ORF3_A3]=AL;
         transitions[ORF3_A3][ORF3_T4]=AL;
         transitions[ORF3_T4][ORF1]=AL-SPLICE_PENALTY;

         transitions[U1][U2]=AL;
         transitions[U1][U1_G1]=AL-SPLICE_PENALTY;
         transitions[U1_G1][U1_T2]=AL;
         transitions[U1_T2][U1_NC]=AL;
         transitions[U1_NC][U1_NC]=AL;
         transitions[U1_NC][U1_A3]=AL;
         transitions[U1_A3][U1_T4]=AL;
         transitions[U1_T4][U3]=AL-SPLICE_PENALTY;
         transitions[U3][U4]=AL;
         transitions[U4][ORF1]=AL;
         
         transitions[U2][U2_G1]=AL-SPLICE_PENALTY;
         transitions[U2_G1][U2_T2]=AL;
         transitions[U2_T2][U2_NC]=AL;
         transitions[U2_NC][U2_NC]=AL;
         transitions[U2_NC][U2_A3]=AL;
         transitions[U2_A3][U2_T4]=AL;
         transitions[U2_T4][U5]=AL-SPLICE_PENALTY;
         transitions[U5][ORF1]=AL;
         
         transitions[ORF3][s3NC]=AL-FRAME_PENALTY;
         transitions[ORF3][END]=AL;
         transitions[s3NC][END]=AL;

               
         v_tab=declare_int ( l+1,nstate);
         v_tab_p=declare_int ( l+1,nstate);
         last_coding=declare_int ( l+1,nstate);
         last_t4=declare_int ( l+1,nstate);

         for (a=0; a< l; a++) potential[a]-=200;

         codon=vcalloc ( 4, sizeof (char));
         best_pstate_p=START;
         best_pstate_v=0;
         nal=0;
         for ( p=1; p<=l; p++)
             {
             if  (translate_dna_codon (seq+(p-1), 'x')=='x' || p>(l-2))orf1=F;
             else orf1=potential[p-1];

             if  (p<2 || translate_dna_codon (seq+(p-2), 'x')=='x' || p>(l-1))orf2=F;
             else orf2=potential[p-1];

             
             if  (p<3 || translate_dna_codon (seq+(p-3), 'x')=='x' || p>l)orf3=F;
             else orf3=potential[p-1];
             
             if ( best_int (3, 1, &a, orf1, orf2, orf3)!=F)ncp=-best_int (3, 1, &a, orf1, orf2, orf3);
             else ncp=1000;
             
             for ( state=0; state< nstate; state++)
                 {
                   
                   if      ( state==ORF1)e=orf1;
                   else if ( state==ORF2)e=orf2;
                   else if ( state==ORF3)e=orf3;
                   else if ( state>=U1 && state<=U3)
                     {
                     e=0;
                     }
                   else if ( state==U4)
                      {
                        codon[2]=seq[p-1];
                        codon[1]=seq[last_coding[p-1][U3]-1];
                        codon[0]=seq[last_coding[p-2][U1_T4]-1];
                        if ( translate_dna_codon (codon, 'x')=='x')e=F;
                        else e=0;
                    }
                   else if ( state==U5)
                      {
                        codon[2]=seq[p-1];
                        codon[1]=seq[last_coding[p-1][U2_T4]-1];
                        q=seq[last_coding[p-1][U2_T4]];
                        codon[0]=seq[last_coding[q-1][U1]-1];
                        if ( translate_dna_codon (codon, 'x')=='x')e=F;
                        else e=0;
                    }

                   else if (state>=ORF3_G1 && state<=U2_G1)e=(p<l-1 && seq[p-1]=='g' && seq[p]=='u')?ncp:F;
                   else if ( state>=ORF3_T2 && state<=U2_T2)
                     {
                     e=(p>1 && seq[p-2]=='g' && seq[p-1]=='u')?ncp:F;
                     }
                   else if ( state>=ORF3_A3 && state<=U2_A3)e=(seq[p-1]=='a')?ncp:F;
                   else if ( state>=ORF3_T4 && state<=U2_T4)e=(seq[p-1]=='u')?ncp:F;
                   else e=ncp;
                   
                   for ( pstate=0; pstate<nstate; pstate++)
                       {
                         if (e==F ||  transitions[pstate][state]==F || v_tab[p-1][pstate]==F)v=F;
                         else v=e+transitions[pstate][state]+v_tab[p-1][pstate];
                            
                         if ( pstate==0 || v>best_pstate_v)
                            {best_pstate_v=v;best_pstate_p=pstate;}
                     }
                  v_tab[p][state]=best_pstate_v;
                  v_tab_p[p][state]=best_pstate_p; 
                  
                  if (!is_coding[state])last_coding[p][state]=last_coding[p-1][best_pstate_p];
                  else if (is_coding[state])last_coding[p][state]=p;
                  
                  if (!is_t4[state])
                     {
                       if (is_coding[state] && last_t4[p-1][best_pstate_p]==0)last_t4[p][state]=p;
                       else last_t4[p][state]=last_t4[p-1][best_pstate_p];
                   }
                  else if (is_t4[state])last_t4[p][state]=p;
                  
                  if (state==0 ||best_pstate_v>best_state_v ){best_state_p=state; best_state_v=best_pstate_v;}
               }
             }
         tot=0;
         for ( p=l; p>0; p--)
                 {
                   if ( best_state_p>=ORF1 &&  best_state_p<=ORF3){seq2[tot++]=tolower (seq[p-1]);}
                   else if ( best_state_p>=U1 && best_state_p<=U5){seq2[tot++]=tolower (seq[p-1]);}
                   if (best_state_p==ORF1)seq[p-1]=toupper (seq[p-1]);
                   else if (best_state_p==ORF2 || best_state_p==ORF3)seq[p-1]=tolower (seq[p-1]);
                   else if ( best_state_p==ORF3_NC || best_state_p==U1_NC ||  best_state_p==U2_NC) seq[p-1]='.';
                   else if ( best_state_p==U1 || best_state_p==U2 || best_state_p==U3 || best_state_p==U4 || best_state_p==U5) seq[p-1]=best_state_p-U1+'1';
                   else seq[p-1]=toupper (seq[p-1]);
                   best_state_p=v_tab_p[p][best_state_p];
               }

         for ( a=0, b=tot-1; b>=0; b--, a++)
             seq3[a]=seq2[b];
         
         fprintf ( stderr, "\n%s\n", seq);
         fprintf ( stderr, "\nN coding=%d\n", tot);
         for ( a=0; a< tot; a+=3)
              {
            b=translate_dna_codon (seq3+a, 'x');
            fprintf ( stderr, "%c",b);
            if ( b=='x'){fprintf ( stderr, "\n");exit (EXIT_SUCCESS);}
            }
          
          fprintf ( stderr, "\n");    
          exit (EXIT_SUCCESS);
          return 0;
          
             
          
       }
Alignment * dna_aln2_3frame_cdna_aln(Alignment *A,int *ns,int **l_s)
{
  Alignment *B;
  int a;
  B=realloc_aln2 (NULL,6,strlen(A->seq_al[l_s[0][0]])+strlen(A->seq_al[l_s[1][0]]));
  for ( a=0; a< 3; a++)  
    {  
      B->seq_al[a]=translate_dna_seq (A->seq_al[l_s[0][0]]+a, 0, 'o',B->seq_al[a]);
      B->seq_al[a+3]=translate_dna_seq (A->seq_al[l_s[1][0]]+a, 0, 'o',B->seq_al[a+3]);
    }
  for ( a=1; a<3; a++)
    {
      if ( strlen(B->seq_al[a])<strlen(B->seq_al[0])) B->seq_al[a]=strcat ( B->seq_al[a], "x");
      if ( strlen(B->seq_al[a+3])<strlen(B->seq_al[3])) B->seq_al[a+3]=strcat ( B->seq_al[a+3], "x");
    }
  
  B->nseq=6;
  B->len_aln=strlen (B->seq_al[0]);
  return B;
}

/************************************************************************************/
/*                ALIGNMENT ANALYZE     : SAR                                            */
/************************************************************************************/
int **analyze_sar_compound1 ( char *name, char *seq, Alignment *A, char *mode);
int **analyze_sar_compound2 ( char *name, char *seq, Alignment *A, char *mode);

double evaluate_sar_score ( int len, int n11, int n1a, int n1b);

Alignment *sar_analyze  (Alignment *A, Alignment *SAR, char *mode)
{
  int a;
  int **list;
  
  for ( a=0; a< SAR->nseq; a++)
    {
      if (strm (mode, "method1") || strm (mode, "method2") ||strm (mode, "method3")||strm (mode, "method4") )
      list=analyze_sar_compound1 (SAR->name[a], SAR->seq_al[a], A, mode);
      else if ( strm (mode, "method5"))
      list=analyze_sar_compound2 (SAR->name[a], SAR->seq_al[a], A, mode);
      free_int (list, -1);
    }
  exit (EXIT_SUCCESS);
  return A;
}

int **analyze_sar_compound2 ( char *name, char *seq, Alignment *A, char *mode)
{
  int **list;
  int a, b;
  int true;
  int C1, C2, r1, r2;
  int sim, diff;
  int npairs=0;
  int N1,NGAP,N00, N11,N10, N01, MSA1,MSA0, SAR1, SAR0, LEN, SAR_SCORE, COL;
  int Nval=0;
  int score;
  int AA0[26], AA1[26];
  static int **pos, p;
  
  if ( !pos)pos=aln2pos_simple(A, A->nseq);

  COL=Nval++;NGAP=Nval++;N00=Nval++;N11=Nval++;N10=Nval++;N01=Nval++; MSA0=Nval++;MSA1=Nval++;SAR0=Nval++;SAR1=Nval++;LEN=Nval++;SAR_SCORE=Nval++;
      
  list=declare_int (A->len_aln, Nval+1);
  
  for ( N1=0,a=0; a< A->len_aln; a++)
    list[a][COL]=a;  
  for ( a=0; a< strlen (seq); a++) N1+=(seq[a]=='I')?1:0;
  for ( a=0; a< 26; a++)AA0[a]=AA1[a]=0;
  
  for ( a=0; a<A->len_aln; a++)
    {
      for ( b=0; b<A->nseq; b++)
      {
        C1=seq[b];
        
        
        r1=tolower(A->seq_al[b][a]);
        if (is_gap(r1))r1='z';
        r1-='a';
        if (C1=='O')AA0[r1]++;
        if (C1=='I')AA1[r1]++;
      }
      
      for (b=0; b< 26; b++)
      {
        if (AA1[b])list[a][MSA1]+=AA0[b]+AA1[b];
        AA0[b]=AA1[b]=0;
      }
      
      list[a][N11]=MIN(N1,list[a][MSA1]);
    }
 

  for ( a=0; a< A->len_aln; a++)
    {
      if (list[a][NGAP]==0 )
      {
        list[a][SAR_SCORE]=100*evaluate_sar_score(A->nseq,list[a][N11], list[a][MSA1],N1);
      }
    }

  sort_int_inv (list,Nval, SAR_SCORE, 0, A->len_aln-1);
  
  for ( a=0; a<1;a++)
    {
      if ( list[a][SAR_SCORE]<0)continue;
      
      fprintf ( stdout, "\nPROF %11s %s ", name, seq);
      fprintf ( stdout, " COL: %-3d SCORE %3d N11 %4d N00 %4d N10 %4d N01 %4d LEN %4d SEQ ", list[a][COL]+1, list[a][SAR_SCORE], list[a][N11], list[a][N00], list[a][N10],  list[a][N01], list[a][LEN]);
      
      for ( b=0; b< A->nseq; b++)fprintf ( stdout, "%c", A->seq_al[b][list[a][COL]]);
      for ( b=0; b< A->nseq; b++)
      {
        if (seq[b]=='I')
          {
            p=list[a][COL];
            fprintf ( stdout, "\n%-11s %10s %c POS %4d COL %4d SCORE %d", name,A->name[b], A->seq_al[b][p],pos[b][p],list[a][COL]+1, list[a][SAR_SCORE]);
          }
      }
      
    }
  return list;
}
int **analyze_sar_compound1 ( char *name, char *seq, Alignment *A, char *mode)
{
  int **list;
  int a, b, c;
  int true;
  int C1, C2, r1, r2;
  int sim, diff;
  int npairs=0;
  int N1,NGAP,N00, N11,N10, N01, MSA1,MSA0, SAR1, SAR0, LEN, SAR_SCORE, COL;
  int Nval=0;
  int score;
  int n, SKIP=-1, p;
  static int **pos;
  static int header;
  
  if ( !pos)pos=aln2pos_simple (A, A->nseq);
  
  COL=Nval++;NGAP=Nval++;N00=Nval++;N11=Nval++;N10=Nval++;N01=Nval++; MSA0=Nval++;MSA1=Nval++;SAR0=Nval++;SAR1=Nval++;LEN=Nval++;SAR_SCORE=Nval++;
      
  list=declare_int (A->len_aln, Nval+1);
  
  for ( a=0; a< A->len_aln; a++)
    list[a][COL]=a;
  
  for ( N1=0,a=0; a< strlen (seq); a++) N1+=(seq[a]=='I')?1:0;
  
  
  
  for ( npairs=0, a=0; a< A->nseq-1; a++)    
    for ( b=a+1; b< A->nseq; b++)
      {
      C1=seq[a];
      C2=seq[b];
      sim=get_seq_sim ( A->seq_al[a],A->seq_al[b],"-.","idmat");
      if (C1=='O' && C2=='O')continue;
      score=(C1==C2)?100-sim:sim;
      npairs++;
      for (c=0; c< A->len_aln; c++)
        {
          r1=A->seq_al[a][c];
          r2=A->seq_al[b][c];
          
          if ( (is_gap(r1) || is_gap(r2)))
            list[c][NGAP]+=score;
          
          else
            {
          
            list[c][LEN]+=score;
            if ( (C1==C2) && (r1==r2))list[c][N11]+=score;
            if ( (C1!=C2) && (r1!=r2))list[c][N00]+=score;
            if ( (C1!=C2) && (r1==r2))list[c][N01]+=score;
            if ( (C1==C2) && (r1!=r2))list[c][N10]+=score;
            
            if ( (C1==C2))list[c][SAR1]+=score;
            else list[c][SAR0]+=score;
            
            if ( (r1==r2))list[c][MSA1]+=score;
            else list[c][MSA0]+=score;
            }
        }
      }
  
  
  for ( c=0; c< A->len_aln; c++)
    {
      list[c][SAR_SCORE]=SKIP;
      if ( list[c][NGAP]>100);
      else 
      {
        list[c][SAR_SCORE]=SKIP;
        if ( strm (mode, "method3"))
             {
             if ( list[c][LEN])list[c][SAR_SCORE]+=(1000*list[c][N00])/list[c][LEN];
             }
          
        else if (strm (mode, "method1"))
          {
            /*method 1: combine (same reac, same seq)+(diff reac, diff seq)*/
            if ( list[c][LEN])list[c][SAR_SCORE]+=100*evaluate_sar_score(list[c][LEN], list[c][N11], list[c][MSA1], list[c][SAR1]);
          }
        else if  ( strm (mode, "method2"))
          {
            /*method 2: combine (same reac, same seq)*/
            list[c][SAR_SCORE]=(100*(list[c][N11]+list[c][N00])/(list[c][LEN]));
          }
        else if ( strm (mode, "method4"))
          {
            if ( list[c][N01]==0)
            list[c][SAR_SCORE]=list[c][SAR1];
            else
            list[c][SAR_SCORE]=SKIP;
          }
      }
    }
  if ( !header)
    {
      header=1;
      fprintf ( stdout, "#SAR_ANALYSIS sar_analysis_format_01 Method %s",mode);
    }
  
  sort_int_inv (list,Nval, SAR_SCORE, 0, A->len_aln-1);
  
  
  for (n=0, a=0;n<1 && a<A->len_aln;a++)
    {
      if ( list[a][SAR_SCORE]==SKIP)continue;
      
      fprintf ( stdout, "\nPROF %11s %s ", name, seq);
      fprintf ( stdout, " COL: %-3d SCORE %3d N11 %4d N00 %4d N10 %4d N01 %4d LEN %4d SEQ ", list[a][COL]+1, list[a][SAR_SCORE], list[a][N11], list[a][N00], list[a][N10],  list[a][N01], list[a][LEN]);
      
      for ( b=0; b< A->nseq; b++)fprintf ( stdout, "%c", A->seq_al[b][list[a][COL]]);
      for ( b=0; b< A->nseq; b++)
      {
        if (seq[b]=='I')
          {
            p=list[a][COL];
            fprintf ( stdout, "\n%-11s %10s %c POS %4d COL %4d SCORE %d", name,A->name[b], A->seq_al[b][p],pos[b][p],list[a][COL]+1, list[a][SAR_SCORE]);
          }
      }
      n++;
    }
  return list;
}
double evaluate_sar_score ( int N, int n11, int n1msa, int n1sar)
{
  double p;
  
  if ( n11==0)return 0;
  p  = M_chooses_Nlog (n1msa, N) + M_chooses_Nlog (n1sar-n11, N-n1msa) + M_chooses_Nlog (n11, n1msa);
  p-=(M_chooses_Nlog (n1msa, N)+M_chooses_Nlog (n1sar, N));
  return -p;
}
int aln2sar_column_list ( Alignment *A, char *filter)
{
  int a, b, c;
  int one, zero;
  int keep, strict;
  int r;

 
  for ( a=0; a< A->len_aln; a++)
    {
      for ( b=0; b< A->nseq; b++)
      {
        if (  filter[b]=='1')one=A->seq_al[b][a];
        if (  filter[b]=='0')zero=A->seq_al[b][a];
      }

      for (keep=1, strict=1, b=0; b< A->nseq; b++)
      {
        r= A->seq_al[b][a];
        if ( (filter[b]=='1' && r!=one) ||(filter[b]=='0' && r==one) )
          {
            keep=0;
            continue;
          }
        if ( filter[b]=='0' && (r!=zero))strict=0;
        
      }
      if ( keep==1)
      {
        fprintf ( stdout, "\nColumn %4d ", a+1);
        for ( b=0; b< A->nseq; b++)
          fprintf ( stdout, "%c", A->seq_al[b][a]);
        fprintf ( stdout, " %s", (strict)?"STRICT":"UNSTRICT");
      }
    }
  return 1;
}

/*********************************COPYRIGHT NOTICE**********************************/
/* Centre National de la Recherche Scientifique (CNRS) */
/*and */
/*Cedric Notredame */
/*Tue May 10 12:08:44     2005. */
/*All rights reserved.*/
/*This file is part of T-COFFEE.*/
/**/
/*    T-COFFEE is free software; you can redistribute it and/or modify*/
/*    it under the terms of the GNU General Public License as published by*/
/*    the Free Software Foundation; either version 2 of the License, or*/
/*    (at your option) any later version.*/
/**/
/*    T-COFFEE is distributed in the hope that it will be useful,*/
/*    but WITHOUT ANY WARRANTY; without even the implied warranty of*/
/*    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the*/
/*    GNU General Public License for more details.*/
/**/
/*    You should have received a copy of the GNU General Public License*/
/*    along with Foobar; if not, write to the Free Software*/
/*    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA*/
/*...............................................                                                                                      |*/
/*  If you need some more information*/
/*  cedric.notredame@europe.com*/
/*...............................................                                                                                                                                     |*/
/**/
/**/
/*    */
/*********************************COPYRIGHT NOTICE**********************************/

Generated by  Doxygen 1.6.0   Back to index